Cysles: Avoid having ShaderData on the stack

This commit introduces a SSS-oriented intersection structure which is replacing
old logic of having separate arrays for just intersections and shader data and
encapsulates all the data needed for SSS evaluation.

This giver a huge stack memory saving on GPU. In own experiments it gave 25%
memory usage reduction on GTX560Ti (722MB vs. 946MB).

Unfortunately, this gave some performance loss of 20% which only happens on GPU.
This is perhaps due to different memory access pattern. Will be solved in the
future, hopefully.

Famous saying: won in memory - lost in time (which is also valid in other way
around).
This commit is contained in:
Sergey Sharybin
2015-11-22 15:00:29 +05:00
parent e6fff424db
commit 8bca34fe32
9 changed files with 290 additions and 109 deletions

View File

@@ -128,10 +128,16 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
/* do subsurface scatter step with copy of shader data, this will
* replace the BSSRDF with a diffuse BSDF closure */
for(int j = 0; j < num_samples; j++) {
ShaderData bssrdf_sd[BSSRDF_MAX_HITS];
SubsurfaceIntersection ss_isect;
float bssrdf_u, bssrdf_v;
path_branched_rng_2D(kg, &bssrdf_rng, state, j, num_samples, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
int num_hits = subsurface_scatter_multi_step(kg, sd, bssrdf_sd, state->flag, sc, &lcg_state, bssrdf_u, bssrdf_v, true);
int num_hits = subsurface_scatter_multi_intersect(kg,
&ss_isect,
sd,
sc,
&lcg_state,
bssrdf_u, bssrdf_v,
true);
#ifdef __VOLUME__
Ray volume_ray = *ray;
bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
@@ -140,6 +146,15 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
/* compute lighting with the BSDF closure */
for(int hit = 0; hit < num_hits; hit++) {
ShaderData bssrdf_sd = *sd;
subsurface_scatter_multi_setup(kg,
&ss_isect,
hit,
&bssrdf_sd,
state->flag,
sc,
true);
PathState hit_state = *state;
path_state_branch(&hit_state, j, num_samples);
@@ -147,7 +162,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
#ifdef __VOLUME__
if(need_update_volume_stack) {
/* Setup ray from previous surface point to the new one. */
float3 P = ray_offset(bssrdf_sd[hit].P, -bssrdf_sd[hit].Ng);
float3 P = ray_offset(bssrdf_sd.P, -bssrdf_sd.Ng);
volume_ray.D = normalize_len(P - volume_ray.P,
&volume_ray.t);
@@ -165,15 +180,27 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
/* direct light */
if(kernel_data.integrator.use_direct_light) {
bool all = kernel_data.integrator.sample_all_lights_direct;
kernel_branched_path_surface_connect_light(kg, rng,
&bssrdf_sd[hit], &hit_state, throughput, num_samples_inv, L, all);
kernel_branched_path_surface_connect_light(
kg,
rng,
&bssrdf_sd,
&hit_state,
throughput,
num_samples_inv,
L,
all);
}
#endif
/* indirect light */
kernel_branched_path_surface_indirect_light(kg, rng,
&bssrdf_sd[hit], throughput, num_samples_inv,
&hit_state, L);
kernel_branched_path_surface_indirect_light(
kg,
rng,
&bssrdf_sd,
throughput,
num_samples_inv,
&hit_state,
L);
}
}
}