Cysles: Avoid having ShaderData on the stack
This commit introduces a SSS-oriented intersection structure which is replacing old logic of having separate arrays for just intersections and shader data and encapsulates all the data needed for SSS evaluation. This giver a huge stack memory saving on GPU. In own experiments it gave 25% memory usage reduction on GTX560Ti (722MB vs. 946MB). Unfortunately, this gave some performance loss of 20% which only happens on GPU. This is perhaps due to different memory access pattern. Will be solved in the future, hopefully. Famous saying: won in memory - lost in time (which is also valid in other way around).
This commit is contained in:
@@ -128,10 +128,16 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
|
||||
/* do subsurface scatter step with copy of shader data, this will
|
||||
* replace the BSSRDF with a diffuse BSDF closure */
|
||||
for(int j = 0; j < num_samples; j++) {
|
||||
ShaderData bssrdf_sd[BSSRDF_MAX_HITS];
|
||||
SubsurfaceIntersection ss_isect;
|
||||
float bssrdf_u, bssrdf_v;
|
||||
path_branched_rng_2D(kg, &bssrdf_rng, state, j, num_samples, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
|
||||
int num_hits = subsurface_scatter_multi_step(kg, sd, bssrdf_sd, state->flag, sc, &lcg_state, bssrdf_u, bssrdf_v, true);
|
||||
int num_hits = subsurface_scatter_multi_intersect(kg,
|
||||
&ss_isect,
|
||||
sd,
|
||||
sc,
|
||||
&lcg_state,
|
||||
bssrdf_u, bssrdf_v,
|
||||
true);
|
||||
#ifdef __VOLUME__
|
||||
Ray volume_ray = *ray;
|
||||
bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
|
||||
@@ -140,6 +146,15 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
|
||||
|
||||
/* compute lighting with the BSDF closure */
|
||||
for(int hit = 0; hit < num_hits; hit++) {
|
||||
ShaderData bssrdf_sd = *sd;
|
||||
subsurface_scatter_multi_setup(kg,
|
||||
&ss_isect,
|
||||
hit,
|
||||
&bssrdf_sd,
|
||||
state->flag,
|
||||
sc,
|
||||
true);
|
||||
|
||||
PathState hit_state = *state;
|
||||
|
||||
path_state_branch(&hit_state, j, num_samples);
|
||||
@@ -147,7 +162,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
|
||||
#ifdef __VOLUME__
|
||||
if(need_update_volume_stack) {
|
||||
/* Setup ray from previous surface point to the new one. */
|
||||
float3 P = ray_offset(bssrdf_sd[hit].P, -bssrdf_sd[hit].Ng);
|
||||
float3 P = ray_offset(bssrdf_sd.P, -bssrdf_sd.Ng);
|
||||
volume_ray.D = normalize_len(P - volume_ray.P,
|
||||
&volume_ray.t);
|
||||
|
||||
@@ -165,15 +180,27 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
|
||||
/* direct light */
|
||||
if(kernel_data.integrator.use_direct_light) {
|
||||
bool all = kernel_data.integrator.sample_all_lights_direct;
|
||||
kernel_branched_path_surface_connect_light(kg, rng,
|
||||
&bssrdf_sd[hit], &hit_state, throughput, num_samples_inv, L, all);
|
||||
kernel_branched_path_surface_connect_light(
|
||||
kg,
|
||||
rng,
|
||||
&bssrdf_sd,
|
||||
&hit_state,
|
||||
throughput,
|
||||
num_samples_inv,
|
||||
L,
|
||||
all);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* indirect light */
|
||||
kernel_branched_path_surface_indirect_light(kg, rng,
|
||||
&bssrdf_sd[hit], throughput, num_samples_inv,
|
||||
&hit_state, L);
|
||||
kernel_branched_path_surface_indirect_light(
|
||||
kg,
|
||||
rng,
|
||||
&bssrdf_sd,
|
||||
throughput,
|
||||
num_samples_inv,
|
||||
&hit_state,
|
||||
L);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user