Cycles: reduce closure memory usage for emission/shadow shader data.
With a Titan Xp, reduces path trace local memory from 1092MB to 840MB. Benchmark performance was within 1% with both RX 480 and Titan Xp. Original patch was implemented by Sergey. Differential Revision: https://developer.blender.org/D2249
This commit is contained in:
@@ -436,10 +436,12 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
|
||||
/* shader data memory used for both volumes and surfaces, saves stack space */
|
||||
ShaderData sd;
|
||||
/* shader data used by emission, shadows, volume stacks, indirect path */
|
||||
ShaderData emission_sd, indirect_sd;
|
||||
ShaderDataTinyStorage emission_sd_storage;
|
||||
ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
|
||||
ShaderData indirect_sd;
|
||||
|
||||
PathState state;
|
||||
path_state_init(kg, &emission_sd, &state, rng_hash, sample, &ray);
|
||||
path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray);
|
||||
|
||||
/* Main Loop
|
||||
* Here we only handle transparency intersections from the camera ray.
|
||||
@@ -460,7 +462,7 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
|
||||
&isect,
|
||||
hit,
|
||||
&indirect_sd,
|
||||
&emission_sd,
|
||||
emission_sd,
|
||||
L);
|
||||
#endif /* __VOLUME__ */
|
||||
|
||||
@@ -472,7 +474,7 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
|
||||
|
||||
/* Setup and evaluate shader. */
|
||||
shader_setup_from_ray(kg, &sd, &isect, &ray);
|
||||
shader_eval_surface(kg, &sd, &state, state.flag);
|
||||
shader_eval_surface(kg, &sd, &state, state.flag, MAX_CLOSURE);
|
||||
shader_merge_closures(&sd);
|
||||
|
||||
/* Apply shadow catcher, holdout, emission. */
|
||||
@@ -481,7 +483,7 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
|
||||
&state,
|
||||
&ray,
|
||||
throughput,
|
||||
&emission_sd,
|
||||
emission_sd,
|
||||
L,
|
||||
buffer))
|
||||
{
|
||||
@@ -513,14 +515,14 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
|
||||
#ifdef __AO__
|
||||
/* ambient occlusion */
|
||||
if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
|
||||
kernel_branched_path_ao(kg, &sd, &emission_sd, L, &state, throughput);
|
||||
kernel_branched_path_ao(kg, &sd, emission_sd, L, &state, throughput);
|
||||
}
|
||||
#endif /* __AO__ */
|
||||
|
||||
#ifdef __SUBSURFACE__
|
||||
/* bssrdf scatter to a different location on the same object */
|
||||
if(sd.flag & SD_BSSRDF) {
|
||||
kernel_branched_path_subsurface_scatter(kg, &sd, &indirect_sd, &emission_sd,
|
||||
kernel_branched_path_subsurface_scatter(kg, &sd, &indirect_sd, emission_sd,
|
||||
L, &state, &ray, throughput);
|
||||
}
|
||||
#endif /* __SUBSURFACE__ */
|
||||
@@ -534,13 +536,13 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
|
||||
int all = (kernel_data.integrator.sample_all_lights_direct) ||
|
||||
(state.flag & PATH_RAY_SHADOW_CATCHER);
|
||||
kernel_branched_path_surface_connect_light(kg,
|
||||
&sd, &emission_sd, &hit_state, throughput, 1.0f, L, all);
|
||||
&sd, emission_sd, &hit_state, throughput, 1.0f, L, all);
|
||||
}
|
||||
#endif /* __EMISSION__ */
|
||||
|
||||
/* indirect light */
|
||||
kernel_branched_path_surface_indirect_light(kg,
|
||||
&sd, &indirect_sd, &emission_sd, throughput, 1.0f, &hit_state, L);
|
||||
&sd, &indirect_sd, emission_sd, throughput, 1.0f, &hit_state, L);
|
||||
|
||||
/* continue in case of transparency */
|
||||
throughput *= shader_bsdf_transparency(kg, &sd);
|
||||
|
Reference in New Issue
Block a user