Cycles: Delay shooting SSS indirect rays

The idea is to delay shooting indirect rays for the SSS sampling and trace them after the main integration loop was finished. This reduces GPU stack usage even further and brings it down to around 652MB (comparing to 722MB before the change and 946MB with previous stable release). This also solves the speed regression happened in the previous commit and now simple SSS scene (SSS suzanne on the floor) renders in 0:50 (comparing to 1:16 with previous commit and 1:03 with official release).
2015-11-22 15:48:33 +05:00
parent 8bca34fe32
commit 2a5c1fc9cc
4 changed files with 353 additions and 112 deletions
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -64,8 +64,19 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
 		/* sample subsurface scattering */
 		if((is_combined || is_sss_sample) && (sd->flag & SD_BSSRDF)) {
 			/* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
-			if(kernel_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, &ray, &throughput))
+			SubsurfaceIndirectRays ss_indirect;
+			if(kernel_path_subsurface_scatter(kg,
+			                                  sd,
+			                                  &L_sample,
+			                                  &state,
+			                                  &rng,
+			                                  &ray,
+			                                  &throughput,
+			                                  &ss_indirect))
+			{
+				kernel_path_subsurface_scatter_indirect(kg, &L_sample, &state, &rng, &ray, &ss_indirect);
 				is_sss_sample = true;
+			}
 		}
 #endif

@@ -84,7 +95,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
 				state.ray_t = 0.0f;
 #endif
 				/* compute indirect light */
-				kernel_path_indirect(kg, &rng, ray, throughput, 1, state, &L_sample);
+				kernel_path_indirect(kg, &rng, &ray, throughput, 1, &state, &L_sample);

 				/* sum and reset indirect light pass variables for the next samples */
 				path_radiance_sum_indirect(&L_sample);