Cycles: Fully roll-back to non-delayed SSS indirect rays for CPU

There are some issues to be solved with the recent optimization we did for
the indirect rays for the SSS. Those issues will take a bit of a time to
be fully solved still and we need to unlock Caminandes team now, so let's
revert some changes back.

CUDA will still use delayed indirect rays since it's an experimental
feature.

For the details about what's to be done still please refer to T46880.
This commit is contained in:
Sergey Sharybin
2015-11-27 17:12:44 +05:00
parent 175f00c89a
commit 20fc9c00fd
3 changed files with 45 additions and 4 deletions

View File

@@ -74,6 +74,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
&throughput, &throughput,
&ss_indirect)) &ss_indirect))
{ {
# ifdef __SUBSURFACE_DELAYED_INDIRECT__
while(ss_indirect.num_rays) { while(ss_indirect.num_rays) {
kernel_path_subsurface_setup_indirect(kg, kernel_path_subsurface_setup_indirect(kg,
&ss_indirect, &ss_indirect,
@@ -90,6 +91,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
&state, &state,
&L_sample); &L_sample);
} }
# endif /* __SUBSURFACE_DELAYED_INDIRECT__ */
is_sss_sample = true; is_sss_sample = true;
} }
} }

View File

@@ -508,7 +508,38 @@ ccl_device bool kernel_path_subsurface_scatter(
#ifdef __LAMP_MIS__ #ifdef __LAMP_MIS__
hit_state->ray_t = 0.0f; hit_state->ray_t = 0.0f;
#endif #endif
#ifdef __SUBSURFACE_DELAYED_INDIRECT__
ss_indirect->num_rays++; ss_indirect->num_rays++;
#else
# ifdef __VOLUME__
if(ss_indirect->need_update_volume_stack) {
Ray volume_ray = *ray;
/* Setup ray from previous surface point to the new one. */
volume_ray.D = normalize_len(hit_ray->P - volume_ray.P,
&volume_ray.t);
kernel_volume_stack_update_for_subsurface(kg,
&volume_ray,
hit_state->volume_stack);
}
# endif /* __VOLUME__ */
kernel_path_indirect(kg,
rng,
hit_ray,
*hit_tp,
hit_state->num_samples,
hit_state,
L);
/* For render passes, sum and reset indirect light pass variables
* for the next samples.
*/
path_radiance_sum_indirect(L);
path_radiance_reset_indirect(L);
#endif
} }
} }
return true; return true;
@@ -516,6 +547,7 @@ ccl_device bool kernel_path_subsurface_scatter(
return false; return false;
} }
#ifdef __SUBSURFACE_DELAYED_INDIRECT__
ccl_device void kernel_path_subsurface_setup_indirect( ccl_device void kernel_path_subsurface_setup_indirect(
KernelGlobals *kg, KernelGlobals *kg,
SubsurfaceIndirectRays *ss_indirect, SubsurfaceIndirectRays *ss_indirect,
@@ -545,7 +577,7 @@ ccl_device void kernel_path_subsurface_setup_indirect(
&volume_ray, &volume_ray,
state->volume_stack); state->volume_stack);
} }
#endif #endif /* __VOLUME__ */
*ray = *indirect_ray; *ray = *indirect_ray;
@@ -555,6 +587,8 @@ ccl_device void kernel_path_subsurface_setup_indirect(
path_radiance_sum_indirect(L); path_radiance_sum_indirect(L);
path_radiance_reset_indirect(L); path_radiance_reset_indirect(L);
} }
#endif /* __SUBSURFACE_DELAYED_INDIRECT__ */
#endif #endif
ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, Ray ray, ccl_global float *buffer) ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, Ray ray, ccl_global float *buffer)
@@ -578,6 +612,7 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
SubsurfaceIndirectRays ss_indirect; SubsurfaceIndirectRays ss_indirect;
ss_indirect.num_rays = 0; ss_indirect.num_rays = 0;
# ifdef __SUBSURFACE_DELAYED_INDIRECT__
/* TODO(sergey): Avoid having explicit copy of the pre-subsurface scatter /* TODO(sergey): Avoid having explicit copy of the pre-subsurface scatter
* ray by storing an updated version of state in the ss_indirect which will * ray by storing an updated version of state in the ss_indirect which will
* be updated to the new volume stack. * be updated to the new volume stack.
@@ -585,6 +620,7 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
Ray ss_orig_ray; Ray ss_orig_ray;
for(;;) { for(;;) {
# endif /* __SUBSURFACE_DELAYED_INDIRECT__ */
#endif #endif
/* path iteration */ /* path iteration */
@@ -833,11 +869,13 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
&throughput, &throughput,
&ss_indirect)) &ss_indirect))
{ {
# ifdef __SUBSURFACE_DELAYED_INDIRECT__
ss_orig_ray = ray; ss_orig_ray = ray;
# endif /* __SUBSURFACE_DELAYED_INDIRECT__ */
break; break;
} }
} }
#endif #endif /* __SUBSURFACE__ */
/* direct lighting */ /* direct lighting */
kernel_path_surface_connect_light(kg, rng, &sd, throughput, &state, &L); kernel_path_surface_connect_light(kg, rng, &sd, throughput, &state, &L);
@@ -847,7 +885,7 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
break; break;
} }
#ifdef __SUBSURFACE__ #ifdef __SUBSURFACE_DELAYED_INDIRECT__
/* Trace indirect subsurface rays by restarting the loop. this uses less /* Trace indirect subsurface rays by restarting the loop. this uses less
* stack memory than invoking kernel_path_indirect. * stack memory than invoking kernel_path_indirect.
*/ */
@@ -864,7 +902,7 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
break; break;
} }
} }
#endif #endif /* __SUBSURFACE_DELAYED_INDIRECT__ */
float3 L_sum = path_radiance_clamp_and_sum(kg, &L); float3 L_sum = path_radiance_clamp_and_sum(kg, &L);

View File

@@ -87,6 +87,7 @@ CCL_NAMESPACE_BEGIN
/* Experimental on GPU */ /* Experimental on GPU */
#ifdef __KERNEL_EXPERIMENTAL__ #ifdef __KERNEL_EXPERIMENTAL__
#define __SUBSURFACE__ #define __SUBSURFACE__
#define __SUBSURFACE_DELAYED_INDIRECT__
#define __CMJ__ #define __CMJ__
#endif #endif