Cycles CUDA: reduce stack memory by reusing ShaderData.

57% less for path and 48% less for branched path.
This commit is contained in:
Brecht Van Lommel
2016-05-22 22:35:47 +02:00
parent af4a04eae0
commit 999d5a6785
14 changed files with 196 additions and 169 deletions

View File

@@ -30,6 +30,9 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
Ray ray; Ray ray;
float3 throughput = make_float3(1.0f, 1.0f, 1.0f); float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
/* emission shader data memory used by various functions */
ShaderData emission_sd;
ray.P = sd->P + sd->Ng; ray.P = sd->P + sd->Ng;
ray.D = -sd->Ng; ray.D = -sd->Ng;
ray.t = FLT_MAX; ray.t = FLT_MAX;
@@ -41,7 +44,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
path_radiance_init(&L_sample, kernel_data.film.use_light_pass); path_radiance_init(&L_sample, kernel_data.film.use_light_pass);
/* init path state */ /* init path state */
path_state_init(kg, &state, &rng, sample, NULL); path_state_init(kg, &emission_sd, &state, &rng, sample, NULL);
/* evaluate surface shader */ /* evaluate surface shader */
float rbsdf = path_state_rng_1D(kg, &rng, &state, PRNG_BSDF); float rbsdf = path_state_rng_1D(kg, &rng, &state, PRNG_BSDF);
@@ -56,7 +59,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
/* sample ambient occlusion */ /* sample ambient occlusion */
if(pass_filter & BAKE_FILTER_AO) { if(pass_filter & BAKE_FILTER_AO) {
kernel_path_ao(kg, sd, &L_sample, &state, &rng, throughput); kernel_path_ao(kg, sd, &emission_sd, &L_sample, &state, &rng, throughput);
} }
/* sample emission */ /* sample emission */
@@ -75,6 +78,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
kernel_path_subsurface_init_indirect(&ss_indirect); kernel_path_subsurface_init_indirect(&ss_indirect);
if(kernel_path_subsurface_scatter(kg, if(kernel_path_subsurface_scatter(kg,
sd, sd,
&emission_sd,
&L_sample, &L_sample,
&state, &state,
&rng, &rng,
@@ -90,6 +94,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
&L_sample, &L_sample,
&throughput); &throughput);
kernel_path_indirect(kg, kernel_path_indirect(kg,
&emission_sd,
&rng, &rng,
&ray, &ray,
throughput, throughput,
@@ -105,14 +110,14 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
/* sample light and BSDF */ /* sample light and BSDF */
if(!is_sss_sample && (pass_filter & (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT))) { if(!is_sss_sample && (pass_filter & (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT))) {
kernel_path_surface_connect_light(kg, &rng, sd, throughput, &state, &L_sample); kernel_path_surface_connect_light(kg, &rng, sd, &emission_sd, throughput, &state, &L_sample);
if(kernel_path_surface_bounce(kg, &rng, sd, &throughput, &state, &L_sample, &ray)) { if(kernel_path_surface_bounce(kg, &rng, sd, &throughput, &state, &L_sample, &ray)) {
#ifdef __LAMP_MIS__ #ifdef __LAMP_MIS__
state.ray_t = 0.0f; state.ray_t = 0.0f;
#endif #endif
/* compute indirect light */ /* compute indirect light */
kernel_path_indirect(kg, &rng, &ray, throughput, 1, &state, &L_sample); kernel_path_indirect(kg, &emission_sd, &rng, &ray, throughput, 1, &state, &L_sample);
/* sum and reset indirect light pass variables for the next samples */ /* sum and reset indirect light pass variables for the next samples */
path_radiance_sum_indirect(&L_sample); path_radiance_sum_indirect(&L_sample);
@@ -126,7 +131,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
/* sample ambient occlusion */ /* sample ambient occlusion */
if(pass_filter & BAKE_FILTER_AO) { if(pass_filter & BAKE_FILTER_AO) {
kernel_branched_path_ao(kg, sd, &L_sample, &state, &rng, throughput); kernel_branched_path_ao(kg, sd, &emission_sd, &L_sample, &state, &rng, throughput);
} }
/* sample emission */ /* sample emission */
@@ -139,7 +144,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
/* sample subsurface scattering */ /* sample subsurface scattering */
if((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) { if((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) {
/* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */ /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
kernel_branched_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, &ray, throughput); kernel_branched_path_subsurface_scatter(kg, sd, &emission_sd, &L_sample, &state, &rng, &ray, throughput);
} }
#endif #endif
@@ -150,13 +155,13 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
if(kernel_data.integrator.use_direct_light) { if(kernel_data.integrator.use_direct_light) {
int all = kernel_data.integrator.sample_all_lights_direct; int all = kernel_data.integrator.sample_all_lights_direct;
kernel_branched_path_surface_connect_light(kg, &rng, kernel_branched_path_surface_connect_light(kg, &rng,
sd, &state, throughput, 1.0f, &L_sample, all); sd, &emission_sd, &state, throughput, 1.0f, &L_sample, all);
} }
#endif #endif
/* indirect light */ /* indirect light */
kernel_branched_path_surface_indirect_light(kg, &rng, kernel_branched_path_surface_indirect_light(kg, &rng,
sd, throughput, 1.0f, &state, &L_sample); sd, &emission_sd, throughput, 1.0f, &state, &L_sample);
} }
} }
#endif #endif

View File

@@ -18,6 +18,7 @@ CCL_NAMESPACE_BEGIN
/* Direction Emission */ /* Direction Emission */
ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg, ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
ShaderData *emission_sd,
LightSample *ls, LightSample *ls,
ccl_addr_space PathState *state, ccl_addr_space PathState *state,
float3 I, float3 I,
@@ -26,12 +27,6 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
float time) float time)
{ {
/* setup shading at emitter */ /* setup shading at emitter */
#ifdef __SPLIT_KERNEL__
ShaderData *sd = kg->sd_input;
#else
ShaderData sd_object;
ShaderData *sd = &sd_object;
#endif
float3 eval; float3 eval;
#ifdef __BACKGROUND_MIS__ #ifdef __BACKGROUND_MIS__
@@ -46,28 +41,28 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
ray.dP = differential3_zero(); ray.dP = differential3_zero();
ray.dD = dI; ray.dD = dI;
shader_setup_from_background(kg, sd, &ray); shader_setup_from_background(kg, emission_sd, &ray);
path_state_modify_bounce(state, true); path_state_modify_bounce(state, true);
eval = shader_eval_background(kg, sd, state, 0, SHADER_CONTEXT_EMISSION); eval = shader_eval_background(kg, emission_sd, state, 0, SHADER_CONTEXT_EMISSION);
path_state_modify_bounce(state, false); path_state_modify_bounce(state, false);
} }
else else
#endif #endif
{ {
shader_setup_from_sample(kg, sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, ls->u, ls->v, t, time); shader_setup_from_sample(kg, emission_sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, ls->u, ls->v, t, time);
ls->Ng = ccl_fetch(sd, Ng); ls->Ng = ccl_fetch(emission_sd, Ng);
/* no path flag, we're evaluating this for all closures. that's weak but /* no path flag, we're evaluating this for all closures. that's weak but
* we'd have to do multiple evaluations otherwise */ * we'd have to do multiple evaluations otherwise */
path_state_modify_bounce(state, true); path_state_modify_bounce(state, true);
shader_eval_surface(kg, sd, state, 0.0f, 0, SHADER_CONTEXT_EMISSION); shader_eval_surface(kg, emission_sd, state, 0.0f, 0, SHADER_CONTEXT_EMISSION);
path_state_modify_bounce(state, false); path_state_modify_bounce(state, false);
/* evaluate emissive closure */ /* evaluate emissive closure */
if(ccl_fetch(sd, flag) & SD_EMISSION) if(ccl_fetch(emission_sd, flag) & SD_EMISSION)
eval = shader_emissive_eval(kg, sd); eval = shader_emissive_eval(kg, emission_sd);
else else
eval = make_float3(0.0f, 0.0f, 0.0f); eval = make_float3(0.0f, 0.0f, 0.0f);
} }
@@ -79,6 +74,7 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
ccl_device_noinline bool direct_emission(KernelGlobals *kg, ccl_device_noinline bool direct_emission(KernelGlobals *kg,
ShaderData *sd, ShaderData *sd,
ShaderData *emission_sd,
LightSample *ls, LightSample *ls,
ccl_addr_space PathState *state, ccl_addr_space PathState *state,
Ray *ray, Ray *ray,
@@ -94,6 +90,7 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg,
/* evaluate closure */ /* evaluate closure */
float3 light_eval = direct_emissive_eval(kg, float3 light_eval = direct_emissive_eval(kg,
emission_sd,
ls, ls,
state, state,
-ls->D, -ls->D,
@@ -198,6 +195,7 @@ ccl_device_noinline float3 indirect_primitive_emission(KernelGlobals *kg, Shader
/* Indirect Lamp Emission */ /* Indirect Lamp Emission */
ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg, ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
ShaderData *emission_sd,
ccl_addr_space PathState *state, ccl_addr_space PathState *state,
Ray *ray, Ray *ray,
float3 *emission) float3 *emission)
@@ -225,6 +223,7 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
#endif #endif
float3 L = direct_emissive_eval(kg, float3 L = direct_emissive_eval(kg,
emission_sd,
&ls, &ls,
state, state,
-ray->D, -ray->D,
@@ -238,7 +237,7 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
Ray volume_ray = *ray; Ray volume_ray = *ray;
volume_ray.t = ls.t; volume_ray.t = ls.t;
float3 volume_tp = make_float3(1.0f, 1.0f, 1.0f); float3 volume_tp = make_float3(1.0f, 1.0f, 1.0f);
kernel_volume_shadow(kg, state, &volume_ray, &volume_tp); kernel_volume_shadow(kg, emission_sd, state, &volume_ray, &volume_tp);
L *= volume_tp; L *= volume_tp;
} }
#endif #endif
@@ -260,6 +259,7 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
/* Indirect Background */ /* Indirect Background */
ccl_device_noinline float3 indirect_background(KernelGlobals *kg, ccl_device_noinline float3 indirect_background(KernelGlobals *kg,
ShaderData *emission_sd,
ccl_addr_space PathState *state, ccl_addr_space PathState *state,
ccl_addr_space Ray *ray) ccl_addr_space Ray *ray)
{ {
@@ -280,19 +280,14 @@ ccl_device_noinline float3 indirect_background(KernelGlobals *kg,
/* evaluate background closure */ /* evaluate background closure */
# ifdef __SPLIT_KERNEL__ # ifdef __SPLIT_KERNEL__
Ray priv_ray = *ray; Ray priv_ray = *ray;
shader_setup_from_background(kg, kg->sd_input, &priv_ray); shader_setup_from_background(kg, emission_sd, &priv_ray);
path_state_modify_bounce(state, true);
float3 L = shader_eval_background(kg, kg->sd_input, state, state->flag, SHADER_CONTEXT_EMISSION);
path_state_modify_bounce(state, false);
# else # else
ShaderData sd; shader_setup_from_background(kg, emission_sd, ray);
shader_setup_from_background(kg, &sd, ray); # endif
path_state_modify_bounce(state, true); path_state_modify_bounce(state, true);
float3 L = shader_eval_background(kg, &sd, state, state->flag, SHADER_CONTEXT_EMISSION); float3 L = shader_eval_background(kg, emission_sd, state, state->flag, SHADER_CONTEXT_EMISSION);
path_state_modify_bounce(state, false); path_state_modify_bounce(state, false);
# endif
#ifdef __BACKGROUND_MIS__ #ifdef __BACKGROUND_MIS__
/* check if background light exists or if we should skip pdf */ /* check if background light exists or if we should skip pdf */

View File

@@ -53,6 +53,7 @@
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
ccl_device void kernel_path_indirect(KernelGlobals *kg, ccl_device void kernel_path_indirect(KernelGlobals *kg,
ShaderData *emission_sd,
RNG *rng, RNG *rng,
Ray *ray, Ray *ray,
float3 throughput, float3 throughput,
@@ -60,6 +61,9 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
PathState *state, PathState *state,
PathRadiance *L) PathRadiance *L)
{ {
/* shader data memory used for both volumes and surfaces, saves stack space */
ShaderData sd;
/* path iteration */ /* path iteration */
for(;;) { for(;;) {
/* intersect scene */ /* intersect scene */
@@ -87,7 +91,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
/* intersect with lamp */ /* intersect with lamp */
float3 emission; float3 emission;
if(indirect_lamp_emission(kg, state, &light_ray, &emission)) { if(indirect_lamp_emission(kg, emission_sd, state, &light_ray, &emission)) {
path_radiance_accum_emission(L, path_radiance_accum_emission(L,
throughput, throughput,
emission, emission,
@@ -115,15 +119,14 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
if(decoupled) { if(decoupled) {
/* cache steps along volume for repeated sampling */ /* cache steps along volume for repeated sampling */
VolumeSegment volume_segment; VolumeSegment volume_segment;
ShaderData volume_sd;
shader_setup_from_volume(kg, shader_setup_from_volume(kg,
&volume_sd, &sd,
&volume_ray); &volume_ray);
kernel_volume_decoupled_record(kg, kernel_volume_decoupled_record(kg,
state, state,
&volume_ray, &volume_ray,
&volume_sd, &sd,
&volume_segment, &volume_segment,
heterogeneous); heterogeneous);
@@ -146,7 +149,8 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
/* direct light sampling */ /* direct light sampling */
kernel_branched_path_volume_connect_light(kg, kernel_branched_path_volume_connect_light(kg,
rng, rng,
&volume_sd, &sd,
emission_sd,
throughput, throughput,
state, state,
L, L,
@@ -163,7 +167,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
result = kernel_volume_decoupled_scatter(kg, result = kernel_volume_decoupled_scatter(kg,
state, state,
&volume_ray, &volume_ray,
&volume_sd, &sd,
&throughput, &throughput,
rphase, rphase,
rscatter, rscatter,
@@ -178,7 +182,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
if(result == VOLUME_PATH_SCATTERED) { if(result == VOLUME_PATH_SCATTERED) {
if(kernel_path_volume_bounce(kg, if(kernel_path_volume_bounce(kg,
rng, rng,
&volume_sd, &sd,
&throughput, &throughput,
state, state,
L, L,
@@ -198,16 +202,16 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
# endif # endif
{ {
/* integrate along volume segment with distance sampling */ /* integrate along volume segment with distance sampling */
ShaderData volume_sd;
VolumeIntegrateResult result = kernel_volume_integrate( VolumeIntegrateResult result = kernel_volume_integrate(
kg, state, &volume_sd, &volume_ray, L, &throughput, rng, heterogeneous); kg, state, &sd, &volume_ray, L, &throughput, rng, heterogeneous);
# ifdef __VOLUME_SCATTER__ # ifdef __VOLUME_SCATTER__
if(result == VOLUME_PATH_SCATTERED) { if(result == VOLUME_PATH_SCATTERED) {
/* direct lighting */ /* direct lighting */
kernel_path_volume_connect_light(kg, kernel_path_volume_connect_light(kg,
rng, rng,
&volume_sd, &sd,
emission_sd,
throughput, throughput,
state, state,
L); L);
@@ -215,7 +219,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
/* indirect light bounce */ /* indirect light bounce */
if(kernel_path_volume_bounce(kg, if(kernel_path_volume_bounce(kg,
rng, rng,
&volume_sd, &sd,
&throughput, &throughput,
state, state,
L, L,
@@ -235,7 +239,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
if(!hit) { if(!hit) {
#ifdef __BACKGROUND__ #ifdef __BACKGROUND__
/* sample background shader */ /* sample background shader */
float3 L_background = indirect_background(kg, state, ray); float3 L_background = indirect_background(kg, emission_sd, state, ray);
path_radiance_accum_background(L, path_radiance_accum_background(L,
throughput, throughput,
L_background, L_background,
@@ -246,7 +250,6 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
} }
/* setup shading */ /* setup shading */
ShaderData sd;
shader_setup_from_ray(kg, shader_setup_from_ray(kg,
&sd, &sd,
&isect, &isect,
@@ -328,7 +331,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
light_ray.dP = sd.dP; light_ray.dP = sd.dP;
light_ray.dD = differential3_zero(); light_ray.dD = differential3_zero();
if(!shadow_blocked(kg, state, &light_ray, &ao_shadow)) { if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow)) {
path_radiance_accum_ao(L, path_radiance_accum_ao(L,
throughput, throughput,
ao_alpha, ao_alpha,
@@ -378,6 +381,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
kernel_branched_path_surface_connect_light(kg, kernel_branched_path_surface_connect_light(kg,
rng, rng,
&sd, &sd,
emission_sd,
state, state,
throughput, throughput,
1.0f, 1.0f,
@@ -393,6 +397,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
ccl_device_noinline void kernel_path_ao(KernelGlobals *kg, ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
ShaderData *sd, ShaderData *sd,
ShaderData *emission_sd,
PathRadiance *L, PathRadiance *L,
PathState *state, PathState *state,
RNG *rng, RNG *rng,
@@ -425,7 +430,7 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
light_ray.dP = ccl_fetch(sd, dP); light_ray.dP = ccl_fetch(sd, dP);
light_ray.dD = differential3_zero(); light_ray.dD = differential3_zero();
if(!shadow_blocked(kg, state, &light_ray, &ao_shadow)) if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow))
path_radiance_accum_ao(L, throughput, ao_alpha, ao_bsdf, ao_shadow, state->bounce); path_radiance_accum_ao(L, throughput, ao_alpha, ao_bsdf, ao_shadow, state->bounce);
} }
} }
@@ -435,6 +440,7 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
ccl_device bool kernel_path_subsurface_scatter( ccl_device bool kernel_path_subsurface_scatter(
KernelGlobals *kg, KernelGlobals *kg,
ShaderData *sd, ShaderData *sd,
ShaderData *emission_sd,
PathRadiance *L, PathRadiance *L,
PathState *state, PathState *state,
RNG *rng, RNG *rng,
@@ -503,7 +509,7 @@ ccl_device bool kernel_path_subsurface_scatter(
hit_L->direct_throughput = L->direct_throughput; hit_L->direct_throughput = L->direct_throughput;
path_radiance_copy_indirect(hit_L, L); path_radiance_copy_indirect(hit_L, L);
kernel_path_surface_connect_light(kg, rng, sd, *hit_tp, state, hit_L); kernel_path_surface_connect_light(kg, rng, sd, emission_sd, *hit_tp, state, hit_L);
if(kernel_path_surface_bounce(kg, if(kernel_path_surface_bounce(kg,
rng, rng,
@@ -526,6 +532,7 @@ ccl_device bool kernel_path_subsurface_scatter(
kernel_volume_stack_update_for_subsurface( kernel_volume_stack_update_for_subsurface(
kg, kg,
emission_sd,
&volume_ray, &volume_ray,
hit_state->volume_stack); hit_state->volume_stack);
} }
@@ -604,8 +611,13 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
path_radiance_init(&L, kernel_data.film.use_light_pass); path_radiance_init(&L, kernel_data.film.use_light_pass);
/* shader data memory used for both volumes and surfaces, saves stack space */
ShaderData sd;
/* shader data used by emission, shadows, volume stacks */
ShaderData emission_sd;
PathState state; PathState state;
path_state_init(kg, &state, rng, sample, &ray); path_state_init(kg, &emission_sd, &state, rng, sample, &ray);
#ifdef __KERNEL_DEBUG__ #ifdef __KERNEL_DEBUG__
DebugData debug_data; DebugData debug_data;
@@ -669,7 +681,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
/* intersect with lamp */ /* intersect with lamp */
float3 emission; float3 emission;
if(indirect_lamp_emission(kg, &state, &light_ray, &emission)) if(indirect_lamp_emission(kg, &emission_sd, &state, &light_ray, &emission))
path_radiance_accum_emission(&L, throughput, emission, state.bounce); path_radiance_accum_emission(&L, throughput, emission, state.bounce);
} }
#endif #endif
@@ -689,11 +701,10 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
if(decoupled) { if(decoupled) {
/* cache steps along volume for repeated sampling */ /* cache steps along volume for repeated sampling */
VolumeSegment volume_segment; VolumeSegment volume_segment;
ShaderData volume_sd;
shader_setup_from_volume(kg, &volume_sd, &volume_ray); shader_setup_from_volume(kg, &sd, &volume_ray);
kernel_volume_decoupled_record(kg, &state, kernel_volume_decoupled_record(kg, &state,
&volume_ray, &volume_sd, &volume_segment, heterogeneous); &volume_ray, &sd, &volume_segment, heterogeneous);
volume_segment.sampling_method = sampling_method; volume_segment.sampling_method = sampling_method;
@@ -708,8 +719,9 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
int all = false; int all = false;
/* direct light sampling */ /* direct light sampling */
kernel_branched_path_volume_connect_light(kg, rng, &volume_sd, kernel_branched_path_volume_connect_light(kg, rng, &sd,
throughput, &state, &L, all, &volume_ray, &volume_segment); &emission_sd, throughput, &state, &L, all,
&volume_ray, &volume_segment);
/* indirect sample. if we use distance sampling and take just /* indirect sample. if we use distance sampling and take just
* one sample for direct and indirect light, we could share * one sample for direct and indirect light, we could share
@@ -718,7 +730,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
float rscatter = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_SCATTER_DISTANCE); float rscatter = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_SCATTER_DISTANCE);
result = kernel_volume_decoupled_scatter(kg, result = kernel_volume_decoupled_scatter(kg,
&state, &volume_ray, &volume_sd, &throughput, &state, &volume_ray, &sd, &throughput,
rphase, rscatter, &volume_segment, NULL, true); rphase, rscatter, &volume_segment, NULL, true);
} }
@@ -726,7 +738,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
kernel_volume_decoupled_free(kg, &volume_segment); kernel_volume_decoupled_free(kg, &volume_segment);
if(result == VOLUME_PATH_SCATTERED) { if(result == VOLUME_PATH_SCATTERED) {
if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray)) if(kernel_path_volume_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))
continue; continue;
else else
break; break;
@@ -739,17 +751,16 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
# endif # endif
{ {
/* integrate along volume segment with distance sampling */ /* integrate along volume segment with distance sampling */
ShaderData volume_sd;
VolumeIntegrateResult result = kernel_volume_integrate( VolumeIntegrateResult result = kernel_volume_integrate(
kg, &state, &volume_sd, &volume_ray, &L, &throughput, rng, heterogeneous); kg, &state, &sd, &volume_ray, &L, &throughput, rng, heterogeneous);
# ifdef __VOLUME_SCATTER__ # ifdef __VOLUME_SCATTER__
if(result == VOLUME_PATH_SCATTERED) { if(result == VOLUME_PATH_SCATTERED) {
/* direct lighting */ /* direct lighting */
kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, &L); kernel_path_volume_connect_light(kg, rng, &sd, &emission_sd, throughput, &state, &L);
/* indirect light bounce */ /* indirect light bounce */
if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray)) if(kernel_path_volume_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))
continue; continue;
else else
break; break;
@@ -772,7 +783,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
#ifdef __BACKGROUND__ #ifdef __BACKGROUND__
/* sample background shader */ /* sample background shader */
float3 L_background = indirect_background(kg, &state, &ray); float3 L_background = indirect_background(kg, &emission_sd, &state, &ray);
path_radiance_accum_background(&L, throughput, L_background, state.bounce); path_radiance_accum_background(&L, throughput, L_background, state.bounce);
#endif #endif
@@ -780,7 +791,6 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
} }
/* setup shading */ /* setup shading */
ShaderData sd;
shader_setup_from_ray(kg, &sd, &isect, &ray); shader_setup_from_ray(kg, &sd, &isect, &ray);
float rbsdf = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_BSDF); float rbsdf = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_BSDF);
shader_eval_surface(kg, &sd, &state, rbsdf, state.flag, SHADER_CONTEXT_MAIN); shader_eval_surface(kg, &sd, &state, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
@@ -848,7 +858,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
#ifdef __AO__ #ifdef __AO__
/* ambient occlusion */ /* ambient occlusion */
if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) { if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
kernel_path_ao(kg, &sd, &L, &state, rng, throughput); kernel_path_ao(kg, &sd, &emission_sd, &L, &state, rng, throughput);
} }
#endif #endif
@@ -858,6 +868,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
if(sd.flag & SD_BSSRDF) { if(sd.flag & SD_BSSRDF) {
if(kernel_path_subsurface_scatter(kg, if(kernel_path_subsurface_scatter(kg,
&sd, &sd,
&emission_sd,
&L, &L,
&state, &state,
rng, rng,
@@ -871,7 +882,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
#endif /* __SUBSURFACE__ */ #endif /* __SUBSURFACE__ */
/* direct lighting */ /* direct lighting */
kernel_path_surface_connect_light(kg, rng, &sd, throughput, &state, &L); kernel_path_surface_connect_light(kg, rng, &sd, &emission_sd, throughput, &state, &L);
/* compute direct lighting and next bounce */ /* compute direct lighting and next bounce */
if(!kernel_path_surface_bounce(kg, rng, &sd, &throughput, &state, &L, &ray)) if(!kernel_path_surface_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))

View File

@@ -18,7 +18,13 @@ CCL_NAMESPACE_BEGIN
#ifdef __BRANCHED_PATH__ #ifdef __BRANCHED_PATH__
ccl_device void kernel_branched_path_ao(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, PathState *state, RNG *rng, float3 throughput) ccl_device void kernel_branched_path_ao(KernelGlobals *kg,
ShaderData *sd,
ShaderData *emission_sd,
PathRadiance *L,
PathState *state,
RNG *rng,
float3 throughput)
{ {
int num_samples = kernel_data.integrator.ao_samples; int num_samples = kernel_data.integrator.ao_samples;
float num_samples_inv = 1.0f/num_samples; float num_samples_inv = 1.0f/num_samples;
@@ -49,7 +55,7 @@ ccl_device void kernel_branched_path_ao(KernelGlobals *kg, ShaderData *sd, PathR
light_ray.dP = ccl_fetch(sd, dP); light_ray.dP = ccl_fetch(sd, dP);
light_ray.dD = differential3_zero(); light_ray.dD = differential3_zero();
if(!shadow_blocked(kg, state, &light_ray, &ao_shadow)) if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow))
path_radiance_accum_ao(L, throughput*num_samples_inv, ao_alpha, ao_bsdf, ao_shadow, state->bounce); path_radiance_accum_ao(L, throughput*num_samples_inv, ao_alpha, ao_bsdf, ao_shadow, state->bounce);
} }
} }
@@ -58,8 +64,8 @@ ccl_device void kernel_branched_path_ao(KernelGlobals *kg, ShaderData *sd, PathR
/* bounce off surface and integrate indirect light */ /* bounce off surface and integrate indirect light */
ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGlobals *kg, ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGlobals *kg,
RNG *rng, ShaderData *sd, float3 throughput, float num_samples_adjust, RNG *rng, ShaderData *sd, ShaderData *emission_sd, float3 throughput,
PathState *state, PathRadiance *L) float num_samples_adjust, PathState *state, PathRadiance *L)
{ {
for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
const ShaderClosure *sc = &ccl_fetch(sd, closure)[i]; const ShaderClosure *sc = &ccl_fetch(sd, closure)[i];
@@ -106,6 +112,7 @@ ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGloba
} }
kernel_path_indirect(kg, kernel_path_indirect(kg,
emission_sd,
rng, rng,
&bsdf_ray, &bsdf_ray,
tp*num_samples_inv, tp*num_samples_inv,
@@ -124,6 +131,7 @@ ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGloba
#ifdef __SUBSURFACE__ #ifdef __SUBSURFACE__
ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg, ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
ShaderData *sd, ShaderData *sd,
ShaderData *emission_sd,
PathRadiance *L, PathRadiance *L,
PathState *state, PathState *state,
RNG *rng, RNG *rng,
@@ -186,6 +194,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
kernel_volume_stack_update_for_subsurface( kernel_volume_stack_update_for_subsurface(
kg, kg,
emission_sd,
&volume_ray, &volume_ray,
hit_state.volume_stack); hit_state.volume_stack);
} }
@@ -199,6 +208,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
kg, kg,
rng, rng,
&bssrdf_sd, &bssrdf_sd,
emission_sd,
&hit_state, &hit_state,
throughput, throughput,
num_samples_inv, num_samples_inv,
@@ -212,6 +222,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
kg, kg,
rng, rng,
&bssrdf_sd, &bssrdf_sd,
emission_sd,
throughput, throughput,
num_samples_inv, num_samples_inv,
&hit_state, &hit_state,
@@ -231,8 +242,13 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
path_radiance_init(&L, kernel_data.film.use_light_pass); path_radiance_init(&L, kernel_data.film.use_light_pass);
/* shader data memory used for both volumes and surfaces, saves stack space */
ShaderData sd;
/* shader data used by emission, shadows, volume stacks */
ShaderData emission_sd;
PathState state; PathState state;
path_state_init(kg, &state, rng, sample, &ray); path_state_init(kg, &emission_sd, &state, rng, sample, &ray);
#ifdef __KERNEL_DEBUG__ #ifdef __KERNEL_DEBUG__
DebugData debug_data; DebugData debug_data;
@@ -287,11 +303,10 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
/* cache steps along volume for repeated sampling */ /* cache steps along volume for repeated sampling */
VolumeSegment volume_segment; VolumeSegment volume_segment;
ShaderData volume_sd;
shader_setup_from_volume(kg, &volume_sd, &volume_ray); shader_setup_from_volume(kg, &sd, &volume_ray);
kernel_volume_decoupled_record(kg, &state, kernel_volume_decoupled_record(kg, &state,
&volume_ray, &volume_sd, &volume_segment, heterogeneous); &volume_ray, &sd, &volume_segment, heterogeneous);
/* direct light sampling */ /* direct light sampling */
if(volume_segment.closure_flag & SD_SCATTER) { if(volume_segment.closure_flag & SD_SCATTER) {
@@ -299,8 +314,9 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
int all = kernel_data.integrator.sample_all_lights_direct; int all = kernel_data.integrator.sample_all_lights_direct;
kernel_branched_path_volume_connect_light(kg, rng, &volume_sd, kernel_branched_path_volume_connect_light(kg, rng, &sd,
throughput, &state, &L, all, &volume_ray, &volume_segment); &emission_sd, throughput, &state, &L, all,
&volume_ray, &volume_segment);
/* indirect light sampling */ /* indirect light sampling */
int num_samples = kernel_data.integrator.volume_samples; int num_samples = kernel_data.integrator.volume_samples;
@@ -326,20 +342,21 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
float rscatter = path_state_rng_1D_for_decision(kg, &tmp_rng, &ps, PRNG_SCATTER_DISTANCE); float rscatter = path_state_rng_1D_for_decision(kg, &tmp_rng, &ps, PRNG_SCATTER_DISTANCE);
VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
&ps, &pray, &volume_sd, &tp, rphase, rscatter, &volume_segment, NULL, false); &ps, &pray, &sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
(void)result; (void)result;
kernel_assert(result == VOLUME_PATH_SCATTERED); kernel_assert(result == VOLUME_PATH_SCATTERED);
if(kernel_path_volume_bounce(kg, if(kernel_path_volume_bounce(kg,
rng, rng,
&volume_sd, &sd,
&tp, &tp,
&ps, &ps,
&L, &L,
&pray)) &pray))
{ {
kernel_path_indirect(kg, kernel_path_indirect(kg,
&emission_sd,
rng, rng,
&pray, &pray,
tp*num_samples_inv, tp*num_samples_inv,
@@ -373,30 +390,30 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
for(int j = 0; j < num_samples; j++) { for(int j = 0; j < num_samples; j++) {
PathState ps = state; PathState ps = state;
Ray pray = ray; Ray pray = ray;
ShaderData volume_sd;
float3 tp = throughput * num_samples_inv; float3 tp = throughput * num_samples_inv;
/* branch RNG state */ /* branch RNG state */
path_state_branch(&ps, j, num_samples); path_state_branch(&ps, j, num_samples);
VolumeIntegrateResult result = kernel_volume_integrate( VolumeIntegrateResult result = kernel_volume_integrate(
kg, &ps, &volume_sd, &volume_ray, &L, &tp, rng, heterogeneous); kg, &ps, &sd, &volume_ray, &L, &tp, rng, heterogeneous);
#ifdef __VOLUME_SCATTER__ #ifdef __VOLUME_SCATTER__
if(result == VOLUME_PATH_SCATTERED) { if(result == VOLUME_PATH_SCATTERED) {
/* todo: support equiangular, MIS and all light sampling. /* todo: support equiangular, MIS and all light sampling.
* alternatively get decoupled ray marching working on the GPU */ * alternatively get decoupled ray marching working on the GPU */
kernel_path_volume_connect_light(kg, rng, &volume_sd, tp, &state, &L); kernel_path_volume_connect_light(kg, rng, &sd, &emission_sd, tp, &state, &L);
if(kernel_path_volume_bounce(kg, if(kernel_path_volume_bounce(kg,
rng, rng,
&volume_sd, &sd,
&tp, &tp,
&ps, &ps,
&L, &L,
&pray)) &pray))
{ {
kernel_path_indirect(kg, kernel_path_indirect(kg,
&emission_sd,
rng, rng,
&pray, &pray,
tp, tp,
@@ -414,7 +431,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
} }
/* todo: avoid this calculation using decoupled ray marching */ /* todo: avoid this calculation using decoupled ray marching */
kernel_volume_shadow(kg, &state, &volume_ray, &throughput); kernel_volume_shadow(kg, &emission_sd, &state, &volume_ray, &throughput);
#endif #endif
} }
#endif #endif
@@ -432,7 +449,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
#ifdef __BACKGROUND__ #ifdef __BACKGROUND__
/* sample background shader */ /* sample background shader */
float3 L_background = indirect_background(kg, &state, &ray); float3 L_background = indirect_background(kg, &emission_sd, &state, &ray);
path_radiance_accum_background(&L, throughput, L_background, state.bounce); path_radiance_accum_background(&L, throughput, L_background, state.bounce);
#endif #endif
@@ -440,7 +457,6 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
} }
/* setup shading */ /* setup shading */
ShaderData sd;
shader_setup_from_ray(kg, &sd, &isect, &ray); shader_setup_from_ray(kg, &sd, &isect, &ray);
shader_eval_surface(kg, &sd, &state, 0.0f, state.flag, SHADER_CONTEXT_MAIN); shader_eval_surface(kg, &sd, &state, 0.0f, state.flag, SHADER_CONTEXT_MAIN);
shader_merge_closures(&sd); shader_merge_closures(&sd);
@@ -499,14 +515,14 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
#ifdef __AO__ #ifdef __AO__
/* ambient occlusion */ /* ambient occlusion */
if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) { if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
kernel_branched_path_ao(kg, &sd, &L, &state, rng, throughput); kernel_branched_path_ao(kg, &sd, &emission_sd, &L, &state, rng, throughput);
} }
#endif #endif
#ifdef __SUBSURFACE__ #ifdef __SUBSURFACE__
/* bssrdf scatter to a different location on the same object */ /* bssrdf scatter to a different location on the same object */
if(sd.flag & SD_BSSRDF) { if(sd.flag & SD_BSSRDF) {
kernel_branched_path_subsurface_scatter(kg, &sd, &L, &state, kernel_branched_path_subsurface_scatter(kg, &sd, &emission_sd, &L, &state,
rng, &ray, throughput); rng, &ray, throughput);
} }
#endif #endif
@@ -519,13 +535,13 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
if(kernel_data.integrator.use_direct_light) { if(kernel_data.integrator.use_direct_light) {
int all = kernel_data.integrator.sample_all_lights_direct; int all = kernel_data.integrator.sample_all_lights_direct;
kernel_branched_path_surface_connect_light(kg, rng, kernel_branched_path_surface_connect_light(kg, rng,
&sd, &hit_state, throughput, 1.0f, &L, all); &sd, &emission_sd, &hit_state, throughput, 1.0f, &L, all);
} }
#endif #endif
/* indirect light */ /* indirect light */
kernel_branched_path_surface_indirect_light(kg, rng, kernel_branched_path_surface_indirect_light(kg, rng,
&sd, throughput, 1.0f, &hit_state, &L); &sd, &emission_sd, throughput, 1.0f, &hit_state, &L);
/* continue in case of transparency */ /* continue in case of transparency */
throughput *= shader_bsdf_transparency(kg, &sd); throughput *= shader_bsdf_transparency(kg, &sd);

View File

@@ -16,7 +16,12 @@
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
ccl_device_inline void path_state_init(KernelGlobals *kg, ccl_addr_space PathState *state, ccl_addr_space RNG *rng, int sample, ccl_addr_space Ray *ray) ccl_device_inline void path_state_init(KernelGlobals *kg,
ShaderData *stack_sd,
ccl_addr_space PathState *state,
ccl_addr_space RNG *rng,
int sample,
ccl_addr_space Ray *ray)
{ {
state->flag = PATH_RAY_CAMERA|PATH_RAY_MIS_SKIP; state->flag = PATH_RAY_CAMERA|PATH_RAY_MIS_SKIP;
@@ -41,7 +46,7 @@ ccl_device_inline void path_state_init(KernelGlobals *kg, ccl_addr_space PathSta
if(kernel_data.integrator.use_volumes) { if(kernel_data.integrator.use_volumes) {
/* initialize volume stack with volume we are inside of */ /* initialize volume stack with volume we are inside of */
kernel_volume_stack_init(kg, ray, state->volume_stack); kernel_volume_stack_init(kg, stack_sd, ray, state->volume_stack);
/* seed RNG for cases where we can't use stratified samples */ /* seed RNG for cases where we can't use stratified samples */
state->rng_congruential = lcg_init(*rng + sample*0x51633e2d); state->rng_congruential = lcg_init(*rng + sample*0x51633e2d);
} }

View File

@@ -20,7 +20,8 @@ CCL_NAMESPACE_BEGIN
/* branched path tracing: connect path directly to position on one or more lights and add it to L */ /* branched path tracing: connect path directly to position on one or more lights and add it to L */
ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RNG *rng, ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RNG *rng,
ShaderData *sd, PathState *state, float3 throughput, float num_samples_adjust, PathRadiance *L, int sample_all_lights) ShaderData *sd, ShaderData *emission_sd, PathState *state, float3 throughput,
float num_samples_adjust, PathRadiance *L, int sample_all_lights)
{ {
#ifdef __EMISSION__ #ifdef __EMISSION__
/* sample illumination from lights to find path contribution */ /* sample illumination from lights to find path contribution */
@@ -55,11 +56,11 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobal
LightSample ls; LightSample ls;
lamp_light_sample(kg, i, light_u, light_v, ccl_fetch(sd, P), &ls); lamp_light_sample(kg, i, light_u, light_v, ccl_fetch(sd, P), &ls);
if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) { if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
/* trace shadow ray */ /* trace shadow ray */
float3 shadow; float3 shadow;
if(!shadow_blocked(kg, state, &light_ray, &shadow)) { if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
/* accumulate */ /* accumulate */
path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp); path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
} }
@@ -87,11 +88,11 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobal
LightSample ls; LightSample ls;
light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls); light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls);
if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) { if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
/* trace shadow ray */ /* trace shadow ray */
float3 shadow; float3 shadow;
if(!shadow_blocked(kg, state, &light_ray, &shadow)) { if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
/* accumulate */ /* accumulate */
path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp); path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
} }
@@ -109,11 +110,11 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobal
light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls); light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls);
/* sample random light */ /* sample random light */
if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) { if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
/* trace shadow ray */ /* trace shadow ray */
float3 shadow; float3 shadow;
if(!shadow_blocked(kg, state, &light_ray, &shadow)) { if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
/* accumulate */ /* accumulate */
path_radiance_accum_light(L, throughput*num_samples_adjust, &L_light, shadow, num_samples_adjust, state->bounce, is_lamp); path_radiance_accum_light(L, throughput*num_samples_adjust, &L_light, shadow, num_samples_adjust, state->bounce, is_lamp);
} }
@@ -184,7 +185,8 @@ ccl_device bool kernel_branched_path_surface_bounce(KernelGlobals *kg, RNG *rng,
#ifndef __SPLIT_KERNEL__ #ifndef __SPLIT_KERNEL__
/* path tracing: connect path directly to position on a light and add it to L */ /* path tracing: connect path directly to position on a light and add it to L */
ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_addr_space RNG *rng, ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_addr_space RNG *rng,
ShaderData *sd, float3 throughput, ccl_addr_space PathState *state, PathRadiance *L) ShaderData *sd, ShaderData *emission_sd, float3 throughput, ccl_addr_space PathState *state,
PathRadiance *L)
{ {
#ifdef __EMISSION__ #ifdef __EMISSION__
if(!(kernel_data.integrator.use_direct_light && (ccl_fetch(sd, flag) & SD_BSDF_HAS_EVAL))) if(!(kernel_data.integrator.use_direct_light && (ccl_fetch(sd, flag) & SD_BSDF_HAS_EVAL)))
@@ -206,11 +208,11 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_
LightSample ls; LightSample ls;
light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls); light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls);
if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) { if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
/* trace shadow ray */ /* trace shadow ray */
float3 shadow; float3 shadow;
if(!shadow_blocked(kg, state, &light_ray, &shadow)) { if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
/* accumulate */ /* accumulate */
path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state->bounce, is_lamp); path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state->bounce, is_lamp);
} }

View File

@@ -19,7 +19,7 @@ CCL_NAMESPACE_BEGIN
#ifdef __VOLUME_SCATTER__ #ifdef __VOLUME_SCATTER__
ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng, ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng,
ShaderData *sd, float3 throughput, PathState *state, PathRadiance *L) ShaderData *sd, ShaderData *emission_sd, float3 throughput, PathState *state, PathRadiance *L)
{ {
#ifdef __EMISSION__ #ifdef __EMISSION__
if(!kernel_data.integrator.use_direct_light) if(!kernel_data.integrator.use_direct_light)
@@ -44,11 +44,11 @@ ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng,
if(ls.pdf == 0.0f) if(ls.pdf == 0.0f)
return; return;
if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) { if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
/* trace shadow ray */ /* trace shadow ray */
float3 shadow; float3 shadow;
if(!shadow_blocked(kg, state, &light_ray, &shadow)) { if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
/* accumulate */ /* accumulate */
path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state->bounce, is_lamp); path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state->bounce, is_lamp);
} }
@@ -106,7 +106,7 @@ bool kernel_path_volume_bounce(KernelGlobals *kg, RNG *rng,
} }
ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG *rng, ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG *rng,
ShaderData *sd, float3 throughput, PathState *state, PathRadiance *L, ShaderData *sd, ShaderData *emission_sd, float3 throughput, PathState *state, PathRadiance *L,
bool sample_all_lights, Ray *ray, const VolumeSegment *segment) bool sample_all_lights, Ray *ray, const VolumeSegment *segment)
{ {
#ifdef __EMISSION__ #ifdef __EMISSION__
@@ -160,11 +160,11 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG
if(ls.pdf == 0.0f) if(ls.pdf == 0.0f)
continue; continue;
if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) { if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
/* trace shadow ray */ /* trace shadow ray */
float3 shadow; float3 shadow;
if(!shadow_blocked(kg, state, &light_ray, &shadow)) { if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
/* accumulate */ /* accumulate */
path_radiance_accum_light(L, tp*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp); path_radiance_accum_light(L, tp*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
} }
@@ -211,11 +211,11 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG
if(ls.pdf == 0.0f) if(ls.pdf == 0.0f)
continue; continue;
if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) { if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
/* trace shadow ray */ /* trace shadow ray */
float3 shadow; float3 shadow;
if(!shadow_blocked(kg, state, &light_ray, &shadow)) { if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
/* accumulate */ /* accumulate */
path_radiance_accum_light(L, tp*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp); path_radiance_accum_light(L, tp*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
} }
@@ -251,11 +251,11 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG
return; return;
/* sample random light */ /* sample random light */
if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) { if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
/* trace shadow ray */ /* trace shadow ray */
float3 shadow; float3 shadow;
if(!shadow_blocked(kg, state, &light_ray, &shadow)) { if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
/* accumulate */ /* accumulate */
path_radiance_accum_light(L, tp, &L_light, shadow, 1.0f, state->bounce, is_lamp); path_radiance_accum_light(L, tp, &L_light, shadow, 1.0f, state->bounce, is_lamp);
} }

View File

@@ -41,7 +41,7 @@ CCL_NAMESPACE_BEGIN
#define STACK_MAX_HITS 64 #define STACK_MAX_HITS 64
ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *ray, float3 *shadow) ccl_device_inline bool shadow_blocked(KernelGlobals *kg, ShaderData *shadow_sd, PathState *state, Ray *ray, float3 *shadow)
{ {
*shadow = make_float3(1.0f, 1.0f, 1.0f); *shadow = make_float3(1.0f, 1.0f, 1.0f);
@@ -107,21 +107,20 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
if(ps.volume_stack[0].shader != SHADER_NONE) { if(ps.volume_stack[0].shader != SHADER_NONE) {
Ray segment_ray = *ray; Ray segment_ray = *ray;
segment_ray.t = isect->t; segment_ray.t = isect->t;
kernel_volume_shadow(kg, &ps, &segment_ray, &throughput); kernel_volume_shadow(kg, shadow_sd, &ps, &segment_ray, &throughput);
} }
#endif #endif
/* setup shader data at surface */ /* setup shader data at surface */
ShaderData sd; shader_setup_from_ray(kg, shadow_sd, isect, ray);
shader_setup_from_ray(kg, &sd, isect, ray);
/* attenuation from transparent surface */ /* attenuation from transparent surface */
if(!(sd.flag & SD_HAS_ONLY_VOLUME)) { if(!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) {
path_state_modify_bounce(state, true); path_state_modify_bounce(state, true);
shader_eval_surface(kg, &sd, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW); shader_eval_surface(kg, shadow_sd, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
path_state_modify_bounce(state, false); path_state_modify_bounce(state, false);
throughput *= shader_bsdf_transparency(kg, &sd); throughput *= shader_bsdf_transparency(kg, shadow_sd);
} }
/* stop if all light is blocked */ /* stop if all light is blocked */
@@ -133,13 +132,13 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
} }
/* move ray forward */ /* move ray forward */
ray->P = sd.P; ray->P = shadow_sd->P;
if(ray->t != FLT_MAX) if(ray->t != FLT_MAX)
ray->D = normalize_len(Pend - ray->P, &ray->t); ray->D = normalize_len(Pend - ray->P, &ray->t);
#ifdef __VOLUME__ #ifdef __VOLUME__
/* exit/enter volume */ /* exit/enter volume */
kernel_volume_stack_enter_exit(kg, &sd, ps.volume_stack); kernel_volume_stack_enter_exit(kg, shadow_sd, ps.volume_stack);
#endif #endif
bounce++; bounce++;
@@ -148,7 +147,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
#ifdef __VOLUME__ #ifdef __VOLUME__
/* attenuation for last line segment towards light */ /* attenuation for last line segment towards light */
if(ps.volume_stack[0].shader != SHADER_NONE) if(ps.volume_stack[0].shader != SHADER_NONE)
kernel_volume_shadow(kg, &ps, ray, &throughput); kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput);
#endif #endif
*shadow = throughput; *shadow = throughput;
@@ -164,7 +163,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
#ifdef __VOLUME__ #ifdef __VOLUME__
if(!blocked && state->volume_stack[0].shader != SHADER_NONE) { if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
/* apply attenuation from current volume shader */ /* apply attenuation from current volume shader */
kernel_volume_shadow(kg, state, ray, shadow); kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
} }
#endif #endif
@@ -184,6 +183,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
* one extra ray cast for the cases were we do want transparency. */ * one extra ray cast for the cases were we do want transparency. */
ccl_device_noinline bool shadow_blocked(KernelGlobals *kg, ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
ShaderData *shadow_sd,
ccl_addr_space PathState *state, ccl_addr_space PathState *state,
ccl_addr_space Ray *ray_input, ccl_addr_space Ray *ray_input,
float3 *shadow) float3 *shadow)
@@ -228,7 +228,7 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
#ifdef __VOLUME__ #ifdef __VOLUME__
/* attenuation for last line segment towards light */ /* attenuation for last line segment towards light */
if(ps.volume_stack[0].shader != SHADER_NONE) if(ps.volume_stack[0].shader != SHADER_NONE)
kernel_volume_shadow(kg, &ps, ray, &throughput); kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput);
#endif #endif
*shadow *= throughput; *shadow *= throughput;
@@ -244,39 +244,33 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
if(ps.volume_stack[0].shader != SHADER_NONE) { if(ps.volume_stack[0].shader != SHADER_NONE) {
Ray segment_ray = *ray; Ray segment_ray = *ray;
segment_ray.t = isect->t; segment_ray.t = isect->t;
kernel_volume_shadow(kg, &ps, &segment_ray, &throughput); kernel_volume_shadow(kg, shadow_sd, &ps, &segment_ray, &throughput);
} }
#endif #endif
/* setup shader data at surface */ /* setup shader data at surface */
#ifdef __SPLIT_KERNEL__ shader_setup_from_ray(kg, shadow_sd, isect, ray);
ShaderData *sd = kg->sd_input;
#else
ShaderData sd_object;
ShaderData *sd = &sd_object;
#endif
shader_setup_from_ray(kg, sd, isect, ray);
/* attenuation from transparent surface */ /* attenuation from transparent surface */
if(!(ccl_fetch(sd, flag) & SD_HAS_ONLY_VOLUME)) { if(!(ccl_fetch(shadow_sd, flag) & SD_HAS_ONLY_VOLUME)) {
path_state_modify_bounce(state, true); path_state_modify_bounce(state, true);
shader_eval_surface(kg, sd, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW); shader_eval_surface(kg, shadow_sd, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
path_state_modify_bounce(state, false); path_state_modify_bounce(state, false);
throughput *= shader_bsdf_transparency(kg, sd); throughput *= shader_bsdf_transparency(kg, shadow_sd);
} }
if(is_zero(throughput)) if(is_zero(throughput))
return true; return true;
/* move ray forward */ /* move ray forward */
ray->P = ray_offset(ccl_fetch(sd, P), -ccl_fetch(sd, Ng)); ray->P = ray_offset(ccl_fetch(shadow_sd, P), -ccl_fetch(shadow_sd, Ng));
if(ray->t != FLT_MAX) if(ray->t != FLT_MAX)
ray->D = normalize_len(Pend - ray->P, &ray->t); ray->D = normalize_len(Pend - ray->P, &ray->t);
#ifdef __VOLUME__ #ifdef __VOLUME__
/* exit/enter volume */ /* exit/enter volume */
kernel_volume_stack_enter_exit(kg, sd, ps.volume_stack); kernel_volume_stack_enter_exit(kg, shadow_sd, ps.volume_stack);
#endif #endif
bounce++; bounce++;
@@ -286,7 +280,7 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
#ifdef __VOLUME__ #ifdef __VOLUME__
else if(!blocked && state->volume_stack[0].shader != SHADER_NONE) { else if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
/* apply attenuation from current volume shader */ /* apply attenuation from current volume shader */
kernel_volume_shadow(kg, state, ray, shadow); kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
} }
#endif #endif
#endif #endif

View File

@@ -219,15 +219,14 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState
/* get the volume attenuation over line segment defined by ray, with the /* get the volume attenuation over line segment defined by ray, with the
* assumption that there are no surfaces blocking light between the endpoints */ * assumption that there are no surfaces blocking light between the endpoints */
ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg, PathState *state, Ray *ray, float3 *throughput) ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg, ShaderData *shadow_sd, PathState *state, Ray *ray, float3 *throughput)
{ {
ShaderData sd; shader_setup_from_volume(kg, shadow_sd, ray);
shader_setup_from_volume(kg, &sd, ray);
if(volume_stack_is_heterogeneous(kg, state->volume_stack)) if(volume_stack_is_heterogeneous(kg, state->volume_stack))
kernel_volume_shadow_heterogeneous(kg, state, ray, &sd, throughput); kernel_volume_shadow_heterogeneous(kg, state, ray, shadow_sd, throughput);
else else
kernel_volume_shadow_homogeneous(kg, state, ray, &sd, throughput); kernel_volume_shadow_homogeneous(kg, state, ray, shadow_sd, throughput);
} }
/* Equi-angular sampling as in: /* Equi-angular sampling as in:
@@ -1000,6 +999,7 @@ ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneou
* is inside of. */ * is inside of. */
ccl_device void kernel_volume_stack_init(KernelGlobals *kg, ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
ShaderData *stack_sd,
Ray *ray, Ray *ray,
VolumeStack *stack) VolumeStack *stack)
{ {
@@ -1040,28 +1040,27 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
qsort(hits, num_hits, sizeof(Intersection), intersections_compare); qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
for(uint hit = 0; hit < num_hits; ++hit, ++isect) { for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
ShaderData sd; shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
shader_setup_from_ray(kg, &sd, isect, &volume_ray); if(stack_sd->flag & SD_BACKFACING) {
if(sd.flag & SD_BACKFACING) {
bool need_add = true; bool need_add = true;
for(int i = 0; i < enclosed_index && need_add; ++i) { for(int i = 0; i < enclosed_index && need_add; ++i) {
/* If ray exited the volume and never entered to that volume /* If ray exited the volume and never entered to that volume
* it means that camera is inside such a volume. * it means that camera is inside such a volume.
*/ */
if(enclosed_volumes[i] == sd.object) { if(enclosed_volumes[i] == stack_sd->object) {
need_add = false; need_add = false;
} }
} }
for(int i = 0; i < stack_index && need_add; ++i) { for(int i = 0; i < stack_index && need_add; ++i) {
/* Don't add intersections twice. */ /* Don't add intersections twice. */
if(stack[i].object == sd.object) { if(stack[i].object == stack_sd->object) {
need_add = false; need_add = false;
break; break;
} }
} }
if(need_add) { if(need_add) {
stack[stack_index].object = sd.object; stack[stack_index].object = stack_sd->object;
stack[stack_index].shader = sd.shader; stack[stack_index].shader = stack_sd->shader;
++stack_index; ++stack_index;
} }
} }
@@ -1069,7 +1068,7 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
/* If ray from camera enters the volume, this volume shouldn't /* If ray from camera enters the volume, this volume shouldn't
* be added to the stack on exit. * be added to the stack on exit.
*/ */
enclosed_volumes[enclosed_index++] = sd.object; enclosed_volumes[enclosed_index++] = stack_sd->object;
} }
} }
} }
@@ -1086,9 +1085,8 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
break; break;
} }
ShaderData sd; shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
shader_setup_from_ray(kg, &sd, &isect, &volume_ray); if(stack_sd->flag & SD_BACKFACING) {
if(sd.flag & SD_BACKFACING) {
/* If ray exited the volume and never entered to that volume /* If ray exited the volume and never entered to that volume
* it means that camera is inside such a volume. * it means that camera is inside such a volume.
*/ */
@@ -1097,20 +1095,20 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
/* If ray exited the volume and never entered to that volume /* If ray exited the volume and never entered to that volume
* it means that camera is inside such a volume. * it means that camera is inside such a volume.
*/ */
if(enclosed_volumes[i] == sd.object) { if(enclosed_volumes[i] == stack_sd->object) {
need_add = false; need_add = false;
} }
} }
for(int i = 0; i < stack_index && need_add; ++i) { for(int i = 0; i < stack_index && need_add; ++i) {
/* Don't add intersections twice. */ /* Don't add intersections twice. */
if(stack[i].object == sd.object) { if(stack[i].object == stack_sd->object) {
need_add = false; need_add = false;
break; break;
} }
} }
if(need_add) { if(need_add) {
stack[stack_index].object = sd.object; stack[stack_index].object = stack_sd->object;
stack[stack_index].shader = sd.shader; stack[stack_index].shader = stack_sd->shader;
++stack_index; ++stack_index;
} }
} }
@@ -1118,11 +1116,11 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
/* If ray from camera enters the volume, this volume shouldn't /* If ray from camera enters the volume, this volume shouldn't
* be added to the stack on exit. * be added to the stack on exit.
*/ */
enclosed_volumes[enclosed_index++] = sd.object; enclosed_volumes[enclosed_index++] = stack_sd->object;
} }
/* Move ray forward. */ /* Move ray forward. */
volume_ray.P = ray_offset(sd.P, -sd.Ng); volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
++step; ++step;
} }
#endif #endif
@@ -1190,6 +1188,7 @@ ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd
#ifdef __SUBSURFACE__ #ifdef __SUBSURFACE__
ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg, ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
ShaderData *stack_sd,
Ray *ray, Ray *ray,
VolumeStack *stack) VolumeStack *stack)
{ {
@@ -1210,9 +1209,8 @@ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
qsort(hits, num_hits, sizeof(Intersection), intersections_compare); qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
for(uint hit = 0; hit < num_hits; ++hit, ++isect) { for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
ShaderData sd; shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
shader_setup_from_ray(kg, &sd, isect, &volume_ray); kernel_volume_stack_enter_exit(kg, stack_sd, stack);
kernel_volume_stack_enter_exit(kg, &sd, stack);
} }
} }
# else # else
@@ -1224,13 +1222,12 @@ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
&isect, &isect,
PATH_RAY_ALL_VISIBILITY)) PATH_RAY_ALL_VISIBILITY))
{ {
ShaderData sd; shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
shader_setup_from_ray(kg, &sd, &isect, &volume_ray); kernel_volume_stack_enter_exit(kg, stack_sd, stack);
kernel_volume_stack_enter_exit(kg, &sd, stack);
/* Move ray forward. */ /* Move ray forward. */
volume_ray.P = ray_offset(sd.P, -sd.Ng); volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
volume_ray.t -= sd.ray_length; volume_ray.t -= stack_sd->ray_length;
++step; ++step;
} }
# endif # endif

View File

@@ -157,7 +157,7 @@ ccl_device char kernel_background_buffer_update(
if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
#ifdef __BACKGROUND__ #ifdef __BACKGROUND__
/* sample background shader */ /* sample background shader */
float3 L_background = indirect_background(kg, state, ray); float3 L_background = indirect_background(kg, kg->sd_input, state, ray);
path_radiance_accum_background(L, (*throughput), L_background, state->bounce); path_radiance_accum_background(L, (*throughput), L_background, state->bounce);
#endif #endif
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
@@ -226,7 +226,7 @@ ccl_device char kernel_background_buffer_update(
*throughput = make_float3(1.0f, 1.0f, 1.0f); *throughput = make_float3(1.0f, 1.0f, 1.0f);
*L_transparent = 0.0f; *L_transparent = 0.0f;
path_radiance_init(L, kernel_data.film.use_light_pass); path_radiance_init(L, kernel_data.film.use_light_pass);
path_state_init(kg, state, rng, sample, ray); path_state_init(kg, kg->sd_input, state, rng, sample, ray);
#ifdef __KERNEL_DEBUG__ #ifdef __KERNEL_DEBUG__
debug_data_init(debug_data); debug_data_init(debug_data);
#endif #endif

View File

@@ -207,6 +207,7 @@ ccl_device void kernel_data_init(
L_transparent_coop[ray_index] = 0.0f; L_transparent_coop[ray_index] = 0.0f;
path_radiance_init(&PathRadiance_coop[ray_index], kernel_data.film.use_light_pass); path_radiance_init(&PathRadiance_coop[ray_index], kernel_data.film.use_light_pass);
path_state_init(kg, path_state_init(kg,
kg->sd_input,
&PathState_coop[ray_index], &PathState_coop[ray_index],
&rng_coop[ray_index], &rng_coop[ray_index],
my_sample, my_sample,

View File

@@ -88,7 +88,7 @@ ccl_device char kernel_direct_lighting(
BsdfEval L_light; BsdfEval L_light;
bool is_lamp; bool is_lamp;
if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) { if(direct_emission(kg, sd, kg->sd_input, &ls, state, &light_ray, &L_light, &is_lamp)) {
/* Write intermediate data to global memory to access from /* Write intermediate data to global memory to access from
* the next kernel. * the next kernel.
*/ */

View File

@@ -74,7 +74,7 @@ ccl_device void kernel_lamp_emission(
/* intersect with lamp */ /* intersect with lamp */
float3 emission; float3 emission;
if(indirect_lamp_emission(kg, state, &light_ray, &emission)) { if(indirect_lamp_emission(kg, kg->sd_input, state, &light_ray, &emission)) {
path_radiance_accum_emission(L, throughput, emission, state->bounce); path_radiance_accum_emission(L, throughput, emission, state->bounce);
} }
} }

View File

@@ -71,6 +71,7 @@ ccl_device void kernel_shadow_blocked(
float3 shadow; float3 shadow;
update_path_radiance = !(shadow_blocked(kg, update_path_radiance = !(shadow_blocked(kg,
kg->sd_input,
state, state,
light_ray_global, light_ray_global,
&shadow)); &shadow));