Code cleanup: move rng into path state.

Also pass by value and don't write back now that it is just a hash for seeding
and no longer an LCG state. Together this makes CUDA a tiny bit faster in my
tests, but mainly simplifies code.
This commit is contained in:
Brecht Van Lommel
2017-08-19 04:11:25 +02:00
parent 1cc4033df8
commit cfa8b762e2
28 changed files with 192 additions and 271 deletions

View File

@@ -55,14 +55,13 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
ShaderData *emission_sd,
PathRadiance *L,
ccl_addr_space PathState *state,
RNG *rng,
float3 throughput,
float3 ao_alpha)
{
/* todo: solve correlation */
float bsdf_u, bsdf_v;
path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
path_state_rng_2D(kg, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
float ao_factor = kernel_data.background.ao_factor;
float3 ao_N;
@@ -101,7 +100,6 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
ccl_device void kernel_path_indirect(KernelGlobals *kg,
ShaderData *sd,
ShaderData *emission_sd,
RNG *rng,
Ray *ray,
float3 throughput,
int num_samples,
@@ -200,7 +198,6 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
/* direct light sampling */
kernel_branched_path_volume_connect_light(kg,
rng,
sd,
emission_sd,
throughput,
@@ -213,8 +210,8 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
/* indirect sample. if we use distance sampling and take just
* one sample for direct and indirect light, we could share
* this computation, but makes code a bit complex */
float rphase = path_state_rng_1D_for_decision(kg, rng, state, PRNG_PHASE);
float rscatter = path_state_rng_1D_for_decision(kg, rng, state, PRNG_SCATTER_DISTANCE);
float rphase = path_state_rng_1D_for_decision(kg, state, PRNG_PHASE);
float rscatter = path_state_rng_1D_for_decision(kg, state, PRNG_SCATTER_DISTANCE);
result = kernel_volume_decoupled_scatter(kg,
state,
@@ -233,7 +230,6 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
if(result == VOLUME_PATH_SCATTERED) {
if(kernel_path_volume_bounce(kg,
rng,
sd,
&throughput,
state,
@@ -255,13 +251,12 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
{
/* integrate along volume segment with distance sampling */
VolumeIntegrateResult result = kernel_volume_integrate(
kg, state, sd, &volume_ray, L, &throughput, rng, heterogeneous);
kg, state, sd, &volume_ray, L, &throughput, heterogeneous);
# ifdef __VOLUME_SCATTER__
if(result == VOLUME_PATH_SCATTERED) {
/* direct lighting */
kernel_path_volume_connect_light(kg,
rng,
sd,
emission_sd,
throughput,
@@ -270,7 +265,6 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
/* indirect light bounce */
if(kernel_path_volume_bounce(kg,
rng,
sd,
&throughput,
state,
@@ -309,8 +303,8 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
sd,
&isect,
ray);
float rbsdf = path_state_rng_1D_for_decision(kg, rng, state, PRNG_BSDF);
shader_eval_surface(kg, sd, rng, state, rbsdf, state->flag, SHADER_CONTEXT_INDIRECT);
float rbsdf = path_state_rng_1D_for_decision(kg, state, PRNG_BSDF);
shader_eval_surface(kg, sd, state, rbsdf, state->flag, SHADER_CONTEXT_INDIRECT);
#ifdef __BRANCHED_PATH__
shader_merge_closures(sd);
#endif /* __BRANCHED_PATH__ */
@@ -360,7 +354,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
break;
}
else if(probability != 1.0f) {
float terminate = path_state_rng_1D_for_decision(kg, rng, state, PRNG_TERMINATE);
float terminate = path_state_rng_1D_for_decision(kg, state, PRNG_TERMINATE);
if(terminate >= probability)
break;
@@ -373,7 +367,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
#ifdef __AO__
/* ambient occlusion */
if(kernel_data.integrator.use_ambient_occlusion || (sd->flag & SD_AO)) {
kernel_path_ao(kg, sd, emission_sd, L, state, rng, throughput, make_float3(0.0f, 0.0f, 0.0f));
kernel_path_ao(kg, sd, emission_sd, L, state, throughput, make_float3(0.0f, 0.0f, 0.0f));
}
#endif /* __AO__ */
@@ -389,11 +383,10 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
/* do bssrdf scatter step if we picked a bssrdf closure */
if(sc) {
uint lcg_state = lcg_state_init(rng, state->rng_offset, state->sample, 0x68bc21eb);
uint lcg_state = lcg_state_init(state, 0x68bc21eb);
float bssrdf_u, bssrdf_v;
path_state_rng_2D(kg,
rng,
state,
PRNG_BSDF_U,
&bssrdf_u, &bssrdf_v);
@@ -414,7 +407,6 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
int all = (kernel_data.integrator.sample_all_lights_indirect) ||
(state->flag & PATH_RAY_SHADOW_CATCHER);
kernel_branched_path_surface_connect_light(kg,
rng,
sd,
emission_sd,
state,
@@ -425,7 +417,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
}
#endif /* defined(__EMISSION__) */
if(!kernel_path_surface_bounce(kg, rng, sd, &throughput, state, L, ray))
if(!kernel_path_surface_bounce(kg, sd, &throughput, state, L, ray))
break;
}
}
@@ -433,7 +425,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
#endif /* defined(__BRANCHED_PATH__) || defined(__BAKING__) */
ccl_device_inline void kernel_path_integrate(KernelGlobals *kg,
RNG *rng,
uint rng_hash,
int sample,
Ray ray,
ccl_global float *buffer,
@@ -451,7 +443,7 @@ ccl_device_inline void kernel_path_integrate(KernelGlobals *kg,
ShaderData emission_sd;
PathState state;
path_state_init(kg, &emission_sd, &state, rng, sample, &ray);
path_state_init(kg, &emission_sd, &state, rng_hash, sample, &ray);
#ifdef __SUBSURFACE__
SubsurfaceIndirectRays ss_indirect;
@@ -478,7 +470,7 @@ ccl_device_inline void kernel_path_integrate(KernelGlobals *kg,
}
extmax = kernel_data.curve.maximum_width;
lcg_state = lcg_state_init(rng, state.rng_offset, state.sample, 0x51633e2d);
lcg_state = lcg_state_init(&state, 0x51633e2d);
}
if(state.bounce > kernel_data.integrator.ao_bounces) {
@@ -558,15 +550,15 @@ ccl_device_inline void kernel_path_integrate(KernelGlobals *kg,
int all = false;
/* direct light sampling */
kernel_branched_path_volume_connect_light(kg, rng, &sd,
kernel_branched_path_volume_connect_light(kg, &sd,
&emission_sd, throughput, &state, L, all,
&volume_ray, &volume_segment);
/* indirect sample. if we use distance sampling and take just
* one sample for direct and indirect light, we could share
* this computation, but makes code a bit complex */
float rphase = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_PHASE);
float rscatter = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_SCATTER_DISTANCE);
float rphase = path_state_rng_1D_for_decision(kg, &state, PRNG_PHASE);
float rscatter = path_state_rng_1D_for_decision(kg, &state, PRNG_SCATTER_DISTANCE);
result = kernel_volume_decoupled_scatter(kg,
&state, &volume_ray, &sd, &throughput,
@@ -577,7 +569,7 @@ ccl_device_inline void kernel_path_integrate(KernelGlobals *kg,
kernel_volume_decoupled_free(kg, &volume_segment);
if(result == VOLUME_PATH_SCATTERED) {
if(kernel_path_volume_bounce(kg, rng, &sd, &throughput, &state, L, &ray))
if(kernel_path_volume_bounce(kg, &sd, &throughput, &state, L, &ray))
continue;
else
break;
@@ -591,15 +583,15 @@ ccl_device_inline void kernel_path_integrate(KernelGlobals *kg,
{
/* integrate along volume segment with distance sampling */
VolumeIntegrateResult result = kernel_volume_integrate(
kg, &state, &sd, &volume_ray, L, &throughput, rng, heterogeneous);
kg, &state, &sd, &volume_ray, L, &throughput, heterogeneous);
# ifdef __VOLUME_SCATTER__
if(result == VOLUME_PATH_SCATTERED) {
/* direct lighting */
kernel_path_volume_connect_light(kg, rng, &sd, &emission_sd, throughput, &state, L);
kernel_path_volume_connect_light(kg, &sd, &emission_sd, throughput, &state, L);
/* indirect light bounce */
if(kernel_path_volume_bounce(kg, rng, &sd, &throughput, &state, L, &ray))
if(kernel_path_volume_bounce(kg, &sd, &throughput, &state, L, &ray))
continue;
else
break;
@@ -634,8 +626,8 @@ ccl_device_inline void kernel_path_integrate(KernelGlobals *kg,
/* setup shading */
shader_setup_from_ray(kg, &sd, &isect, &ray);
float rbsdf = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_BSDF);
shader_eval_surface(kg, &sd, rng, &state, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
float rbsdf = path_state_rng_1D_for_decision(kg, &state, PRNG_BSDF);
shader_eval_surface(kg, &sd, &state, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
#ifdef __SHADOW_TRICKS__
if((sd.object_flag & SD_OBJECT_SHADOW_CATCHER)) {
@@ -713,7 +705,7 @@ ccl_device_inline void kernel_path_integrate(KernelGlobals *kg,
break;
}
else if(probability != 1.0f) {
float terminate = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_TERMINATE);
float terminate = path_state_rng_1D_for_decision(kg, &state, PRNG_TERMINATE);
if(terminate >= probability)
break;
@@ -725,7 +717,7 @@ ccl_device_inline void kernel_path_integrate(KernelGlobals *kg,
#ifdef __AO__
/* ambient occlusion */
if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
kernel_path_ao(kg, &sd, &emission_sd, L, &state, rng, throughput, shader_bsdf_alpha(kg, &sd));
kernel_path_ao(kg, &sd, &emission_sd, L, &state, throughput, shader_bsdf_alpha(kg, &sd));
}
#endif /* __AO__ */
@@ -738,7 +730,6 @@ ccl_device_inline void kernel_path_integrate(KernelGlobals *kg,
&emission_sd,
L,
&state,
rng,
&ray,
&throughput,
&ss_indirect))
@@ -749,10 +740,10 @@ ccl_device_inline void kernel_path_integrate(KernelGlobals *kg,
#endif /* __SUBSURFACE__ */
/* direct lighting */
kernel_path_surface_connect_light(kg, rng, &sd, &emission_sd, throughput, &state, L);
kernel_path_surface_connect_light(kg, &sd, &emission_sd, throughput, &state, L);
/* compute direct lighting and next bounce */
if(!kernel_path_surface_bounce(kg, rng, &sd, &throughput, &state, L, &ray))
if(!kernel_path_surface_bounce(kg, &sd, &throughput, &state, L, &ray))
break;
}
@@ -793,17 +784,17 @@ ccl_device void kernel_path_trace(KernelGlobals *kg,
buffer += index*pass_stride;
/* initialize random numbers and ray */
RNG rng;
uint rng_hash;
Ray ray;
kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng, &ray);
kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng_hash, &ray);
/* integrate */
PathRadiance L;
bool is_shadow_catcher;
if(ray.t != 0.0f) {
kernel_path_integrate(kg, &rng, sample, ray, buffer, &L, &is_shadow_catcher);
kernel_path_integrate(kg, rng_hash, sample, ray, buffer, &L, &is_shadow_catcher);
kernel_write_result(kg, buffer, sample, &L, is_shadow_catcher);
}
else {