Cycles: experimental correlated multi-jittered sampling pattern that can be used

instead of sobol. So far one doesn't seem to be consistently better or worse than
the other for the same number of samples but more testing is needed.

The random number generator itself is slower than sobol for most number of samples,
except 16, 64, 256, .. because they can be computed faster. This can probably be
optimized, but we can do that when/if this actually turns out to be useful.

Paper this implementation is based on:
http://graphics.pixar.com/library/MultiJitteredSampling/

Also includes some refactoring of RNG code, fixing a Sobol correlation issue with
the first BSDF and < 16 samples, skipping some unneeded RNG calls and using a
simpler unit square to unit disk function.
This commit is contained in:
Brecht Van Lommel
2013-06-07 16:06:22 +00:00
parent d835d2f4e6
commit b20a7e01d0
13 changed files with 417 additions and 110 deletions

View File

@@ -233,7 +233,7 @@ __device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *ra
return result;
}
__device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample, Ray ray, __global float *buffer)
__device float4 kernel_path_progressive(KernelGlobals *kg, RNG rng, int sample, Ray ray, __global float *buffer)
{
/* initialize */
PathRadiance L;
@@ -249,6 +249,7 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
#endif
PathState state;
int rng_offset = PRNG_BASE_NUM;
int num_samples = kernel_data.integrator.aa_samples;
path_state_init(&state);
@@ -270,7 +271,7 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
}
extmax = kernel_data.curve_kernel_data.maximum_width;
lcg_state = lcg_init(*rng + rng_offset + sample*0x51633e2d);
lcg_state = lcg_init(rng + rng_offset + sample*0x51633e2d);
}
bool hit = scene_intersect(kg, &ray, visibility, &isect, &lcg_state, difl, extmax);
@@ -292,7 +293,7 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
light_ray.dP = ray.dP;
/* intersect with lamp */
float light_t = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT);
float light_t = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT);
float3 emission;
if(indirect_lamp_emission(kg, &light_ray, state.flag, ray_pdf, light_t, &emission))
@@ -323,7 +324,7 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
/* setup shading */
ShaderData sd;
shader_setup_from_ray(kg, &sd, &isect, &ray);
float rbsdf = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF);
float rbsdf = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF);
shader_eval_surface(kg, &sd, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
/* holdout */
@@ -373,12 +374,18 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
* mainly due to the mixed in MIS that we use. gives too many unneeded
* shader evaluations, only need emission if we are going to terminate */
float probability = path_state_terminate_probability(kg, &state, throughput);
float terminate = path_rng(kg, rng, sample, rng_offset + PRNG_TERMINATE);
if(terminate >= probability)
if(probability == 0.0f) {
break;
}
else if(probability != 1.0f) {
float terminate = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_TERMINATE);
throughput /= probability;
if(terminate >= probability)
break;
throughput /= probability;
}
#ifdef __SUBSURFACE__
/* bssrdf scatter to a different location on the same object, replacing
@@ -392,7 +399,7 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
/* do bssrdf scatter step if we picked a bssrdf closure */
if(sc) {
uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb);
uint lcg_state = lcg_init(rng + rng_offset + sample*0x68bc21eb);
subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, false);
}
}
@@ -402,8 +409,9 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
/* ambient occlusion */
if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
/* todo: solve correlation */
float bsdf_u = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_U);
float bsdf_v = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_V);
float2 bsdf_uv = path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF_U);
float bsdf_u = bsdf_uv.x;
float bsdf_v = bsdf_uv.y;
float ao_factor = kernel_data.background.ao_factor;
float3 ao_N;
@@ -436,10 +444,15 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
if(kernel_data.integrator.use_direct_light) {
/* sample illumination from lights to find path contribution */
if(sd.flag & SD_BSDF_HAS_EVAL) {
float light_t = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT);
float light_o = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT_F);
float light_u = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT_U);
float light_v = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT_V);
float light_t = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT);
#ifdef __MULTI_CLOSURE__
float light_o = 0.0f;
#else
float light_o = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT_F);
#endif
float2 light_uv = path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT_U);
float light_u = light_uv.x;
float light_v = light_uv.y;
Ray light_ray;
BsdfEval L_light;
@@ -471,8 +484,9 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
BsdfEval bsdf_eval;
float3 bsdf_omega_in;
differential3 bsdf_domega_in;
float bsdf_u = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_U);
float bsdf_v = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_V);
float2 bsdf_uv = path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF_U);
float bsdf_u = bsdf_uv.x;
float bsdf_v = bsdf_uv.y;
int label;
label = shader_bsdf_sample(kg, &sd, bsdf_u, bsdf_v, &bsdf_eval,
@@ -524,8 +538,8 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
#ifdef __NON_PROGRESSIVE__
__device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray ray, __global float *buffer,
float3 throughput, float num_samples_adjust,
__device void kernel_path_indirect(KernelGlobals *kg, RNG rng, int sample, Ray ray, __global float *buffer,
float3 throughput, int num_samples, int num_total_samples,
float min_ray_pdf, float ray_pdf, PathState state, int rng_offset, PathRadiance *L)
{
#ifdef __LAMP_MIS__
@@ -557,7 +571,7 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
light_ray.dP = ray.dP;
/* intersect with lamp */
float light_t = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT);
float light_t = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_LIGHT);
float3 emission;
if(indirect_lamp_emission(kg, &light_ray, state.flag, ray_pdf, light_t, &emission))
@@ -578,7 +592,7 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
/* setup shading */
ShaderData sd;
shader_setup_from_ray(kg, &sd, &isect, &ray);
float rbsdf = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF);
float rbsdf = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF);
shader_eval_surface(kg, &sd, rbsdf, state.flag, SHADER_CONTEXT_INDIRECT);
shader_merge_closures(kg, &sd);
@@ -604,13 +618,19 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
/* path termination. this is a strange place to put the termination, it's
* mainly due to the mixed in MIS that we use. gives too many unneeded
* shader evaluations, only need emission if we are going to terminate */
float probability = path_state_terminate_probability(kg, &state, throughput*num_samples_adjust);
float terminate = path_rng(kg, rng, sample, rng_offset + PRNG_TERMINATE);
float probability = path_state_terminate_probability(kg, &state, throughput*num_samples);
if(terminate >= probability)
if(probability == 0.0f) {
break;
}
else if(probability != 1.0f) {
float terminate = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_TERMINATE);
throughput /= probability;
if(terminate >= probability)
break;
throughput /= probability;
}
#ifdef __SUBSURFACE__
/* bssrdf scatter to a different location on the same object, replacing
@@ -624,7 +644,7 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
/* do bssrdf scatter step if we picked a bssrdf closure */
if(sc) {
uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb);
uint lcg_state = lcg_init(rng + rng_offset + sample*0x68bc21eb);
subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, false);
}
}
@@ -634,8 +654,9 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
/* ambient occlusion */
if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
/* todo: solve correlation */
float bsdf_u = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_U);
float bsdf_v = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_V);
float2 bsdf_uv = path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF_U);
float bsdf_u = bsdf_uv.x;
float bsdf_v = bsdf_uv.y;
float ao_factor = kernel_data.background.ao_factor;
float3 ao_N;
@@ -668,10 +689,15 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
if(kernel_data.integrator.use_direct_light) {
/* sample illumination from lights to find path contribution */
if(sd.flag & SD_BSDF_HAS_EVAL) {
float light_t = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT);
float light_o = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT_F);
float light_u = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT_U);
float light_v = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT_V);
float light_t = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_LIGHT);
#ifdef __MULTI_CLOSURE__
float light_o = 0.0f;
#else
float light_o = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_LIGHT_F);
#endif
float2 light_uv = path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_LIGHT_U);
float light_u = light_uv.x;
float light_v = light_uv.y;
Ray light_ray;
BsdfEval L_light;
@@ -704,8 +730,9 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
BsdfEval bsdf_eval;
float3 bsdf_omega_in;
differential3 bsdf_domega_in;
float bsdf_u = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_U);
float bsdf_v = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_V);
float2 bsdf_uv = path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF_U);
float bsdf_u = bsdf_uv.x;
float bsdf_v = bsdf_uv.y;
int label;
label = shader_bsdf_sample(kg, &sd, bsdf_u, bsdf_v, &bsdf_eval,
@@ -740,15 +767,17 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
}
}
__device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, RNG *rng, int sample,
__device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, RNG rng, int sample,
ShaderData *sd, float3 throughput, float num_samples_adjust,
float min_ray_pdf, float ray_pdf, PathState state,
int rng_offset, PathRadiance *L, __global float *buffer)
{
int aa_samples = kernel_data.integrator.aa_samples;
#ifdef __AO__
/* ambient occlusion */
if(kernel_data.integrator.use_ambient_occlusion || (sd->flag & SD_AO)) {
int num_samples = ceil(kernel_data.integrator.ao_samples*num_samples_adjust);
int num_samples = ceil_to_int(kernel_data.integrator.ao_samples*num_samples_adjust);
float num_samples_inv = num_samples_adjust/num_samples;
float ao_factor = kernel_data.background.ao_factor;
float3 ao_N;
@@ -756,8 +785,9 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
for(int j = 0; j < num_samples; j++) {
/* todo: solve correlation */
float bsdf_u = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_BSDF_U);
float bsdf_v = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_BSDF_V);
float2 bsdf_uv = path_rng_2D(kg, rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_BSDF_U);
float bsdf_u = bsdf_uv.x;
float bsdf_v = bsdf_uv.y;
float3 ao_D;
float ao_pdf;
@@ -798,15 +828,17 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
/* lamp sampling */
for(int i = 0; i < kernel_data.integrator.num_all_lights; i++) {
int num_samples = ceil(num_samples_adjust*light_select_num_samples(kg, i));
int num_samples = ceil_to_int(num_samples_adjust*light_select_num_samples(kg, i));
float num_samples_inv = num_samples_adjust/(num_samples*kernel_data.integrator.num_all_lights);
RNG lamp_rng = cmj_hash(rng, i);
if(kernel_data.integrator.pdf_triangles != 0.0f)
num_samples_inv *= 0.5f;
for(int j = 0; j < num_samples; j++) {
float light_u = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT_U);
float light_v = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT_V);
float2 light_uv = path_rng_2D(kg, lamp_rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_LIGHT_U);
float light_u = light_uv.x;
float light_v = light_uv.y;
if(direct_emission(kg, sd, i, 0.0f, 0.0f, light_u, light_v, &light_ray, &L_light, &is_lamp)) {
/* trace shadow ray */
@@ -822,16 +854,17 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
/* mesh light sampling */
if(kernel_data.integrator.pdf_triangles != 0.0f) {
int num_samples = ceil(num_samples_adjust*kernel_data.integrator.mesh_light_samples);
int num_samples = ceil_to_int(num_samples_adjust*kernel_data.integrator.mesh_light_samples);
float num_samples_inv = num_samples_adjust/num_samples;
if(kernel_data.integrator.num_all_lights)
num_samples_inv *= 0.5f;
for(int j = 0; j < num_samples; j++) {
float light_t = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT);
float light_u = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT_U);
float light_v = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT_V);
float light_t = path_rng_1D(kg, rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_LIGHT);
float2 light_uv = path_rng_2D(kg, rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_LIGHT_U);
float light_u = light_uv.x;
float light_v = light_uv.y;
/* only sample triangle lights */
if(kernel_data.integrator.num_all_lights)
@@ -869,9 +902,10 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
else
num_samples = kernel_data.integrator.transmission_samples;
num_samples = ceil(num_samples_adjust*num_samples);
num_samples = ceil_to_int(num_samples_adjust*num_samples);
float num_samples_inv = num_samples_adjust/num_samples;
RNG bsdf_rng = cmj_hash(rng, i);
for(int j = 0; j < num_samples; j++) {
/* sample BSDF */
@@ -879,8 +913,9 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
BsdfEval bsdf_eval;
float3 bsdf_omega_in;
differential3 bsdf_domega_in;
float bsdf_u = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_BSDF_U);
float bsdf_v = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_BSDF_V);
float2 bsdf_uv = path_rng_2D(kg, bsdf_rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_BSDF_U);
float bsdf_u = bsdf_uv.x;
float bsdf_v = bsdf_uv.y;
int label;
label = shader_bsdf_sample_closure(kg, sd, sc, bsdf_u, bsdf_v, &bsdf_eval,
@@ -918,7 +953,7 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
#endif
kernel_path_indirect(kg, rng, sample*num_samples + j, bsdf_ray, buffer,
tp*num_samples_inv, num_samples,
tp*num_samples_inv, num_samples, aa_samples*num_samples,
min_ray_pdf, bsdf_pdf, ps, rng_offset+PRNG_BOUNCE_NUM, L);
/* for render passes, sum and reset indirect light pass variables
@@ -929,7 +964,7 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
}
}
__device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sample, Ray ray, __global float *buffer)
__device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG rng, int sample, Ray ray, __global float *buffer)
{
/* initialize */
PathRadiance L;
@@ -941,6 +976,7 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam
float ray_pdf = 0.0f;
PathState state;
int rng_offset = PRNG_BASE_NUM;
int aa_samples = kernel_data.integrator.aa_samples;
path_state_init(&state);
@@ -961,7 +997,7 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam
}
extmax = kernel_data.curve_kernel_data.maximum_width;
lcg_state = lcg_init(*rng + rng_offset + sample*0x51633e2d);
lcg_state = lcg_init(rng + rng_offset + sample*0x51633e2d);
}
if(!scene_intersect(kg, &ray, visibility, &isect, &lcg_state, difl, extmax)) {
@@ -990,8 +1026,7 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam
/* setup shading */
ShaderData sd;
shader_setup_from_ray(kg, &sd, &isect, &ray);
float rbsdf = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF);
shader_eval_surface(kg, &sd, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
shader_eval_surface(kg, &sd, 0.0f, state.flag, SHADER_CONTEXT_MAIN);
shader_merge_closures(kg, &sd);
/* holdout */
@@ -1031,12 +1066,18 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam
* mainly due to the mixed in MIS that we use. gives too many unneeded
* shader evaluations, only need emission if we are going to terminate */
float probability = path_state_terminate_probability(kg, &state, throughput);
float terminate = path_rng(kg, rng, sample, rng_offset + PRNG_TERMINATE);
if(terminate >= probability)
if(probability == 0.0f) {
break;
}
else if(probability != 1.0f) {
float terminate = path_rng_1D(kg, rng, sample, aa_samples, rng_offset + PRNG_TERMINATE);
throughput /= probability;
if(terminate >= probability)
break;
throughput /= probability;
}
}
#ifdef __SUBSURFACE__
@@ -1049,7 +1090,7 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam
continue;
/* set up random number generator */
uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb);
uint lcg_state = lcg_init(rng + rng_offset + sample*0x68bc21eb);
int num_samples = kernel_data.integrator.subsurface_samples;
float num_samples_inv = 1.0f/num_samples;
@@ -1112,22 +1153,26 @@ __device void kernel_path_trace(KernelGlobals *kg,
float filter_u;
float filter_v;
int num_samples = kernel_data.integrator.aa_samples;
path_rng_init(kg, rng_state, sample, &rng, x, y, &filter_u, &filter_v);
path_rng_init(kg, rng_state, sample, num_samples, &rng, x, y, &filter_u, &filter_v);
/* sample camera ray */
Ray ray;
float lens_u = 0.0f, lens_v = 0.0f;
float time = 0.0f;
if(kernel_data.cam.aperturesize > 0.0f) {
lens_u = path_rng(kg, &rng, sample, PRNG_LENS_U);
lens_v = path_rng(kg, &rng, sample, PRNG_LENS_V);
float2 lens_uv = path_rng_2D(kg, rng, sample, num_samples, PRNG_LENS_U);
lens_u = lens_uv.x;
lens_v = lens_uv.y;
}
float time = 0.0f;
#ifdef __CAMERA_MOTION__
if(kernel_data.cam.shuttertime != -1.0f)
time = path_rng(kg, &rng, sample, PRNG_TIME);
time = path_rng_1D(kg, rng, sample, num_samples, PRNG_TIME);
#endif
camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, &ray);
@@ -1139,10 +1184,10 @@ __device void kernel_path_trace(KernelGlobals *kg,
#ifdef __NON_PROGRESSIVE__
if(kernel_data.integrator.progressive)
#endif
L = kernel_path_progressive(kg, &rng, sample, ray, buffer);
L = kernel_path_progressive(kg, rng, sample, ray, buffer);
#ifdef __NON_PROGRESSIVE__
else
L = kernel_path_non_progressive(kg, &rng, sample, ray, buffer);
L = kernel_path_non_progressive(kg, rng, sample, ray, buffer);
#endif
}
else