Cycles: experimental correlated multi-jittered sampling pattern that can be used

instead of sobol. So far one doesn't seem to be consistently better or worse than the other for the same number of samples but more testing is needed. The random number generator itself is slower than sobol for most number of samples, except 16, 64, 256, .. because they can be computed faster. This can probably be optimized, but we can do that when/if this actually turns out to be useful. Paper this implementation is based on: http://graphics.pixar.com/library/MultiJitteredSampling/ Also includes some refactoring of RNG code, fixing a Sobol correlation issue with the first BSDF and < 16 samples, skipping some unneeded RNG calls and using a simpler unit square to unit disk function.
2013-06-07 16:06:22 +00:00
parent d835d2f4e6
commit b20a7e01d0
13 changed files with 417 additions and 110 deletions
--- a/intern/cycles/kernel/kernel_random.h
+++ b/intern/cycles/kernel/kernel_random.h
@@ -16,6 +16,8 @@
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */

+#include "kernel_jitter.h"
+
 CCL_NAMESPACE_BEGIN

 typedef uint RNG;
@@ -100,10 +102,10 @@ __device uint sobol_lookup(const uint m, const uint frame, const uint ex, const
 	return index;
 }

-__device_inline float path_rng(KernelGlobals *kg, RNG *rng, int sample, int dimension)
+__device_inline float path_rng(KernelGlobals *kg, RNG rng, int sample, int dimension)
 {
 #ifdef __SOBOL_FULL_SCREEN__
-	uint result = sobol_dimension(kg, *rng, dimension);
+	uint result = sobol_dimension(kg, rng, dimension);
 	float r = (float)result * (1.0f/(float)0xFFFFFFFF);
 	return r;
 #else
@@ -115,15 +117,44 @@ __device_inline float path_rng(KernelGlobals *kg, RNG *rng, int sample, int dime
 	float shift;

 	if(dimension & 1)
-		shift = (*rng >> 16)/((float)0xFFFF);
+		shift = (rng >> 16)*(1.0f/(float)0xFFFF);
 	else
-		shift = (*rng & 0xFFFF)/((float)0xFFFF);
+		shift = (rng & 0xFFFF)*(1.0f/(float)0xFFFF);

 	return r + shift - floorf(r + shift);
 #endif
 }

-__device_inline void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, RNG *rng, int x, int y, float *fx, float *fy)
+__device_inline float path_rng_1D(KernelGlobals *kg, RNG rng, int sample, int num_samples, int dimension)
+{
+#ifdef __CMJ__
+	if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) {
+		/* correlated multi-jittered */
+		int p = rng + dimension;
+		return cmj_sample_1D(sample, num_samples, p);
+	}
+#endif
+
+	/* sobol */
+	return path_rng(kg, rng, sample, dimension);
+}
+
+__device_inline float2 path_rng_2D(KernelGlobals *kg, RNG rng, int sample, int num_samples, int dimension)
+{
+#ifdef __CMJ__
+	if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) {
+		/* correlated multi-jittered */
+		int p = rng + dimension;
+		return cmj_sample_2D(sample, num_samples, p);
+	}
+#endif
+
+	/* sobol */
+	return make_float2(path_rng(kg, rng, sample, dimension),
+	                   path_rng(kg, rng, sample, dimension + 1));
+}
+
+__device_inline void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, int num_samples, RNG *rng, int x, int y, float *fx, float *fy)
 {
 #ifdef __SOBOL_FULL_SCREEN__
 	uint px, py;
@@ -153,8 +184,10 @@ __device_inline void path_rng_init(KernelGlobals *kg, __global uint *rng_state,
 		*fy = 0.5f;
 	}
 	else {
-		*fx = path_rng(kg, rng, sample, PRNG_FILTER_U);
-		*fy = path_rng(kg, rng, sample, PRNG_FILTER_V);
+		float2 fxy = path_rng_2D(kg, *rng, sample, num_samples, PRNG_FILTER_U);
+
+		*fx = fxy.x;
+		*fy = fxy.y;
 	}
 #endif
 }
@@ -168,14 +201,25 @@ __device void path_rng_end(KernelGlobals *kg, __global uint *rng_state, RNG rng)

 /* Linear Congruential Generator */

-__device float path_rng(KernelGlobals *kg, RNG *rng, int sample, int dimension)
+__device float path_rng(KernelGlobals *kg, RNG& rng, int sample, int dimension)
 {
 	/* implicit mod 2^32 */
-	*rng = (1103515245*(*rng) + 12345);
-	return (float)*rng * (1.0f/(float)0xFFFFFFFF);
+	rng = (1103515245*(rng) + 12345);
+	return (float)rng * (1.0f/(float)0xFFFFFFFF);
 }

-__device void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, RNG *rng, int x, int y, float *fx, float *fy)
+__device_inline float path_rng_1D(KernelGlobals *kg, RNG& rng, int sample, int num_samples, int dimension)
+{
+	return path_rng(kg, rng, sample, dimension);
+}
+
+__device_inline float2 path_rng_2D(KernelGlobals *kg, RNG& rng, int sample, int num_samples, int dimension)
+{
+	return make_float2(path_rng(kg, rng, sample, dimension),
+	                   path_rng(kg, rng, sample, dimension + 1));
+}
+
+__device void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, int num_samples, RNG *rng, int x, int y, float *fx, float *fy)
 {
 	/* load state */
 	*rng = *rng_state;
@@ -187,8 +231,10 @@ __device void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sam
 		*fy = 0.5f;
 	}
 	else {
-		*fx = path_rng(kg, rng, sample, PRNG_FILTER_U);
-		*fy = path_rng(kg, rng, sample, PRNG_FILTER_V);
+		float2 fxy = path_rng_2D(kg, rng, sample, num_samples, PRNG_FILTER_U);
+
+		*fx = fxy.x;
+		*fy = fxy.y;
 	}
 }