Code refactor: remove rng_state buffer and compute hash on the fly.
A little faster on some benchmark scenes, a little slower on others, seems about performance neutral on average and saves a little memory.
This commit is contained in:
@@ -171,7 +171,7 @@ public:
|
|||||||
|
|
||||||
DeviceRequestedFeatures requested_features;
|
DeviceRequestedFeatures requested_features;
|
||||||
|
|
||||||
KernelFunctions<void(*)(KernelGlobals *, float *, unsigned int *, int, int, int, int, int)> path_trace_kernel;
|
KernelFunctions<void(*)(KernelGlobals *, float *, int, int, int, int, int)> path_trace_kernel;
|
||||||
KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> convert_to_half_float_kernel;
|
KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> convert_to_half_float_kernel;
|
||||||
KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> convert_to_byte_kernel;
|
KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> convert_to_byte_kernel;
|
||||||
KernelFunctions<void(*)(KernelGlobals *, uint4 *, float4 *, float*, int, int, int, int, int)> shader_kernel;
|
KernelFunctions<void(*)(KernelGlobals *, uint4 *, float4 *, float*, int, int, int, int, int)> shader_kernel;
|
||||||
@@ -192,7 +192,7 @@ public:
|
|||||||
KernelFunctions<void(*)(int, int, int, int, int, float*, int*, float*, float3*, int*, int)> filter_finalize_kernel;
|
KernelFunctions<void(*)(int, int, int, int, int, float*, int*, float*, float3*, int*, int)> filter_finalize_kernel;
|
||||||
|
|
||||||
KernelFunctions<void(*)(KernelGlobals *, ccl_constant KernelData*, ccl_global void*, int, ccl_global char*,
|
KernelFunctions<void(*)(KernelGlobals *, ccl_constant KernelData*, ccl_global void*, int, ccl_global char*,
|
||||||
ccl_global uint*, int, int, int, int, int, int, int, int, ccl_global int*, int,
|
int, int, int, int, int, int, int, int, ccl_global int*, int,
|
||||||
ccl_global char*, ccl_global unsigned int*, unsigned int, ccl_global float*)> data_init_kernel;
|
ccl_global char*, ccl_global unsigned int*, unsigned int, ccl_global float*)> data_init_kernel;
|
||||||
unordered_map<string, KernelFunctions<void(*)(KernelGlobals*, KernelData*)> > split_kernels;
|
unordered_map<string, KernelFunctions<void(*)(KernelGlobals*, KernelData*)> > split_kernels;
|
||||||
|
|
||||||
@@ -617,7 +617,6 @@ public:
|
|||||||
void path_trace(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
|
void path_trace(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
|
||||||
{
|
{
|
||||||
float *render_buffer = (float*)tile.buffer;
|
float *render_buffer = (float*)tile.buffer;
|
||||||
uint *rng_state = (uint*)tile.rng_state;
|
|
||||||
int start_sample = tile.start_sample;
|
int start_sample = tile.start_sample;
|
||||||
int end_sample = tile.start_sample + tile.num_samples;
|
int end_sample = tile.start_sample + tile.num_samples;
|
||||||
|
|
||||||
@@ -629,7 +628,7 @@ public:
|
|||||||
|
|
||||||
for(int y = tile.y; y < tile.y + tile.h; y++) {
|
for(int y = tile.y; y < tile.y + tile.h; y++) {
|
||||||
for(int x = tile.x; x < tile.x + tile.w; x++) {
|
for(int x = tile.x; x < tile.x + tile.w; x++) {
|
||||||
path_trace_kernel()(kg, render_buffer, rng_state,
|
path_trace_kernel()(kg, render_buffer,
|
||||||
sample, x, y, tile.offset, tile.stride);
|
sample, x, y, tile.offset, tile.stride);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -913,7 +912,6 @@ bool CPUSplitKernel::enqueue_split_kernel_data_init(const KernelDimensions& dim,
|
|||||||
(void*)split_data.device_pointer,
|
(void*)split_data.device_pointer,
|
||||||
num_global_elements,
|
num_global_elements,
|
||||||
(char*)ray_state.device_pointer,
|
(char*)ray_state.device_pointer,
|
||||||
(uint*)rtile.rng_state,
|
|
||||||
rtile.start_sample,
|
rtile.start_sample,
|
||||||
rtile.start_sample + rtile.num_samples,
|
rtile.start_sample + rtile.num_samples,
|
||||||
rtile.x,
|
rtile.x,
|
||||||
|
@@ -1322,7 +1322,6 @@ public:
|
|||||||
wtile->start_sample = sample;
|
wtile->start_sample = sample;
|
||||||
wtile->num_samples = 1;
|
wtile->num_samples = 1;
|
||||||
wtile->buffer = (float*)cuda_device_ptr(rtile.buffer);
|
wtile->buffer = (float*)cuda_device_ptr(rtile.buffer);
|
||||||
wtile->rng_state = (uint*)cuda_device_ptr(rtile.rng_state);
|
|
||||||
|
|
||||||
mem_alloc("work_tiles", work_tiles, MEM_READ_ONLY);
|
mem_alloc("work_tiles", work_tiles, MEM_READ_ONLY);
|
||||||
mem_copy_to(work_tiles);
|
mem_copy_to(work_tiles);
|
||||||
@@ -1945,7 +1944,6 @@ bool CUDASplitKernel::enqueue_split_kernel_data_init(const KernelDimensions& dim
|
|||||||
CUdeviceptr d_use_queues_flag = device->cuda_device_ptr(use_queues_flag.device_pointer);
|
CUdeviceptr d_use_queues_flag = device->cuda_device_ptr(use_queues_flag.device_pointer);
|
||||||
CUdeviceptr d_work_pool_wgs = device->cuda_device_ptr(work_pool_wgs.device_pointer);
|
CUdeviceptr d_work_pool_wgs = device->cuda_device_ptr(work_pool_wgs.device_pointer);
|
||||||
|
|
||||||
CUdeviceptr d_rng_state = device->cuda_device_ptr(rtile.rng_state);
|
|
||||||
CUdeviceptr d_buffer = device->cuda_device_ptr(rtile.buffer);
|
CUdeviceptr d_buffer = device->cuda_device_ptr(rtile.buffer);
|
||||||
|
|
||||||
int end_sample = rtile.start_sample + rtile.num_samples;
|
int end_sample = rtile.start_sample + rtile.num_samples;
|
||||||
@@ -1955,7 +1953,6 @@ bool CUDASplitKernel::enqueue_split_kernel_data_init(const KernelDimensions& dim
|
|||||||
CUdeviceptr* split_data_buffer;
|
CUdeviceptr* split_data_buffer;
|
||||||
int* num_elements;
|
int* num_elements;
|
||||||
CUdeviceptr* ray_state;
|
CUdeviceptr* ray_state;
|
||||||
CUdeviceptr* rng_state;
|
|
||||||
int* start_sample;
|
int* start_sample;
|
||||||
int* end_sample;
|
int* end_sample;
|
||||||
int* sx;
|
int* sx;
|
||||||
@@ -1976,7 +1973,6 @@ bool CUDASplitKernel::enqueue_split_kernel_data_init(const KernelDimensions& dim
|
|||||||
&d_split_data,
|
&d_split_data,
|
||||||
&num_global_elements,
|
&num_global_elements,
|
||||||
&d_ray_state,
|
&d_ray_state,
|
||||||
&d_rng_state,
|
|
||||||
&rtile.start_sample,
|
&rtile.start_sample,
|
||||||
&end_sample,
|
&end_sample,
|
||||||
&rtile.x,
|
&rtile.x,
|
||||||
|
@@ -281,7 +281,6 @@ public:
|
|||||||
foreach(SubDevice& sub, devices) {
|
foreach(SubDevice& sub, devices) {
|
||||||
if(sub.device == sub_device) {
|
if(sub.device == sub_device) {
|
||||||
if(tile.buffer) tile.buffer = sub.ptr_map[tile.buffer];
|
if(tile.buffer) tile.buffer = sub.ptr_map[tile.buffer];
|
||||||
if(tile.rng_state) tile.rng_state = sub.ptr_map[tile.rng_state];
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -737,7 +737,6 @@ protected:
|
|||||||
tile = entry.tile;
|
tile = entry.tile;
|
||||||
|
|
||||||
if(tile.buffer) tile.buffer = ptr_map[tile.buffer];
|
if(tile.buffer) tile.buffer = ptr_map[tile.buffer];
|
||||||
if(tile.rng_state) tile.rng_state = ptr_map[tile.rng_state];
|
|
||||||
|
|
||||||
result = true;
|
result = true;
|
||||||
break;
|
break;
|
||||||
@@ -769,7 +768,6 @@ protected:
|
|||||||
thread_scoped_lock acquire_lock(acquire_mutex);
|
thread_scoped_lock acquire_lock(acquire_mutex);
|
||||||
|
|
||||||
if(tile.buffer) tile.buffer = ptr_imap[tile.buffer];
|
if(tile.buffer) tile.buffer = ptr_imap[tile.buffer];
|
||||||
if(tile.rng_state) tile.rng_state = ptr_imap[tile.rng_state];
|
|
||||||
|
|
||||||
{
|
{
|
||||||
thread_scoped_lock lock(rpc_lock);
|
thread_scoped_lock lock(rpc_lock);
|
||||||
|
@@ -142,7 +142,7 @@ public:
|
|||||||
archive & tile.x & tile.y & tile.w & tile.h;
|
archive & tile.x & tile.y & tile.w & tile.h;
|
||||||
archive & tile.start_sample & tile.num_samples & tile.sample;
|
archive & tile.start_sample & tile.num_samples & tile.sample;
|
||||||
archive & tile.resolution & tile.offset & tile.stride;
|
archive & tile.resolution & tile.offset & tile.stride;
|
||||||
archive & tile.buffer & tile.rng_state;
|
archive & tile.buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
void write()
|
void write()
|
||||||
@@ -303,7 +303,7 @@ public:
|
|||||||
*archive & tile.x & tile.y & tile.w & tile.h;
|
*archive & tile.x & tile.y & tile.w & tile.h;
|
||||||
*archive & tile.start_sample & tile.num_samples & tile.sample;
|
*archive & tile.start_sample & tile.num_samples & tile.sample;
|
||||||
*archive & tile.resolution & tile.offset & tile.stride;
|
*archive & tile.resolution & tile.offset & tile.stride;
|
||||||
*archive & tile.buffer & tile.rng_state;
|
*archive & tile.buffer;
|
||||||
|
|
||||||
tile.buffers = NULL;
|
tile.buffers = NULL;
|
||||||
}
|
}
|
||||||
|
@@ -62,7 +62,6 @@ public:
|
|||||||
/* Cast arguments to cl types. */
|
/* Cast arguments to cl types. */
|
||||||
cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
|
cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
|
||||||
cl_mem d_buffer = CL_MEM_PTR(rtile.buffer);
|
cl_mem d_buffer = CL_MEM_PTR(rtile.buffer);
|
||||||
cl_mem d_rng_state = CL_MEM_PTR(rtile.rng_state);
|
|
||||||
cl_int d_x = rtile.x;
|
cl_int d_x = rtile.x;
|
||||||
cl_int d_y = rtile.y;
|
cl_int d_y = rtile.y;
|
||||||
cl_int d_w = rtile.w;
|
cl_int d_w = rtile.w;
|
||||||
@@ -79,8 +78,7 @@ public:
|
|||||||
kernel_set_args(ckPathTraceKernel,
|
kernel_set_args(ckPathTraceKernel,
|
||||||
0,
|
0,
|
||||||
d_data,
|
d_data,
|
||||||
d_buffer,
|
d_buffer);
|
||||||
d_rng_state);
|
|
||||||
|
|
||||||
set_kernel_arg_buffers(ckPathTraceKernel, &start_arg_index);
|
set_kernel_arg_buffers(ckPathTraceKernel, &start_arg_index);
|
||||||
|
|
||||||
|
@@ -192,7 +192,6 @@ struct CachedSplitMemory {
|
|||||||
int id;
|
int id;
|
||||||
device_memory *split_data;
|
device_memory *split_data;
|
||||||
device_memory *ray_state;
|
device_memory *ray_state;
|
||||||
device_ptr *rng_state;
|
|
||||||
device_memory *queue_index;
|
device_memory *queue_index;
|
||||||
device_memory *use_queues_flag;
|
device_memory *use_queues_flag;
|
||||||
device_memory *work_pools;
|
device_memory *work_pools;
|
||||||
@@ -225,8 +224,7 @@ public:
|
|||||||
kg,
|
kg,
|
||||||
data,
|
data,
|
||||||
*cached_memory.split_data,
|
*cached_memory.split_data,
|
||||||
*cached_memory.ray_state,
|
*cached_memory.ray_state);
|
||||||
*cached_memory.rng_state);
|
|
||||||
|
|
||||||
device->set_kernel_arg_buffers(program(), &start_arg_index);
|
device->set_kernel_arg_buffers(program(), &start_arg_index);
|
||||||
|
|
||||||
@@ -356,8 +354,7 @@ public:
|
|||||||
kernel_data,
|
kernel_data,
|
||||||
split_data,
|
split_data,
|
||||||
num_global_elements,
|
num_global_elements,
|
||||||
ray_state,
|
ray_state);
|
||||||
rtile.rng_state);
|
|
||||||
|
|
||||||
device->set_kernel_arg_buffers(device->program_data_init(), &start_arg_index);
|
device->set_kernel_arg_buffers(device->program_data_init(), &start_arg_index);
|
||||||
|
|
||||||
@@ -401,7 +398,6 @@ public:
|
|||||||
|
|
||||||
cached_memory.split_data = &split_data;
|
cached_memory.split_data = &split_data;
|
||||||
cached_memory.ray_state = &ray_state;
|
cached_memory.ray_state = &ray_state;
|
||||||
cached_memory.rng_state = &rtile.rng_state;
|
|
||||||
cached_memory.queue_index = &queue_index;
|
cached_memory.queue_index = &queue_index;
|
||||||
cached_memory.use_queues_flag = &use_queues_flag;
|
cached_memory.use_queues_flag = &use_queues_flag;
|
||||||
cached_memory.work_pools = &work_pool_wgs;
|
cached_memory.work_pools = &work_pool_wgs;
|
||||||
|
@@ -672,21 +672,20 @@ ccl_device_forceinline void kernel_path_integrate(
|
|||||||
}
|
}
|
||||||
|
|
||||||
ccl_device void kernel_path_trace(KernelGlobals *kg,
|
ccl_device void kernel_path_trace(KernelGlobals *kg,
|
||||||
ccl_global float *buffer, ccl_global uint *rng_state,
|
ccl_global float *buffer,
|
||||||
int sample, int x, int y, int offset, int stride)
|
int sample, int x, int y, int offset, int stride)
|
||||||
{
|
{
|
||||||
/* buffer offset */
|
/* buffer offset */
|
||||||
int index = offset + x + y*stride;
|
int index = offset + x + y*stride;
|
||||||
int pass_stride = kernel_data.film.pass_stride;
|
int pass_stride = kernel_data.film.pass_stride;
|
||||||
|
|
||||||
rng_state += index;
|
|
||||||
buffer += index*pass_stride;
|
buffer += index*pass_stride;
|
||||||
|
|
||||||
/* Initialize random numbers and sample ray. */
|
/* Initialize random numbers and sample ray. */
|
||||||
uint rng_hash;
|
uint rng_hash;
|
||||||
Ray ray;
|
Ray ray;
|
||||||
|
|
||||||
kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng_hash, &ray);
|
kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray);
|
||||||
|
|
||||||
if(ray.t == 0.0f) {
|
if(ray.t == 0.0f) {
|
||||||
kernel_write_result(kg, buffer, sample, NULL);
|
kernel_write_result(kg, buffer, sample, NULL);
|
||||||
|
@@ -538,21 +538,20 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
|
|||||||
}
|
}
|
||||||
|
|
||||||
ccl_device void kernel_branched_path_trace(KernelGlobals *kg,
|
ccl_device void kernel_branched_path_trace(KernelGlobals *kg,
|
||||||
ccl_global float *buffer, ccl_global uint *rng_state,
|
ccl_global float *buffer,
|
||||||
int sample, int x, int y, int offset, int stride)
|
int sample, int x, int y, int offset, int stride)
|
||||||
{
|
{
|
||||||
/* buffer offset */
|
/* buffer offset */
|
||||||
int index = offset + x + y*stride;
|
int index = offset + x + y*stride;
|
||||||
int pass_stride = kernel_data.film.pass_stride;
|
int pass_stride = kernel_data.film.pass_stride;
|
||||||
|
|
||||||
rng_state += index;
|
|
||||||
buffer += index*pass_stride;
|
buffer += index*pass_stride;
|
||||||
|
|
||||||
/* initialize random numbers and ray */
|
/* initialize random numbers and ray */
|
||||||
uint rng_hash;
|
uint rng_hash;
|
||||||
Ray ray;
|
Ray ray;
|
||||||
|
|
||||||
kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng_hash, &ray);
|
kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray);
|
||||||
|
|
||||||
/* integrate */
|
/* integrate */
|
||||||
PathRadiance L;
|
PathRadiance L;
|
||||||
|
@@ -19,7 +19,6 @@
|
|||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
ccl_device_inline void kernel_path_trace_setup(KernelGlobals *kg,
|
ccl_device_inline void kernel_path_trace_setup(KernelGlobals *kg,
|
||||||
ccl_global uint *rng_state,
|
|
||||||
int sample,
|
int sample,
|
||||||
int x, int y,
|
int x, int y,
|
||||||
uint *rng_hash,
|
uint *rng_hash,
|
||||||
@@ -30,11 +29,7 @@ ccl_device_inline void kernel_path_trace_setup(KernelGlobals *kg,
|
|||||||
|
|
||||||
int num_samples = kernel_data.integrator.aa_samples;
|
int num_samples = kernel_data.integrator.aa_samples;
|
||||||
|
|
||||||
if(sample == kernel_data.integrator.start_sample) {
|
path_rng_init(kg, sample, num_samples, rng_hash, x, y, &filter_u, &filter_v);
|
||||||
*rng_state = hash_int_2d(x, y);
|
|
||||||
}
|
|
||||||
|
|
||||||
path_rng_init(kg, rng_state, sample, num_samples, rng_hash, x, y, &filter_u, &filter_v);
|
|
||||||
|
|
||||||
/* sample camera ray */
|
/* sample camera ray */
|
||||||
|
|
||||||
|
@@ -15,6 +15,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "kernel/kernel_jitter.h"
|
#include "kernel/kernel_jitter.h"
|
||||||
|
#include "util/util_hash.h"
|
||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
@@ -115,14 +116,13 @@ ccl_device_forceinline void path_rng_2D(KernelGlobals *kg,
|
|||||||
}
|
}
|
||||||
|
|
||||||
ccl_device_inline void path_rng_init(KernelGlobals *kg,
|
ccl_device_inline void path_rng_init(KernelGlobals *kg,
|
||||||
ccl_global uint *rng_state,
|
|
||||||
int sample, int num_samples,
|
int sample, int num_samples,
|
||||||
uint *rng_hash,
|
uint *rng_hash,
|
||||||
int x, int y,
|
int x, int y,
|
||||||
float *fx, float *fy)
|
float *fx, float *fy)
|
||||||
{
|
{
|
||||||
/* load state */
|
/* load state */
|
||||||
*rng_hash = *rng_state;
|
*rng_hash = hash_int_2d(x, y);
|
||||||
*rng_hash ^= kernel_data.integrator.seed;
|
*rng_hash ^= kernel_data.integrator.seed;
|
||||||
|
|
||||||
#ifdef __DEBUG_CORRELATION__
|
#ifdef __DEBUG_CORRELATION__
|
||||||
|
@@ -1460,7 +1460,6 @@ typedef struct WorkTile {
|
|||||||
uint stride;
|
uint stride;
|
||||||
|
|
||||||
ccl_global float *buffer;
|
ccl_global float *buffer;
|
||||||
ccl_global uint *rng_state;
|
|
||||||
} WorkTile;
|
} WorkTile;
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
@@ -18,7 +18,6 @@
|
|||||||
|
|
||||||
void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg,
|
void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg,
|
||||||
float *buffer,
|
float *buffer,
|
||||||
unsigned int *rng_state,
|
|
||||||
int sample,
|
int sample,
|
||||||
int x, int y,
|
int x, int y,
|
||||||
int offset,
|
int offset,
|
||||||
@@ -57,7 +56,6 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
|
|||||||
ccl_global void *split_data_buffer,
|
ccl_global void *split_data_buffer,
|
||||||
int num_elements,
|
int num_elements,
|
||||||
ccl_global char *ray_state,
|
ccl_global char *ray_state,
|
||||||
ccl_global uint *rng_state,
|
|
||||||
int start_sample,
|
int start_sample,
|
||||||
int end_sample,
|
int end_sample,
|
||||||
int sx, int sy, int sw, int sh, int offset, int stride,
|
int sx, int sy, int sw, int sh, int offset, int stride,
|
||||||
|
@@ -75,7 +75,6 @@ CCL_NAMESPACE_BEGIN
|
|||||||
|
|
||||||
void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg,
|
void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg,
|
||||||
float *buffer,
|
float *buffer,
|
||||||
unsigned int *rng_state,
|
|
||||||
int sample,
|
int sample,
|
||||||
int x, int y,
|
int x, int y,
|
||||||
int offset,
|
int offset,
|
||||||
@@ -88,7 +87,6 @@ void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg,
|
|||||||
if(kernel_data.integrator.branched) {
|
if(kernel_data.integrator.branched) {
|
||||||
kernel_branched_path_trace(kg,
|
kernel_branched_path_trace(kg,
|
||||||
buffer,
|
buffer,
|
||||||
rng_state,
|
|
||||||
sample,
|
sample,
|
||||||
x, y,
|
x, y,
|
||||||
offset,
|
offset,
|
||||||
@@ -97,7 +95,7 @@ void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg,
|
|||||||
else
|
else
|
||||||
# endif
|
# endif
|
||||||
{
|
{
|
||||||
kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
|
kernel_path_trace(kg, buffer, sample, x, y, offset, stride);
|
||||||
}
|
}
|
||||||
#endif /* KERNEL_STUB */
|
#endif /* KERNEL_STUB */
|
||||||
}
|
}
|
||||||
|
@@ -42,7 +42,7 @@ kernel_cuda_path_trace(WorkTile *tile, uint total_work_size)
|
|||||||
get_work_pixel(tile, work_index, &x, &y, &sample);
|
get_work_pixel(tile, work_index, &x, &y, &sample);
|
||||||
|
|
||||||
KernelGlobals kg;
|
KernelGlobals kg;
|
||||||
kernel_path_trace(&kg, tile->buffer, tile->rng_state, sample, x, y, tile->offset, tile->stride);
|
kernel_path_trace(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -58,7 +58,7 @@ kernel_cuda_branched_path_trace(WorkTile *tile, uint total_work_size)
|
|||||||
get_work_pixel(tile, work_index, &x, &y, &sample);
|
get_work_pixel(tile, work_index, &x, &y, &sample);
|
||||||
|
|
||||||
KernelGlobals kg;
|
KernelGlobals kg;
|
||||||
kernel_branched_path_trace(&kg, tile->buffer, tile->rng_state, sample, x, y, tile->offset, tile->stride);
|
kernel_branched_path_trace(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@@ -60,7 +60,6 @@ kernel_cuda_path_trace_data_init(
|
|||||||
ccl_global void *split_data_buffer,
|
ccl_global void *split_data_buffer,
|
||||||
int num_elements,
|
int num_elements,
|
||||||
ccl_global char *ray_state,
|
ccl_global char *ray_state,
|
||||||
ccl_global uint *rng_state,
|
|
||||||
int start_sample,
|
int start_sample,
|
||||||
int end_sample,
|
int end_sample,
|
||||||
int sx, int sy, int sw, int sh, int offset, int stride,
|
int sx, int sy, int sw, int sh, int offset, int stride,
|
||||||
@@ -76,7 +75,6 @@ kernel_cuda_path_trace_data_init(
|
|||||||
split_data_buffer,
|
split_data_buffer,
|
||||||
num_elements,
|
num_elements,
|
||||||
ray_state,
|
ray_state,
|
||||||
rng_state,
|
|
||||||
start_sample,
|
start_sample,
|
||||||
end_sample,
|
end_sample,
|
||||||
sx, sy, sw, sh, offset, stride,
|
sx, sy, sw, sh, offset, stride,
|
||||||
|
@@ -50,7 +50,6 @@
|
|||||||
__kernel void kernel_ocl_path_trace(
|
__kernel void kernel_ocl_path_trace(
|
||||||
ccl_constant KernelData *data,
|
ccl_constant KernelData *data,
|
||||||
ccl_global float *buffer,
|
ccl_global float *buffer,
|
||||||
ccl_global uint *rng_state,
|
|
||||||
|
|
||||||
KERNEL_BUFFER_PARAMS,
|
KERNEL_BUFFER_PARAMS,
|
||||||
|
|
||||||
@@ -68,7 +67,7 @@ __kernel void kernel_ocl_path_trace(
|
|||||||
int y = sy + ccl_global_id(1);
|
int y = sy + ccl_global_id(1);
|
||||||
|
|
||||||
if(x < sx + sw && y < sy + sh)
|
if(x < sx + sw && y < sy + sh)
|
||||||
kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
|
kernel_path_trace(kg, buffer, sample, x, y, offset, stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
#else /* __COMPILE_ONLY_MEGAKERNEL__ */
|
#else /* __COMPILE_ONLY_MEGAKERNEL__ */
|
||||||
|
@@ -24,7 +24,6 @@ __kernel void kernel_ocl_path_trace_data_init(
|
|||||||
ccl_global void *split_data_buffer,
|
ccl_global void *split_data_buffer,
|
||||||
int num_elements,
|
int num_elements,
|
||||||
ccl_global char *ray_state,
|
ccl_global char *ray_state,
|
||||||
ccl_global uint *rng_state,
|
|
||||||
KERNEL_BUFFER_PARAMS,
|
KERNEL_BUFFER_PARAMS,
|
||||||
int start_sample,
|
int start_sample,
|
||||||
int end_sample,
|
int end_sample,
|
||||||
@@ -41,7 +40,6 @@ __kernel void kernel_ocl_path_trace_data_init(
|
|||||||
split_data_buffer,
|
split_data_buffer,
|
||||||
num_elements,
|
num_elements,
|
||||||
ray_state,
|
ray_state,
|
||||||
rng_state,
|
|
||||||
KERNEL_BUFFER_ARGS,
|
KERNEL_BUFFER_ARGS,
|
||||||
start_sample,
|
start_sample,
|
||||||
end_sample,
|
end_sample,
|
||||||
|
@@ -23,7 +23,6 @@ __kernel void KERNEL_NAME_EVAL(kernel_ocl_path_trace, KERNEL_NAME)(
|
|||||||
|
|
||||||
ccl_global void *split_data_buffer,
|
ccl_global void *split_data_buffer,
|
||||||
ccl_global char *ray_state,
|
ccl_global char *ray_state,
|
||||||
ccl_global uint *rng_state,
|
|
||||||
|
|
||||||
KERNEL_BUFFER_PARAMS,
|
KERNEL_BUFFER_PARAMS,
|
||||||
|
|
||||||
@@ -42,7 +41,6 @@ __kernel void KERNEL_NAME_EVAL(kernel_ocl_path_trace, KERNEL_NAME)(
|
|||||||
if(ccl_local_id(0) + ccl_local_id(1) == 0) {
|
if(ccl_local_id(0) + ccl_local_id(1) == 0) {
|
||||||
kg->data = data;
|
kg->data = data;
|
||||||
|
|
||||||
kernel_split_params.tile.rng_state = rng_state;
|
|
||||||
kernel_split_params.queue_index = queue_index;
|
kernel_split_params.queue_index = queue_index;
|
||||||
kernel_split_params.use_queues_flag = use_queues_flag;
|
kernel_split_params.use_queues_flag = use_queues_flag;
|
||||||
kernel_split_params.work_pools = work_pools;
|
kernel_split_params.work_pools = work_pools;
|
||||||
|
@@ -108,10 +108,6 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg,
|
|||||||
uint x, y, sample;
|
uint x, y, sample;
|
||||||
get_work_pixel(tile, work_index, &x, &y, &sample);
|
get_work_pixel(tile, work_index, &x, &y, &sample);
|
||||||
|
|
||||||
/* Remap rng_state to current pixel. */
|
|
||||||
ccl_global uint *rng_state = kernel_split_params.tile.rng_state;
|
|
||||||
rng_state += tile->offset + x + y*tile->stride;
|
|
||||||
|
|
||||||
/* Store buffer offset for writing to passes. */
|
/* Store buffer offset for writing to passes. */
|
||||||
uint buffer_offset = (tile->offset + x + y*tile->stride) * kernel_data.film.pass_stride;
|
uint buffer_offset = (tile->offset + x + y*tile->stride) * kernel_data.film.pass_stride;
|
||||||
ccl_global float *buffer = tile->buffer + buffer_offset;
|
ccl_global float *buffer = tile->buffer + buffer_offset;
|
||||||
@@ -119,7 +115,7 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg,
|
|||||||
|
|
||||||
/* Initialize random numbers and ray. */
|
/* Initialize random numbers and ray. */
|
||||||
uint rng_hash;
|
uint rng_hash;
|
||||||
kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng_hash, ray);
|
kernel_path_trace_setup(kg, sample, x, y, &rng_hash, ray);
|
||||||
|
|
||||||
if(ray->t != 0.0f) {
|
if(ray->t != 0.0f) {
|
||||||
/* Initialize throughput, path radiance, Ray, PathState;
|
/* Initialize throughput, path radiance, Ray, PathState;
|
||||||
|
@@ -49,7 +49,6 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
|
|||||||
ccl_global void *split_data_buffer,
|
ccl_global void *split_data_buffer,
|
||||||
int num_elements,
|
int num_elements,
|
||||||
ccl_global char *ray_state,
|
ccl_global char *ray_state,
|
||||||
ccl_global uint *rng_state,
|
|
||||||
|
|
||||||
#ifdef __KERNEL_OPENCL__
|
#ifdef __KERNEL_OPENCL__
|
||||||
KERNEL_BUFFER_PARAMS,
|
KERNEL_BUFFER_PARAMS,
|
||||||
@@ -84,7 +83,6 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
|
|||||||
kernel_split_params.tile.offset = offset;
|
kernel_split_params.tile.offset = offset;
|
||||||
kernel_split_params.tile.stride = stride;
|
kernel_split_params.tile.stride = stride;
|
||||||
|
|
||||||
kernel_split_params.tile.rng_state = rng_state;
|
|
||||||
kernel_split_params.tile.buffer = buffer;
|
kernel_split_params.tile.buffer = buffer;
|
||||||
|
|
||||||
kernel_split_params.total_work_size = sw * sh * num_samples;
|
kernel_split_params.total_work_size = sw * sh * num_samples;
|
||||||
@@ -122,7 +120,7 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
|
|||||||
*use_queues_flag = 0;
|
*use_queues_flag = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* zero the tiles pixels and initialize rng_state if this is the first sample */
|
/* zero the tiles pixels if this is the first sample */
|
||||||
if(start_sample == 0) {
|
if(start_sample == 0) {
|
||||||
int pass_stride = kernel_data.film.pass_stride;
|
int pass_stride = kernel_data.film.pass_stride;
|
||||||
|
|
||||||
@@ -130,9 +128,6 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
|
|||||||
for(int y = sy; y < sy + sh; y++) {
|
for(int y = sy; y < sy + sh; y++) {
|
||||||
int index = offset + y * stride;
|
int index = offset + y * stride;
|
||||||
memset(buffer + (sx + index) * pass_stride, 0, sizeof(float) * pass_stride * sw);
|
memset(buffer + (sx + index) * pass_stride, 0, sizeof(float) * pass_stride * sw);
|
||||||
for(int x = sx; x < sx + sw; x++) {
|
|
||||||
rng_state[index + x] = hash_int_2d(x, y);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
parallel_for(kg, i, sw * sh * pass_stride) {
|
parallel_for(kg, i, sw * sh * pass_stride) {
|
||||||
@@ -146,14 +141,6 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
|
|||||||
|
|
||||||
*(buffer + index) = 0.0f;
|
*(buffer + index) = 0.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
parallel_for(kg, i, sw * sh) {
|
|
||||||
int x = sx + i % sw;
|
|
||||||
int y = sy + i / sw;
|
|
||||||
|
|
||||||
int index = (offset + x + y*stride);
|
|
||||||
*(rng_state + index) = hash_int_2d(x, y);
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -45,10 +45,6 @@ ccl_device void kernel_path_init(KernelGlobals *kg) {
|
|||||||
uint x, y, sample;
|
uint x, y, sample;
|
||||||
get_work_pixel(tile, work_index, &x, &y, &sample);
|
get_work_pixel(tile, work_index, &x, &y, &sample);
|
||||||
|
|
||||||
/* Remap rng_state and buffer to current pixel. */
|
|
||||||
ccl_global uint *rng_state = kernel_split_params.tile.rng_state;
|
|
||||||
rng_state += tile->offset + x + y*tile->stride;
|
|
||||||
|
|
||||||
/* Store buffer offset for writing to passes. */
|
/* Store buffer offset for writing to passes. */
|
||||||
uint buffer_offset = (tile->offset + x + y*tile->stride) * kernel_data.film.pass_stride;
|
uint buffer_offset = (tile->offset + x + y*tile->stride) * kernel_data.film.pass_stride;
|
||||||
ccl_global float *buffer = tile->buffer + buffer_offset;
|
ccl_global float *buffer = tile->buffer + buffer_offset;
|
||||||
@@ -57,7 +53,6 @@ ccl_device void kernel_path_init(KernelGlobals *kg) {
|
|||||||
/* Initialize random numbers and ray. */
|
/* Initialize random numbers and ray. */
|
||||||
uint rng_hash;
|
uint rng_hash;
|
||||||
kernel_path_trace_setup(kg,
|
kernel_path_trace_setup(kg,
|
||||||
rng_state,
|
|
||||||
sample,
|
sample,
|
||||||
x, y,
|
x, y,
|
||||||
&rng_hash,
|
&rng_hash,
|
||||||
|
@@ -108,7 +108,6 @@ RenderTile::RenderTile()
|
|||||||
stride = 0;
|
stride = 0;
|
||||||
|
|
||||||
buffer = 0;
|
buffer = 0;
|
||||||
rng_state = 0;
|
|
||||||
|
|
||||||
buffers = NULL;
|
buffers = NULL;
|
||||||
}
|
}
|
||||||
@@ -131,11 +130,6 @@ void RenderBuffers::device_free()
|
|||||||
device->mem_free(buffer);
|
device->mem_free(buffer);
|
||||||
buffer.clear();
|
buffer.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
if(rng_state.device_pointer) {
|
|
||||||
device->mem_free(rng_state);
|
|
||||||
rng_state.clear();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void RenderBuffers::reset(Device *device, BufferParams& params_)
|
void RenderBuffers::reset(Device *device, BufferParams& params_)
|
||||||
@@ -149,11 +143,6 @@ void RenderBuffers::reset(Device *device, BufferParams& params_)
|
|||||||
buffer.resize(params.width*params.height*params.get_passes_size());
|
buffer.resize(params.width*params.height*params.get_passes_size());
|
||||||
device->mem_alloc("render_buffer", buffer, MEM_READ_WRITE);
|
device->mem_alloc("render_buffer", buffer, MEM_READ_WRITE);
|
||||||
device->mem_zero(buffer);
|
device->mem_zero(buffer);
|
||||||
|
|
||||||
/* allocate rng state */
|
|
||||||
rng_state.resize(params.width, params.height);
|
|
||||||
|
|
||||||
device->mem_alloc("rng_state", rng_state, MEM_READ_WRITE);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RenderBuffers::copy_from_device(Device *from_device)
|
bool RenderBuffers::copy_from_device(Device *from_device)
|
||||||
|
@@ -74,8 +74,6 @@ public:
|
|||||||
|
|
||||||
/* float buffer */
|
/* float buffer */
|
||||||
device_vector<float> buffer;
|
device_vector<float> buffer;
|
||||||
/* random number generator state */
|
|
||||||
device_vector<uint> rng_state;
|
|
||||||
|
|
||||||
Device *device;
|
Device *device;
|
||||||
|
|
||||||
@@ -149,7 +147,6 @@ public:
|
|||||||
int tile_index;
|
int tile_index;
|
||||||
|
|
||||||
device_ptr buffer;
|
device_ptr buffer;
|
||||||
device_ptr rng_state;
|
|
||||||
|
|
||||||
RenderBuffers *buffers;
|
RenderBuffers *buffers;
|
||||||
|
|
||||||
|
@@ -384,7 +384,6 @@ bool Session::acquire_tile(Device *tile_device, RenderTile& rtile)
|
|||||||
tile_manager.state.buffer.get_offset_stride(rtile.offset, rtile.stride);
|
tile_manager.state.buffer.get_offset_stride(rtile.offset, rtile.stride);
|
||||||
|
|
||||||
rtile.buffer = buffers->buffer.device_pointer;
|
rtile.buffer = buffers->buffer.device_pointer;
|
||||||
rtile.rng_state = buffers->rng_state.device_pointer;
|
|
||||||
rtile.buffers = buffers;
|
rtile.buffers = buffers;
|
||||||
tile->buffers = buffers;
|
tile->buffers = buffers;
|
||||||
|
|
||||||
@@ -442,7 +441,6 @@ bool Session::acquire_tile(Device *tile_device, RenderTile& rtile)
|
|||||||
tile->buffers->params.get_offset_stride(rtile.offset, rtile.stride);
|
tile->buffers->params.get_offset_stride(rtile.offset, rtile.stride);
|
||||||
|
|
||||||
rtile.buffer = tile->buffers->buffer.device_pointer;
|
rtile.buffer = tile->buffers->buffer.device_pointer;
|
||||||
rtile.rng_state = tile->buffers->rng_state.device_pointer;
|
|
||||||
rtile.buffers = tile->buffers;
|
rtile.buffers = tile->buffers;
|
||||||
rtile.sample = 0;
|
rtile.sample = 0;
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user