Cycles: Define ccl_local variables in kernel functions
Declaring ccl_local in a device function is not supported by certain compilers.
This commit is contained in:
@@ -168,21 +168,28 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
|
|||||||
kernel_##name(kg); \
|
kernel_##name(kg); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \
|
||||||
|
void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \
|
||||||
|
{ \
|
||||||
|
ccl_local type locals; \
|
||||||
|
kernel_##name(kg, &locals); \
|
||||||
|
}
|
||||||
|
|
||||||
DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
|
DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
|
||||||
DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
|
DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
|
||||||
DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
|
DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
|
||||||
DEFINE_SPLIT_KERNEL_FUNCTION(do_volume)
|
DEFINE_SPLIT_KERNEL_FUNCTION(do_volume)
|
||||||
DEFINE_SPLIT_KERNEL_FUNCTION(queue_enqueue)
|
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals)
|
||||||
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background)
|
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background)
|
||||||
DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval)
|
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_eval, uint)
|
||||||
DEFINE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao)
|
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals)
|
||||||
DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
|
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(subsurface_scatter, uint)
|
||||||
DEFINE_SPLIT_KERNEL_FUNCTION(direct_lighting)
|
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint)
|
||||||
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
|
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
|
||||||
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
|
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
|
||||||
DEFINE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
|
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint)
|
||||||
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
|
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
|
||||||
DEFINE_SPLIT_KERNEL_FUNCTION(buffer_update)
|
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint)
|
||||||
|
|
||||||
void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name, void* func))
|
void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name, void* func))
|
||||||
{
|
{
|
||||||
|
@@ -93,21 +93,30 @@ kernel_cuda_path_trace_data_init(
|
|||||||
kernel_##name(NULL); \
|
kernel_##name(NULL); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \
|
||||||
|
extern "C" __global__ void \
|
||||||
|
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) \
|
||||||
|
kernel_cuda_##name() \
|
||||||
|
{ \
|
||||||
|
ccl_local type locals; \
|
||||||
|
kernel_##name(NULL, &locals); \
|
||||||
|
}
|
||||||
|
|
||||||
DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
|
DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
|
||||||
DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
|
DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
|
||||||
DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
|
DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
|
||||||
DEFINE_SPLIT_KERNEL_FUNCTION(do_volume)
|
DEFINE_SPLIT_KERNEL_FUNCTION(do_volume)
|
||||||
DEFINE_SPLIT_KERNEL_FUNCTION(queue_enqueue)
|
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals)
|
||||||
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background)
|
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background)
|
||||||
DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval)
|
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_eval, uint)
|
||||||
DEFINE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao)
|
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals)
|
||||||
DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
|
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(subsurface_scatter, uint)
|
||||||
DEFINE_SPLIT_KERNEL_FUNCTION(direct_lighting)
|
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint)
|
||||||
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
|
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
|
||||||
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
|
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
|
||||||
DEFINE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
|
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint)
|
||||||
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
|
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
|
||||||
DEFINE_SPLIT_KERNEL_FUNCTION(buffer_update)
|
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint)
|
||||||
|
|
||||||
extern "C" __global__ void
|
extern "C" __global__ void
|
||||||
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
|
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
|
||||||
|
@@ -22,5 +22,6 @@ __kernel void kernel_ocl_path_trace_buffer_update(
|
|||||||
ccl_global char *kg,
|
ccl_global char *kg,
|
||||||
ccl_constant KernelData *data)
|
ccl_constant KernelData *data)
|
||||||
{
|
{
|
||||||
kernel_buffer_update((KernelGlobals*)kg);
|
ccl_local unsigned int local_queue_atomics;
|
||||||
|
kernel_buffer_update((KernelGlobals*)kg, &local_queue_atomics);
|
||||||
}
|
}
|
||||||
|
@@ -22,5 +22,6 @@ __kernel void kernel_ocl_path_trace_direct_lighting(
|
|||||||
ccl_global char *kg,
|
ccl_global char *kg,
|
||||||
ccl_constant KernelData *data)
|
ccl_constant KernelData *data)
|
||||||
{
|
{
|
||||||
kernel_direct_lighting((KernelGlobals*)kg);
|
ccl_local unsigned int local_queue_atomics;
|
||||||
|
kernel_direct_lighting((KernelGlobals*)kg, &local_queue_atomics);
|
||||||
}
|
}
|
||||||
|
@@ -22,5 +22,8 @@ __kernel void kernel_ocl_path_trace_holdout_emission_blurring_pathtermination_ao
|
|||||||
ccl_global char *kg,
|
ccl_global char *kg,
|
||||||
ccl_constant KernelData *data)
|
ccl_constant KernelData *data)
|
||||||
{
|
{
|
||||||
kernel_holdout_emission_blurring_pathtermination_ao((KernelGlobals*)kg);
|
ccl_local BackgroundAOLocals locals;
|
||||||
|
kernel_holdout_emission_blurring_pathtermination_ao(
|
||||||
|
(KernelGlobals*)kg,
|
||||||
|
&locals);
|
||||||
}
|
}
|
||||||
|
@@ -22,5 +22,6 @@ __kernel void kernel_ocl_path_trace_next_iteration_setup(
|
|||||||
ccl_global char *kg,
|
ccl_global char *kg,
|
||||||
ccl_constant KernelData *data)
|
ccl_constant KernelData *data)
|
||||||
{
|
{
|
||||||
kernel_next_iteration_setup((KernelGlobals*)kg);
|
ccl_local unsigned int local_queue_atomics;
|
||||||
|
kernel_next_iteration_setup((KernelGlobals*)kg, &local_queue_atomics);
|
||||||
}
|
}
|
||||||
|
@@ -22,5 +22,6 @@ __kernel void kernel_ocl_path_trace_queue_enqueue(
|
|||||||
ccl_global char *kg,
|
ccl_global char *kg,
|
||||||
ccl_constant KernelData *data)
|
ccl_constant KernelData *data)
|
||||||
{
|
{
|
||||||
kernel_queue_enqueue((KernelGlobals*)kg);
|
ccl_local QueueEnqueueLocals locals;
|
||||||
|
kernel_queue_enqueue((KernelGlobals*)kg, &locals);
|
||||||
}
|
}
|
||||||
|
@@ -22,5 +22,6 @@ __kernel void kernel_ocl_path_trace_shader_eval(
|
|||||||
ccl_global char *kg,
|
ccl_global char *kg,
|
||||||
ccl_constant KernelData *data)
|
ccl_constant KernelData *data)
|
||||||
{
|
{
|
||||||
kernel_shader_eval((KernelGlobals*)kg);
|
ccl_local unsigned int local_queue_atomics;
|
||||||
|
kernel_shader_eval((KernelGlobals*)kg, &local_queue_atomics);
|
||||||
}
|
}
|
||||||
|
@@ -22,5 +22,6 @@ __kernel void kernel_ocl_path_trace_subsurface_scatter(
|
|||||||
ccl_global char *kg,
|
ccl_global char *kg,
|
||||||
ccl_constant KernelData *data)
|
ccl_constant KernelData *data)
|
||||||
{
|
{
|
||||||
kernel_subsurface_scatter((KernelGlobals*)kg);
|
ccl_local unsigned int local_queue_atomics;
|
||||||
|
kernel_subsurface_scatter((KernelGlobals*)kg, &local_queue_atomics);
|
||||||
}
|
}
|
||||||
|
@@ -38,11 +38,11 @@ CCL_NAMESPACE_BEGIN
|
|||||||
* RAY_REGENERATED rays.
|
* RAY_REGENERATED rays.
|
||||||
* - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be empty.
|
* - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be empty.
|
||||||
*/
|
*/
|
||||||
ccl_device void kernel_buffer_update(KernelGlobals *kg)
|
ccl_device void kernel_buffer_update(KernelGlobals *kg,
|
||||||
|
ccl_local_param unsigned int *local_queue_atomics)
|
||||||
{
|
{
|
||||||
ccl_local unsigned int local_queue_atomics;
|
|
||||||
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
|
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
|
||||||
local_queue_atomics = 0;
|
*local_queue_atomics = 0;
|
||||||
}
|
}
|
||||||
ccl_barrier(CCL_LOCAL_MEM_FENCE);
|
ccl_barrier(CCL_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
@@ -188,7 +188,7 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg)
|
|||||||
QUEUE_ACTIVE_AND_REGENERATED_RAYS,
|
QUEUE_ACTIVE_AND_REGENERATED_RAYS,
|
||||||
enqueue_flag,
|
enqueue_flag,
|
||||||
kernel_split_params.queue_size,
|
kernel_split_params.queue_size,
|
||||||
&local_queue_atomics,
|
local_queue_atomics,
|
||||||
kernel_split_state.queue_data,
|
kernel_split_state.queue_data,
|
||||||
kernel_split_params.queue_index);
|
kernel_split_params.queue_index);
|
||||||
}
|
}
|
||||||
|
@@ -40,11 +40,11 @@ CCL_NAMESPACE_BEGIN
|
|||||||
* shadow_blocked function must be executed, after this kernel call
|
* shadow_blocked function must be executed, after this kernel call
|
||||||
* Before this kernel call the QUEUE_SHADOW_RAY_CAST_DL_RAYS will be empty.
|
* Before this kernel call the QUEUE_SHADOW_RAY_CAST_DL_RAYS will be empty.
|
||||||
*/
|
*/
|
||||||
ccl_device void kernel_direct_lighting(KernelGlobals *kg)
|
ccl_device void kernel_direct_lighting(KernelGlobals *kg,
|
||||||
|
ccl_local_param unsigned int *local_queue_atomics)
|
||||||
{
|
{
|
||||||
ccl_local unsigned int local_queue_atomics;
|
|
||||||
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
|
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
|
||||||
local_queue_atomics = 0;
|
*local_queue_atomics = 0;
|
||||||
}
|
}
|
||||||
ccl_barrier(CCL_LOCAL_MEM_FENCE);
|
ccl_barrier(CCL_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
@@ -130,7 +130,7 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg)
|
|||||||
QUEUE_SHADOW_RAY_CAST_DL_RAYS,
|
QUEUE_SHADOW_RAY_CAST_DL_RAYS,
|
||||||
enqueue_flag,
|
enqueue_flag,
|
||||||
kernel_split_params.queue_size,
|
kernel_split_params.queue_size,
|
||||||
&local_queue_atomics,
|
local_queue_atomics,
|
||||||
kernel_split_state.queue_data,
|
kernel_split_state.queue_data,
|
||||||
kernel_split_params.queue_index);
|
kernel_split_params.queue_index);
|
||||||
#endif
|
#endif
|
||||||
|
@@ -52,13 +52,13 @@ CCL_NAMESPACE_BEGIN
|
|||||||
* - QUEUE_SHADOW_RAY_CAST_AO_RAYS will be filled with rays marked with
|
* - QUEUE_SHADOW_RAY_CAST_AO_RAYS will be filled with rays marked with
|
||||||
* flag RAY_SHADOW_RAY_CAST_AO
|
* flag RAY_SHADOW_RAY_CAST_AO
|
||||||
*/
|
*/
|
||||||
ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(KernelGlobals *kg)
|
ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
|
||||||
|
KernelGlobals *kg,
|
||||||
|
ccl_local_param BackgroundAOLocals *locals)
|
||||||
{
|
{
|
||||||
ccl_local unsigned int local_queue_atomics_bg;
|
|
||||||
ccl_local unsigned int local_queue_atomics_ao;
|
|
||||||
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
|
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
|
||||||
local_queue_atomics_bg = 0;
|
locals->queue_atomics_bg = 0;
|
||||||
local_queue_atomics_ao = 0;
|
locals->queue_atomics_ao = 0;
|
||||||
}
|
}
|
||||||
ccl_barrier(CCL_LOCAL_MEM_FENCE);
|
ccl_barrier(CCL_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
@@ -253,7 +253,7 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(KernelGlobal
|
|||||||
QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
|
QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
|
||||||
enqueue_flag,
|
enqueue_flag,
|
||||||
kernel_split_params.queue_size,
|
kernel_split_params.queue_size,
|
||||||
&local_queue_atomics_bg,
|
&locals->queue_atomics_bg,
|
||||||
kernel_split_state.queue_data,
|
kernel_split_state.queue_data,
|
||||||
kernel_split_params.queue_index);
|
kernel_split_params.queue_index);
|
||||||
|
|
||||||
@@ -263,7 +263,7 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(KernelGlobal
|
|||||||
QUEUE_SHADOW_RAY_CAST_AO_RAYS,
|
QUEUE_SHADOW_RAY_CAST_AO_RAYS,
|
||||||
enqueue_flag_AO_SHADOW_RAY_CAST,
|
enqueue_flag_AO_SHADOW_RAY_CAST,
|
||||||
kernel_split_params.queue_size,
|
kernel_split_params.queue_size,
|
||||||
&local_queue_atomics_ao,
|
&locals->queue_atomics_bg,
|
||||||
kernel_split_state.queue_data,
|
kernel_split_state.queue_data,
|
||||||
kernel_split_params.queue_index);
|
kernel_split_params.queue_index);
|
||||||
#endif
|
#endif
|
||||||
|
@@ -18,7 +18,6 @@ CCL_NAMESPACE_BEGIN
|
|||||||
|
|
||||||
ccl_device void kernel_indirect_background(KernelGlobals *kg)
|
ccl_device void kernel_indirect_background(KernelGlobals *kg)
|
||||||
{
|
{
|
||||||
|
|
||||||
ccl_global char *ray_state = kernel_split_state.ray_state;
|
ccl_global char *ray_state = kernel_split_state.ray_state;
|
||||||
|
|
||||||
int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
|
int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
|
||||||
|
@@ -44,11 +44,11 @@ CCL_NAMESPACE_BEGIN
|
|||||||
* - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with
|
* - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with
|
||||||
* RAY_TO_REGENERATE and more RAY_UPDATE_BUFFER rays.
|
* RAY_TO_REGENERATE and more RAY_UPDATE_BUFFER rays.
|
||||||
*/
|
*/
|
||||||
ccl_device void kernel_next_iteration_setup(KernelGlobals *kg)
|
ccl_device void kernel_next_iteration_setup(KernelGlobals *kg,
|
||||||
|
ccl_local_param unsigned int *local_queue_atomics)
|
||||||
{
|
{
|
||||||
ccl_local unsigned int local_queue_atomics;
|
|
||||||
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
|
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
|
||||||
local_queue_atomics = 0;
|
*local_queue_atomics = 0;
|
||||||
}
|
}
|
||||||
ccl_barrier(CCL_LOCAL_MEM_FENCE);
|
ccl_barrier(CCL_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
@@ -161,7 +161,7 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg)
|
|||||||
QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
|
QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
|
||||||
enqueue_flag,
|
enqueue_flag,
|
||||||
kernel_split_params.queue_size,
|
kernel_split_params.queue_size,
|
||||||
&local_queue_atomics,
|
local_queue_atomics,
|
||||||
kernel_split_state.queue_data,
|
kernel_split_state.queue_data,
|
||||||
kernel_split_params.queue_index);
|
kernel_split_params.queue_index);
|
||||||
}
|
}
|
||||||
|
@@ -35,17 +35,16 @@ CCL_NAMESPACE_BEGIN
|
|||||||
* - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with
|
* - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with
|
||||||
* RAY_TO_REGENERATE, RAY_UPDATE_BUFFER, RAY_HIT_BACKGROUND rays.
|
* RAY_TO_REGENERATE, RAY_UPDATE_BUFFER, RAY_HIT_BACKGROUND rays.
|
||||||
*/
|
*/
|
||||||
ccl_device void kernel_queue_enqueue(KernelGlobals *kg)
|
ccl_device void kernel_queue_enqueue(KernelGlobals *kg,
|
||||||
|
ccl_local_param QueueEnqueueLocals *locals)
|
||||||
{
|
{
|
||||||
/* We have only 2 cases (Hit/Not-Hit) */
|
/* We have only 2 cases (Hit/Not-Hit) */
|
||||||
ccl_local unsigned int local_queue_atomics[2];
|
|
||||||
|
|
||||||
int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0);
|
int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0);
|
||||||
int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
|
int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
|
||||||
|
|
||||||
if(lidx == 0) {
|
if(lidx == 0) {
|
||||||
local_queue_atomics[0] = 0;
|
locals->queue_atomics[0] = 0;
|
||||||
local_queue_atomics[1] = 0;
|
locals->queue_atomics[1] = 0;
|
||||||
}
|
}
|
||||||
ccl_barrier(CCL_LOCAL_MEM_FENCE);
|
ccl_barrier(CCL_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
@@ -62,18 +61,18 @@ ccl_device void kernel_queue_enqueue(KernelGlobals *kg)
|
|||||||
|
|
||||||
unsigned int my_lqidx;
|
unsigned int my_lqidx;
|
||||||
if(queue_number != -1) {
|
if(queue_number != -1) {
|
||||||
my_lqidx = get_local_queue_index(queue_number, local_queue_atomics);
|
my_lqidx = get_local_queue_index(queue_number, locals->queue_atomics);
|
||||||
}
|
}
|
||||||
ccl_barrier(CCL_LOCAL_MEM_FENCE);
|
ccl_barrier(CCL_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
if(lidx == 0) {
|
if(lidx == 0) {
|
||||||
local_queue_atomics[QUEUE_ACTIVE_AND_REGENERATED_RAYS] =
|
locals->queue_atomics[QUEUE_ACTIVE_AND_REGENERATED_RAYS] =
|
||||||
get_global_per_queue_offset(QUEUE_ACTIVE_AND_REGENERATED_RAYS,
|
get_global_per_queue_offset(QUEUE_ACTIVE_AND_REGENERATED_RAYS,
|
||||||
local_queue_atomics,
|
locals->queue_atomics,
|
||||||
kernel_split_params.queue_index);
|
kernel_split_params.queue_index);
|
||||||
local_queue_atomics[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] =
|
locals->queue_atomics[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] =
|
||||||
get_global_per_queue_offset(QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
|
get_global_per_queue_offset(QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
|
||||||
local_queue_atomics,
|
locals->queue_atomics,
|
||||||
kernel_split_params.queue_index);
|
kernel_split_params.queue_index);
|
||||||
}
|
}
|
||||||
ccl_barrier(CCL_LOCAL_MEM_FENCE);
|
ccl_barrier(CCL_LOCAL_MEM_FENCE);
|
||||||
@@ -83,7 +82,7 @@ ccl_device void kernel_queue_enqueue(KernelGlobals *kg)
|
|||||||
my_gqidx = get_global_queue_index(queue_number,
|
my_gqidx = get_global_queue_index(queue_number,
|
||||||
kernel_split_params.queue_size,
|
kernel_split_params.queue_size,
|
||||||
my_lqidx,
|
my_lqidx,
|
||||||
local_queue_atomics);
|
locals->queue_atomics);
|
||||||
kernel_split_state.queue_data[my_gqidx] = ray_index;
|
kernel_split_state.queue_data[my_gqidx] = ray_index;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -22,12 +22,12 @@ CCL_NAMESPACE_BEGIN
|
|||||||
* It also identifies the rays of state RAY_TO_REGENERATE and enqueues them
|
* It also identifies the rays of state RAY_TO_REGENERATE and enqueues them
|
||||||
* in QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue.
|
* in QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue.
|
||||||
*/
|
*/
|
||||||
ccl_device void kernel_shader_eval(KernelGlobals *kg)
|
ccl_device void kernel_shader_eval(KernelGlobals *kg,
|
||||||
|
ccl_local_param unsigned int *local_queue_atomics)
|
||||||
{
|
{
|
||||||
/* Enqeueue RAY_TO_REGENERATE rays into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. */
|
/* Enqeueue RAY_TO_REGENERATE rays into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. */
|
||||||
ccl_local unsigned int local_queue_atomics;
|
|
||||||
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
|
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
|
||||||
local_queue_atomics = 0;
|
*local_queue_atomics = 0;
|
||||||
}
|
}
|
||||||
ccl_barrier(CCL_LOCAL_MEM_FENCE);
|
ccl_barrier(CCL_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
@@ -47,7 +47,7 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg)
|
|||||||
QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
|
QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
|
||||||
enqueue_flag,
|
enqueue_flag,
|
||||||
kernel_split_params.queue_size,
|
kernel_split_params.queue_size,
|
||||||
&local_queue_atomics,
|
local_queue_atomics,
|
||||||
kernel_split_state.queue_data,
|
kernel_split_state.queue_data,
|
||||||
kernel_split_params.queue_index);
|
kernel_split_params.queue_index);
|
||||||
|
|
||||||
|
@@ -111,6 +111,17 @@ __device__ SplitParams __split_param_data;
|
|||||||
# define kernel_split_params (__split_param_data)
|
# define kernel_split_params (__split_param_data)
|
||||||
#endif /* __KERNEL_CUDA__ */
|
#endif /* __KERNEL_CUDA__ */
|
||||||
|
|
||||||
|
/* Local storage for queue_enqueue kernel. */
|
||||||
|
typedef struct QueueEnqueueLocals {
|
||||||
|
uint queue_atomics[2];
|
||||||
|
} QueueEnqueueLocals;
|
||||||
|
|
||||||
|
/* Local storage for holdout_emission_blurring_pathtermination_ao kernel. */
|
||||||
|
typedef struct BackgroundAOLocals {
|
||||||
|
uint queue_atomics_bg;
|
||||||
|
uint queue_atomics_ao;
|
||||||
|
} BackgroundAOLocals;
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
|
||||||
#endif /* __KERNEL_SPLIT_DATA_TYPES_H__ */
|
#endif /* __KERNEL_SPLIT_DATA_TYPES_H__ */
|
||||||
|
@@ -17,13 +17,12 @@
|
|||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
|
||||||
ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
|
ccl_device void kernel_subsurface_scatter(KernelGlobals *kg,
|
||||||
|
ccl_local_param unsigned int* local_queue_atomics)
|
||||||
{
|
{
|
||||||
#ifdef __SUBSURFACE__
|
#ifdef __SUBSURFACE__
|
||||||
|
|
||||||
ccl_local unsigned int local_queue_atomics;
|
|
||||||
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
|
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
|
||||||
local_queue_atomics = 0;
|
*local_queue_atomics = 0;
|
||||||
}
|
}
|
||||||
ccl_barrier(CCL_LOCAL_MEM_FENCE);
|
ccl_barrier(CCL_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
@@ -89,7 +88,7 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
|
|||||||
QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
|
QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
|
||||||
enqueue_flag,
|
enqueue_flag,
|
||||||
kernel_split_params.queue_size,
|
kernel_split_params.queue_size,
|
||||||
&local_queue_atomics,
|
local_queue_atomics,
|
||||||
kernel_split_state.queue_data,
|
kernel_split_state.queue_data,
|
||||||
kernel_split_params.queue_index);
|
kernel_split_params.queue_index);
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user