Cycles: Add kernel to enqueue inactive rays
The queue will be used to make reuse of inactive threads to keep the GPU more busy.
This commit is contained in:
@@ -47,6 +47,7 @@ DeviceSplitKernel::DeviceSplitKernel(Device *device) : device(device)
|
||||
kernel_direct_lighting = NULL;
|
||||
kernel_shadow_blocked_ao = NULL;
|
||||
kernel_shadow_blocked_dl = NULL;
|
||||
kernel_enqueue_inactive = NULL;
|
||||
kernel_next_iteration_setup = NULL;
|
||||
kernel_indirect_subsurface = NULL;
|
||||
kernel_buffer_update = NULL;
|
||||
@@ -74,6 +75,7 @@ DeviceSplitKernel::~DeviceSplitKernel()
|
||||
delete kernel_direct_lighting;
|
||||
delete kernel_shadow_blocked_ao;
|
||||
delete kernel_shadow_blocked_dl;
|
||||
delete kernel_enqueue_inactive;
|
||||
delete kernel_next_iteration_setup;
|
||||
delete kernel_indirect_subsurface;
|
||||
delete kernel_buffer_update;
|
||||
@@ -101,6 +103,7 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_fe
|
||||
LOAD_KERNEL(direct_lighting);
|
||||
LOAD_KERNEL(shadow_blocked_ao);
|
||||
LOAD_KERNEL(shadow_blocked_dl);
|
||||
LOAD_KERNEL(enqueue_inactive);
|
||||
LOAD_KERNEL(next_iteration_setup);
|
||||
LOAD_KERNEL(indirect_subsurface);
|
||||
LOAD_KERNEL(buffer_update);
|
||||
|
@@ -69,6 +69,7 @@ private:
|
||||
SplitKernelFunction *kernel_direct_lighting;
|
||||
SplitKernelFunction *kernel_shadow_blocked_ao;
|
||||
SplitKernelFunction *kernel_shadow_blocked_dl;
|
||||
SplitKernelFunction *kernel_enqueue_inactive;
|
||||
SplitKernelFunction *kernel_next_iteration_setup;
|
||||
SplitKernelFunction *kernel_indirect_subsurface;
|
||||
SplitKernelFunction *kernel_buffer_update;
|
||||
|
@@ -45,6 +45,7 @@ set(SRC
|
||||
kernels/opencl/kernel_direct_lighting.cl
|
||||
kernels/opencl/kernel_shadow_blocked_ao.cl
|
||||
kernels/opencl/kernel_shadow_blocked_dl.cl
|
||||
kernels/opencl/kernel_enqueue_inactive.cl
|
||||
kernels/opencl/kernel_next_iteration_setup.cl
|
||||
kernels/opencl/kernel_indirect_subsurface.cl
|
||||
kernels/opencl/kernel_buffer_update.cl
|
||||
@@ -278,6 +279,7 @@ set(SRC_SPLIT_HEADERS
|
||||
split/kernel_data_init.h
|
||||
split/kernel_direct_lighting.h
|
||||
split/kernel_do_volume.h
|
||||
split/kernel_enqueue_inactive.h
|
||||
split/kernel_holdout_emission_blurring_pathtermination_ao.h
|
||||
split/kernel_indirect_background.h
|
||||
split/kernel_indirect_subsurface.h
|
||||
@@ -490,6 +492,7 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_subsurface_sc
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_direct_lighting.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked_ao.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked_dl.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_enqueue_inactive.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_next_iteration_setup.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_indirect_subsurface.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_buffer_update.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
|
||||
|
@@ -1387,6 +1387,8 @@ enum QueueNumber {
|
||||
#ifdef __BRANCHED_PATH__
|
||||
/* All rays moving to next iteration of the indirect loop for light */
|
||||
QUEUE_LIGHT_INDIRECT_ITER,
|
||||
/* Queue of all inactive rays. These are candidates for sharing work of indirect loops */
|
||||
QUEUE_INACTIVE_RAYS,
|
||||
# ifdef __VOLUME__
|
||||
/* All rays moving to next iteration of the indirect loop for volumes */
|
||||
QUEUE_VOLUME_INDIRECT_ITER,
|
||||
|
@@ -85,6 +85,7 @@ DECLARE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
|
||||
DECLARE_SPLIT_KERNEL_FUNCTION(direct_lighting)
|
||||
DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
|
||||
DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
|
||||
DECLARE_SPLIT_KERNEL_FUNCTION(enqueue_inactive)
|
||||
DECLARE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
|
||||
DECLARE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
|
||||
DECLARE_SPLIT_KERNEL_FUNCTION(buffer_update)
|
||||
|
@@ -53,6 +53,7 @@
|
||||
# include "kernel/split/kernel_direct_lighting.h"
|
||||
# include "kernel/split/kernel_shadow_blocked_ao.h"
|
||||
# include "kernel/split/kernel_shadow_blocked_dl.h"
|
||||
# include "kernel/split/kernel_enqueue_inactive.h"
|
||||
# include "kernel/split/kernel_next_iteration_setup.h"
|
||||
# include "kernel/split/kernel_indirect_subsurface.h"
|
||||
# include "kernel/split/kernel_buffer_update.h"
|
||||
@@ -230,6 +231,7 @@ DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint)
|
||||
|
@@ -39,6 +39,7 @@
|
||||
#include "kernel/split/kernel_direct_lighting.h"
|
||||
#include "kernel/split/kernel_shadow_blocked_ao.h"
|
||||
#include "kernel/split/kernel_shadow_blocked_dl.h"
|
||||
#include "kernel/split/kernel_enqueue_inactive.h"
|
||||
#include "kernel/split/kernel_next_iteration_setup.h"
|
||||
#include "kernel/split/kernel_indirect_subsurface.h"
|
||||
#include "kernel/split/kernel_buffer_update.h"
|
||||
@@ -118,6 +119,7 @@ DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint)
|
||||
|
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
* Copyright 2011-2017 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kernel/kernel_compat_opencl.h"
|
||||
#include "kernel/split/kernel_split_common.h"
|
||||
#include "kernel/split/kernel_enqueue_inactive.h"
|
||||
|
||||
__kernel void kernel_ocl_path_trace_enqueue_inactive(
|
||||
ccl_global char *kg,
|
||||
ccl_constant KernelData *data)
|
||||
{
|
||||
ccl_local unsigned int local_queue_atomics;
|
||||
kernel_enqueue_inactive((KernelGlobals*)kg, &local_queue_atomics);
|
||||
}
|
@@ -31,6 +31,7 @@
|
||||
#include "kernel/kernels/opencl/kernel_direct_lighting.cl"
|
||||
#include "kernel/kernels/opencl/kernel_shadow_blocked_ao.cl"
|
||||
#include "kernel/kernels/opencl/kernel_shadow_blocked_dl.cl"
|
||||
#include "kernel/kernels/opencl/kernel_enqueue_inactive.cl"
|
||||
#include "kernel/kernels/opencl/kernel_next_iteration_setup.cl"
|
||||
#include "kernel/kernels/opencl/kernel_indirect_subsurface.cl"
|
||||
#include "kernel/kernels/opencl/kernel_buffer_update.cl"
|
||||
|
46
intern/cycles/kernel/split/kernel_enqueue_inactive.h
Normal file
46
intern/cycles/kernel/split/kernel_enqueue_inactive.h
Normal file
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
* Copyright 2011-2017 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
ccl_device void kernel_enqueue_inactive(KernelGlobals *kg,
|
||||
ccl_local_param unsigned int *local_queue_atomics)
|
||||
{
|
||||
#ifdef __BRANCHED_PATH__
|
||||
/* Enqeueue RAY_INACTIVE rays into QUEUE_INACTIVE_RAYS queue. */
|
||||
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
|
||||
*local_queue_atomics = 0;
|
||||
}
|
||||
ccl_barrier(CCL_LOCAL_MEM_FENCE);
|
||||
|
||||
int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
|
||||
|
||||
char enqueue_flag = 0;
|
||||
if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_INACTIVE)) {
|
||||
enqueue_flag = 1;
|
||||
}
|
||||
|
||||
enqueue_ray_index_local(ray_index,
|
||||
QUEUE_INACTIVE_RAYS,
|
||||
enqueue_flag,
|
||||
kernel_split_params.queue_size,
|
||||
local_queue_atomics,
|
||||
kernel_split_state.queue_data,
|
||||
kernel_split_params.queue_index);
|
||||
#endif /* __BRANCHED_PATH__ */
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
Reference in New Issue
Block a user