Code refactor: split displace/background into separate kernels, remove luma.

This commit is contained in:
Brecht Van Lommel
2017-10-05 15:17:09 +02:00
parent d8509b349d
commit fb99ea79f8
19 changed files with 129 additions and 125 deletions

View File

@@ -174,7 +174,7 @@ public:
KernelFunctions<void(*)(KernelGlobals *, float *, int, int, int, int, int)> path_trace_kernel; KernelFunctions<void(*)(KernelGlobals *, float *, int, int, int, int, int)> path_trace_kernel;
KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> convert_to_half_float_kernel; KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> convert_to_half_float_kernel;
KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> convert_to_byte_kernel; KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> convert_to_byte_kernel;
KernelFunctions<void(*)(KernelGlobals *, uint4 *, float4 *, float*, int, int, int, int, int)> shader_kernel; KernelFunctions<void(*)(KernelGlobals *, uint4 *, float4 *, int, int, int, int, int)> shader_kernel;
KernelFunctions<void(*)(int, TilesInfo*, int, int, float*, float*, float*, float*, float*, int*, int, int)> filter_divide_shadow_kernel; KernelFunctions<void(*)(int, TilesInfo*, int, int, float*, float*, float*, float*, float*, int*, int, int)> filter_divide_shadow_kernel;
KernelFunctions<void(*)(int, TilesInfo*, int, int, int, int, float*, float*, int*, int, int)> filter_get_feature_kernel; KernelFunctions<void(*)(int, TilesInfo*, int, int, int, int, float*, float*, int*, int, int)> filter_get_feature_kernel;
@@ -756,7 +756,6 @@ public:
shader_kernel()(&kg, shader_kernel()(&kg,
(uint4*)task.shader_input, (uint4*)task.shader_input,
(float4*)task.shader_output, (float4*)task.shader_output,
(float*)task.shader_output_luma,
task.shader_eval_type, task.shader_eval_type,
task.shader_filter, task.shader_filter,
x, x,

View File

@@ -1424,14 +1424,16 @@ public:
CUfunction cuShader; CUfunction cuShader;
CUdeviceptr d_input = cuda_device_ptr(task.shader_input); CUdeviceptr d_input = cuda_device_ptr(task.shader_input);
CUdeviceptr d_output = cuda_device_ptr(task.shader_output); CUdeviceptr d_output = cuda_device_ptr(task.shader_output);
CUdeviceptr d_output_luma = cuda_device_ptr(task.shader_output_luma);
/* get kernel function */ /* get kernel function */
if(task.shader_eval_type >= SHADER_EVAL_BAKE) { if(task.shader_eval_type >= SHADER_EVAL_BAKE) {
cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_bake")); cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_bake"));
} }
else if(task.shader_eval_type == SHADER_EVAL_DISPLACE) {
cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_displace"));
}
else { else {
cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_shader")); cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_background"));
} }
/* do tasks in smaller chunks, so we can cancel it */ /* do tasks in smaller chunks, so we can cancel it */
@@ -1450,9 +1452,6 @@ public:
int arg = 0; int arg = 0;
args[arg++] = &d_input; args[arg++] = &d_input;
args[arg++] = &d_output; args[arg++] = &d_output;
if(task.shader_eval_type < SHADER_EVAL_BAKE) {
args[arg++] = &d_output_luma;
}
args[arg++] = &task.shader_eval_type; args[arg++] = &task.shader_eval_type;
if(task.shader_eval_type >= SHADER_EVAL_BAKE) { if(task.shader_eval_type >= SHADER_EVAL_BAKE) {
args[arg++] = &task.shader_filter; args[arg++] = &task.shader_filter;

View File

@@ -383,7 +383,6 @@ public:
if(task.rgba_half) subtask.rgba_half = sub.ptr_map[task.rgba_half]; if(task.rgba_half) subtask.rgba_half = sub.ptr_map[task.rgba_half];
if(task.shader_input) subtask.shader_input = sub.ptr_map[task.shader_input]; if(task.shader_input) subtask.shader_input = sub.ptr_map[task.shader_input];
if(task.shader_output) subtask.shader_output = sub.ptr_map[task.shader_output]; if(task.shader_output) subtask.shader_output = sub.ptr_map[task.shader_output];
if(task.shader_output_luma) subtask.shader_output_luma = sub.ptr_map[task.shader_output_luma];
sub.device->task_add(subtask); sub.device->task_add(subtask);
} }

View File

@@ -660,10 +660,6 @@ protected:
if(task.shader_output) if(task.shader_output)
task.shader_output = device_ptr_from_client_pointer(task.shader_output); task.shader_output = device_ptr_from_client_pointer(task.shader_output);
if(task.shader_output_luma)
task.shader_output_luma = device_ptr_from_client_pointer(task.shader_output_luma);
task.acquire_tile = function_bind(&DeviceServer::task_acquire_tile, this, _1, _2); task.acquire_tile = function_bind(&DeviceServer::task_acquire_tile, this, _1, _2);
task.release_tile = function_bind(&DeviceServer::task_release_tile, this, _1); task.release_tile = function_bind(&DeviceServer::task_release_tile, this, _1);
task.update_progress_sample = function_bind(&DeviceServer::task_update_progress_sample, this); task.update_progress_sample = function_bind(&DeviceServer::task_update_progress_sample, this);

View File

@@ -132,7 +132,7 @@ public:
archive & type & task.x & task.y & task.w & task.h; archive & type & task.x & task.y & task.w & task.h;
archive & task.rgba_byte & task.rgba_half & task.buffer & task.sample & task.num_samples; archive & task.rgba_byte & task.rgba_half & task.buffer & task.sample & task.num_samples;
archive & task.offset & task.stride; archive & task.offset & task.stride;
archive & task.shader_input & task.shader_output & task.shader_output_luma & task.shader_eval_type; archive & task.shader_input & task.shader_output & task.shader_eval_type;
archive & task.shader_x & task.shader_w; archive & task.shader_x & task.shader_w;
archive & task.need_finish_queue; archive & task.need_finish_queue;
} }
@@ -291,7 +291,7 @@ public:
*archive & type & task.x & task.y & task.w & task.h; *archive & type & task.x & task.y & task.w & task.h;
*archive & task.rgba_byte & task.rgba_half & task.buffer & task.sample & task.num_samples; *archive & task.rgba_byte & task.rgba_half & task.buffer & task.sample & task.num_samples;
*archive & task.offset & task.stride; *archive & task.offset & task.stride;
*archive & task.shader_input & task.shader_output & task.shader_output_luma & task.shader_eval_type; *archive & task.shader_input & task.shader_output & task.shader_eval_type;
*archive & task.shader_x & task.shader_w; *archive & task.shader_x & task.shader_w;
*archive & task.need_finish_queue; *archive & task.need_finish_queue;

View File

@@ -31,7 +31,7 @@ CCL_NAMESPACE_BEGIN
DeviceTask::DeviceTask(Type type_) DeviceTask::DeviceTask(Type type_)
: type(type_), x(0), y(0), w(0), h(0), rgba_byte(0), rgba_half(0), buffer(0), : type(type_), x(0), y(0), w(0), h(0), rgba_byte(0), rgba_half(0), buffer(0),
sample(0), num_samples(1), sample(0), num_samples(1),
shader_input(0), shader_output(0), shader_output_luma(0), shader_input(0), shader_output(0),
shader_eval_type(0), shader_filter(0), shader_x(0), shader_w(0) shader_eval_type(0), shader_filter(0), shader_x(0), shader_w(0)
{ {
last_update_time = time_dt(); last_update_time = time_dt();

View File

@@ -46,7 +46,7 @@ public:
int offset, stride; int offset, stride;
device_ptr shader_input; device_ptr shader_input;
device_ptr shader_output, shader_output_luma; device_ptr shader_output;
int shader_eval_type; int shader_eval_type;
int shader_filter; int shader_filter;
int shader_x, shader_w; int shader_x, shader_w;

View File

@@ -228,7 +228,8 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
base_program = OpenCLProgram(this, "base", "kernel.cl", build_options_for_base_program(requested_features)); base_program = OpenCLProgram(this, "base", "kernel.cl", build_options_for_base_program(requested_features));
base_program.add_kernel(ustring("convert_to_byte")); base_program.add_kernel(ustring("convert_to_byte"));
base_program.add_kernel(ustring("convert_to_half_float")); base_program.add_kernel(ustring("convert_to_half_float"));
base_program.add_kernel(ustring("shader")); base_program.add_kernel(ustring("displace"));
base_program.add_kernel(ustring("background"));
base_program.add_kernel(ustring("bake")); base_program.add_kernel(ustring("bake"));
base_program.add_kernel(ustring("zero_buffer")); base_program.add_kernel(ustring("zero_buffer"));
@@ -1112,7 +1113,6 @@ void OpenCLDeviceBase::shader(DeviceTask& task)
cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer); cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
cl_mem d_input = CL_MEM_PTR(task.shader_input); cl_mem d_input = CL_MEM_PTR(task.shader_input);
cl_mem d_output = CL_MEM_PTR(task.shader_output); cl_mem d_output = CL_MEM_PTR(task.shader_output);
cl_mem d_output_luma = CL_MEM_PTR(task.shader_output_luma);
cl_int d_shader_eval_type = task.shader_eval_type; cl_int d_shader_eval_type = task.shader_eval_type;
cl_int d_shader_filter = task.shader_filter; cl_int d_shader_filter = task.shader_filter;
cl_int d_shader_x = task.shader_x; cl_int d_shader_x = task.shader_x;
@@ -1121,10 +1121,15 @@ void OpenCLDeviceBase::shader(DeviceTask& task)
cl_kernel kernel; cl_kernel kernel;
if(task.shader_eval_type >= SHADER_EVAL_BAKE) if(task.shader_eval_type >= SHADER_EVAL_BAKE) {
kernel = base_program(ustring("bake")); kernel = base_program(ustring("bake"));
else }
kernel = base_program(ustring("shader")); else if(task.shader_eval_type >= SHADER_EVAL_DISPLACE) {
kernel = base_program(ustring("displace"));
}
else {
kernel = base_program(ustring("background"));
}
cl_uint start_arg_index = cl_uint start_arg_index =
kernel_set_args(kernel, kernel_set_args(kernel,
@@ -1133,12 +1138,6 @@ void OpenCLDeviceBase::shader(DeviceTask& task)
d_input, d_input,
d_output); d_output);
if(task.shader_eval_type < SHADER_EVAL_BAKE) {
start_arg_index += kernel_set_args(kernel,
start_arg_index,
d_output_luma);
}
set_kernel_arg_buffers(kernel, &start_arg_index); set_kernel_arg_buffers(kernel, &start_arg_index);
start_arg_index += kernel_set_args(kernel, start_arg_index += kernel_set_args(kernel,

View File

@@ -493,20 +493,15 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
#endif /* __BAKING__ */ #endif /* __BAKING__ */
ccl_device void kernel_shader_evaluate(KernelGlobals *kg, ccl_device void kernel_displace_evaluate(KernelGlobals *kg,
ccl_global uint4 *input, ccl_global uint4 *input,
ccl_global float4 *output, ccl_global float4 *output,
ccl_global float *output_luma, int i)
ShaderEvalType type,
int i,
int sample)
{ {
ShaderData sd; ShaderData sd;
PathState state = {0}; PathState state = {0};
uint4 in = input[i]; uint4 in = input[i];
float3 out;
if(type == SHADER_EVAL_DISPLACE) {
/* setup shader data */ /* setup shader data */
int object = in.x; int object = in.x;
int prim = in.y; int prim = in.y;
@@ -518,11 +513,23 @@ ccl_device void kernel_shader_evaluate(KernelGlobals *kg,
/* evaluate */ /* evaluate */
float3 P = sd.P; float3 P = sd.P;
shader_eval_displacement(kg, &sd, &state); shader_eval_displacement(kg, &sd, &state);
out = sd.P - P; float3 D = sd.P - P;
object_inverse_dir_transform(kg, &sd, &out); object_inverse_dir_transform(kg, &sd, &D);
/* write output */
output[i] += make_float4(D.x, D.y, D.z, 0.0f);
} }
else { // SHADER_EVAL_BACKGROUND
ccl_device void kernel_background_evaluate(KernelGlobals *kg,
ccl_global uint4 *input,
ccl_global float4 *output,
int i)
{
ShaderData sd;
PathState state = {0};
uint4 in = input[i];
/* setup ray */ /* setup ray */
Ray ray; Ray ray;
float u = __uint_as_float(in.x); float u = __uint_as_float(in.x);
@@ -545,26 +552,10 @@ ccl_device void kernel_shader_evaluate(KernelGlobals *kg,
/* evaluate */ /* evaluate */
int flag = 0; /* we can't know which type of BSDF this is for */ int flag = 0; /* we can't know which type of BSDF this is for */
out = shader_eval_background(kg, &sd, &state, flag); float3 color = shader_eval_background(kg, &sd, &state, flag);
}
/* write output */ /* write output */
if(sample == 0) { output[i] += make_float4(color.x, color.y, color.z, 0.0f);
if(output != NULL) {
output[i] = make_float4(out.x, out.y, out.z, 0.0f);
}
if(output_luma != NULL) {
output_luma[i] = average(out);
}
}
else {
if(output != NULL) {
output[i] += make_float4(out.x, out.y, out.z, 0.0f);
}
if(output_luma != NULL) {
output_luma[i] += average(out);
}
}
} }
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -1204,7 +1204,7 @@ ccl_device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd, ccl_
#ifdef __SVM__ #ifdef __SVM__
# ifdef __OSL__ # ifdef __OSL__
if(kg->osl) if(kg->osl)
OSLShader::eval_displacement(kg, sd); OSLShader::eval_displacement(kg, sd, state);
else else
# endif # endif
{ {

View File

@@ -41,7 +41,6 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
uint4 *input, uint4 *input,
float4 *output, float4 *output,
float *output_luma,
int type, int type,
int filter, int filter,
int i, int i,

View File

@@ -149,7 +149,6 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
uint4 *input, uint4 *input,
float4 *output, float4 *output,
float *output_luma,
int type, int type,
int filter, int filter,
int i, int i,
@@ -160,7 +159,6 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
STUB_ASSERT(KERNEL_ARCH, shader); STUB_ASSERT(KERNEL_ARCH, shader);
#else #else
if(type >= SHADER_EVAL_BAKE) { if(type >= SHADER_EVAL_BAKE) {
kernel_assert(output_luma == NULL);
# ifdef __BAKING__ # ifdef __BAKING__
kernel_bake_evaluate(kg, kernel_bake_evaluate(kg,
input, input,
@@ -172,14 +170,11 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
sample); sample);
# endif # endif
} }
else if(type == SHADER_EVAL_DISPLACE) {
kernel_displace_evaluate(kg, input, output, i);
}
else { else {
kernel_shader_evaluate(kg, kernel_background_evaluate(kg, input, output, i);
input,
output,
output_luma,
(ShaderEvalType)type,
i,
sample);
} }
#endif /* KERNEL_STUB */ #endif /* KERNEL_STUB */
} }

View File

@@ -91,9 +91,8 @@ kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scal
extern "C" __global__ void extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
kernel_cuda_shader(uint4 *input, kernel_cuda_displace(uint4 *input,
float4 *output, float4 *output,
float *output_luma,
int type, int type,
int sx, int sx,
int sw, int sw,
@@ -104,13 +103,25 @@ kernel_cuda_shader(uint4 *input,
if(x < sx + sw) { if(x < sx + sw) {
KernelGlobals kg; KernelGlobals kg;
kernel_shader_evaluate(&kg, kernel_displace_evaluate(&kg, input, output, x);
input, }
output, }
output_luma,
(ShaderEvalType)type, extern "C" __global__ void
x, CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
sample); kernel_cuda_background(uint4 *input,
float4 *output,
int type,
int sx,
int sw,
int offset,
int sample)
{
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
if(x < sx + sw) {
KernelGlobals kg;
kernel_background_evaluate(&kg, input, output, x);
} }
} }

View File

@@ -72,11 +72,10 @@ __kernel void kernel_ocl_path_trace(
#else /* __COMPILE_ONLY_MEGAKERNEL__ */ #else /* __COMPILE_ONLY_MEGAKERNEL__ */
__kernel void kernel_ocl_shader( __kernel void kernel_ocl_displace(
ccl_constant KernelData *data, ccl_constant KernelData *data,
ccl_global uint4 *input, ccl_global uint4 *input,
ccl_global float4 *output, ccl_global float4 *output,
ccl_global float *output_luma,
KERNEL_BUFFER_PARAMS, KERNEL_BUFFER_PARAMS,
@@ -92,13 +91,29 @@ __kernel void kernel_ocl_shader(
int x = sx + ccl_global_id(0); int x = sx + ccl_global_id(0);
if(x < sx + sw) { if(x < sx + sw) {
kernel_shader_evaluate(kg, kernel_displace_evaluate(kg, input, output, x);
input, }
output, }
output_luma, __kernel void kernel_ocl_background(
(ShaderEvalType)type, ccl_constant KernelData *data,
x, ccl_global uint4 *input,
sample); ccl_global float4 *output,
KERNEL_BUFFER_PARAMS,
int type, int sx, int sw, int offset, int sample)
{
KernelGlobals kglobals, *kg = &kglobals;
kg->data = data;
kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
kernel_set_buffer_info(kg);
int x = sx + ccl_global_id(0);
if(x < sx + sw) {
kernel_background_evaluate(kg, input, output, x);
} }
} }

View File

@@ -348,14 +348,12 @@ void OSLShader::eval_volume(KernelGlobals *kg, ShaderData *sd, PathState *state,
/* Displacement */ /* Displacement */
void OSLShader::eval_displacement(KernelGlobals *kg, ShaderData *sd) void OSLShader::eval_displacement(KernelGlobals *kg, ShaderData *sd, PathState *state)
{ {
/* setup shader globals from shader data */ /* setup shader globals from shader data */
OSLThreadData *tdata = kg->osl_tdata; OSLThreadData *tdata = kg->osl_tdata;
PathState state = {0}; shaderdata_to_shaderglobals(kg, sd, state, 0, tdata);
shaderdata_to_shaderglobals(kg, sd, &state, 0, tdata);
/* execute shader */ /* execute shader */
OSL::ShadingSystem *ss = (OSL::ShadingSystem*)kg->osl_ss; OSL::ShadingSystem *ss = (OSL::ShadingSystem*)kg->osl_ss;

View File

@@ -56,7 +56,7 @@ public:
static void eval_surface(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag); static void eval_surface(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag);
static void eval_background(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag); static void eval_background(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag);
static void eval_volume(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag); static void eval_volume(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag);
static void eval_displacement(KernelGlobals *kg, ShaderData *sd); static void eval_displacement(KernelGlobals *kg, ShaderData *sd, PathState *state);
/* attributes */ /* attributes */
static int find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, AttributeDescriptor *desc); static int find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, AttributeDescriptor *desc);

View File

@@ -174,6 +174,7 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre
device->mem_alloc("bake_input", d_input, MEM_READ_ONLY); device->mem_alloc("bake_input", d_input, MEM_READ_ONLY);
device->mem_copy_to(d_input); device->mem_copy_to(d_input);
device->mem_alloc("bake_output", d_output, MEM_READ_WRITE); device->mem_alloc("bake_output", d_output, MEM_READ_WRITE);
device->mem_zero(d_output);
DeviceTask task(DeviceTask::SHADER); DeviceTask task(DeviceTask::SHADER);
task.shader_input = d_input.device_pointer; task.shader_input = d_input.device_pointer;

View File

@@ -60,6 +60,7 @@ static void shade_background_pixels(Device *device, DeviceScene *dscene, int res
device->mem_alloc("shade_background_pixels_input", d_input, MEM_READ_ONLY); device->mem_alloc("shade_background_pixels_input", d_input, MEM_READ_ONLY);
device->mem_copy_to(d_input); device->mem_copy_to(d_input);
device->mem_alloc("shade_background_pixels_output", d_output, MEM_WRITE_ONLY); device->mem_alloc("shade_background_pixels_output", d_output, MEM_WRITE_ONLY);
device->mem_zero(d_output);
DeviceTask main_task(DeviceTask::SHADER); DeviceTask main_task(DeviceTask::SHADER);
main_task.shader_input = d_input.device_pointer; main_task.shader_input = d_input.device_pointer;

View File

@@ -124,6 +124,7 @@ bool MeshManager::displace(Device *device, DeviceScene *dscene, Scene *scene, Me
device->mem_alloc("displace_input", d_input, MEM_READ_ONLY); device->mem_alloc("displace_input", d_input, MEM_READ_ONLY);
device->mem_copy_to(d_input); device->mem_copy_to(d_input);
device->mem_alloc("displace_output", d_output, MEM_WRITE_ONLY); device->mem_alloc("displace_output", d_output, MEM_WRITE_ONLY);
device->mem_zero(d_output);
DeviceTask task(DeviceTask::SHADER); DeviceTask task(DeviceTask::SHADER);
task.shader_input = d_input.device_pointer; task.shader_input = d_input.device_pointer;