Code refactor: split displace/background into separate kernels, remove luma.
This commit is contained in:
@@ -493,78 +493,69 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
|
||||
|
||||
#endif /* __BAKING__ */
|
||||
|
||||
ccl_device void kernel_shader_evaluate(KernelGlobals *kg,
|
||||
ccl_global uint4 *input,
|
||||
ccl_global float4 *output,
|
||||
ccl_global float *output_luma,
|
||||
ShaderEvalType type,
|
||||
int i,
|
||||
int sample)
|
||||
ccl_device void kernel_displace_evaluate(KernelGlobals *kg,
|
||||
ccl_global uint4 *input,
|
||||
ccl_global float4 *output,
|
||||
int i)
|
||||
{
|
||||
ShaderData sd;
|
||||
PathState state = {0};
|
||||
uint4 in = input[i];
|
||||
float3 out;
|
||||
|
||||
if(type == SHADER_EVAL_DISPLACE) {
|
||||
/* setup shader data */
|
||||
int object = in.x;
|
||||
int prim = in.y;
|
||||
float u = __uint_as_float(in.z);
|
||||
float v = __uint_as_float(in.w);
|
||||
/* setup shader data */
|
||||
int object = in.x;
|
||||
int prim = in.y;
|
||||
float u = __uint_as_float(in.z);
|
||||
float v = __uint_as_float(in.w);
|
||||
|
||||
shader_setup_from_displace(kg, &sd, object, prim, u, v);
|
||||
shader_setup_from_displace(kg, &sd, object, prim, u, v);
|
||||
|
||||
/* evaluate */
|
||||
float3 P = sd.P;
|
||||
shader_eval_displacement(kg, &sd, &state);
|
||||
out = sd.P - P;
|
||||
/* evaluate */
|
||||
float3 P = sd.P;
|
||||
shader_eval_displacement(kg, &sd, &state);
|
||||
float3 D = sd.P - P;
|
||||
|
||||
object_inverse_dir_transform(kg, &sd, &out);
|
||||
}
|
||||
else { // SHADER_EVAL_BACKGROUND
|
||||
/* setup ray */
|
||||
Ray ray;
|
||||
float u = __uint_as_float(in.x);
|
||||
float v = __uint_as_float(in.y);
|
||||
object_inverse_dir_transform(kg, &sd, &D);
|
||||
|
||||
ray.P = make_float3(0.0f, 0.0f, 0.0f);
|
||||
ray.D = equirectangular_to_direction(u, v);
|
||||
ray.t = 0.0f;
|
||||
/* write output */
|
||||
output[i] += make_float4(D.x, D.y, D.z, 0.0f);
|
||||
}
|
||||
|
||||
ccl_device void kernel_background_evaluate(KernelGlobals *kg,
|
||||
ccl_global uint4 *input,
|
||||
ccl_global float4 *output,
|
||||
int i)
|
||||
{
|
||||
ShaderData sd;
|
||||
PathState state = {0};
|
||||
uint4 in = input[i];
|
||||
|
||||
/* setup ray */
|
||||
Ray ray;
|
||||
float u = __uint_as_float(in.x);
|
||||
float v = __uint_as_float(in.y);
|
||||
|
||||
ray.P = make_float3(0.0f, 0.0f, 0.0f);
|
||||
ray.D = equirectangular_to_direction(u, v);
|
||||
ray.t = 0.0f;
|
||||
#ifdef __CAMERA_MOTION__
|
||||
ray.time = 0.5f;
|
||||
ray.time = 0.5f;
|
||||
#endif
|
||||
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
ray.dD = differential3_zero();
|
||||
ray.dP = differential3_zero();
|
||||
ray.dD = differential3_zero();
|
||||
ray.dP = differential3_zero();
|
||||
#endif
|
||||
|
||||
/* setup shader data */
|
||||
shader_setup_from_background(kg, &sd, &ray);
|
||||
/* setup shader data */
|
||||
shader_setup_from_background(kg, &sd, &ray);
|
||||
|
||||
/* evaluate */
|
||||
int flag = 0; /* we can't know which type of BSDF this is for */
|
||||
float3 color = shader_eval_background(kg, &sd, &state, flag);
|
||||
|
||||
/* evaluate */
|
||||
int flag = 0; /* we can't know which type of BSDF this is for */
|
||||
out = shader_eval_background(kg, &sd, &state, flag);
|
||||
}
|
||||
|
||||
/* write output */
|
||||
if(sample == 0) {
|
||||
if(output != NULL) {
|
||||
output[i] = make_float4(out.x, out.y, out.z, 0.0f);
|
||||
}
|
||||
if(output_luma != NULL) {
|
||||
output_luma[i] = average(out);
|
||||
}
|
||||
}
|
||||
else {
|
||||
if(output != NULL) {
|
||||
output[i] += make_float4(out.x, out.y, out.z, 0.0f);
|
||||
}
|
||||
if(output_luma != NULL) {
|
||||
output_luma[i] += average(out);
|
||||
}
|
||||
}
|
||||
output[i] += make_float4(color.x, color.y, color.z, 0.0f);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -1204,7 +1204,7 @@ ccl_device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd, ccl_
|
||||
#ifdef __SVM__
|
||||
# ifdef __OSL__
|
||||
if(kg->osl)
|
||||
OSLShader::eval_displacement(kg, sd);
|
||||
OSLShader::eval_displacement(kg, sd, state);
|
||||
else
|
||||
# endif
|
||||
{
|
||||
|
@@ -41,7 +41,6 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
|
||||
void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
|
||||
uint4 *input,
|
||||
float4 *output,
|
||||
float *output_luma,
|
||||
int type,
|
||||
int filter,
|
||||
int i,
|
||||
|
@@ -149,7 +149,6 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
|
||||
void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
|
||||
uint4 *input,
|
||||
float4 *output,
|
||||
float *output_luma,
|
||||
int type,
|
||||
int filter,
|
||||
int i,
|
||||
@@ -160,7 +159,6 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
|
||||
STUB_ASSERT(KERNEL_ARCH, shader);
|
||||
#else
|
||||
if(type >= SHADER_EVAL_BAKE) {
|
||||
kernel_assert(output_luma == NULL);
|
||||
# ifdef __BAKING__
|
||||
kernel_bake_evaluate(kg,
|
||||
input,
|
||||
@@ -172,14 +170,11 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
|
||||
sample);
|
||||
# endif
|
||||
}
|
||||
else if(type == SHADER_EVAL_DISPLACE) {
|
||||
kernel_displace_evaluate(kg, input, output, i);
|
||||
}
|
||||
else {
|
||||
kernel_shader_evaluate(kg,
|
||||
input,
|
||||
output,
|
||||
output_luma,
|
||||
(ShaderEvalType)type,
|
||||
i,
|
||||
sample);
|
||||
kernel_background_evaluate(kg, input, output, i);
|
||||
}
|
||||
#endif /* KERNEL_STUB */
|
||||
}
|
||||
|
@@ -91,26 +91,37 @@ kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scal
|
||||
|
||||
extern "C" __global__ void
|
||||
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
|
||||
kernel_cuda_shader(uint4 *input,
|
||||
float4 *output,
|
||||
float *output_luma,
|
||||
int type,
|
||||
int sx,
|
||||
int sw,
|
||||
int offset,
|
||||
int sample)
|
||||
kernel_cuda_displace(uint4 *input,
|
||||
float4 *output,
|
||||
int type,
|
||||
int sx,
|
||||
int sw,
|
||||
int offset,
|
||||
int sample)
|
||||
{
|
||||
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
|
||||
|
||||
if(x < sx + sw) {
|
||||
KernelGlobals kg;
|
||||
kernel_shader_evaluate(&kg,
|
||||
input,
|
||||
output,
|
||||
output_luma,
|
||||
(ShaderEvalType)type,
|
||||
x,
|
||||
sample);
|
||||
kernel_displace_evaluate(&kg, input, output, x);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void
|
||||
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
|
||||
kernel_cuda_background(uint4 *input,
|
||||
float4 *output,
|
||||
int type,
|
||||
int sx,
|
||||
int sw,
|
||||
int offset,
|
||||
int sample)
|
||||
{
|
||||
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
|
||||
|
||||
if(x < sx + sw) {
|
||||
KernelGlobals kg;
|
||||
kernel_background_evaluate(&kg, input, output, x);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -72,11 +72,10 @@ __kernel void kernel_ocl_path_trace(
|
||||
|
||||
#else /* __COMPILE_ONLY_MEGAKERNEL__ */
|
||||
|
||||
__kernel void kernel_ocl_shader(
|
||||
__kernel void kernel_ocl_displace(
|
||||
ccl_constant KernelData *data,
|
||||
ccl_global uint4 *input,
|
||||
ccl_global float4 *output,
|
||||
ccl_global float *output_luma,
|
||||
|
||||
KERNEL_BUFFER_PARAMS,
|
||||
|
||||
@@ -92,13 +91,29 @@ __kernel void kernel_ocl_shader(
|
||||
int x = sx + ccl_global_id(0);
|
||||
|
||||
if(x < sx + sw) {
|
||||
kernel_shader_evaluate(kg,
|
||||
input,
|
||||
output,
|
||||
output_luma,
|
||||
(ShaderEvalType)type,
|
||||
x,
|
||||
sample);
|
||||
kernel_displace_evaluate(kg, input, output, x);
|
||||
}
|
||||
}
|
||||
__kernel void kernel_ocl_background(
|
||||
ccl_constant KernelData *data,
|
||||
ccl_global uint4 *input,
|
||||
ccl_global float4 *output,
|
||||
|
||||
KERNEL_BUFFER_PARAMS,
|
||||
|
||||
int type, int sx, int sw, int offset, int sample)
|
||||
{
|
||||
KernelGlobals kglobals, *kg = &kglobals;
|
||||
|
||||
kg->data = data;
|
||||
|
||||
kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
|
||||
kernel_set_buffer_info(kg);
|
||||
|
||||
int x = sx + ccl_global_id(0);
|
||||
|
||||
if(x < sx + sw) {
|
||||
kernel_background_evaluate(kg, input, output, x);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -348,14 +348,12 @@ void OSLShader::eval_volume(KernelGlobals *kg, ShaderData *sd, PathState *state,
|
||||
|
||||
/* Displacement */
|
||||
|
||||
void OSLShader::eval_displacement(KernelGlobals *kg, ShaderData *sd)
|
||||
void OSLShader::eval_displacement(KernelGlobals *kg, ShaderData *sd, PathState *state)
|
||||
{
|
||||
/* setup shader globals from shader data */
|
||||
OSLThreadData *tdata = kg->osl_tdata;
|
||||
|
||||
PathState state = {0};
|
||||
|
||||
shaderdata_to_shaderglobals(kg, sd, &state, 0, tdata);
|
||||
shaderdata_to_shaderglobals(kg, sd, state, 0, tdata);
|
||||
|
||||
/* execute shader */
|
||||
OSL::ShadingSystem *ss = (OSL::ShadingSystem*)kg->osl_ss;
|
||||
|
@@ -56,7 +56,7 @@ public:
|
||||
static void eval_surface(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag);
|
||||
static void eval_background(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag);
|
||||
static void eval_volume(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag);
|
||||
static void eval_displacement(KernelGlobals *kg, ShaderData *sd);
|
||||
static void eval_displacement(KernelGlobals *kg, ShaderData *sd, PathState *state);
|
||||
|
||||
/* attributes */
|
||||
static int find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, AttributeDescriptor *desc);
|
||||
|
Reference in New Issue
Block a user