Cycles: viewport render now takes scene color management settings into account,

except for curves, that's still missing from the OpenColorIO GLSL shader.

The pixels are stored in a half float texture, converterd from full float with
native GPU instructions and SIMD on the CPU, so it should be pretty quick.
Using a GLSL shader is useful for GPU render because it avoids a copy through
CPU memory.
This commit is contained in:
Brecht Van Lommel
2013-08-30 23:49:38 +00:00
parent 60ff60dcdc
commit 29f6616d60
25 changed files with 379 additions and 115 deletions

View File

@@ -588,7 +588,15 @@ bool BlenderSession::draw(int w, int h)
/* draw */ /* draw */
BufferParams buffer_params = BlenderSync::get_buffer_params(b_render, b_scene, b_v3d, b_rv3d, scene->camera, width, height); BufferParams buffer_params = BlenderSync::get_buffer_params(b_render, b_scene, b_v3d, b_rv3d, scene->camera, width, height);
return !session->draw(buffer_params); if(session->params.display_buffer_linear)
b_engine.bind_display_space_shader(b_scene);
bool draw_ok = !session->draw(buffer_params);
if(session->params.display_buffer_linear)
b_engine.unbind_display_space_shader();
return draw_ok;
} }
void BlenderSession::get_status(string& status, string& substatus) void BlenderSession::get_status(string& status, string& substatus)

View File

@@ -492,6 +492,9 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine b_engine, BL::Use
params.shadingsystem = SessionParams::SVM; params.shadingsystem = SessionParams::SVM;
else if(shadingsystem == 1) else if(shadingsystem == 1)
params.shadingsystem = SessionParams::OSL; params.shadingsystem = SessionParams::OSL;
/* color managagement */
params.display_buffer_linear = b_engine.support_display_space_shader(b_scene);
return params; return params;
} }

View File

@@ -41,7 +41,10 @@ void Device::pixels_alloc(device_memory& mem)
void Device::pixels_copy_from(device_memory& mem, int y, int w, int h) void Device::pixels_copy_from(device_memory& mem, int y, int w, int h)
{ {
mem_copy_from(mem, y, w, h, sizeof(uint8_t)*4); if(mem.data_type == TYPE_HALF)
mem_copy_from(mem, y, w, h, sizeof(half4));
else
mem_copy_from(mem, y, w, h, sizeof(uchar4));
} }
void Device::pixels_free(device_memory& mem) void Device::pixels_free(device_memory& mem)
@@ -53,27 +56,49 @@ void Device::draw_pixels(device_memory& rgba, int y, int w, int h, int dy, int w
{ {
pixels_copy_from(rgba, y, w, h); pixels_copy_from(rgba, y, w, h);
GLuint texid;
glGenTextures(1, &texid);
glBindTexture(GL_TEXTURE_2D, texid);
if(rgba.data_type == TYPE_HALF)
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, w, h, 0, GL_RGBA, GL_HALF_FLOAT, (void*)rgba.data_pointer);
else
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, (void*)rgba.data_pointer);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glEnable(GL_TEXTURE_2D);
if(transparent) { if(transparent) {
glEnable(GL_BLEND); glEnable(GL_BLEND);
glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA); glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
} }
glPixelZoom((float)width/(float)w, (float)height/(float)h); glColor3f(1.0f, 1.0f, 1.0f);
glRasterPos2f(0, dy);
uint8_t *pixels = (uint8_t*)rgba.data_pointer; glPushMatrix();
glTranslatef(0.0f, (float)dy, 0.0f);
/* for multi devices, this assumes the ineffecient method that we allocate glBegin(GL_QUADS);
* all pixels on the device even though we only render to a subset */
pixels += 4*y*w; glTexCoord2f(0.0f, 0.0f);
glVertex2f(0.0f, 0.0f);
glTexCoord2f(1.0f, 0.0f);
glVertex2f((float)width, 0.0f);
glTexCoord2f(1.0f, 1.0f);
glVertex2f((float)width, (float)height);
glTexCoord2f(0.0f, 1.0f);
glVertex2f(0.0f, (float)height);
glDrawPixels(w, h, GL_RGBA, GL_UNSIGNED_BYTE, pixels); glEnd();
glRasterPos2f(0.0f, 0.0f); glPopMatrix();
glPixelZoom(1.0f, 1.0f);
if(transparent) if(transparent)
glDisable(GL_BLEND); glDisable(GL_BLEND);
glBindTexture(GL_TEXTURE_2D, 0);
glDisable(GL_TEXTURE_2D);
glDeleteTextures(1, &texid);
} }
Device *Device::create(DeviceInfo& info, Stats &stats, bool background) Device *Device::create(DeviceInfo& info, Stats &stats, bool background)

View File

@@ -127,8 +127,8 @@ public:
{ {
if(task->type == DeviceTask::PATH_TRACE) if(task->type == DeviceTask::PATH_TRACE)
thread_path_trace(*task); thread_path_trace(*task);
else if(task->type == DeviceTask::TONEMAP) else if(task->type == DeviceTask::FILM_CONVERT)
thread_tonemap(*task); thread_film_convert(*task);
else if(task->type == DeviceTask::SHADER) else if(task->type == DeviceTask::SHADER)
thread_shader(*task); thread_shader(*task);
} }
@@ -237,28 +237,55 @@ public:
#endif #endif
} }
void thread_tonemap(DeviceTask& task) void thread_film_convert(DeviceTask& task)
{ {
float sample_scale = 1.0f/(task.sample + 1);
if(task.rgba_half) {
#ifdef WITH_OPTIMIZED_KERNEL #ifdef WITH_OPTIMIZED_KERNEL
if(system_cpu_support_sse3()) { if(system_cpu_support_sse3()) {
for(int y = task.y; y < task.y + task.h; y++) for(int y = task.y; y < task.y + task.h; y++)
for(int x = task.x; x < task.x + task.w; x++) for(int x = task.x; x < task.x + task.w; x++)
kernel_cpu_sse3_tonemap(&kernel_globals, (uchar4*)task.rgba, (float*)task.buffer, kernel_cpu_sse3_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer,
task.sample, x, y, task.offset, task.stride); sample_scale, x, y, task.offset, task.stride);
} }
else if(system_cpu_support_sse2()) { else if(system_cpu_support_sse2()) {
for(int y = task.y; y < task.y + task.h; y++) for(int y = task.y; y < task.y + task.h; y++)
for(int x = task.x; x < task.x + task.w; x++) for(int x = task.x; x < task.x + task.w; x++)
kernel_cpu_sse2_tonemap(&kernel_globals, (uchar4*)task.rgba, (float*)task.buffer, kernel_cpu_sse2_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer,
task.sample, x, y, task.offset, task.stride); sample_scale, x, y, task.offset, task.stride);
} }
else else
#endif #endif
{ {
for(int y = task.y; y < task.y + task.h; y++) for(int y = task.y; y < task.y + task.h; y++)
for(int x = task.x; x < task.x + task.w; x++) for(int x = task.x; x < task.x + task.w; x++)
kernel_cpu_tonemap(&kernel_globals, (uchar4*)task.rgba, (float*)task.buffer, kernel_cpu_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer,
task.sample, x, y, task.offset, task.stride); sample_scale, x, y, task.offset, task.stride);
}
}
else {
#ifdef WITH_OPTIMIZED_KERNEL
if(system_cpu_support_sse3()) {
for(int y = task.y; y < task.y + task.h; y++)
for(int x = task.x; x < task.x + task.w; x++)
kernel_cpu_sse3_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer,
sample_scale, x, y, task.offset, task.stride);
}
else if(system_cpu_support_sse2()) {
for(int y = task.y; y < task.y + task.h; y++)
for(int x = task.x; x < task.x + task.w; x++)
kernel_cpu_sse2_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer,
sample_scale, x, y, task.offset, task.stride);
}
else
#endif
{
for(int y = task.y; y < task.y + task.h; y++)
for(int x = task.x; x < task.x + task.w; x++)
kernel_cpu_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer,
sample_scale, x, y, task.offset, task.stride);
}
} }
} }

View File

@@ -625,7 +625,7 @@ public:
cuda_pop_context(); cuda_pop_context();
} }
void tonemap(DeviceTask& task, device_ptr buffer, device_ptr rgba) void film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half)
{ {
if(have_error()) if(have_error())
return; return;
@@ -633,11 +633,14 @@ public:
cuda_push_context(); cuda_push_context();
CUfunction cuFilmConvert; CUfunction cuFilmConvert;
CUdeviceptr d_rgba = map_pixels(rgba); CUdeviceptr d_rgba = map_pixels((rgba_byte)? rgba_byte: rgba_half);
CUdeviceptr d_buffer = cuda_device_ptr(buffer); CUdeviceptr d_buffer = cuda_device_ptr(buffer);
/* get kernel function */ /* get kernel function */
cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_tonemap")) if(rgba_half)
cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_half_float"))
else
cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_byte"))
/* pass in parameters */ /* pass in parameters */
int offset = 0; int offset = 0;
@@ -648,11 +651,11 @@ public:
cuda_assert(cuParamSetv(cuFilmConvert, offset, &d_buffer, sizeof(d_buffer))) cuda_assert(cuParamSetv(cuFilmConvert, offset, &d_buffer, sizeof(d_buffer)))
offset += sizeof(d_buffer); offset += sizeof(d_buffer);
int sample = task.sample; float sample_scale = 1.0f/(task.sample + 1);
offset = align_up(offset, __alignof(sample)); offset = align_up(offset, __alignof(sample_scale));
cuda_assert(cuParamSeti(cuFilmConvert, offset, task.sample)) cuda_assert(cuParamSetf(cuFilmConvert, offset, sample_scale))
offset += sizeof(task.sample); offset += sizeof(sample_scale);
cuda_assert(cuParamSeti(cuFilmConvert, offset, task.x)) cuda_assert(cuParamSeti(cuFilmConvert, offset, task.x))
offset += sizeof(task.x); offset += sizeof(task.x);
@@ -684,7 +687,7 @@ public:
cuda_assert(cuFuncSetBlockShape(cuFilmConvert, xthreads, ythreads, 1)) cuda_assert(cuFuncSetBlockShape(cuFilmConvert, xthreads, ythreads, 1))
cuda_assert(cuLaunchGrid(cuFilmConvert, xblocks, yblocks)) cuda_assert(cuLaunchGrid(cuFilmConvert, xblocks, yblocks))
unmap_pixels(task.rgba); unmap_pixels((rgba_byte)? rgba_byte: rgba_half);
cuda_pop_context(); cuda_pop_context();
} }
@@ -771,13 +774,19 @@ public:
glGenBuffers(1, &pmem.cuPBO); glGenBuffers(1, &pmem.cuPBO);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
glBufferData(GL_PIXEL_UNPACK_BUFFER, pmem.w*pmem.h*sizeof(GLfloat)*3, NULL, GL_DYNAMIC_DRAW); if(mem.data_type == TYPE_HALF)
glBufferData(GL_PIXEL_UNPACK_BUFFER, pmem.w*pmem.h*sizeof(GLhalf)*4, NULL, GL_DYNAMIC_DRAW);
else
glBufferData(GL_PIXEL_UNPACK_BUFFER, pmem.w*pmem.h*sizeof(uint8_t)*4, NULL, GL_DYNAMIC_DRAW);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
glGenTextures(1, &pmem.cuTexId); glGenTextures(1, &pmem.cuTexId);
glBindTexture(GL_TEXTURE_2D, pmem.cuTexId); glBindTexture(GL_TEXTURE_2D, pmem.cuTexId);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, pmem.w, pmem.h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); if(mem.data_type == TYPE_HALF)
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, pmem.w, pmem.h, 0, GL_RGBA, GL_HALF_FLOAT, NULL);
else
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, pmem.w, pmem.h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glBindTexture(GL_TEXTURE_2D, 0); glBindTexture(GL_TEXTURE_2D, 0);
@@ -865,11 +874,19 @@ public:
/* for multi devices, this assumes the ineffecient method that we allocate /* for multi devices, this assumes the ineffecient method that we allocate
* all pixels on the device even though we only render to a subset */ * all pixels on the device even though we only render to a subset */
size_t offset = sizeof(uint8_t)*4*y*w; size_t offset = 4*y*w;
if(mem.data_type == TYPE_HALF)
offset *= sizeof(GLhalf);
else
offset *= sizeof(uint8_t);
glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, pmem.cuPBO); glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, pmem.cuPBO);
glBindTexture(GL_TEXTURE_2D, pmem.cuTexId); glBindTexture(GL_TEXTURE_2D, pmem.cuTexId);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_UNSIGNED_BYTE, (void*)offset); if(mem.data_type == TYPE_HALF)
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_HALF_FLOAT, (void*)offset);
else
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_UNSIGNED_BYTE, (void*)offset);
glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0); glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
glEnable(GL_TEXTURE_2D); glEnable(GL_TEXTURE_2D);
@@ -961,9 +978,9 @@ public:
void task_add(DeviceTask& task) void task_add(DeviceTask& task)
{ {
if(task.type == DeviceTask::TONEMAP) { if(task.type == DeviceTask::FILM_CONVERT) {
/* must be done in main thread due to opengl access */ /* must be done in main thread due to opengl access */
tonemap(task, task.buffer, task.rgba); film_convert(task, task.buffer, task.rgba_byte, task.rgba_half);
cuda_push_context(); cuda_push_context();
cuda_assert(cuCtxSynchronize()) cuda_assert(cuCtxSynchronize())

View File

@@ -46,7 +46,8 @@ enum DataType {
TYPE_UCHAR, TYPE_UCHAR,
TYPE_UINT, TYPE_UINT,
TYPE_INT, TYPE_INT,
TYPE_FLOAT TYPE_FLOAT,
TYPE_HALF
}; };
static inline size_t datatype_size(DataType datatype) static inline size_t datatype_size(DataType datatype)
@@ -56,6 +57,7 @@ static inline size_t datatype_size(DataType datatype)
case TYPE_FLOAT: return sizeof(float); case TYPE_FLOAT: return sizeof(float);
case TYPE_UINT: return sizeof(uint); case TYPE_UINT: return sizeof(uint);
case TYPE_INT: return sizeof(int); case TYPE_INT: return sizeof(int);
case TYPE_HALF: return sizeof(half);
default: return 0; default: return 0;
} }
} }
@@ -147,6 +149,11 @@ template<> struct device_type_traits<float4> {
static const int num_elements = 4; static const int num_elements = 4;
}; };
template<> struct device_type_traits<half4> {
static const DataType data_type = TYPE_HALF;
static const int num_elements = 4;
};
/* Device Memory */ /* Device Memory */
class device_memory class device_memory

View File

@@ -261,7 +261,6 @@ public:
if(sub.device == sub_device) { if(sub.device == sub_device) {
if(tile.buffer) tile.buffer = sub.ptr_map[tile.buffer]; if(tile.buffer) tile.buffer = sub.ptr_map[tile.buffer];
if(tile.rng_state) tile.rng_state = sub.ptr_map[tile.rng_state]; if(tile.rng_state) tile.rng_state = sub.ptr_map[tile.rng_state];
if(tile.rgba) tile.rgba = sub.ptr_map[tile.rgba];
} }
} }
} }
@@ -290,7 +289,8 @@ public:
tasks.pop_front(); tasks.pop_front();
if(task.buffer) subtask.buffer = sub.ptr_map[task.buffer]; if(task.buffer) subtask.buffer = sub.ptr_map[task.buffer];
if(task.rgba) subtask.rgba = sub.ptr_map[task.rgba]; if(task.rgba_byte) subtask.rgba_byte = sub.ptr_map[task.rgba_byte];
if(task.rgba_half) subtask.rgba_half = sub.ptr_map[task.rgba_half];
if(task.shader_input) subtask.shader_input = sub.ptr_map[task.shader_input]; if(task.shader_input) subtask.shader_input = sub.ptr_map[task.shader_input];
if(task.shader_output) subtask.shader_output = sub.ptr_map[task.shader_output]; if(task.shader_output) subtask.shader_output = sub.ptr_map[task.shader_output];

View File

@@ -408,7 +408,8 @@ protected:
rcv.read(task); rcv.read(task);
if(task.buffer) task.buffer = ptr_map[task.buffer]; if(task.buffer) task.buffer = ptr_map[task.buffer];
if(task.rgba) task.rgba = ptr_map[task.rgba]; if(task.rgba_byte) task.rgba_byte = ptr_map[task.rgba_byte];
if(task.rgba_half) task.rgba_half = ptr_map[task.rgba_half];
if(task.shader_input) task.shader_input = ptr_map[task.shader_input]; if(task.shader_input) task.shader_input = ptr_map[task.shader_input];
if(task.shader_output) task.shader_output = ptr_map[task.shader_output]; if(task.shader_output) task.shader_output = ptr_map[task.shader_output];
@@ -448,7 +449,6 @@ protected:
if(tile.buffer) tile.buffer = ptr_map[tile.buffer]; if(tile.buffer) tile.buffer = ptr_map[tile.buffer];
if(tile.rng_state) tile.rng_state = ptr_map[tile.rng_state]; if(tile.rng_state) tile.rng_state = ptr_map[tile.rng_state];
if(tile.rgba) tile.rgba = ptr_map[tile.rgba];
result = true; result = true;
break; break;
@@ -478,7 +478,6 @@ protected:
if(tile.buffer) tile.buffer = ptr_imap[tile.buffer]; if(tile.buffer) tile.buffer = ptr_imap[tile.buffer];
if(tile.rng_state) tile.rng_state = ptr_imap[tile.rng_state]; if(tile.rng_state) tile.rng_state = ptr_imap[tile.rng_state];
if(tile.rgba) tile.rgba = ptr_imap[tile.rgba];
RPCSend snd(socket, "release_tile"); RPCSend snd(socket, "release_tile");
snd.add(tile); snd.add(tile);

View File

@@ -94,7 +94,7 @@ public:
int type = (int)task.type; int type = (int)task.type;
archive & type & task.x & task.y & task.w & task.h; archive & type & task.x & task.y & task.w & task.h;
archive & task.rgba & task.buffer & task.sample & task.num_samples; archive & task.rgba_byte & task.rgba_half & task.buffer & task.sample & task.num_samples;
archive & task.offset & task.stride; archive & task.offset & task.stride;
archive & task.shader_input & task.shader_output & task.shader_eval_type; archive & task.shader_input & task.shader_output & task.shader_eval_type;
archive & task.shader_x & task.shader_w; archive & task.shader_x & task.shader_w;
@@ -105,7 +105,7 @@ public:
archive & tile.x & tile.y & tile.w & tile.h; archive & tile.x & tile.y & tile.w & tile.h;
archive & tile.start_sample & tile.num_samples & tile.sample; archive & tile.start_sample & tile.num_samples & tile.sample;
archive & tile.offset & tile.stride; archive & tile.offset & tile.stride;
archive & tile.buffer & tile.rng_state & tile.rgba; archive & tile.buffer & tile.rng_state;
} }
void write() void write()
@@ -234,7 +234,7 @@ public:
int type; int type;
*archive & type & task.x & task.y & task.w & task.h; *archive & type & task.x & task.y & task.w & task.h;
*archive & task.rgba & task.buffer & task.sample & task.num_samples; *archive & task.rgba_byte & task.rgba_half & task.buffer & task.sample & task.num_samples;
*archive & task.resolution & task.offset & task.stride; *archive & task.resolution & task.offset & task.stride;
*archive & task.shader_input & task.shader_output & task.shader_eval_type; *archive & task.shader_input & task.shader_output & task.shader_eval_type;
*archive & task.shader_x & task.shader_w; *archive & task.shader_x & task.shader_w;
@@ -247,7 +247,7 @@ public:
*archive & tile.x & tile.y & tile.w & tile.h; *archive & tile.x & tile.y & tile.w & tile.h;
*archive & tile.start_sample & tile.num_samples & tile.sample; *archive & tile.start_sample & tile.num_samples & tile.sample;
*archive & tile.resolution & tile.offset & tile.stride; *archive & tile.resolution & tile.offset & tile.stride;
*archive & tile.buffer & tile.rng_state & tile.rgba; *archive & tile.buffer & tile.rng_state & tile.rgba_byte & tile.rgba_half;
tile.buffers = NULL; tile.buffers = NULL;
} }

View File

@@ -321,7 +321,8 @@ public:
cl_device_id cdDevice; cl_device_id cdDevice;
cl_program cpProgram; cl_program cpProgram;
cl_kernel ckPathTraceKernel; cl_kernel ckPathTraceKernel;
cl_kernel ckFilmConvertKernel; cl_kernel ckFilmConvertByteKernel;
cl_kernel ckFilmConvertHalfFloatKernel;
cl_kernel ckShaderKernel; cl_kernel ckShaderKernel;
cl_int ciErr; cl_int ciErr;
@@ -431,7 +432,8 @@ public:
cqCommandQueue = NULL; cqCommandQueue = NULL;
cpProgram = NULL; cpProgram = NULL;
ckPathTraceKernel = NULL; ckPathTraceKernel = NULL;
ckFilmConvertKernel = NULL; ckFilmConvertByteKernel = NULL;
ckFilmConvertHalfFloatKernel = NULL;
ckShaderKernel = NULL; ckShaderKernel = NULL;
null_mem = 0; null_mem = 0;
device_initialized = false; device_initialized = false;
@@ -762,7 +764,11 @@ public:
if(opencl_error(ciErr)) if(opencl_error(ciErr))
return false; return false;
ckFilmConvertKernel = clCreateKernel(cpProgram, "kernel_ocl_tonemap", &ciErr); ckFilmConvertByteKernel = clCreateKernel(cpProgram, "kernel_ocl_convert_to_byte", &ciErr);
if(opencl_error(ciErr))
return false;
ckFilmConvertHalfFloatKernel = clCreateKernel(cpProgram, "kernel_ocl_convert_to_half_float", &ciErr);
if(opencl_error(ciErr)) if(opencl_error(ciErr))
return false; return false;
@@ -788,8 +794,10 @@ public:
if(ckPathTraceKernel) if(ckPathTraceKernel)
clReleaseKernel(ckPathTraceKernel); clReleaseKernel(ckPathTraceKernel);
if(ckFilmConvertKernel) if(ckFilmConvertByteKernel)
clReleaseKernel(ckFilmConvertKernel); clReleaseKernel(ckFilmConvertByteKernel);
if(ckFilmConvertHalfFloatKernel)
clReleaseKernel(ckFilmConvertHalfFloatKernel);
if(cpProgram) if(cpProgram)
clReleaseProgram(cpProgram); clReleaseProgram(cpProgram);
if(cqCommandQueue) if(cqCommandQueue)
@@ -980,17 +988,17 @@ public:
return err; return err;
} }
void tonemap(DeviceTask& task, device_ptr buffer, device_ptr rgba) void film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half)
{ {
/* cast arguments to cl types */ /* cast arguments to cl types */
cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer); cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
cl_mem d_rgba = CL_MEM_PTR(rgba); cl_mem d_rgba = (rgba_byte)? CL_MEM_PTR(rgba_byte): CL_MEM_PTR(rgba_half);
cl_mem d_buffer = CL_MEM_PTR(buffer); cl_mem d_buffer = CL_MEM_PTR(buffer);
cl_int d_x = task.x; cl_int d_x = task.x;
cl_int d_y = task.y; cl_int d_y = task.y;
cl_int d_w = task.w; cl_int d_w = task.w;
cl_int d_h = task.h; cl_int d_h = task.h;
cl_int d_sample = task.sample; cl_float d_sample_scale = 1.0f/(task.sample + 1);
cl_int d_offset = task.offset; cl_int d_offset = task.offset;
cl_int d_stride = task.stride; cl_int d_stride = task.stride;
@@ -998,6 +1006,8 @@ public:
cl_uint narg = 0; cl_uint narg = 0;
ciErr = 0; ciErr = 0;
cl_kernel ckFilmConvertKernel = (rgba_byte)? ckFilmConvertByteKernel: ckFilmConvertHalfFloatKernel;
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_data), (void*)&d_data); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_data), (void*)&d_data);
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_rgba), (void*)&d_rgba); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_rgba), (void*)&d_rgba);
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_buffer), (void*)&d_buffer); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_buffer), (void*)&d_buffer);
@@ -1006,7 +1016,7 @@ public:
ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, #name); ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, #name);
#include "kernel_textures.h" #include "kernel_textures.h"
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_sample), (void*)&d_sample); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_sample_scale), (void*)&d_sample_scale);
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_x), (void*)&d_x); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_x), (void*)&d_x);
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_y), (void*)&d_y); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_y), (void*)&d_y);
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_w), (void*)&d_w); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_w), (void*)&d_w);
@@ -1052,8 +1062,8 @@ public:
void thread_run(DeviceTask *task) void thread_run(DeviceTask *task)
{ {
if(task->type == DeviceTask::TONEMAP) { if(task->type == DeviceTask::FILM_CONVERT) {
tonemap(*task, task->buffer, task->rgba); film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half);
} }
else if(task->type == DeviceTask::SHADER) { else if(task->type == DeviceTask::SHADER) {
shader(*task); shader(*task);

View File

@@ -27,7 +27,7 @@ CCL_NAMESPACE_BEGIN
/* Device Task */ /* Device Task */
DeviceTask::DeviceTask(Type type_) DeviceTask::DeviceTask(Type type_)
: type(type_), x(0), y(0), w(0), h(0), rgba(0), buffer(0), : type(type_), x(0), y(0), w(0), h(0), rgba_byte(0), rgba_half(0), buffer(0),
sample(0), num_samples(1), sample(0), num_samples(1),
shader_input(0), shader_output(0), shader_input(0), shader_output(0),
shader_eval_type(0), shader_x(0), shader_w(0) shader_eval_type(0), shader_x(0), shader_w(0)

View File

@@ -34,11 +34,12 @@ class Tile;
class DeviceTask : public Task { class DeviceTask : public Task {
public: public:
typedef enum { PATH_TRACE, TONEMAP, SHADER } Type; typedef enum { PATH_TRACE, FILM_CONVERT, SHADER } Type;
Type type; Type type;
int x, y, w, h; int x, y, w, h;
device_ptr rgba; device_ptr rgba_byte;
device_ptr rgba_half;
device_ptr buffer; device_ptr buffer;
int sample; int sample;
int num_samples; int num_samples;

View File

@@ -52,7 +52,7 @@ __kernel void kernel_ocl_path_trace(
kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
} }
__kernel void kernel_ocl_tonemap( __kernel void kernel_ocl_convert_to_byte(
__constant KernelData *data, __constant KernelData *data,
__global uchar4 *rgba, __global uchar4 *rgba,
__global float *buffer, __global float *buffer,
@@ -61,7 +61,7 @@ __kernel void kernel_ocl_tonemap(
__global type *name, __global type *name,
#include "kernel_textures.h" #include "kernel_textures.h"
int sample, float sample_scale,
int sx, int sy, int sw, int sh, int offset, int stride) int sx, int sy, int sw, int sh, int offset, int stride)
{ {
KernelGlobals kglobals, *kg = &kglobals; KernelGlobals kglobals, *kg = &kglobals;
@@ -76,7 +76,34 @@ __kernel void kernel_ocl_tonemap(
int y = sy + get_global_id(1); int y = sy + get_global_id(1);
if(x < sx + sw && y < sy + sh) if(x < sx + sw && y < sy + sh)
kernel_film_tonemap(kg, rgba, buffer, sample, x, y, offset, stride); kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride);
}
__kernel void kernel_ocl_convert_to_half_float(
__constant KernelData *data,
__global uchar4 *rgba,
__global float *buffer,
#define KERNEL_TEX(type, ttype, name) \
__global type *name,
#include "kernel_textures.h"
float sample_scale,
int sx, int sy, int sw, int sh, int offset, int stride)
{
KernelGlobals kglobals, *kg = &kglobals;
kg->data = data;
#define KERNEL_TEX(type, ttype, name) \
kg->name = name;
#include "kernel_textures.h"
int x = sx + get_global_id(0);
int y = sy + get_global_id(1);
if(x < sx + sw && y < sy + sh)
kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride);
} }
__kernel void kernel_ocl_shader( __kernel void kernel_ocl_shader(

View File

@@ -96,11 +96,16 @@ void kernel_cpu_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_s
kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
} }
/* Tonemapping */ /* Film */
void kernel_cpu_tonemap(KernelGlobals *kg, uchar4 *rgba, float *buffer, int sample, int x, int y, int offset, int stride) void kernel_cpu_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
{ {
kernel_film_tonemap(kg, rgba, buffer, sample, x, y, offset, stride); kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride);
}
void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
{
kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride);
} }
/* Shader Evaluation */ /* Shader Evaluation */

View File

@@ -44,13 +44,22 @@ extern "C" __global__ void kernel_cuda_branched_path_trace(float *buffer, uint *
} }
#endif #endif
extern "C" __global__ void kernel_cuda_tonemap(uchar4 *rgba, float *buffer, int sample, int sx, int sy, int sw, int sh, int offset, int stride) extern "C" __global__ void kernel_cuda_convert_to_byte(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride)
{ {
int x = sx + blockDim.x*blockIdx.x + threadIdx.x; int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
int y = sy + blockDim.y*blockIdx.y + threadIdx.y; int y = sy + blockDim.y*blockIdx.y + threadIdx.y;
if(x < sx + sw && y < sy + sh) if(x < sx + sw && y < sy + sh)
kernel_film_tonemap(NULL, rgba, buffer, sample, x, y, offset, stride); kernel_film_convert_to_byte(NULL, rgba, buffer, sample_scale, x, y, offset, stride);
}
extern "C" __global__ void kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride)
{
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
int y = sy + blockDim.y*blockIdx.y + threadIdx.y;
if(x < sx + sw && y < sy + sh)
kernel_film_convert_to_half_float(NULL, rgba, buffer, sample_scale, x, y, offset, stride);
} }
extern "C" __global__ void kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx) extern "C" __global__ void kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx)

View File

@@ -36,23 +36,29 @@ void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t
void kernel_cpu_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, void kernel_cpu_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
int sample, int x, int y, int offset, int stride); int sample, int x, int y, int offset, int stride);
void kernel_cpu_tonemap(KernelGlobals *kg, uchar4 *rgba, float *buffer, void kernel_cpu_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer,
int sample, int x, int y, int offset, int stride); float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output,
int type, int i); int type, int i);
#ifdef WITH_OPTIMIZED_KERNEL #ifdef WITH_OPTIMIZED_KERNEL
void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
int sample, int x, int y, int offset, int stride); int sample, int x, int y, int offset, int stride);
void kernel_cpu_sse2_tonemap(KernelGlobals *kg, uchar4 *rgba, float *buffer, void kernel_cpu_sse2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer,
int sample, int x, int y, int offset, int stride); float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output,
int type, int i); int type, int i);
void kernel_cpu_sse3_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, void kernel_cpu_sse3_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
int sample, int x, int y, int offset, int stride); int sample, int x, int y, int offset, int stride);
void kernel_cpu_sse3_tonemap(KernelGlobals *kg, uchar4 *rgba, float *buffer, void kernel_cpu_sse3_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer,
int sample, int x, int y, int offset, int stride); float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output,
int type, int i); int type, int i);
#endif #endif

View File

@@ -25,8 +25,6 @@
#include <cuda.h> #include <cuda.h>
#include <float.h> #include <float.h>
#include "util_types.h"
/* Qualifier wrappers for different names on different devices */ /* Qualifier wrappers for different names on different devices */
#define __device __device__ __inline__ #define __device __device__ __inline__
@@ -41,6 +39,10 @@
#define kernel_assert(cond) #define kernel_assert(cond)
/* Types */
#include "util_types.h"
/* Textures */ /* Textures */
typedef texture<float4, 1> texture_float4; typedef texture<float4, 1> texture_float4;

View File

@@ -16,9 +16,8 @@
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
__device float4 film_map(KernelGlobals *kg, float4 irradiance, int sample) __device float4 film_map(KernelGlobals *kg, float4 irradiance, float scale)
{ {
float scale = 1.0f/(float)(sample+1);
float exposure = kernel_data.film.exposure; float exposure = kernel_data.film.exposure;
float4 result = irradiance*scale; float4 result = irradiance*scale;
@@ -46,9 +45,9 @@ __device uchar4 film_float_to_byte(float4 color)
return result; return result;
} }
__device void kernel_film_tonemap(KernelGlobals *kg, __device void kernel_film_convert_to_byte(KernelGlobals *kg,
__global uchar4 *rgba, __global float *buffer, __global uchar4 *rgba, __global float *buffer,
int sample, int x, int y, int offset, int stride) float sample_scale, int x, int y, int offset, int stride)
{ {
/* buffer offset */ /* buffer offset */
int index = offset + x + y*stride; int index = offset + x + y*stride;
@@ -58,11 +57,25 @@ __device void kernel_film_tonemap(KernelGlobals *kg,
/* map colors */ /* map colors */
float4 irradiance = *((__global float4*)buffer); float4 irradiance = *((__global float4*)buffer);
float4 float_result = film_map(kg, irradiance, sample); float4 float_result = film_map(kg, irradiance, sample_scale);
uchar4 byte_result = film_float_to_byte(float_result); uchar4 byte_result = film_float_to_byte(float_result);
*rgba = byte_result; *rgba = byte_result;
} }
__device void kernel_film_convert_to_half_float(KernelGlobals *kg,
__global uchar4 *rgba, __global float *buffer,
float sample_scale, int x, int y, int offset, int stride)
{
/* buffer offset */
int index = offset + x + y*stride;
float4 *in = (__global float4*)(buffer + index*kernel_data.film.pass_stride);
half *out = (half*)rgba + index*4;
float scale = kernel_data.film.exposure*sample_scale;
float4_store_half(out, in, scale);
}
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -45,11 +45,16 @@ void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *
kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
} }
/* Tonemapping */ /* Film */
void kernel_cpu_sse2_tonemap(KernelGlobals *kg, uchar4 *rgba, float *buffer, int sample, int x, int y, int offset, int stride) void kernel_cpu_sse2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
{ {
kernel_film_tonemap(kg, rgba, buffer, sample, x, y, offset, stride); kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride);
}
void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
{
kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride);
} }
/* Shader Evaluate */ /* Shader Evaluate */

View File

@@ -47,11 +47,16 @@ void kernel_cpu_sse3_path_trace(KernelGlobals *kg, float *buffer, unsigned int *
kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
} }
/* Tonemapping */ /* Film */
void kernel_cpu_sse3_tonemap(KernelGlobals *kg, uchar4 *rgba, float *buffer, int sample, int x, int y, int offset, int stride) void kernel_cpu_sse3_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
{ {
kernel_film_tonemap(kg, rgba, buffer, sample, x, y, offset, stride); kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride);
}
void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
{
kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride);
} }
/* Shader Evaluate */ /* Shader Evaluate */

View File

@@ -91,7 +91,6 @@ RenderTile::RenderTile()
buffer = 0; buffer = 0;
rng_state = 0; rng_state = 0;
rgba = 0;
buffers = NULL; buffers = NULL;
} }
@@ -298,12 +297,13 @@ bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int
/* Display Buffer */ /* Display Buffer */
DisplayBuffer::DisplayBuffer(Device *device_) DisplayBuffer::DisplayBuffer(Device *device_, bool linear)
{ {
device = device_; device = device_;
draw_width = 0; draw_width = 0;
draw_height = 0; draw_height = 0;
transparent = true; /* todo: determine from background */ transparent = true; /* todo: determine from background */
half_float = linear;
} }
DisplayBuffer::~DisplayBuffer() DisplayBuffer::~DisplayBuffer()
@@ -313,9 +313,13 @@ DisplayBuffer::~DisplayBuffer()
void DisplayBuffer::device_free() void DisplayBuffer::device_free()
{ {
if(rgba.device_pointer) { if(rgba_byte.device_pointer) {
device->pixels_free(rgba); device->pixels_free(rgba_byte);
rgba.clear(); rgba_byte.clear();
}
if(rgba_half.device_pointer) {
device->pixels_free(rgba_half);
rgba_half.clear();
} }
} }
@@ -330,8 +334,14 @@ void DisplayBuffer::reset(Device *device, BufferParams& params_)
device_free(); device_free();
/* allocate display pixels */ /* allocate display pixels */
rgba.resize(params.width, params.height); if(half_float) {
device->pixels_alloc(rgba); rgba_half.resize(params.width, params.height);
device->pixels_alloc(rgba_half);
}
else {
rgba_byte.resize(params.width, params.height);
device->pixels_alloc(rgba_byte);
}
} }
void DisplayBuffer::draw_set(int width, int height) void DisplayBuffer::draw_set(int width, int height)
@@ -347,6 +357,7 @@ void DisplayBuffer::draw(Device *device)
if(draw_width != 0 && draw_height != 0) { if(draw_width != 0 && draw_height != 0) {
glPushMatrix(); glPushMatrix();
glTranslatef(params.full_x, params.full_y, 0.0f); glTranslatef(params.full_x, params.full_y, 0.0f);
device_memory& rgba = rgba_data();
device->draw_pixels(rgba, 0, draw_width, draw_height, 0, params.width, params.height, transparent); device->draw_pixels(rgba, 0, draw_width, draw_height, 0, params.width, params.height, transparent);
@@ -366,8 +377,12 @@ void DisplayBuffer::write(Device *device, const string& filename)
if(w == 0 || h == 0) if(w == 0 || h == 0)
return; return;
if(half_float)
return;
/* read buffer from device */ /* read buffer from device */
device_memory& rgba = rgba_data();
device->pixels_copy_from(rgba, 0, w, h); device->pixels_copy_from(rgba, 0, w, h);
/* write image */ /* write image */
@@ -389,5 +404,13 @@ void DisplayBuffer::write(Device *device, const string& filename)
delete out; delete out;
} }
device_memory& DisplayBuffer::rgba_data()
{
if(half_float)
return rgba_half;
else
return rgba_byte;
}
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -87,8 +87,8 @@ protected:
/* Display Buffer /* Display Buffer
* *
* The buffer used for drawing during render, filled by tonemapping the render * The buffer used for drawing during render, filled by converting the render
* buffers and converting to uchar4 storage. */ * buffers to byte of half float storage */
class DisplayBuffer { class DisplayBuffer {
public: public:
@@ -100,10 +100,13 @@ public:
int draw_width, draw_height; int draw_width, draw_height;
/* draw alpha channel? */ /* draw alpha channel? */
bool transparent; bool transparent;
/* byte buffer for tonemapped result */ /* use half float? */
device_vector<uchar4> rgba; bool half_float;
/* byte buffer for converted result */
device_vector<uchar4> rgba_byte;
device_vector<half4> rgba_half;
DisplayBuffer(Device *device); DisplayBuffer(Device *device, bool linear = false);
~DisplayBuffer(); ~DisplayBuffer();
void reset(Device *device, BufferParams& params); void reset(Device *device, BufferParams& params);
@@ -113,6 +116,8 @@ public:
void draw(Device *device); void draw(Device *device);
bool draw_ready(); bool draw_ready();
device_memory& rgba_data();
protected: protected:
void device_free(); void device_free();
@@ -134,7 +139,6 @@ public:
device_ptr buffer; device_ptr buffer;
device_ptr rng_state; device_ptr rng_state;
device_ptr rgba;
RenderBuffers *buffers; RenderBuffers *buffers;

View File

@@ -56,7 +56,7 @@ Session::Session(const SessionParams& params_)
} }
else { else {
buffers = new RenderBuffers(device); buffers = new RenderBuffers(device);
display = new DisplayBuffer(device); display = new DisplayBuffer(device, params.display_buffer_linear);
} }
session_thread = NULL; session_thread = NULL;
@@ -371,7 +371,6 @@ bool Session::acquire_tile(Device *tile_device, RenderTile& rtile)
rtile.buffer = buffers->buffer.device_pointer; rtile.buffer = buffers->buffer.device_pointer;
rtile.rng_state = buffers->rng_state.device_pointer; rtile.rng_state = buffers->rng_state.device_pointer;
rtile.rgba = display->rgba.device_pointer;
rtile.buffers = buffers; rtile.buffers = buffers;
device->map_tile(tile_device, rtile); device->map_tile(tile_device, rtile);
@@ -415,7 +414,6 @@ bool Session::acquire_tile(Device *tile_device, RenderTile& rtile)
rtile.buffer = tilebuffers->buffer.device_pointer; rtile.buffer = tilebuffers->buffer.device_pointer;
rtile.rng_state = tilebuffers->rng_state.device_pointer; rtile.rng_state = tilebuffers->rng_state.device_pointer;
rtile.rgba = 0;
rtile.buffers = tilebuffers; rtile.buffers = tilebuffers;
/* this will tag tile as IN PROGRESS in blender-side render pipeline, /* this will tag tile as IN PROGRESS in blender-side render pipeline,
@@ -838,13 +836,14 @@ void Session::path_trace()
void Session::tonemap() void Session::tonemap()
{ {
/* add tonemap task */ /* add tonemap task */
DeviceTask task(DeviceTask::TONEMAP); DeviceTask task(DeviceTask::FILM_CONVERT);
task.x = tile_manager.state.buffer.full_x; task.x = tile_manager.state.buffer.full_x;
task.y = tile_manager.state.buffer.full_y; task.y = tile_manager.state.buffer.full_y;
task.w = tile_manager.state.buffer.width; task.w = tile_manager.state.buffer.width;
task.h = tile_manager.state.buffer.height; task.h = tile_manager.state.buffer.height;
task.rgba = display->rgba.device_pointer; task.rgba_byte = display->rgba_byte.device_pointer;
task.rgba_half = display->rgba_half.device_pointer;
task.buffer = buffers->buffer.device_pointer; task.buffer = buffers->buffer.device_pointer;
task.sample = tile_manager.state.sample; task.sample = tile_manager.state.sample;
tile_manager.state.buffer.get_offset_stride(task.offset, task.stride); tile_manager.state.buffer.get_offset_stride(task.offset, task.stride);

View File

@@ -53,6 +53,8 @@ public:
int start_resolution; int start_resolution;
int threads; int threads;
bool display_buffer_linear;
double cancel_timeout; double cancel_timeout;
double reset_timeout; double reset_timeout;
double text_timeout; double text_timeout;
@@ -72,6 +74,8 @@ public:
start_resolution = INT_MAX; start_resolution = INT_MAX;
threads = 0; threads = 0;
display_buffer_linear = false;
cancel_timeout = 0.1; cancel_timeout = 0.1;
reset_timeout = 0.1; reset_timeout = 0.1;
text_timeout = 1.0; text_timeout = 1.0;
@@ -91,6 +95,7 @@ public:
&& tile_size == params.tile_size && tile_size == params.tile_size
&& start_resolution == params.start_resolution && start_resolution == params.start_resolution
&& threads == params.threads && threads == params.threads
&& display_buffer_linear == params.display_buffer_linear
&& cancel_timeout == params.cancel_timeout && cancel_timeout == params.cancel_timeout
&& reset_timeout == params.reset_timeout && reset_timeout == params.reset_timeout
&& text_timeout == params.text_timeout && text_timeout == params.text_timeout

View File

@@ -541,6 +541,70 @@ template<size_t i0, size_t i1, size_t i2, size_t i3> __device_inline const __m12
} }
#endif #endif
/* Half Floats */
#ifdef __KERNEL_OPENCL__
__device_inline void float4_store_half(half *h, const float4 *f, float scale)
{
vstore_half4(*f * scale, 0, h);
}
#else
typedef unsigned short half;
struct half4 { half x, y, z, w; };
#ifdef __KERNEL_CUDA__
__device_inline void float4_store_half(half *h, const float4 *f, float scale)
{
h[0] = __float2half_rn(f->x * scale);
h[1] = __float2half_rn(f->y * scale);
h[2] = __float2half_rn(f->z * scale);
h[3] = __float2half_rn(f->w * scale);
}
#else
__device_inline void float4_store_half(half *h, const float4 *f, float scale)
{
#ifndef __KERNEL_SSE2__
for(int i = 0; i < 4; i++) {
/* optimized float to half for pixels:
* assumes no negative, no nan, no inf, and sets denormal to 0 */
union { uint i; float f; } in;
in.f = ((*f)[i] > 0.0f)? (*f)[i] * scale: 0.0f;
int x = in.i;
int absolute = x & 0x7FFFFFFF;
int Z = absolute + 0xC8000000;
int result = (absolute < 0x38800000)? 0: Z;
h[i] = ((result >> 13) & 0x7FFF);
}
#else
/* same as above with SSE */
const __m128 mm_scale = _mm_set_ps1(scale);
const __m128i mm_38800000 = _mm_set1_epi32(0x38800000);
const __m128i mm_7FFF = _mm_set1_epi32(0x7FFF);
const __m128i mm_7FFFFFFF = _mm_set1_epi32(0x7FFFFFFF);
const __m128i mm_C8000000 = _mm_set1_epi32(0xC8000000);
__m128i x = _mm_castps_si128(_mm_max_ps(_mm_mul_ps(*(__m128*)f, mm_scale), _mm_set_ps1(0.0f)));
__m128i absolute = _mm_and_si128(x, mm_7FFFFFFF);
__m128i Z = _mm_add_epi32(absolute, mm_C8000000);
__m128i result = _mm_andnot_si128(_mm_cmplt_epi32(absolute, mm_38800000), Z);
__m128i rh = _mm_and_si128(_mm_srai_epi32(result, 13), mm_7FFF);
_mm_storel_pi((__m64*)h, _mm_castsi128_ps(_mm_packs_epi32(rh, rh)));
#endif
}
#endif
#endif
CCL_NAMESPACE_END CCL_NAMESPACE_END
#endif /* __UTIL_TYPES_H__ */ #endif /* __UTIL_TYPES_H__ */