Fix #33375: OSL geom:trianglevertices gave wrong coordinates for static BVH.

Also some simple OSL optimization, passing thread data pointer directly instead
of via thread local storage, and creating ustrings for attribute lookup.
This commit is contained in:
Brecht Van Lommel
2012-12-01 19:15:05 +00:00
parent 807fd448a5
commit 7c0a0bae79
19 changed files with 276 additions and 282 deletions

View File

@@ -23,9 +23,12 @@
#include "device_intern.h"
#include "kernel.h"
#include "kernel_compat_cpu.h"
#include "kernel_types.h"
#include "kernel_globals.h"
#include "osl_shader.h"
#include "osl_globals.h"
#include "buffers.h"
@@ -43,11 +46,16 @@ class CPUDevice : public Device
{
public:
TaskPool task_pool;
KernelGlobals *kg;
KernelGlobals kernel_globals;
#ifdef WITH_OSL
OSLGlobals osl_globals;
#endif
CPUDevice(Stats &stats) : Device(stats)
{
kg = kernel_globals_create();
#ifdef WITH_OSL
kernel_globals.osl = &osl_globals;
#endif
/* do now to avoid thread issues */
system_cpu_support_optimized();
@@ -56,7 +64,6 @@ public:
~CPUDevice()
{
task_pool.stop();
kernel_globals_free(kg);
}
bool support_advanced_shading()
@@ -95,12 +102,12 @@ public:
void const_copy_to(const char *name, void *host, size_t size)
{
kernel_const_copy(kg, name, host, size);
kernel_const_copy(&kernel_globals, name, host, size);
}
void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
{
kernel_tex_copy(kg, name, mem.data_pointer, mem.data_width, mem.data_height);
kernel_tex_copy(&kernel_globals, name, mem.data_pointer, mem.data_width, mem.data_height);
mem.device_pointer = mem.data_pointer;
stats.mem_alloc(mem.memory_size());
@@ -116,7 +123,7 @@ public:
void *osl_memory()
{
#ifdef WITH_OSL
return kernel_osl_memory(kg);
return &osl_globals;
#else
return NULL;
#endif
@@ -148,9 +155,10 @@ public:
return;
}
KernelGlobals kg = kernel_globals;
#ifdef WITH_OSL
if(kernel_osl_use(kg))
OSLShader::thread_init(kg);
OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
#endif
RenderTile tile;
@@ -171,7 +179,7 @@ public:
for(int y = tile.y; y < tile.y + tile.h; y++) {
for(int x = tile.x; x < tile.x + tile.w; x++) {
kernel_cpu_optimized_path_trace(kg, render_buffer, rng_state,
kernel_cpu_optimized_path_trace(&kg, render_buffer, rng_state,
sample, x, y, tile.offset, tile.stride);
}
}
@@ -192,7 +200,7 @@ public:
for(int y = tile.y; y < tile.y + tile.h; y++) {
for(int x = tile.x; x < tile.x + tile.w; x++) {
kernel_cpu_path_trace(kg, render_buffer, rng_state,
kernel_cpu_path_trace(&kg, render_buffer, rng_state,
sample, x, y, tile.offset, tile.stride);
}
}
@@ -212,8 +220,7 @@ public:
}
#ifdef WITH_OSL
if(kernel_osl_use(kg))
OSLShader::thread_free(kg);
OSLShader::thread_free(&kg);
#endif
}
@@ -223,7 +230,7 @@ public:
if(system_cpu_support_optimized()) {
for(int y = task.y; y < task.y + task.h; y++)
for(int x = task.x; x < task.x + task.w; x++)
kernel_cpu_optimized_tonemap(kg, (uchar4*)task.rgba, (float*)task.buffer,
kernel_cpu_optimized_tonemap(&kernel_globals, (uchar4*)task.rgba, (float*)task.buffer,
task.sample, task.resolution, x, y, task.offset, task.stride);
}
else
@@ -231,22 +238,23 @@ public:
{
for(int y = task.y; y < task.y + task.h; y++)
for(int x = task.x; x < task.x + task.w; x++)
kernel_cpu_tonemap(kg, (uchar4*)task.rgba, (float*)task.buffer,
kernel_cpu_tonemap(&kernel_globals, (uchar4*)task.rgba, (float*)task.buffer,
task.sample, task.resolution, x, y, task.offset, task.stride);
}
}
void thread_shader(DeviceTask& task)
{
KernelGlobals kg = kernel_globals;
#ifdef WITH_OSL
if(kernel_osl_use(kg))
OSLShader::thread_init(kg);
OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
#endif
#ifdef WITH_OPTIMIZED_KERNEL
if(system_cpu_support_optimized()) {
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
kernel_cpu_optimized_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
kernel_cpu_optimized_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
if(task_pool.cancelled())
break;
@@ -256,7 +264,7 @@ public:
#endif
{
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
kernel_cpu_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
if(task_pool.cancelled())
break;
@@ -264,8 +272,7 @@ public:
}
#ifdef WITH_OSL
if(kernel_osl_use(kg))
OSLShader::thread_free(kg);
OSLShader::thread_free(&kg);
#endif
}