Fix #33375: OSL geom:trianglevertices gave wrong coordinates for static BVH.
Also some simple OSL optimization, passing thread data pointer directly instead of via thread local storage, and creating ustrings for attribute lookup.
This commit is contained in:
@@ -23,9 +23,12 @@
|
||||
#include "device_intern.h"
|
||||
|
||||
#include "kernel.h"
|
||||
#include "kernel_compat_cpu.h"
|
||||
#include "kernel_types.h"
|
||||
#include "kernel_globals.h"
|
||||
|
||||
#include "osl_shader.h"
|
||||
#include "osl_globals.h"
|
||||
|
||||
#include "buffers.h"
|
||||
|
||||
@@ -43,11 +46,16 @@ class CPUDevice : public Device
|
||||
{
|
||||
public:
|
||||
TaskPool task_pool;
|
||||
KernelGlobals *kg;
|
||||
KernelGlobals kernel_globals;
|
||||
#ifdef WITH_OSL
|
||||
OSLGlobals osl_globals;
|
||||
#endif
|
||||
|
||||
CPUDevice(Stats &stats) : Device(stats)
|
||||
{
|
||||
kg = kernel_globals_create();
|
||||
#ifdef WITH_OSL
|
||||
kernel_globals.osl = &osl_globals;
|
||||
#endif
|
||||
|
||||
/* do now to avoid thread issues */
|
||||
system_cpu_support_optimized();
|
||||
@@ -56,7 +64,6 @@ public:
|
||||
~CPUDevice()
|
||||
{
|
||||
task_pool.stop();
|
||||
kernel_globals_free(kg);
|
||||
}
|
||||
|
||||
bool support_advanced_shading()
|
||||
@@ -95,12 +102,12 @@ public:
|
||||
|
||||
void const_copy_to(const char *name, void *host, size_t size)
|
||||
{
|
||||
kernel_const_copy(kg, name, host, size);
|
||||
kernel_const_copy(&kernel_globals, name, host, size);
|
||||
}
|
||||
|
||||
void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
|
||||
{
|
||||
kernel_tex_copy(kg, name, mem.data_pointer, mem.data_width, mem.data_height);
|
||||
kernel_tex_copy(&kernel_globals, name, mem.data_pointer, mem.data_width, mem.data_height);
|
||||
mem.device_pointer = mem.data_pointer;
|
||||
|
||||
stats.mem_alloc(mem.memory_size());
|
||||
@@ -116,7 +123,7 @@ public:
|
||||
void *osl_memory()
|
||||
{
|
||||
#ifdef WITH_OSL
|
||||
return kernel_osl_memory(kg);
|
||||
return &osl_globals;
|
||||
#else
|
||||
return NULL;
|
||||
#endif
|
||||
@@ -148,9 +155,10 @@ public:
|
||||
return;
|
||||
}
|
||||
|
||||
KernelGlobals kg = kernel_globals;
|
||||
|
||||
#ifdef WITH_OSL
|
||||
if(kernel_osl_use(kg))
|
||||
OSLShader::thread_init(kg);
|
||||
OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
|
||||
#endif
|
||||
|
||||
RenderTile tile;
|
||||
@@ -171,7 +179,7 @@ public:
|
||||
|
||||
for(int y = tile.y; y < tile.y + tile.h; y++) {
|
||||
for(int x = tile.x; x < tile.x + tile.w; x++) {
|
||||
kernel_cpu_optimized_path_trace(kg, render_buffer, rng_state,
|
||||
kernel_cpu_optimized_path_trace(&kg, render_buffer, rng_state,
|
||||
sample, x, y, tile.offset, tile.stride);
|
||||
}
|
||||
}
|
||||
@@ -192,7 +200,7 @@ public:
|
||||
|
||||
for(int y = tile.y; y < tile.y + tile.h; y++) {
|
||||
for(int x = tile.x; x < tile.x + tile.w; x++) {
|
||||
kernel_cpu_path_trace(kg, render_buffer, rng_state,
|
||||
kernel_cpu_path_trace(&kg, render_buffer, rng_state,
|
||||
sample, x, y, tile.offset, tile.stride);
|
||||
}
|
||||
}
|
||||
@@ -212,8 +220,7 @@ public:
|
||||
}
|
||||
|
||||
#ifdef WITH_OSL
|
||||
if(kernel_osl_use(kg))
|
||||
OSLShader::thread_free(kg);
|
||||
OSLShader::thread_free(&kg);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -223,7 +230,7 @@ public:
|
||||
if(system_cpu_support_optimized()) {
|
||||
for(int y = task.y; y < task.y + task.h; y++)
|
||||
for(int x = task.x; x < task.x + task.w; x++)
|
||||
kernel_cpu_optimized_tonemap(kg, (uchar4*)task.rgba, (float*)task.buffer,
|
||||
kernel_cpu_optimized_tonemap(&kernel_globals, (uchar4*)task.rgba, (float*)task.buffer,
|
||||
task.sample, task.resolution, x, y, task.offset, task.stride);
|
||||
}
|
||||
else
|
||||
@@ -231,22 +238,23 @@ public:
|
||||
{
|
||||
for(int y = task.y; y < task.y + task.h; y++)
|
||||
for(int x = task.x; x < task.x + task.w; x++)
|
||||
kernel_cpu_tonemap(kg, (uchar4*)task.rgba, (float*)task.buffer,
|
||||
kernel_cpu_tonemap(&kernel_globals, (uchar4*)task.rgba, (float*)task.buffer,
|
||||
task.sample, task.resolution, x, y, task.offset, task.stride);
|
||||
}
|
||||
}
|
||||
|
||||
void thread_shader(DeviceTask& task)
|
||||
{
|
||||
KernelGlobals kg = kernel_globals;
|
||||
|
||||
#ifdef WITH_OSL
|
||||
if(kernel_osl_use(kg))
|
||||
OSLShader::thread_init(kg);
|
||||
OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
|
||||
#endif
|
||||
|
||||
#ifdef WITH_OPTIMIZED_KERNEL
|
||||
if(system_cpu_support_optimized()) {
|
||||
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
|
||||
kernel_cpu_optimized_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
|
||||
kernel_cpu_optimized_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
|
||||
|
||||
if(task_pool.cancelled())
|
||||
break;
|
||||
@@ -256,7 +264,7 @@ public:
|
||||
#endif
|
||||
{
|
||||
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
|
||||
kernel_cpu_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
|
||||
kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
|
||||
|
||||
if(task_pool.cancelled())
|
||||
break;
|
||||
@@ -264,8 +272,7 @@ public:
|
||||
}
|
||||
|
||||
#ifdef WITH_OSL
|
||||
if(kernel_osl_use(kg))
|
||||
OSLShader::thread_free(kg);
|
||||
OSLShader::thread_free(&kg);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user