Cycles OSL: image texture lookup optimization, acquire the per thread handle

for texture system in advance. Patch by Martijn Berger, with some tweaks.

There was about a 10% performance improvement on OS X in my tests with the
images.blend test file. This may be less on other platforms because OS X has
particularly slow mutex locks.
This commit is contained in:
Brecht Van Lommel
2013-08-05 12:49:15 +00:00
parent 83617429cf
commit 3bf175f270
3 changed files with 33 additions and 7 deletions

View File

@@ -87,9 +87,10 @@ struct OSLTraceData {
/* thread key for thread specific data lookup */
struct OSLThreadData {
OSL::ShaderGlobals globals;
OSL::PerThreadInfo *thread_info;
OSL::PerThreadInfo *osl_thread_info;
OSLTraceData tracedata;
OSL::ShadingContext *context[SHADER_CONTEXT_NUM];
OIIO::TextureSystem::Perthread *oiio_thread_info;
};
CCL_NAMESPACE_END

View File

@@ -775,7 +775,15 @@ bool OSLRenderServices::texture(ustring filename, TextureOpt &options,
float dsdy, float dtdy, float *result)
{
OSL::TextureSystem *ts = osl_ts;
bool status = ts->texture(filename, options, s, t, dsdx, dtdx, dsdy, dtdy, result);
ShaderData *sd = (ShaderData *)(sg->renderstate);
KernelGlobals *kg = sd->osl_globals;
OSLThreadData *tdata = kg->osl_tdata;
OIIO::TextureSystem::Perthread *thread_info = tdata->oiio_thread_info;
OIIO::TextureSystem::TextureHandle *th = ts->get_texture_handle(filename, thread_info);
bool status = ts->texture(th, thread_info,
options, s, t, dsdx, dtdx, dsdy, dtdy, result);
if(!status) {
if(options.nchannels == 3 || options.nchannels == 4) {
@@ -797,7 +805,15 @@ bool OSLRenderServices::texture3d(ustring filename, TextureOpt &options,
const OSL::Vec3 &dPdz, float *result)
{
OSL::TextureSystem *ts = osl_ts;
bool status = ts->texture3d(filename, options, P, dPdx, dPdy, dPdz, result);
ShaderData *sd = (ShaderData *)(sg->renderstate);
KernelGlobals *kg = sd->osl_globals;
OSLThreadData *tdata = kg->osl_tdata;
OIIO::TextureSystem::Perthread *thread_info = tdata->oiio_thread_info;
OIIO::TextureSystem::TextureHandle *th = ts->get_texture_handle(filename, thread_info);
bool status = ts->texture3d(th, thread_info,
options, P, dPdx, dPdy, dPdz, result);
if(!status) {
if(options.nchannels == 3 || options.nchannels == 4) {
@@ -819,7 +835,14 @@ bool OSLRenderServices::environment(ustring filename, TextureOpt &options,
const OSL::Vec3 &dRdx, const OSL::Vec3 &dRdy, float *result)
{
OSL::TextureSystem *ts = osl_ts;
bool status = ts->environment(filename, options, R, dRdx, dRdy, result);
ShaderData *sd = (ShaderData *)(sg->renderstate);
KernelGlobals *kg = sd->osl_globals;
OSLThreadData *tdata = kg->osl_tdata;
OIIO::TextureSystem::Perthread *thread_info = tdata->oiio_thread_info;
OIIO::TextureSystem::TextureHandle *th = ts->get_texture_handle(filename, thread_info);
bool status = ts->environment(th, thread_info,
options, R, dRdx, dRdy, result);
if(!status) {
if(options.nchannels == 3 || options.nchannels == 4) {

View File

@@ -55,10 +55,12 @@ void OSLShader::thread_init(KernelGlobals *kg, KernelGlobals *kernel_globals, OS
memset(&tdata->globals, 0, sizeof(OSL::ShaderGlobals));
tdata->globals.tracedata = &tdata->tracedata;
tdata->globals.flipHandedness = false;
tdata->thread_info = ss->create_thread_info();
tdata->osl_thread_info = ss->create_thread_info();
for(int i = 0; i < SHADER_CONTEXT_NUM; i++)
tdata->context[i] = ss->get_context(tdata->thread_info);
tdata->context[i] = ss->get_context(tdata->osl_thread_info);
tdata->oiio_thread_info = osl_globals->ts->get_perthread_info();
kg->osl_ss = (OSLShadingSystem*)ss;
kg->osl_tdata = tdata;
@@ -75,7 +77,7 @@ void OSLShader::thread_free(KernelGlobals *kg)
for(int i = 0; i < SHADER_CONTEXT_NUM; i++)
ss->release_context(tdata->context[i]);
ss->destroy_thread_info(tdata->thread_info);
ss->destroy_thread_info(tdata->osl_thread_info);
delete tdata;