Cycles: avoid int->float conversions for pixel lookups
Gives ~3% speedup for image.blend test, and 6% for image heavy file. Overall speedup in real-world use is likely much less.
This commit is contained in:
@@ -61,9 +61,7 @@ void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t
|
|||||||
|
|
||||||
if(tex) {
|
if(tex) {
|
||||||
tex->data = (float4*)mem;
|
tex->data = (float4*)mem;
|
||||||
tex->width = width;
|
tex->dimensions_set(width, height, depth);
|
||||||
tex->height = height;
|
|
||||||
tex->depth = depth;
|
|
||||||
tex->interpolation = interpolation;
|
tex->interpolation = interpolation;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -78,9 +76,7 @@ void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t
|
|||||||
|
|
||||||
if(tex) {
|
if(tex) {
|
||||||
tex->data = (uchar4*)mem;
|
tex->data = (uchar4*)mem;
|
||||||
tex->width = width;
|
tex->dimensions_set(width, height, depth);
|
||||||
tex->height = height;
|
|
||||||
tex->depth = depth;
|
|
||||||
tex->interpolation = interpolation;
|
tex->interpolation = interpolation;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -95,14 +95,14 @@ template<typename T> struct texture_image {
|
|||||||
|
|
||||||
ccl_always_inline float4 interp(float x, float y, bool periodic = true)
|
ccl_always_inline float4 interp(float x, float y, bool periodic = true)
|
||||||
{
|
{
|
||||||
if(!data)
|
if(UNLIKELY(!data))
|
||||||
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
||||||
|
|
||||||
int ix, iy, nix, niy;
|
int ix, iy, nix, niy;
|
||||||
|
|
||||||
if(interpolation == INTERPOLATION_CLOSEST) {
|
if(interpolation == INTERPOLATION_CLOSEST) {
|
||||||
frac(x*width, &ix);
|
frac(x*width_fl, &ix);
|
||||||
frac(y*height, &iy);
|
frac(y*height_fl, &iy);
|
||||||
if(periodic) {
|
if(periodic) {
|
||||||
ix = wrap_periodic(ix, width);
|
ix = wrap_periodic(ix, width);
|
||||||
iy = wrap_periodic(iy, height);
|
iy = wrap_periodic(iy, height);
|
||||||
@@ -115,8 +115,8 @@ template<typename T> struct texture_image {
|
|||||||
return read(data[ix + iy*width]);
|
return read(data[ix + iy*width]);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
float tx = frac(x*width - 0.5f, &ix);
|
float tx = frac(x*width_fl - 0.5f, &ix);
|
||||||
float ty = frac(y*height - 0.5f, &iy);
|
float ty = frac(y*height_fl - 0.5f, &iy);
|
||||||
|
|
||||||
if(periodic) {
|
if(periodic) {
|
||||||
ix = wrap_periodic(ix, width);
|
ix = wrap_periodic(ix, width);
|
||||||
@@ -144,15 +144,15 @@ template<typename T> struct texture_image {
|
|||||||
|
|
||||||
ccl_always_inline float4 interp_3d(float x, float y, float z, bool periodic = false)
|
ccl_always_inline float4 interp_3d(float x, float y, float z, bool periodic = false)
|
||||||
{
|
{
|
||||||
if(!data)
|
if(UNLIKELY(!data))
|
||||||
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
||||||
|
|
||||||
int ix, iy, iz, nix, niy, niz;
|
int ix, iy, iz, nix, niy, niz;
|
||||||
|
|
||||||
if(interpolation == INTERPOLATION_CLOSEST) {
|
if(interpolation == INTERPOLATION_CLOSEST) {
|
||||||
frac(x*width, &ix);
|
frac(x*width_fl, &ix);
|
||||||
frac(y*height, &iy);
|
frac(y*height_fl, &iy);
|
||||||
frac(z*depth, &iz);
|
frac(z*depth_fl, &iz);
|
||||||
|
|
||||||
if(periodic) {
|
if(periodic) {
|
||||||
ix = wrap_periodic(ix, width);
|
ix = wrap_periodic(ix, width);
|
||||||
@@ -168,9 +168,9 @@ template<typename T> struct texture_image {
|
|||||||
return read(data[ix + iy*width + iz*width*height]);
|
return read(data[ix + iy*width + iz*width*height]);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
float tx = frac(x*width - 0.5f, &ix);
|
float tx = frac(x*width_fl - 0.5f, &ix);
|
||||||
float ty = frac(y*height - 0.5f, &iy);
|
float ty = frac(y*height_fl - 0.5f, &iy);
|
||||||
float tz = frac(z*depth - 0.5f, &iz);
|
float tz = frac(z*depth_fl - 0.5f, &iz);
|
||||||
|
|
||||||
if(periodic) {
|
if(periodic) {
|
||||||
ix = wrap_periodic(ix, width);
|
ix = wrap_periodic(ix, width);
|
||||||
@@ -207,9 +207,23 @@ template<typename T> struct texture_image {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ccl_always_inline void dimensions_set(int width_, int height_, int depth_)
|
||||||
|
{
|
||||||
|
width = width_;
|
||||||
|
height = height_;
|
||||||
|
depth = depth_;
|
||||||
|
|
||||||
|
width_fl = (float)width_;
|
||||||
|
height_fl = (float)height_;
|
||||||
|
depth_fl = (float)depth_;
|
||||||
|
}
|
||||||
|
|
||||||
T *data;
|
T *data;
|
||||||
int interpolation;
|
int interpolation;
|
||||||
int width, height, depth;
|
int width, height, depth;
|
||||||
|
|
||||||
|
/* avoid int/float conversion */
|
||||||
|
float width_fl, height_fl, depth_fl;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef texture<float4> texture_float4;
|
typedef texture<float4> texture_float4;
|
||||||
|
Reference in New Issue
Block a user