Cycles: optimize CPU texture sampler interpolation
Use templates to optimize the CPU texture sampler to interpolate using float for single component datatypes instead of using float4 for all types. Differential Revision: https://developer.blender.org/D14424
This commit is contained in:

committed by
Brecht Van Lommel

parent
d67f9820b8
commit
4e56e738a8
@@ -31,7 +31,18 @@ ccl_device_inline float frac(float x, int *ix)
|
|||||||
return x - (float)i;
|
return x - (float)i;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T> struct TextureInterpolator {
|
template<typename TexT, typename OutT = float4> struct TextureInterpolator {
|
||||||
|
template<typename ZeroT> static ccl_always_inline ZeroT zero();
|
||||||
|
|
||||||
|
template<> static ccl_always_inline float zero()
|
||||||
|
{
|
||||||
|
return 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> static ccl_always_inline float4 zero()
|
||||||
|
{
|
||||||
|
return zero_float4();
|
||||||
|
}
|
||||||
|
|
||||||
static ccl_always_inline float4 read(float4 r)
|
static ccl_always_inline float4 read(float4 r)
|
||||||
{
|
{
|
||||||
@@ -40,21 +51,18 @@ template<typename T> struct TextureInterpolator {
|
|||||||
|
|
||||||
static ccl_always_inline float4 read(uchar4 r)
|
static ccl_always_inline float4 read(uchar4 r)
|
||||||
{
|
{
|
||||||
float f = 1.0f / 255.0f;
|
const float f = 1.0f / 255.0f;
|
||||||
return make_float4(r.x * f, r.y * f, r.z * f, r.w * f);
|
return make_float4(r.x * f, r.y * f, r.z * f, r.w * f);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ccl_always_inline float4 read(uchar r)
|
static ccl_always_inline float read(uchar r)
|
||||||
{
|
{
|
||||||
float f = r * (1.0f / 255.0f);
|
return r * (1.0f / 255.0f);
|
||||||
return make_float4(f, f, f, 1.0f);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static ccl_always_inline float4 read(float r)
|
static ccl_always_inline float read(float r)
|
||||||
{
|
{
|
||||||
/* TODO(dingto): Optimize this, so interpolation
|
return r;
|
||||||
* happens on float instead of float4 */
|
|
||||||
return make_float4(r, r, r, 1.0f);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static ccl_always_inline float4 read(half4 r)
|
static ccl_always_inline float4 read(half4 r)
|
||||||
@@ -62,63 +70,61 @@ template<typename T> struct TextureInterpolator {
|
|||||||
return half4_to_float4_image(r);
|
return half4_to_float4_image(r);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ccl_always_inline float4 read(half r)
|
static ccl_always_inline float read(half r)
|
||||||
{
|
{
|
||||||
float f = half_to_float_image(r);
|
return half_to_float_image(r);
|
||||||
return make_float4(f, f, f, 1.0f);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static ccl_always_inline float4 read(uint16_t r)
|
static ccl_always_inline float read(uint16_t r)
|
||||||
{
|
{
|
||||||
float f = r * (1.0f / 65535.0f);
|
return r * (1.0f / 65535.0f);
|
||||||
return make_float4(f, f, f, 1.0f);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static ccl_always_inline float4 read(ushort4 r)
|
static ccl_always_inline float4 read(ushort4 r)
|
||||||
{
|
{
|
||||||
float f = 1.0f / 65535.0f;
|
const float f = 1.0f / 65535.0f;
|
||||||
return make_float4(r.x * f, r.y * f, r.z * f, r.w * f);
|
return make_float4(r.x * f, r.y * f, r.z * f, r.w * f);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Read 2D Texture Data
|
/* Read 2D Texture Data
|
||||||
* Does not check if data request is in bounds. */
|
* Does not check if data request is in bounds. */
|
||||||
static ccl_always_inline float4 read(const T *data, int x, int y, int width, int height)
|
static ccl_always_inline OutT read(const TexT *data, int x, int y, int width, int height)
|
||||||
{
|
{
|
||||||
return read(data[y * width + x]);
|
return read(data[y * width + x]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Read 2D Texture Data Clip
|
/* Read 2D Texture Data Clip
|
||||||
* Returns transparent black if data request is out of bounds. */
|
* Returns transparent black if data request is out of bounds. */
|
||||||
static ccl_always_inline float4 read_clip(const T *data, int x, int y, int width, int height)
|
static ccl_always_inline OutT read_clip(const TexT *data, int x, int y, int width, int height)
|
||||||
{
|
{
|
||||||
if (x < 0 || x >= width || y < 0 || y >= height) {
|
if (x < 0 || x >= width || y < 0 || y >= height) {
|
||||||
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
return zero<OutT>();
|
||||||
}
|
}
|
||||||
return read(data[y * width + x]);
|
return read(data[y * width + x]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Read 3D Texture Data
|
/* Read 3D Texture Data
|
||||||
* Does not check if data request is in bounds. */
|
* Does not check if data request is in bounds. */
|
||||||
static ccl_always_inline float4
|
static ccl_always_inline OutT
|
||||||
read(const T *data, int x, int y, int z, int width, int height, int depth)
|
read(const TexT *data, int x, int y, int z, int width, int height, int depth)
|
||||||
{
|
{
|
||||||
return read(data[x + y * width + z * width * height]);
|
return read(data[x + y * width + z * width * height]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Read 3D Texture Data Clip
|
/* Read 3D Texture Data Clip
|
||||||
* Returns transparent black if data request is out of bounds. */
|
* Returns transparent black if data request is out of bounds. */
|
||||||
static ccl_always_inline float4
|
static ccl_always_inline OutT
|
||||||
read_clip(const T *data, int x, int y, int z, int width, int height, int depth)
|
read_clip(const TexT *data, int x, int y, int z, int width, int height, int depth)
|
||||||
{
|
{
|
||||||
if (x < 0 || x >= width || y < 0 || y >= height || z < 0 || z >= depth) {
|
if (x < 0 || x >= width || y < 0 || y >= height || z < 0 || z >= depth) {
|
||||||
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
return zero<OutT>();
|
||||||
}
|
}
|
||||||
return read(data[x + y * width + z * width * height]);
|
return read(data[x + y * width + z * width * height]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Trilinear Interpolation */
|
/* Trilinear Interpolation */
|
||||||
static ccl_always_inline float4
|
static ccl_always_inline OutT
|
||||||
trilinear_lookup(const T *data,
|
trilinear_lookup(const TexT *data,
|
||||||
float tx,
|
float tx,
|
||||||
float ty,
|
float ty,
|
||||||
float tz,
|
float tz,
|
||||||
@@ -131,10 +137,10 @@ template<typename T> struct TextureInterpolator {
|
|||||||
int width,
|
int width,
|
||||||
int height,
|
int height,
|
||||||
int depth,
|
int depth,
|
||||||
float4 read(const T *, int, int, int, int, int, int))
|
OutT read(const TexT *, int, int, int, int, int, int))
|
||||||
{
|
{
|
||||||
float4 r;
|
OutT r = (1.0f - tz) * (1.0f - ty) * (1.0f - tx) *
|
||||||
r = (1.0f - tz) * (1.0f - ty) * (1.0f - tx) * read(data, ix, iy, iz, width, height, depth);
|
read(data, ix, iy, iz, width, height, depth);
|
||||||
r += (1.0f - tz) * (1.0f - ty) * tx * read(data, nix, iy, iz, width, height, depth);
|
r += (1.0f - tz) * (1.0f - ty) * tx * read(data, nix, iy, iz, width, height, depth);
|
||||||
r += (1.0f - tz) * ty * (1.0f - tx) * read(data, ix, niy, iz, width, height, depth);
|
r += (1.0f - tz) * ty * (1.0f - tx) * read(data, ix, niy, iz, width, height, depth);
|
||||||
r += (1.0f - tz) * ty * tx * read(data, nix, niy, iz, width, height, depth);
|
r += (1.0f - tz) * ty * tx * read(data, nix, niy, iz, width, height, depth);
|
||||||
@@ -147,8 +153,8 @@ template<typename T> struct TextureInterpolator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** Tricubic Interpolation */
|
/** Tricubic Interpolation */
|
||||||
static ccl_always_inline float4
|
static ccl_always_inline OutT
|
||||||
tricubic_lookup(const T *data,
|
tricubic_lookup(const TexT *data,
|
||||||
float tx,
|
float tx,
|
||||||
float ty,
|
float ty,
|
||||||
float tz,
|
float tz,
|
||||||
@@ -158,7 +164,7 @@ template<typename T> struct TextureInterpolator {
|
|||||||
int width,
|
int width,
|
||||||
int height,
|
int height,
|
||||||
int depth,
|
int depth,
|
||||||
float4 read(const T *, int, int, int, int, int, int))
|
OutT read(const TexT *, int, int, int, int, int, int))
|
||||||
{
|
{
|
||||||
float u[4], v[4], w[4];
|
float u[4], v[4], w[4];
|
||||||
|
|
||||||
@@ -199,7 +205,7 @@ template<typename T> struct TextureInterpolator {
|
|||||||
|
|
||||||
/* ******** 2D interpolation ******** */
|
/* ******** 2D interpolation ******** */
|
||||||
|
|
||||||
static ccl_always_inline float4 interp_closest(const TextureInfo &info, float x, float y)
|
static ccl_always_inline OutT interp_closest(const TextureInfo &info, float x, float y)
|
||||||
{
|
{
|
||||||
const int width = info.width;
|
const int width = info.width;
|
||||||
const int height = info.height;
|
const int height = info.height;
|
||||||
@@ -214,7 +220,7 @@ template<typename T> struct TextureInterpolator {
|
|||||||
case EXTENSION_CLIP:
|
case EXTENSION_CLIP:
|
||||||
/* No samples are inside the clip region. */
|
/* No samples are inside the clip region. */
|
||||||
if (ix < 0 || ix >= width || iy < 0 || iy >= height) {
|
if (ix < 0 || ix >= width || iy < 0 || iy >= height) {
|
||||||
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
return zero<OutT>();
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case EXTENSION_EXTEND:
|
case EXTENSION_EXTEND:
|
||||||
@@ -223,14 +229,14 @@ template<typename T> struct TextureInterpolator {
|
|||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
kernel_assert(0);
|
kernel_assert(0);
|
||||||
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
return zero<OutT>();
|
||||||
}
|
}
|
||||||
|
|
||||||
const T *data = (const T *)info.data;
|
const TexT *data = (const TexT *)info.data;
|
||||||
return read((const T *)data, ix, iy, width, height);
|
return read((const TexT *)data, ix, iy, width, height);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ccl_always_inline float4 interp_linear(const TextureInfo &info, float x, float y)
|
static ccl_always_inline OutT interp_linear(const TextureInfo &info, float x, float y)
|
||||||
{
|
{
|
||||||
const int width = info.width;
|
const int width = info.width;
|
||||||
const int height = info.height;
|
const int height = info.height;
|
||||||
@@ -252,7 +258,7 @@ template<typename T> struct TextureInterpolator {
|
|||||||
case EXTENSION_CLIP:
|
case EXTENSION_CLIP:
|
||||||
/* No linear samples are inside the clip region. */
|
/* No linear samples are inside the clip region. */
|
||||||
if (ix < -1 || ix >= width || iy < -1 || iy >= height) {
|
if (ix < -1 || ix >= width || iy < -1 || iy >= height) {
|
||||||
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
return zero<OutT>();
|
||||||
}
|
}
|
||||||
nix = ix + 1;
|
nix = ix + 1;
|
||||||
niy = iy + 1;
|
niy = iy + 1;
|
||||||
@@ -265,17 +271,17 @@ template<typename T> struct TextureInterpolator {
|
|||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
kernel_assert(0);
|
kernel_assert(0);
|
||||||
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
return zero<OutT>();
|
||||||
}
|
}
|
||||||
|
|
||||||
const T *data = (const T *)info.data;
|
const TexT *data = (const TexT *)info.data;
|
||||||
return (1.0f - ty) * (1.0f - tx) * read_clip(data, ix, iy, width, height) +
|
return (1.0f - ty) * (1.0f - tx) * read_clip(data, ix, iy, width, height) +
|
||||||
(1.0f - ty) * tx * read_clip(data, nix, iy, width, height) +
|
(1.0f - ty) * tx * read_clip(data, nix, iy, width, height) +
|
||||||
ty * (1.0f - tx) * read_clip(data, ix, niy, width, height) +
|
ty * (1.0f - tx) * read_clip(data, ix, niy, width, height) +
|
||||||
ty * tx * read_clip(data, nix, niy, width, height);
|
ty * tx * read_clip(data, nix, niy, width, height);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ccl_always_inline float4 interp_cubic(const TextureInfo &info, float x, float y)
|
static ccl_always_inline OutT interp_cubic(const TextureInfo &info, float x, float y)
|
||||||
{
|
{
|
||||||
const int width = info.width;
|
const int width = info.width;
|
||||||
const int height = info.height;
|
const int height = info.height;
|
||||||
@@ -304,7 +310,7 @@ template<typename T> struct TextureInterpolator {
|
|||||||
case EXTENSION_CLIP:
|
case EXTENSION_CLIP:
|
||||||
/* No cubic samples are inside the clip region. */
|
/* No cubic samples are inside the clip region. */
|
||||||
if (ix < -2 || ix > width || iy < -2 || iy > height) {
|
if (ix < -2 || ix > width || iy < -2 || iy > height) {
|
||||||
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
return zero<OutT>();
|
||||||
}
|
}
|
||||||
|
|
||||||
pix = ix - 1;
|
pix = ix - 1;
|
||||||
@@ -328,10 +334,10 @@ template<typename T> struct TextureInterpolator {
|
|||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
kernel_assert(0);
|
kernel_assert(0);
|
||||||
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
return zero<OutT>();
|
||||||
}
|
}
|
||||||
|
|
||||||
const T *data = (const T *)info.data;
|
const TexT *data = (const TexT *)info.data;
|
||||||
const int xc[4] = {pix, ix, nix, nnix};
|
const int xc[4] = {pix, ix, nix, nnix};
|
||||||
const int yc[4] = {piy, iy, niy, nniy};
|
const int yc[4] = {piy, iy, niy, nniy};
|
||||||
float u[4], v[4];
|
float u[4], v[4];
|
||||||
@@ -353,11 +359,8 @@ template<typename T> struct TextureInterpolator {
|
|||||||
#undef DATA
|
#undef DATA
|
||||||
}
|
}
|
||||||
|
|
||||||
static ccl_always_inline float4 interp(const TextureInfo &info, float x, float y)
|
static ccl_always_inline OutT interp(const TextureInfo &info, float x, float y)
|
||||||
{
|
{
|
||||||
if (UNLIKELY(!info.data)) {
|
|
||||||
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
|
||||||
}
|
|
||||||
switch (info.interpolation) {
|
switch (info.interpolation) {
|
||||||
case INTERPOLATION_CLOSEST:
|
case INTERPOLATION_CLOSEST:
|
||||||
return interp_closest(info, x, y);
|
return interp_closest(info, x, y);
|
||||||
@@ -370,7 +373,7 @@ template<typename T> struct TextureInterpolator {
|
|||||||
|
|
||||||
/* ******** 3D interpolation ******** */
|
/* ******** 3D interpolation ******** */
|
||||||
|
|
||||||
static ccl_always_inline float4 interp_3d_closest(const TextureInfo &info,
|
static ccl_always_inline OutT interp_3d_closest(const TextureInfo &info,
|
||||||
float x,
|
float x,
|
||||||
float y,
|
float y,
|
||||||
float z)
|
float z)
|
||||||
@@ -393,7 +396,7 @@ template<typename T> struct TextureInterpolator {
|
|||||||
case EXTENSION_CLIP:
|
case EXTENSION_CLIP:
|
||||||
/* No samples are inside the clip region. */
|
/* No samples are inside the clip region. */
|
||||||
if (ix < 0 || ix >= width || iy < 0 || iy >= height || iz < 0 || iz >= depth) {
|
if (ix < 0 || ix >= width || iy < 0 || iy >= height || iz < 0 || iz >= depth) {
|
||||||
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
return zero<OutT>();
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case EXTENSION_EXTEND:
|
case EXTENSION_EXTEND:
|
||||||
@@ -403,14 +406,14 @@ template<typename T> struct TextureInterpolator {
|
|||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
kernel_assert(0);
|
kernel_assert(0);
|
||||||
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
return zero<OutT>();
|
||||||
}
|
}
|
||||||
|
|
||||||
const T *data = (const T *)info.data;
|
const TexT *data = (const TexT *)info.data;
|
||||||
return read(data, ix, iy, iz, width, height, depth);
|
return read(data, ix, iy, iz, width, height, depth);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ccl_always_inline float4 interp_3d_linear(const TextureInfo &info,
|
static ccl_always_inline OutT interp_3d_linear(const TextureInfo &info,
|
||||||
float x,
|
float x,
|
||||||
float y,
|
float y,
|
||||||
float z)
|
float z)
|
||||||
@@ -440,7 +443,7 @@ template<typename T> struct TextureInterpolator {
|
|||||||
case EXTENSION_CLIP:
|
case EXTENSION_CLIP:
|
||||||
/* No linear samples are inside the clip region. */
|
/* No linear samples are inside the clip region. */
|
||||||
if (ix < -1 || ix >= width || iy < -1 || iy >= height || iz < -1 || iz >= depth) {
|
if (ix < -1 || ix >= width || iy < -1 || iy >= height || iz < -1 || iz >= depth) {
|
||||||
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
return zero<OutT>();
|
||||||
}
|
}
|
||||||
|
|
||||||
nix = ix + 1;
|
nix = ix + 1;
|
||||||
@@ -454,7 +457,7 @@ template<typename T> struct TextureInterpolator {
|
|||||||
|
|
||||||
/* The linear samples span the clip border.
|
/* The linear samples span the clip border.
|
||||||
* #read_clip is used to ensure proper interpolation across the clip border. */
|
* #read_clip is used to ensure proper interpolation across the clip border. */
|
||||||
return trilinear_lookup((const T *)info.data,
|
return trilinear_lookup((const TexT *)info.data,
|
||||||
tx,
|
tx,
|
||||||
ty,
|
ty,
|
||||||
tz,
|
tz,
|
||||||
@@ -480,11 +483,23 @@ template<typename T> struct TextureInterpolator {
|
|||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
kernel_assert(0);
|
kernel_assert(0);
|
||||||
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
return zero<OutT>();
|
||||||
}
|
}
|
||||||
|
|
||||||
return trilinear_lookup(
|
return trilinear_lookup((const TexT *)info.data,
|
||||||
(const T *)info.data, tx, ty, tz, ix, iy, iz, nix, niy, niz, width, height, depth, read);
|
tx,
|
||||||
|
ty,
|
||||||
|
tz,
|
||||||
|
ix,
|
||||||
|
iy,
|
||||||
|
iz,
|
||||||
|
nix,
|
||||||
|
niy,
|
||||||
|
niz,
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
depth,
|
||||||
|
read);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Tricubic b-spline interpolation.
|
/* Tricubic b-spline interpolation.
|
||||||
@@ -500,7 +515,7 @@ template<typename T> struct TextureInterpolator {
|
|||||||
#else
|
#else
|
||||||
static ccl_never_inline
|
static ccl_never_inline
|
||||||
#endif
|
#endif
|
||||||
float4
|
OutT
|
||||||
interp_3d_cubic(const TextureInfo &info, float x, float y, float z)
|
interp_3d_cubic(const TextureInfo &info, float x, float y, float z)
|
||||||
{
|
{
|
||||||
int width = info.width;
|
int width = info.width;
|
||||||
@@ -537,7 +552,7 @@ template<typename T> struct TextureInterpolator {
|
|||||||
case EXTENSION_CLIP: {
|
case EXTENSION_CLIP: {
|
||||||
/* No cubic samples are inside the clip region. */
|
/* No cubic samples are inside the clip region. */
|
||||||
if (ix < -2 || ix > width || iy < -2 || iy > height || iz < -2 || iz > depth) {
|
if (ix < -2 || ix > width || iy < -2 || iy > height || iz < -2 || iz > depth) {
|
||||||
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
return zero<OutT>();
|
||||||
}
|
}
|
||||||
|
|
||||||
pix = ix - 1;
|
pix = ix - 1;
|
||||||
@@ -563,7 +578,7 @@ template<typename T> struct TextureInterpolator {
|
|||||||
const int yc[4] = {piy, iy, niy, nniy};
|
const int yc[4] = {piy, iy, niy, nniy};
|
||||||
const int zc[4] = {piz, iz, niz, nniz};
|
const int zc[4] = {piz, iz, niz, nniz};
|
||||||
return tricubic_lookup(
|
return tricubic_lookup(
|
||||||
(const T *)info.data, tx, ty, tz, xc, yc, zc, width, height, depth, read_clip);
|
(const TexT *)info.data, tx, ty, tz, xc, yc, zc, width, height, depth, read_clip);
|
||||||
}
|
}
|
||||||
case EXTENSION_EXTEND:
|
case EXTENSION_EXTEND:
|
||||||
pix = wrap_clamp(ix - 1, width);
|
pix = wrap_clamp(ix - 1, width);
|
||||||
@@ -583,21 +598,18 @@ template<typename T> struct TextureInterpolator {
|
|||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
kernel_assert(0);
|
kernel_assert(0);
|
||||||
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
return zero<OutT>();
|
||||||
}
|
}
|
||||||
const int xc[4] = {pix, ix, nix, nnix};
|
const int xc[4] = {pix, ix, nix, nnix};
|
||||||
const int yc[4] = {piy, iy, niy, nniy};
|
const int yc[4] = {piy, iy, niy, nniy};
|
||||||
const int zc[4] = {piz, iz, niz, nniz};
|
const int zc[4] = {piz, iz, niz, nniz};
|
||||||
const T *data = (const T *)info.data;
|
const TexT *data = (const TexT *)info.data;
|
||||||
return tricubic_lookup(data, tx, ty, tz, xc, yc, zc, width, height, depth, read);
|
return tricubic_lookup(data, tx, ty, tz, xc, yc, zc, width, height, depth, read);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ccl_always_inline float4
|
static ccl_always_inline OutT
|
||||||
interp_3d(const TextureInfo &info, float x, float y, float z, InterpolationType interp)
|
interp_3d(const TextureInfo &info, float x, float y, float z, InterpolationType interp)
|
||||||
{
|
{
|
||||||
if (UNLIKELY(!info.data))
|
|
||||||
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
|
||||||
|
|
||||||
switch ((interp == INTERPOLATION_NONE) ? info.interpolation : interp) {
|
switch ((interp == INTERPOLATION_NONE) ? info.interpolation : interp) {
|
||||||
case INTERPOLATION_CLOSEST:
|
case INTERPOLATION_CLOSEST:
|
||||||
return interp_3d_closest(info, x, y, z);
|
return interp_3d_closest(info, x, y, z);
|
||||||
@@ -610,13 +622,13 @@ template<typename T> struct TextureInterpolator {
|
|||||||
};
|
};
|
||||||
|
|
||||||
#ifdef WITH_NANOVDB
|
#ifdef WITH_NANOVDB
|
||||||
template<typename T> struct NanoVDBInterpolator {
|
template<typename TexT, typename OutT = float4> struct NanoVDBInterpolator {
|
||||||
|
|
||||||
typedef typename nanovdb::NanoGrid<T>::AccessorType AccessorType;
|
typedef typename nanovdb::NanoGrid<TexT>::AccessorType AccessorType;
|
||||||
|
|
||||||
static ccl_always_inline float4 read(float r)
|
static ccl_always_inline float read(float r)
|
||||||
{
|
{
|
||||||
return make_float4(r, r, r, 1.0f);
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
static ccl_always_inline float4 read(nanovdb::Vec3f r)
|
static ccl_always_inline float4 read(nanovdb::Vec3f r)
|
||||||
@@ -624,7 +636,7 @@ template<typename T> struct NanoVDBInterpolator {
|
|||||||
return make_float4(r[0], r[1], r[2], 1.0f);
|
return make_float4(r[0], r[1], r[2], 1.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ccl_always_inline float4 interp_3d_closest(const AccessorType &acc,
|
static ccl_always_inline OutT interp_3d_closest(const AccessorType &acc,
|
||||||
float x,
|
float x,
|
||||||
float y,
|
float y,
|
||||||
float z)
|
float z)
|
||||||
@@ -633,7 +645,7 @@ template<typename T> struct NanoVDBInterpolator {
|
|||||||
return read(nanovdb::SampleFromVoxels<AccessorType, 0, false>(acc)(xyz));
|
return read(nanovdb::SampleFromVoxels<AccessorType, 0, false>(acc)(xyz));
|
||||||
}
|
}
|
||||||
|
|
||||||
static ccl_always_inline float4 interp_3d_linear(const AccessorType &acc,
|
static ccl_always_inline OutT interp_3d_linear(const AccessorType &acc,
|
||||||
float x,
|
float x,
|
||||||
float y,
|
float y,
|
||||||
float z)
|
float z)
|
||||||
@@ -648,7 +660,7 @@ template<typename T> struct NanoVDBInterpolator {
|
|||||||
# else
|
# else
|
||||||
static ccl_never_inline
|
static ccl_never_inline
|
||||||
# endif
|
# endif
|
||||||
float4
|
OutT
|
||||||
interp_3d_cubic(const AccessorType &acc, float x, float y, float z)
|
interp_3d_cubic(const AccessorType &acc, float x, float y, float z)
|
||||||
{
|
{
|
||||||
int ix, iy, iz;
|
int ix, iy, iz;
|
||||||
@@ -698,12 +710,12 @@ template<typename T> struct NanoVDBInterpolator {
|
|||||||
# undef DATA
|
# undef DATA
|
||||||
}
|
}
|
||||||
|
|
||||||
static ccl_always_inline float4
|
static ccl_always_inline OutT
|
||||||
interp_3d(const TextureInfo &info, float x, float y, float z, InterpolationType interp)
|
interp_3d(const TextureInfo &info, float x, float y, float z, InterpolationType interp)
|
||||||
{
|
{
|
||||||
using namespace nanovdb;
|
using namespace nanovdb;
|
||||||
|
|
||||||
NanoGrid<T> *const grid = (NanoGrid<T> *)info.data;
|
NanoGrid<TexT> *const grid = (NanoGrid<TexT> *)info.data;
|
||||||
AccessorType acc = grid->getAccessor();
|
AccessorType acc = grid->getAccessor();
|
||||||
|
|
||||||
switch ((interp == INTERPOLATION_NONE) ? info.interpolation : interp) {
|
switch ((interp == INTERPOLATION_NONE) ? info.interpolation : interp) {
|
||||||
@@ -724,15 +736,27 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals kg, int id, float x, flo
|
|||||||
{
|
{
|
||||||
const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
|
const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
|
||||||
|
|
||||||
|
if (UNLIKELY(!info.data)) {
|
||||||
|
return zero_float4();
|
||||||
|
}
|
||||||
|
|
||||||
switch (info.data_type) {
|
switch (info.data_type) {
|
||||||
case IMAGE_DATA_TYPE_HALF:
|
case IMAGE_DATA_TYPE_HALF: {
|
||||||
return TextureInterpolator<half>::interp(info, x, y);
|
const float f = TextureInterpolator<half, float>::interp(info, x, y);
|
||||||
case IMAGE_DATA_TYPE_BYTE:
|
return make_float4(f, f, f, 1.0f);
|
||||||
return TextureInterpolator<uchar>::interp(info, x, y);
|
}
|
||||||
case IMAGE_DATA_TYPE_USHORT:
|
case IMAGE_DATA_TYPE_BYTE: {
|
||||||
return TextureInterpolator<uint16_t>::interp(info, x, y);
|
const float f = TextureInterpolator<uchar, float>::interp(info, x, y);
|
||||||
case IMAGE_DATA_TYPE_FLOAT:
|
return make_float4(f, f, f, 1.0f);
|
||||||
return TextureInterpolator<float>::interp(info, x, y);
|
}
|
||||||
|
case IMAGE_DATA_TYPE_USHORT: {
|
||||||
|
const float f = TextureInterpolator<uint16_t, float>::interp(info, x, y);
|
||||||
|
return make_float4(f, f, f, 1.0f);
|
||||||
|
}
|
||||||
|
case IMAGE_DATA_TYPE_FLOAT: {
|
||||||
|
const float f = TextureInterpolator<float, float>::interp(info, x, y);
|
||||||
|
return make_float4(f, f, f, 1.0f);
|
||||||
|
}
|
||||||
case IMAGE_DATA_TYPE_HALF4:
|
case IMAGE_DATA_TYPE_HALF4:
|
||||||
return TextureInterpolator<half4>::interp(info, x, y);
|
return TextureInterpolator<half4>::interp(info, x, y);
|
||||||
case IMAGE_DATA_TYPE_BYTE4:
|
case IMAGE_DATA_TYPE_BYTE4:
|
||||||
@@ -755,19 +779,30 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals kg,
|
|||||||
{
|
{
|
||||||
const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
|
const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
|
||||||
|
|
||||||
|
if (UNLIKELY(!info.data)) {
|
||||||
|
return zero_float4();
|
||||||
|
}
|
||||||
|
|
||||||
if (info.use_transform_3d) {
|
if (info.use_transform_3d) {
|
||||||
P = transform_point(&info.transform_3d, P);
|
P = transform_point(&info.transform_3d, P);
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (info.data_type) {
|
switch (info.data_type) {
|
||||||
case IMAGE_DATA_TYPE_HALF:
|
case IMAGE_DATA_TYPE_HALF: {
|
||||||
return TextureInterpolator<half>::interp_3d(info, P.x, P.y, P.z, interp);
|
const float f = TextureInterpolator<half, float>::interp_3d(info, P.x, P.y, P.z, interp);
|
||||||
case IMAGE_DATA_TYPE_BYTE:
|
return make_float4(f, f, f, 1.0f);
|
||||||
return TextureInterpolator<uchar>::interp_3d(info, P.x, P.y, P.z, interp);
|
}
|
||||||
case IMAGE_DATA_TYPE_USHORT:
|
case IMAGE_DATA_TYPE_BYTE: {
|
||||||
return TextureInterpolator<uint16_t>::interp_3d(info, P.x, P.y, P.z, interp);
|
const float f = TextureInterpolator<uchar, float>::interp_3d(info, P.x, P.y, P.z, interp);
|
||||||
case IMAGE_DATA_TYPE_FLOAT:
|
return make_float4(f, f, f, 1.0f);
|
||||||
return TextureInterpolator<float>::interp_3d(info, P.x, P.y, P.z, interp);
|
}
|
||||||
|
case IMAGE_DATA_TYPE_USHORT: {
|
||||||
|
const float f = TextureInterpolator<uint16_t, float>::interp_3d(info, P.x, P.y, P.z, interp);
|
||||||
|
return make_float4(f, f, f, 1.0f);
|
||||||
|
}
|
||||||
|
case IMAGE_DATA_TYPE_FLOAT: {
|
||||||
|
const float f = TextureInterpolator<float, float>::interp_3d(info, P.x, P.y, P.z, interp);
|
||||||
|
return make_float4(f, f, f, 1.0f);
|
||||||
|
}
|
||||||
case IMAGE_DATA_TYPE_HALF4:
|
case IMAGE_DATA_TYPE_HALF4:
|
||||||
return TextureInterpolator<half4>::interp_3d(info, P.x, P.y, P.z, interp);
|
return TextureInterpolator<half4>::interp_3d(info, P.x, P.y, P.z, interp);
|
||||||
case IMAGE_DATA_TYPE_BYTE4:
|
case IMAGE_DATA_TYPE_BYTE4:
|
||||||
@@ -777,8 +812,10 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals kg,
|
|||||||
case IMAGE_DATA_TYPE_FLOAT4:
|
case IMAGE_DATA_TYPE_FLOAT4:
|
||||||
return TextureInterpolator<float4>::interp_3d(info, P.x, P.y, P.z, interp);
|
return TextureInterpolator<float4>::interp_3d(info, P.x, P.y, P.z, interp);
|
||||||
#ifdef WITH_NANOVDB
|
#ifdef WITH_NANOVDB
|
||||||
case IMAGE_DATA_TYPE_NANOVDB_FLOAT:
|
case IMAGE_DATA_TYPE_NANOVDB_FLOAT: {
|
||||||
return NanoVDBInterpolator<float>::interp_3d(info, P.x, P.y, P.z, interp);
|
const float f = NanoVDBInterpolator<float, float>::interp_3d(info, P.x, P.y, P.z, interp);
|
||||||
|
return make_float4(f, f, f, 1.0f);
|
||||||
|
}
|
||||||
case IMAGE_DATA_TYPE_NANOVDB_FLOAT3:
|
case IMAGE_DATA_TYPE_NANOVDB_FLOAT3:
|
||||||
return NanoVDBInterpolator<nanovdb::Vec3f>::interp_3d(info, P.x, P.y, P.z, interp);
|
return NanoVDBInterpolator<nanovdb::Vec3f>::interp_3d(info, P.x, P.y, P.z, interp);
|
||||||
#endif
|
#endif
|
||||||
|
@@ -45,6 +45,7 @@ ccl_device_inline float4 make_float4(const int4 &i);
|
|||||||
ccl_device_inline void print_float4(const char *label, const float4 &a);
|
ccl_device_inline void print_float4(const char *label, const float4 &a);
|
||||||
#endif /* __KERNEL_GPU__ */
|
#endif /* __KERNEL_GPU__ */
|
||||||
|
|
||||||
|
ccl_device_inline float4 make_float4(float f);
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
|
||||||
#endif /* __UTIL_TYPES_FLOAT4_H__ */
|
#endif /* __UTIL_TYPES_FLOAT4_H__ */
|
||||||
|
@@ -89,6 +89,11 @@ ccl_device_inline void print_float4(const char *label, const float4 &a)
|
|||||||
{
|
{
|
||||||
printf("%s: %.8f %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z, (double)a.w);
|
printf("%s: %.8f %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z, (double)a.w);
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
ccl_device_inline float4 make_float4(float f)
|
||||||
|
{
|
||||||
|
return make_float4(f, f, f, f);
|
||||||
|
}
|
||||||
#endif /* __KERNEL_GPU__ */
|
#endif /* __KERNEL_GPU__ */
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
Reference in New Issue
Block a user