Cycles: Add support for float4 textures on OpenCL.

Title says it all, this adds OpenCL float4 texture support.

There is a bug in the code still, I get a "Out of ressources error" on nvidia hardware here, not sure whats wrong yet.
Will investigate further, but maybe someone else has an idea. :)

Reviewers: #cycles, brecht

Subscribers: brecht, candreacchio

Differential Revision: https://developer.blender.org/D1983
This commit is contained in:
Thomas Dinges
2016-05-09 17:06:22 +02:00
parent dc82c2cd48
commit 76481eaeff
5 changed files with 78 additions and 31 deletions

View File

@@ -236,7 +236,8 @@ KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_149)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_150) KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_150)
/* packed image (opencl) */ /* packed image (opencl) */
KERNEL_TEX(uchar4, texture_uchar4, __tex_image_packed) KERNEL_TEX(uchar4, texture_uchar4, __tex_image_byte4_packed)
KERNEL_TEX(float4, texture_float4, __tex_image_float4_packed)
KERNEL_TEX(uint4, texture_uint4, __tex_image_packed_info) KERNEL_TEX(uint4, texture_uint4, __tex_image_packed_info)
#undef KERNEL_TEX #undef KERNEL_TEX

View File

@@ -30,11 +30,16 @@ CCL_NAMESPACE_BEGIN
/* For OpenCL all images are packed in a single array, and we do manual lookup /* For OpenCL all images are packed in a single array, and we do manual lookup
* and interpolation. */ * and interpolation. */
ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int offset) ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int offset)
{ {
uchar4 r = kernel_tex_fetch(__tex_image_packed, offset); if(id >= TEX_NUM_FLOAT4_IMAGES) {
uchar4 r = kernel_tex_fetch(__tex_image_byte4_packed, offset);
float f = 1.0f/255.0f; float f = 1.0f/255.0f;
return make_float4(r.x*f, r.y*f, r.z*f, r.w*f); return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
}
else {
return kernel_tex_fetch(__tex_image_float4_packed, offset);
}
} }
ccl_device_inline int svm_image_texture_wrap_periodic(int x, int width) ccl_device_inline int svm_image_texture_wrap_periodic(int x, int width)
@@ -81,7 +86,7 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
iy = svm_image_texture_wrap_clamp(iy, height); iy = svm_image_texture_wrap_clamp(iy, height);
} }
r = svm_image_texture_read(kg, offset + ix + iy*width); r = svm_image_texture_read(kg, id, offset + ix + iy*width);
} }
else { /* We default to linear interpolation if it is not closest */ else { /* We default to linear interpolation if it is not closest */
float tx = svm_image_texture_frac(x*width, &ix); float tx = svm_image_texture_frac(x*width, &ix);
@@ -103,10 +108,10 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
} }
r = (1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, offset + ix + iy*width); r = (1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + iy*width);
r += (1.0f - ty)*tx*svm_image_texture_read(kg, offset + nix + iy*width); r += (1.0f - ty)*tx*svm_image_texture_read(kg, id, offset + nix + iy*width);
r += ty*(1.0f - tx)*svm_image_texture_read(kg, offset + ix + niy*width); r += ty*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + niy*width);
r += ty*tx*svm_image_texture_read(kg, offset + nix + niy*width); r += ty*tx*svm_image_texture_read(kg, id, offset + nix + niy*width);
} }
if(use_alpha && r.w != 1.0f && r.w != 0.0f) { if(use_alpha && r.w != 1.0f && r.w != 0.0f) {

View File

@@ -223,7 +223,7 @@ int ImageManager::add_image(const string& filename,
size_t slot; size_t slot;
/* Load image info and find out if we need a float texture. */ /* Load image info and find out if we need a float texture. */
is_float = (pack_images)? false: is_float_image(filename, builtin_data, is_linear); is_float = is_float_image(filename, builtin_data, is_linear);
ImageDataType type = is_float? IMAGE_DATA_TYPE_FLOAT4 : IMAGE_DATA_TYPE_BYTE4; ImageDataType type = is_float? IMAGE_DATA_TYPE_FLOAT4 : IMAGE_DATA_TYPE_BYTE4;
@@ -803,12 +803,16 @@ void ImageManager::device_pack_images(Device *device,
DeviceScene *dscene, DeviceScene *dscene,
Progress& /*progess*/) Progress& /*progess*/)
{ {
/* for OpenCL, we pack all image textures inside a single big texture, and /* For OpenCL, we pack all image textures into a single large texture, and
* will do our own interpolation in the kernel */ * do our own interpolation in the kernel. */
size_t size = 0; size_t size = 0, offset = 0;
ImageDataType type;
/* Only byte textures are supported atm */ int info_size = tex_num_images[IMAGE_DATA_TYPE_FLOAT4] + tex_num_images[IMAGE_DATA_TYPE_BYTE4];
ImageDataType type = IMAGE_DATA_TYPE_BYTE4; uint4 *info = dscene->tex_image_packed_info.resize(info_size);
/* Byte Textures*/
type = IMAGE_DATA_TYPE_BYTE4;
for(size_t slot = 0; slot < images[type].size(); slot++) { for(size_t slot = 0; slot < images[type].size(); slot++) {
if(!images[type][slot]) if(!images[type][slot])
@@ -818,10 +822,7 @@ void ImageManager::device_pack_images(Device *device,
size += tex_img.size(); size += tex_img.size();
} }
uint4 *info = dscene->tex_image_packed_info.resize(images[type].size()); uchar4 *pixels_byte = dscene->tex_image_byte4_packed.resize(size);
uchar4 *pixels = dscene->tex_image_packed.resize(size);
size_t offset = 0;
for(size_t slot = 0; slot < images[type].size(); slot++) { for(size_t slot = 0; slot < images[type].size(); slot++) {
if(!images[type][slot]) if(!images[type][slot])
@@ -829,24 +830,61 @@ void ImageManager::device_pack_images(Device *device,
device_vector<uchar4>& tex_img = dscene->tex_byte4_image[slot]; device_vector<uchar4>& tex_img = dscene->tex_byte4_image[slot];
/* The image options are packed
bit 0 -> periodic
bit 1 + 2 -> interpolation type */
uint8_t interpolation = (images[type][slot]->interpolation << 1) + 1;
info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, interpolation);
memcpy(pixels_byte+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
offset += tex_img.size();
}
/* Float Textures*/
type = IMAGE_DATA_TYPE_FLOAT4;
size = 0, offset = 0;
for(size_t slot = 0; slot < images[type].size(); slot++) {
if(!images[type][slot])
continue;
device_vector<float4>& tex_img = dscene->tex_float4_image[slot];
size += tex_img.size();
}
float4 *pixels_float = dscene->tex_image_float4_packed.resize(size);
for(size_t slot = 0; slot < images[type].size(); slot++) {
if(!images[type][slot])
continue;
device_vector<float4>& tex_img = dscene->tex_float4_image[slot];
/* todo: support 3D textures, only CPU for now */ /* todo: support 3D textures, only CPU for now */
/* The image options are packed /* The image options are packed
bit 0 -> periodic bit 0 -> periodic
bit 1 + 2 -> interpolation type */ bit 1 + 2 -> interpolation type */
uint8_t interpolation = (images[type][slot]->interpolation << 1) + 1; uint8_t interpolation = (images[type][slot]->interpolation << 1) + 1;
info[slot] = make_uint4(tex_img.data_width, tex_img.data_height, offset, interpolation); info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, interpolation);
memcpy(pixels+offset, (void*)tex_img.data_pointer, tex_img.memory_size()); memcpy(pixels_float+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
offset += tex_img.size(); offset += tex_img.size();
} }
if(dscene->tex_image_packed.size()) { if(dscene->tex_image_byte4_packed.size()) {
if(dscene->tex_image_packed.device_pointer) { if(dscene->tex_image_byte4_packed.device_pointer) {
thread_scoped_lock device_lock(device_mutex); thread_scoped_lock device_lock(device_mutex);
device->tex_free(dscene->tex_image_packed); device->tex_free(dscene->tex_image_byte4_packed);
} }
device->tex_alloc("__tex_image_packed", dscene->tex_image_packed); device->tex_alloc("__tex_image_byte4_packed", dscene->tex_image_byte4_packed);
}
if(dscene->tex_image_float4_packed.size()) {
if(dscene->tex_image_float4_packed.device_pointer) {
thread_scoped_lock device_lock(device_mutex);
device->tex_free(dscene->tex_image_float4_packed);
}
device->tex_alloc("__tex_image_float4_packed", dscene->tex_image_float4_packed);
} }
if(dscene->tex_image_packed_info.size()) { if(dscene->tex_image_packed_info.size()) {
if(dscene->tex_image_packed_info.device_pointer) { if(dscene->tex_image_packed_info.device_pointer) {
@@ -876,10 +914,12 @@ void ImageManager::device_free(Device *device, DeviceScene *dscene)
images[type].clear(); images[type].clear();
} }
device->tex_free(dscene->tex_image_packed); device->tex_free(dscene->tex_image_byte4_packed);
device->tex_free(dscene->tex_image_float4_packed);
device->tex_free(dscene->tex_image_packed_info); device->tex_free(dscene->tex_image_packed_info);
dscene->tex_image_packed.clear(); dscene->tex_image_byte4_packed.clear();
dscene->tex_image_float4_packed.clear();
dscene->tex_image_packed_info.clear(); dscene->tex_image_packed_info.clear();
} }

View File

@@ -113,7 +113,8 @@ public:
device_vector<float4> tex_float4_image[TEX_NUM_FLOAT4_IMAGES_CPU]; device_vector<float4> tex_float4_image[TEX_NUM_FLOAT4_IMAGES_CPU];
/* opencl images */ /* opencl images */
device_vector<uchar4> tex_image_packed; device_vector<uchar4> tex_image_byte4_packed;
device_vector<float4> tex_image_float4_packed;
device_vector<uint4> tex_image_packed_info; device_vector<uint4> tex_image_packed_info;
KernelData data; KernelData data;

View File

@@ -38,7 +38,7 @@ CCL_NAMESPACE_BEGIN
/* OpenCL */ /* OpenCL */
#define TEX_NUM_BYTE4_IMAGES_OPENCL 1024 #define TEX_NUM_BYTE4_IMAGES_OPENCL 1024
#define TEX_NUM_FLOAT4_IMAGES_OPENCL 0 #define TEX_NUM_FLOAT4_IMAGES_OPENCL 1024
#define TEX_IMAGE_BYTE4_START_OPENCL TEX_NUM_FLOAT4_IMAGES_OPENCL #define TEX_IMAGE_BYTE4_START_OPENCL TEX_NUM_FLOAT4_IMAGES_OPENCL