Cycles: OpenCL image texture support, fix an attribute node issue and refactor

feature enabling #defines a bit.
This commit is contained in:
Brecht Van Lommel
2012-05-13 12:32:44 +00:00
parent f964292630
commit dd9c1b7fbf
19 changed files with 222 additions and 23 deletions

View File

@@ -53,6 +53,7 @@ public:
int num; int num;
bool display_device; bool display_device;
bool advanced_shading; bool advanced_shading;
bool pack_images;
vector<DeviceInfo> multi_devices; vector<DeviceInfo> multi_devices;
DeviceInfo() DeviceInfo()
@@ -62,6 +63,7 @@ public:
num = 0; num = 0;
display_device = false; display_device = false;
advanced_shading = true; advanced_shading = true;
pack_images = false;
} }
}; };

View File

@@ -260,6 +260,7 @@ void device_cpu_info(vector<DeviceInfo>& devices)
info.id = "CPU"; info.id = "CPU";
info.num = 0; info.num = 0;
info.advanced_shading = true; info.advanced_shading = true;
info.pack_images = false;
devices.insert(devices.begin(), info); devices.insert(devices.begin(), info);
} }

View File

@@ -877,6 +877,7 @@ void device_cuda_info(vector<DeviceInfo>& devices)
int major, minor; int major, minor;
cuDeviceComputeCapability(&major, &minor, num); cuDeviceComputeCapability(&major, &minor, num);
info.advanced_shading = (major >= 2); info.advanced_shading = (major >= 2);
info.pack_images = false;
/* if device has a kernel timeout, assume it is used for display */ /* if device has a kernel timeout, assume it is used for display */
if(cuDeviceGetAttribute(&attr, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, num) == CUDA_SUCCESS && attr == 1) { if(cuDeviceGetAttribute(&attr, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, num) == CUDA_SUCCESS && attr == 1) {

View File

@@ -304,6 +304,7 @@ static bool device_multi_add(vector<DeviceInfo>& devices, DeviceType type, bool
int num_added = 0, num_display = 0; int num_added = 0, num_display = 0;
info.advanced_shading = with_advanced_shading; info.advanced_shading = with_advanced_shading;
info.pack_images = false;
foreach(DeviceInfo& subinfo, devices) { foreach(DeviceInfo& subinfo, devices) {
if(subinfo.type == type) { if(subinfo.type == type) {
@@ -326,6 +327,7 @@ static bool device_multi_add(vector<DeviceInfo>& devices, DeviceType type, bool
info.multi_devices.push_back(subinfo); info.multi_devices.push_back(subinfo);
if(subinfo.display_device) if(subinfo.display_device)
info.display_device = true; info.display_device = true;
info.pack_images = info.pack_images || subinfo.pack_images;
num_added++; num_added++;
} }
} }

View File

@@ -212,7 +212,7 @@ public:
{ {
char version[256]; char version[256];
int major, minor, req_major = 1, req_minor = 1; int major, minor, req_major = 1, req_minor = 0;
clGetPlatformInfo(cpPlatform, CL_PLATFORM_VERSION, sizeof(version), &version, NULL); clGetPlatformInfo(cpPlatform, CL_PLATFORM_VERSION, sizeof(version), &version, NULL);
@@ -300,15 +300,15 @@ public:
/* Multi Closure for nVidia cards */ /* Multi Closure for nVidia cards */
if(platform_name == "NVIDIA CUDA") if(platform_name == "NVIDIA CUDA")
build_options += "-D__KERNEL_SHADING__ -D__MULTI_CLOSURE__ -cl-nv-maxrregcount=24 -cl-nv-verbose "; build_options += "-D__KERNEL_SHADING__ -D__KERNEL_OPENCL_NVIDIA__ -cl-nv-maxrregcount=24 -cl-nv-verbose ";
/* No Float3 for Apple */ /* No Float3 for Apple */
else if(platform_name == "Apple") else if(platform_name == "Apple")
build_options += "-D__CL_NO_FLOAT3__ "; build_options += "-D__CL_NO_FLOAT3__ -D__KERNEL_OPENCL_APPLE__ ";
/* Basic shading for AMD cards (non Apple) */ /* Basic shading for AMD cards (non Apple) */
else if(platform_name == "AMD Accelerated Parallel Processing") else if(platform_name == "AMD Accelerated Parallel Processing")
build_options += "-D__KERNEL_SHADING__ -D__CL_NO_FLOAT3__ "; build_options += "-D__CL_NO_FLOAT3__ -D__KERNEL_OPENCL_AMD__ ";
return build_options; return build_options;
} }
@@ -743,6 +743,7 @@ void device_opencl_info(vector<DeviceInfo>& devices)
/* we don't know if it's used for display, but assume it is */ /* we don't know if it's used for display, but assume it is */
info.display_device = true; info.display_device = true;
info.advanced_shading = false; info.advanced_shading = false;
info.pack_images = true;
devices.push_back(info); devices.push_back(info);
} }

View File

@@ -146,6 +146,7 @@ typedef texture<float> texture_float;
typedef texture<uint> texture_uint; typedef texture<uint> texture_uint;
typedef texture<int> texture_int; typedef texture<int> texture_int;
typedef texture<uint4> texture_uint4; typedef texture<uint4> texture_uint4;
typedef texture<uchar4> texture_uchar4;
typedef texture_image<float4> texture_image_float4; typedef texture_image<float4> texture_image_float4;
typedef texture_image<uchar4> texture_image_uchar4; typedef texture_image<uchar4> texture_image_uchar4;

View File

@@ -50,6 +50,7 @@ typedef texture<float, 1> texture_float;
typedef texture<uint, 1> texture_uint; typedef texture<uint, 1> texture_uint;
typedef texture<int, 1> texture_int; typedef texture<int, 1> texture_int;
typedef texture<uint4, 1> texture_uint4; typedef texture<uint4, 1> texture_uint4;
typedef texture<uchar4, 1> texture_uchar4;
typedef texture<float4, 2> texture_image_float4; typedef texture<float4, 2> texture_image_float4;
typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4; typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4;

View File

@@ -59,6 +59,7 @@ __device float3 area_light_sample(float3 axisu, float3 axisv, float randu, float
return axisu*randu + axisv*randv; return axisu*randu + axisv*randv;
} }
#ifdef __BACKGROUND_MIS__
__device float3 background_light_sample(KernelGlobals *kg, float randu, float randv, float *pdf) __device float3 background_light_sample(KernelGlobals *kg, float randu, float randv, float *pdf)
{ {
/* for the following, the CDF values are actually a pair of floats, with the /* for the following, the CDF values are actually a pair of floats, with the
@@ -165,6 +166,7 @@ __device float background_light_pdf(KernelGlobals *kg, float3 direction)
return pdf * kernel_data.integrator.pdf_lights; return pdf * kernel_data.integrator.pdf_lights;
} }
#endif
__device void regular_light_sample(KernelGlobals *kg, int point, __device void regular_light_sample(KernelGlobals *kg, int point,
float randu, float randv, float3 P, LightSample *ls, float *pdf) float randu, float randv, float3 P, LightSample *ls, float *pdf)

View File

@@ -7,7 +7,6 @@
#define KERNEL_IMAGE_TEX(type, ttype, name) #define KERNEL_IMAGE_TEX(type, ttype, name)
#endif #endif
/* bvh */ /* bvh */
KERNEL_TEX(float4, texture_float4, __bvh_nodes) KERNEL_TEX(float4, texture_float4, __bvh_nodes)
KERNEL_TEX(float4, texture_float4, __tri_woop) KERNEL_TEX(float4, texture_float4, __tri_woop)
@@ -151,6 +150,10 @@ KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float_097)
KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float_098) KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float_098)
KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float_099) KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float_099)
/* packed image (opencl) */
KERNEL_TEX(uchar4, texture_uchar4, __tex_image_packed)
KERNEL_TEX(uint4, texture_uint4, __tex_image_packed_info)
#undef KERNEL_TEX #undef KERNEL_TEX
#undef KERNEL_IMAGE_TEX #undef KERNEL_IMAGE_TEX

View File

@@ -49,8 +49,30 @@ CCL_NAMESPACE_BEGIN
#endif #endif
#ifdef __KERNEL_OPENCL__ #ifdef __KERNEL_OPENCL__
//#define __KERNEL_SHADING__
//#define __KERNEL_ADV_SHADING__ #ifdef __KERNEL_OPENCL_NVIDIA__
#define __KERNEL_SHADING__
#define __MULTI_CLOSURE__
#endif
#ifdef __KERNEL_OPENCL_APPLE__
//#define __SVM__
//#define __EMISSION__
//#define __IMAGE_TEXTURES__
//#define __HOLDOUT__
//#define __PROCEDURAL_TEXTURES__
//#define __EXTRA_NODES__
#endif
#ifdef __KERNEL_OPENCL_AMD__
#define __SVM__
#define __EMISSION__
#define __IMAGE_TEXTURES__
#define __HOLDOUT__
#define __PROCEDURAL_TEXTURES__
#define __EXTRA_NODES__
#endif
#endif #endif
/* kernel features */ /* kernel features */
@@ -69,7 +91,9 @@ CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_SHADING__ #ifdef __KERNEL_SHADING__
#define __SVM__ #define __SVM__
#define __EMISSION__ #define __EMISSION__
#define __TEXTURES__ #define __PROCEDURAL_TEXTURES__
#define __IMAGE_TEXTURES__
#define __EXTRA_NODES__
#define __HOLDOUT__ #define __HOLDOUT__
#endif #endif
@@ -85,7 +109,6 @@ CCL_NAMESPACE_BEGIN
//#define __MULTI_LIGHT__ //#define __MULTI_LIGHT__
//#define __OSL__ //#define __OSL__
//#define __SOBOL_FULL_SCREEN__ //#define __SOBOL_FULL_SCREEN__
//#define __MODIFY_TP__
//#define __QBVH__ //#define __QBVH__
/* Shader Evaluation */ /* Shader Evaluation */

View File

@@ -216,13 +216,15 @@ __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderT
case NODE_JUMP: case NODE_JUMP:
offset = node.y; offset = node.y;
break; break;
#ifdef __TEXTURES__ #ifdef __IMAGE_TEXTURES__
case NODE_TEX_IMAGE: case NODE_TEX_IMAGE:
svm_node_tex_image(kg, sd, stack, node); svm_node_tex_image(kg, sd, stack, node);
break; break;
case NODE_TEX_ENVIRONMENT: case NODE_TEX_ENVIRONMENT:
svm_node_tex_environment(kg, sd, stack, node); svm_node_tex_environment(kg, sd, stack, node);
break; break;
#endif
#ifdef __PROCEDURAL_TEXTURES__
case NODE_TEX_SKY: case NODE_TEX_SKY:
svm_node_tex_sky(kg, sd, stack, node.y, node.z); svm_node_tex_sky(kg, sd, stack, node.y, node.z);
break; break;
@@ -254,6 +256,7 @@ __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderT
case NODE_GEOMETRY: case NODE_GEOMETRY:
svm_node_geometry(sd, stack, node.y, node.z); svm_node_geometry(sd, stack, node.y, node.z);
break; break;
#ifdef __EXTRA_NODES__
case NODE_GEOMETRY_BUMP_DX: case NODE_GEOMETRY_BUMP_DX:
svm_node_geometry_bump_dx(sd, stack, node.y, node.z); svm_node_geometry_bump_dx(sd, stack, node.y, node.z);
break; break;
@@ -263,6 +266,7 @@ __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderT
case NODE_LIGHT_PATH: case NODE_LIGHT_PATH:
svm_node_light_path(sd, stack, node.y, node.z, path_flag); svm_node_light_path(sd, stack, node.y, node.z, path_flag);
break; break;
#endif
case NODE_CONVERT: case NODE_CONVERT:
svm_node_convert(sd, stack, node.y, node.z, node.w); svm_node_convert(sd, stack, node.y, node.z, node.w);
break; break;
@@ -272,6 +276,7 @@ __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderT
case NODE_VALUE_V: case NODE_VALUE_V:
svm_node_value_v(kg, sd, stack, node.y, &offset); svm_node_value_v(kg, sd, stack, node.y, &offset);
break; break;
#ifdef __EXTRA_NODES__
case NODE_INVERT: case NODE_INVERT:
svm_node_invert(sd, stack, node.y, node.z, node.w); svm_node_invert(sd, stack, node.y, node.z, node.w);
break; break;
@@ -293,21 +298,25 @@ __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderT
case NODE_HSV: case NODE_HSV:
svm_node_hsv(kg, sd, stack, node.y, node.z, node.w, &offset); svm_node_hsv(kg, sd, stack, node.y, node.z, node.w, &offset);
break; break;
#endif
case NODE_ATTR: case NODE_ATTR:
svm_node_attr(kg, sd, stack, node); svm_node_attr(kg, sd, stack, node);
break; break;
#ifdef __EXTRA_NODES__
case NODE_ATTR_BUMP_DX: case NODE_ATTR_BUMP_DX:
svm_node_attr_bump_dx(kg, sd, stack, node); svm_node_attr_bump_dx(kg, sd, stack, node);
break; break;
case NODE_ATTR_BUMP_DY: case NODE_ATTR_BUMP_DY:
svm_node_attr_bump_dy(kg, sd, stack, node); svm_node_attr_bump_dy(kg, sd, stack, node);
break; break;
#endif
case NODE_FRESNEL: case NODE_FRESNEL:
svm_node_fresnel(sd, stack, node.y, node.z, node.w); svm_node_fresnel(sd, stack, node.y, node.z, node.w);
break; break;
case NODE_LAYER_WEIGHT: case NODE_LAYER_WEIGHT:
svm_node_layer_weight(sd, stack, node); svm_node_layer_weight(sd, stack, node);
break; break;
#ifdef __EXTRA_NODES__
case NODE_SET_DISPLACEMENT: case NODE_SET_DISPLACEMENT:
svm_node_set_displacement(sd, stack, node.y); svm_node_set_displacement(sd, stack, node.y);
break; break;
@@ -323,6 +332,7 @@ __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderT
case NODE_NORMAL: case NODE_NORMAL:
svm_node_normal(kg, sd, stack, node.y, node.z, node.w, &offset); svm_node_normal(kg, sd, stack, node.y, node.z, node.w, &offset);
break; break;
#endif
case NODE_MAPPING: case NODE_MAPPING:
svm_node_mapping(kg, sd, stack, node.y, node.z, &offset); svm_node_mapping(kg, sd, stack, node.y, node.z, &offset);
break; break;
@@ -332,15 +342,18 @@ __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderT
case NODE_TEX_COORD: case NODE_TEX_COORD:
svm_node_tex_coord(kg, sd, stack, node.y, node.z); svm_node_tex_coord(kg, sd, stack, node.y, node.z);
break; break;
#ifdef __EXTRA_NODES__
case NODE_TEX_COORD_BUMP_DX: case NODE_TEX_COORD_BUMP_DX:
svm_node_tex_coord_bump_dx(kg, sd, stack, node.y, node.z); svm_node_tex_coord_bump_dx(kg, sd, stack, node.y, node.z);
break; break;
case NODE_TEX_COORD_BUMP_DY: case NODE_TEX_COORD_BUMP_DY:
svm_node_tex_coord_bump_dy(kg, sd, stack, node.y, node.z); svm_node_tex_coord_bump_dy(kg, sd, stack, node.y, node.z);
break; break;
#endif
case NODE_EMISSION_SET_WEIGHT_TOTAL: case NODE_EMISSION_SET_WEIGHT_TOTAL:
svm_node_emission_set_weight_total(kg, sd, node.y, node.z, node.w); svm_node_emission_set_weight_total(kg, sd, node.y, node.z, node.w);
break; break;
#ifdef __EXTRA_NODES__
case NODE_RGB_RAMP: case NODE_RGB_RAMP:
svm_node_rgb_ramp(kg, sd, stack, node, &offset); svm_node_rgb_ramp(kg, sd, stack, node, &offset);
break; break;
@@ -350,6 +363,7 @@ __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderT
case NODE_LIGHT_FALLOFF: case NODE_LIGHT_FALLOFF:
svm_node_light_falloff(sd, stack, node); svm_node_light_falloff(sd, stack, node);
break; break;
#endif
case NODE_END: case NODE_END:
default: default:
#ifndef __MULTI_CLOSURE__ #ifndef __MULTI_CLOSURE__

View File

@@ -22,7 +22,7 @@ CCL_NAMESPACE_BEGIN
__device void svm_node_attr_init(KernelGlobals *kg, ShaderData *sd, __device void svm_node_attr_init(KernelGlobals *kg, ShaderData *sd,
uint4 node, NodeAttributeType *type, uint4 node, NodeAttributeType *type,
NodeAttributeType *mesh_type, AttributeElement *elem, uint *offset, uint *out_offset) NodeAttributeType *mesh_type, AttributeElement *elem, int *offset, uint *out_offset)
{ {
if(sd->object != ~0) { if(sd->object != ~0) {
/* find attribute by unique id */ /* find attribute by unique id */
@@ -35,7 +35,7 @@ __device void svm_node_attr_init(KernelGlobals *kg, ShaderData *sd,
/* return result */ /* return result */
*elem = (AttributeElement)attr_map.y; *elem = (AttributeElement)attr_map.y;
*offset = attr_map.z; *offset = as_int(attr_map.z);
*mesh_type = (NodeAttributeType)attr_map.w; *mesh_type = (NodeAttributeType)attr_map.w;
} }
else { else {
@@ -53,7 +53,8 @@ __device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, uin
{ {
NodeAttributeType type, mesh_type; NodeAttributeType type, mesh_type;
AttributeElement elem; AttributeElement elem;
uint offset, out_offset; uint out_offset;
int offset;
svm_node_attr_init(kg, sd, node, &type, &mesh_type, &elem, &offset, &out_offset); svm_node_attr_init(kg, sd, node, &type, &mesh_type, &elem, &offset, &out_offset);
@@ -84,7 +85,8 @@ __device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *st
{ {
NodeAttributeType type, mesh_type; NodeAttributeType type, mesh_type;
AttributeElement elem; AttributeElement elem;
uint offset, out_offset; uint out_offset;
int offset;
svm_node_attr_init(kg, sd, node, &type, &mesh_type, &elem, &offset, &out_offset); svm_node_attr_init(kg, sd, node, &type, &mesh_type, &elem, &offset, &out_offset);
@@ -119,7 +121,8 @@ __device void svm_node_attr_bump_dy(KernelGlobals *kg, ShaderData *sd, float *st
{ {
NodeAttributeType type, mesh_type; NodeAttributeType type, mesh_type;
AttributeElement elem; AttributeElement elem;
uint offset, out_offset; uint out_offset;
int offset;
svm_node_attr_init(kg, sd, node, &type, &mesh_type, &elem, &offset, &out_offset); svm_node_attr_init(kg, sd, node, &type, &mesh_type, &elem, &offset, &out_offset);

View File

@@ -18,6 +18,75 @@
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_OPENCL__
/* For OpenCL all images are packed in a single array, and we do manual lookup
* and interpolation. */
__device_inline float4 svm_image_texture_read(KernelGlobals *kg, int offset)
{
uchar4 r = kernel_tex_fetch(__tex_image_packed, offset);
float f = 1.0f/255.0f;
return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
}
__device_inline int svm_image_texture_wrap_periodic(int x, int width)
{
x %= width;
if(x < 0)
x += width;
return x;
}
__device_inline int svm_image_texture_wrap_clamp(int x, int width)
{
return clamp(x, 0, width-1);
}
__device_inline float svm_image_texture_frac(float x, int *ix)
{
int i = (int)x - ((x < 0.0f)? 1: 0);
*ix = i;
return x - (float)i;
}
__device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y)
{
uint4 info = kernel_tex_fetch(__tex_image_packed_info, id);
uint width = info.x;
uint height = info.y;
uint offset = info.z;
uint periodic = info.w;
int ix, iy, nix, niy;
float tx = svm_image_texture_frac(x*width, &ix);
float ty = svm_image_texture_frac(y*height, &iy);
if(periodic) {
ix = svm_image_texture_wrap_periodic(ix, width);
iy = svm_image_texture_wrap_periodic(iy, height);
nix = svm_image_texture_wrap_periodic(ix+1, width);
niy = svm_image_texture_wrap_periodic(iy+1, height);
}
else {
ix = svm_image_texture_wrap_clamp(ix, width);
iy = svm_image_texture_wrap_clamp(iy, height);
nix = svm_image_texture_wrap_clamp(ix+1, width);
niy = svm_image_texture_wrap_clamp(iy+1, height);
}
float4 r = (1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, offset + ix + iy*width);
r += (1.0f - ty)*tx*svm_image_texture_read(kg, offset + nix + iy*width);
r += ty*(1.0f - tx)*svm_image_texture_read(kg, offset + ix + niy*width);
r += ty*tx*svm_image_texture_read(kg, offset + nix + niy*width);
return r;
}
#else
__device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y) __device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y)
{ {
float4 r; float4 r;
@@ -31,9 +100,6 @@ __device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y)
also note that cuda has 128 textures limit, we use 100 now, since also note that cuda has 128 textures limit, we use 100 now, since
we still need some for other storage */ we still need some for other storage */
#ifdef __KERNEL_OPENCL__
r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); /* todo */
#else
switch(id) { switch(id) {
case 0: r = kernel_tex_image_interp(__tex_image_000, x, y); break; case 0: r = kernel_tex_image_interp(__tex_image_000, x, y); break;
case 1: r = kernel_tex_image_interp(__tex_image_001, x, y); break; case 1: r = kernel_tex_image_interp(__tex_image_001, x, y); break;
@@ -139,11 +205,12 @@ __device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y)
kernel_assert(0); kernel_assert(0);
return make_float4(0.0f, 0.0f, 0.0f, 0.0f); return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
} }
#endif
return r; return r;
} }
#endif
__device void svm_node_tex_image(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) __device void svm_node_tex_image(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
{ {
uint id = node.y; uint id = node.y;

View File

@@ -34,6 +34,7 @@ CCL_NAMESPACE_BEGIN
ImageManager::ImageManager() ImageManager::ImageManager()
{ {
need_update = true; need_update = true;
pack_images = false;
osl_texture_system = NULL; osl_texture_system = NULL;
} }
@@ -45,6 +46,11 @@ ImageManager::~ImageManager()
assert(!float_images[slot]); assert(!float_images[slot]);
} }
void ImageManager::set_pack_images(bool pack_images_)
{
pack_images = pack_images_;
}
void ImageManager::set_osl_texture_system(void *texture_system) void ImageManager::set_osl_texture_system(void *texture_system)
{ {
osl_texture_system = texture_system; osl_texture_system = texture_system;
@@ -84,7 +90,7 @@ int ImageManager::add_image(const string& filename, bool& is_float)
size_t slot; size_t slot;
/* load image info and find out if we need a float texture */ /* load image info and find out if we need a float texture */
is_float = is_float_image(filename); is_float = (pack_images)? false: is_float_image(filename);
if(is_float) { if(is_float) {
/* find existing image */ /* find existing image */
@@ -361,7 +367,8 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, int sl
if(slot >= 10) name = string_printf("__tex_image_float_0%d", slot); if(slot >= 10) name = string_printf("__tex_image_float_0%d", slot);
else name = string_printf("__tex_image_float_00%d", slot); else name = string_printf("__tex_image_float_00%d", slot);
device->tex_alloc(name.c_str(), tex_img, true, true); if(!pack_images)
device->tex_alloc(name.c_str(), tex_img, true, true);
} }
else { else {
string filename = path_filename(images[slot]->filename); string filename = path_filename(images[slot]->filename);
@@ -387,7 +394,8 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, int sl
if(slot >= 10) name = string_printf("__tex_image_0%d", slot); if(slot >= 10) name = string_printf("__tex_image_0%d", slot);
else name = string_printf("__tex_image_00%d", slot); else name = string_printf("__tex_image_00%d", slot);
device->tex_alloc(name.c_str(), tex_img, true, true); if(!pack_images)
device->tex_alloc(name.c_str(), tex_img, true, true);
} }
img->need_load = false; img->need_load = false;
@@ -466,9 +474,49 @@ void ImageManager::device_update(Device *device, DeviceScene *dscene, Progress&
pool.wait_work(); pool.wait_work();
if(pack_images)
device_pack_images(device, dscene, progress);
need_update = false; need_update = false;
} }
void ImageManager::device_pack_images(Device *device, DeviceScene *dscene, Progress& progess)
{
/* for OpenCL, we pack all image textures inside a single big texture, and
will do our own interpolation in the kernel */
size_t size = 0;
for(size_t slot = 0; slot < images.size(); slot++) {
if(!images[slot])
continue;
device_vector<uchar4>& tex_img = dscene->tex_image[slot];
size += tex_img.size();
}
uint4 *info = dscene->tex_image_packed_info.resize(images.size());
uchar4 *pixels = dscene->tex_image_packed.resize(size);
size_t offset = 0;
for(size_t slot = 0; slot < images.size(); slot++) {
if(!images[slot])
continue;
device_vector<uchar4>& tex_img = dscene->tex_image[slot];
info[slot] = make_uint4(tex_img.data_width, tex_img.data_height, offset, 1);
memcpy(pixels+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
offset += tex_img.size();
}
if(dscene->tex_image_packed.size())
device->tex_alloc("__tex_image_packed", dscene->tex_image_packed);
if(dscene->tex_image_packed_info.size())
device->tex_alloc("__tex_image_packed_info", dscene->tex_image_packed_info);
}
void ImageManager::device_free(Device *device, DeviceScene *dscene) void ImageManager::device_free(Device *device, DeviceScene *dscene)
{ {
for(size_t slot = 0; slot < images.size(); slot++) for(size_t slot = 0; slot < images.size(); slot++)
@@ -476,6 +524,12 @@ void ImageManager::device_free(Device *device, DeviceScene *dscene)
for(size_t slot = 0; slot < float_images.size(); slot++) for(size_t slot = 0; slot < float_images.size(); slot++)
device_free_image(device, dscene, slot + TEX_IMAGE_FLOAT_START); device_free_image(device, dscene, slot + TEX_IMAGE_FLOAT_START);
device->tex_free(dscene->tex_image_packed);
dscene->tex_image_packed.clear();
device->tex_free(dscene->tex_image_packed_info);
dscene->tex_image_packed_info.clear();
images.clear(); images.clear();
float_images.clear(); float_images.clear();
} }

View File

@@ -47,6 +47,7 @@ public:
void device_free(Device *device, DeviceScene *dscene); void device_free(Device *device, DeviceScene *dscene);
void set_osl_texture_system(void *texture_system); void set_osl_texture_system(void *texture_system);
void set_pack_images(bool pack_images_);
bool need_update; bool need_update;
@@ -61,12 +62,15 @@ private:
vector<Image*> images; vector<Image*> images;
vector<Image*> float_images; vector<Image*> float_images;
void *osl_texture_system; void *osl_texture_system;
bool pack_images;
bool file_load_image(Image *img, device_vector<uchar4>& tex_img); bool file_load_image(Image *img, device_vector<uchar4>& tex_img);
bool file_load_float_image(Image *img, device_vector<float4>& tex_img); bool file_load_float_image(Image *img, device_vector<float4>& tex_img);
void device_load_image(Device *device, DeviceScene *dscene, int slot, Progress *progess); void device_load_image(Device *device, DeviceScene *dscene, int slot, Progress *progess);
void device_free_image(Device *device, DeviceScene *dscene, int slot); void device_free_image(Device *device, DeviceScene *dscene, int slot);
void device_pack_images(Device *device, DeviceScene *dscene, Progress& progess);
}; };
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -421,7 +421,7 @@ void MeshManager::update_svm_attributes(Device *device, DeviceScene *dscene, Sce
attr_map[index].x = id; attr_map[index].x = id;
attr_map[index].y = req.element; attr_map[index].y = req.element;
attr_map[index].z = req.offset; attr_map[index].z = as_uint(req.offset);
if(req.type == TypeDesc::TypeFloat) if(req.type == TypeDesc::TypeFloat)
attr_map[index].w = NODE_ATTR_FLOAT; attr_map[index].w = NODE_ATTR_FLOAT;

View File

@@ -111,6 +111,8 @@ void Scene::device_update(Device *device_, Progress& progress)
* - Displacement shader must have all shader data available. * - Displacement shader must have all shader data available.
* - Light manager needs final mesh data to compute emission CDF. * - Light manager needs final mesh data to compute emission CDF.
*/ */
image_manager->set_pack_images(device->info.pack_images);
progress.set_status("Updating Background"); progress.set_status("Updating Background");
background->device_update(device, &dscene, this); background->device_update(device, &dscene, this);

View File

@@ -97,6 +97,10 @@ public:
device_vector<uchar4> tex_image[TEX_NUM_IMAGES]; device_vector<uchar4> tex_image[TEX_NUM_IMAGES];
device_vector<float4> tex_float_image[TEX_NUM_FLOAT_IMAGES]; device_vector<float4> tex_float_image[TEX_NUM_FLOAT_IMAGES];
/* opencl images */
device_vector<uchar4> tex_image_packed;
device_vector<uint4> tex_image_packed_info;
KernelData data; KernelData data;
}; };

View File

@@ -965,6 +965,20 @@ __device_inline void print_int4(const char *label, const int4& a)
#ifndef __KERNEL_OPENCL__ #ifndef __KERNEL_OPENCL__
__device_inline unsigned int as_int(uint i)
{
union { unsigned int ui; int i; } u;
u.ui = i;
return u.i;
}
__device_inline unsigned int as_uint(int i)
{
union { unsigned int ui; int i; } u;
u.i = i;
return u.ui;
}
__device_inline unsigned int as_uint(float f) __device_inline unsigned int as_uint(float f)
{ {
union { unsigned int i; float f; } u; union { unsigned int i; float f; } u;