Cycles: OpenCL image texture support, fix an attribute node issue and refactor
feature enabling #defines a bit.
This commit is contained in:
@@ -53,6 +53,7 @@ public:
|
|||||||
int num;
|
int num;
|
||||||
bool display_device;
|
bool display_device;
|
||||||
bool advanced_shading;
|
bool advanced_shading;
|
||||||
|
bool pack_images;
|
||||||
vector<DeviceInfo> multi_devices;
|
vector<DeviceInfo> multi_devices;
|
||||||
|
|
||||||
DeviceInfo()
|
DeviceInfo()
|
||||||
@@ -62,6 +63,7 @@ public:
|
|||||||
num = 0;
|
num = 0;
|
||||||
display_device = false;
|
display_device = false;
|
||||||
advanced_shading = true;
|
advanced_shading = true;
|
||||||
|
pack_images = false;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@@ -260,6 +260,7 @@ void device_cpu_info(vector<DeviceInfo>& devices)
|
|||||||
info.id = "CPU";
|
info.id = "CPU";
|
||||||
info.num = 0;
|
info.num = 0;
|
||||||
info.advanced_shading = true;
|
info.advanced_shading = true;
|
||||||
|
info.pack_images = false;
|
||||||
|
|
||||||
devices.insert(devices.begin(), info);
|
devices.insert(devices.begin(), info);
|
||||||
}
|
}
|
||||||
|
@@ -877,6 +877,7 @@ void device_cuda_info(vector<DeviceInfo>& devices)
|
|||||||
int major, minor;
|
int major, minor;
|
||||||
cuDeviceComputeCapability(&major, &minor, num);
|
cuDeviceComputeCapability(&major, &minor, num);
|
||||||
info.advanced_shading = (major >= 2);
|
info.advanced_shading = (major >= 2);
|
||||||
|
info.pack_images = false;
|
||||||
|
|
||||||
/* if device has a kernel timeout, assume it is used for display */
|
/* if device has a kernel timeout, assume it is used for display */
|
||||||
if(cuDeviceGetAttribute(&attr, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, num) == CUDA_SUCCESS && attr == 1) {
|
if(cuDeviceGetAttribute(&attr, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, num) == CUDA_SUCCESS && attr == 1) {
|
||||||
|
@@ -304,6 +304,7 @@ static bool device_multi_add(vector<DeviceInfo>& devices, DeviceType type, bool
|
|||||||
int num_added = 0, num_display = 0;
|
int num_added = 0, num_display = 0;
|
||||||
|
|
||||||
info.advanced_shading = with_advanced_shading;
|
info.advanced_shading = with_advanced_shading;
|
||||||
|
info.pack_images = false;
|
||||||
|
|
||||||
foreach(DeviceInfo& subinfo, devices) {
|
foreach(DeviceInfo& subinfo, devices) {
|
||||||
if(subinfo.type == type) {
|
if(subinfo.type == type) {
|
||||||
@@ -326,6 +327,7 @@ static bool device_multi_add(vector<DeviceInfo>& devices, DeviceType type, bool
|
|||||||
info.multi_devices.push_back(subinfo);
|
info.multi_devices.push_back(subinfo);
|
||||||
if(subinfo.display_device)
|
if(subinfo.display_device)
|
||||||
info.display_device = true;
|
info.display_device = true;
|
||||||
|
info.pack_images = info.pack_images || subinfo.pack_images;
|
||||||
num_added++;
|
num_added++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -212,7 +212,7 @@ public:
|
|||||||
{
|
{
|
||||||
char version[256];
|
char version[256];
|
||||||
|
|
||||||
int major, minor, req_major = 1, req_minor = 1;
|
int major, minor, req_major = 1, req_minor = 0;
|
||||||
|
|
||||||
clGetPlatformInfo(cpPlatform, CL_PLATFORM_VERSION, sizeof(version), &version, NULL);
|
clGetPlatformInfo(cpPlatform, CL_PLATFORM_VERSION, sizeof(version), &version, NULL);
|
||||||
|
|
||||||
@@ -300,15 +300,15 @@ public:
|
|||||||
|
|
||||||
/* Multi Closure for nVidia cards */
|
/* Multi Closure for nVidia cards */
|
||||||
if(platform_name == "NVIDIA CUDA")
|
if(platform_name == "NVIDIA CUDA")
|
||||||
build_options += "-D__KERNEL_SHADING__ -D__MULTI_CLOSURE__ -cl-nv-maxrregcount=24 -cl-nv-verbose ";
|
build_options += "-D__KERNEL_SHADING__ -D__KERNEL_OPENCL_NVIDIA__ -cl-nv-maxrregcount=24 -cl-nv-verbose ";
|
||||||
|
|
||||||
/* No Float3 for Apple */
|
/* No Float3 for Apple */
|
||||||
else if(platform_name == "Apple")
|
else if(platform_name == "Apple")
|
||||||
build_options += "-D__CL_NO_FLOAT3__ ";
|
build_options += "-D__CL_NO_FLOAT3__ -D__KERNEL_OPENCL_APPLE__ ";
|
||||||
|
|
||||||
/* Basic shading for AMD cards (non Apple) */
|
/* Basic shading for AMD cards (non Apple) */
|
||||||
else if(platform_name == "AMD Accelerated Parallel Processing")
|
else if(platform_name == "AMD Accelerated Parallel Processing")
|
||||||
build_options += "-D__KERNEL_SHADING__ -D__CL_NO_FLOAT3__ ";
|
build_options += "-D__CL_NO_FLOAT3__ -D__KERNEL_OPENCL_AMD__ ";
|
||||||
|
|
||||||
return build_options;
|
return build_options;
|
||||||
}
|
}
|
||||||
@@ -743,6 +743,7 @@ void device_opencl_info(vector<DeviceInfo>& devices)
|
|||||||
/* we don't know if it's used for display, but assume it is */
|
/* we don't know if it's used for display, but assume it is */
|
||||||
info.display_device = true;
|
info.display_device = true;
|
||||||
info.advanced_shading = false;
|
info.advanced_shading = false;
|
||||||
|
info.pack_images = true;
|
||||||
|
|
||||||
devices.push_back(info);
|
devices.push_back(info);
|
||||||
}
|
}
|
||||||
|
@@ -146,6 +146,7 @@ typedef texture<float> texture_float;
|
|||||||
typedef texture<uint> texture_uint;
|
typedef texture<uint> texture_uint;
|
||||||
typedef texture<int> texture_int;
|
typedef texture<int> texture_int;
|
||||||
typedef texture<uint4> texture_uint4;
|
typedef texture<uint4> texture_uint4;
|
||||||
|
typedef texture<uchar4> texture_uchar4;
|
||||||
typedef texture_image<float4> texture_image_float4;
|
typedef texture_image<float4> texture_image_float4;
|
||||||
typedef texture_image<uchar4> texture_image_uchar4;
|
typedef texture_image<uchar4> texture_image_uchar4;
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ typedef texture<float, 1> texture_float;
|
|||||||
typedef texture<uint, 1> texture_uint;
|
typedef texture<uint, 1> texture_uint;
|
||||||
typedef texture<int, 1> texture_int;
|
typedef texture<int, 1> texture_int;
|
||||||
typedef texture<uint4, 1> texture_uint4;
|
typedef texture<uint4, 1> texture_uint4;
|
||||||
|
typedef texture<uchar4, 1> texture_uchar4;
|
||||||
typedef texture<float4, 2> texture_image_float4;
|
typedef texture<float4, 2> texture_image_float4;
|
||||||
typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4;
|
typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4;
|
||||||
|
|
||||||
|
@@ -59,6 +59,7 @@ __device float3 area_light_sample(float3 axisu, float3 axisv, float randu, float
|
|||||||
return axisu*randu + axisv*randv;
|
return axisu*randu + axisv*randv;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __BACKGROUND_MIS__
|
||||||
__device float3 background_light_sample(KernelGlobals *kg, float randu, float randv, float *pdf)
|
__device float3 background_light_sample(KernelGlobals *kg, float randu, float randv, float *pdf)
|
||||||
{
|
{
|
||||||
/* for the following, the CDF values are actually a pair of floats, with the
|
/* for the following, the CDF values are actually a pair of floats, with the
|
||||||
@@ -165,6 +166,7 @@ __device float background_light_pdf(KernelGlobals *kg, float3 direction)
|
|||||||
|
|
||||||
return pdf * kernel_data.integrator.pdf_lights;
|
return pdf * kernel_data.integrator.pdf_lights;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
__device void regular_light_sample(KernelGlobals *kg, int point,
|
__device void regular_light_sample(KernelGlobals *kg, int point,
|
||||||
float randu, float randv, float3 P, LightSample *ls, float *pdf)
|
float randu, float randv, float3 P, LightSample *ls, float *pdf)
|
||||||
|
@@ -7,7 +7,6 @@
|
|||||||
#define KERNEL_IMAGE_TEX(type, ttype, name)
|
#define KERNEL_IMAGE_TEX(type, ttype, name)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/* bvh */
|
/* bvh */
|
||||||
KERNEL_TEX(float4, texture_float4, __bvh_nodes)
|
KERNEL_TEX(float4, texture_float4, __bvh_nodes)
|
||||||
KERNEL_TEX(float4, texture_float4, __tri_woop)
|
KERNEL_TEX(float4, texture_float4, __tri_woop)
|
||||||
@@ -151,6 +150,10 @@ KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float_097)
|
|||||||
KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float_098)
|
KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float_098)
|
||||||
KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float_099)
|
KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float_099)
|
||||||
|
|
||||||
|
/* packed image (opencl) */
|
||||||
|
KERNEL_TEX(uchar4, texture_uchar4, __tex_image_packed)
|
||||||
|
KERNEL_TEX(uint4, texture_uint4, __tex_image_packed_info)
|
||||||
|
|
||||||
#undef KERNEL_TEX
|
#undef KERNEL_TEX
|
||||||
#undef KERNEL_IMAGE_TEX
|
#undef KERNEL_IMAGE_TEX
|
||||||
|
|
||||||
|
@@ -49,8 +49,30 @@ CCL_NAMESPACE_BEGIN
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __KERNEL_OPENCL__
|
#ifdef __KERNEL_OPENCL__
|
||||||
//#define __KERNEL_SHADING__
|
|
||||||
//#define __KERNEL_ADV_SHADING__
|
#ifdef __KERNEL_OPENCL_NVIDIA__
|
||||||
|
#define __KERNEL_SHADING__
|
||||||
|
#define __MULTI_CLOSURE__
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __KERNEL_OPENCL_APPLE__
|
||||||
|
//#define __SVM__
|
||||||
|
//#define __EMISSION__
|
||||||
|
//#define __IMAGE_TEXTURES__
|
||||||
|
//#define __HOLDOUT__
|
||||||
|
//#define __PROCEDURAL_TEXTURES__
|
||||||
|
//#define __EXTRA_NODES__
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __KERNEL_OPENCL_AMD__
|
||||||
|
#define __SVM__
|
||||||
|
#define __EMISSION__
|
||||||
|
#define __IMAGE_TEXTURES__
|
||||||
|
#define __HOLDOUT__
|
||||||
|
#define __PROCEDURAL_TEXTURES__
|
||||||
|
#define __EXTRA_NODES__
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* kernel features */
|
/* kernel features */
|
||||||
@@ -69,7 +91,9 @@ CCL_NAMESPACE_BEGIN
|
|||||||
#ifdef __KERNEL_SHADING__
|
#ifdef __KERNEL_SHADING__
|
||||||
#define __SVM__
|
#define __SVM__
|
||||||
#define __EMISSION__
|
#define __EMISSION__
|
||||||
#define __TEXTURES__
|
#define __PROCEDURAL_TEXTURES__
|
||||||
|
#define __IMAGE_TEXTURES__
|
||||||
|
#define __EXTRA_NODES__
|
||||||
#define __HOLDOUT__
|
#define __HOLDOUT__
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -85,7 +109,6 @@ CCL_NAMESPACE_BEGIN
|
|||||||
//#define __MULTI_LIGHT__
|
//#define __MULTI_LIGHT__
|
||||||
//#define __OSL__
|
//#define __OSL__
|
||||||
//#define __SOBOL_FULL_SCREEN__
|
//#define __SOBOL_FULL_SCREEN__
|
||||||
//#define __MODIFY_TP__
|
|
||||||
//#define __QBVH__
|
//#define __QBVH__
|
||||||
|
|
||||||
/* Shader Evaluation */
|
/* Shader Evaluation */
|
||||||
|
@@ -216,13 +216,15 @@ __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderT
|
|||||||
case NODE_JUMP:
|
case NODE_JUMP:
|
||||||
offset = node.y;
|
offset = node.y;
|
||||||
break;
|
break;
|
||||||
#ifdef __TEXTURES__
|
#ifdef __IMAGE_TEXTURES__
|
||||||
case NODE_TEX_IMAGE:
|
case NODE_TEX_IMAGE:
|
||||||
svm_node_tex_image(kg, sd, stack, node);
|
svm_node_tex_image(kg, sd, stack, node);
|
||||||
break;
|
break;
|
||||||
case NODE_TEX_ENVIRONMENT:
|
case NODE_TEX_ENVIRONMENT:
|
||||||
svm_node_tex_environment(kg, sd, stack, node);
|
svm_node_tex_environment(kg, sd, stack, node);
|
||||||
break;
|
break;
|
||||||
|
#endif
|
||||||
|
#ifdef __PROCEDURAL_TEXTURES__
|
||||||
case NODE_TEX_SKY:
|
case NODE_TEX_SKY:
|
||||||
svm_node_tex_sky(kg, sd, stack, node.y, node.z);
|
svm_node_tex_sky(kg, sd, stack, node.y, node.z);
|
||||||
break;
|
break;
|
||||||
@@ -254,6 +256,7 @@ __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderT
|
|||||||
case NODE_GEOMETRY:
|
case NODE_GEOMETRY:
|
||||||
svm_node_geometry(sd, stack, node.y, node.z);
|
svm_node_geometry(sd, stack, node.y, node.z);
|
||||||
break;
|
break;
|
||||||
|
#ifdef __EXTRA_NODES__
|
||||||
case NODE_GEOMETRY_BUMP_DX:
|
case NODE_GEOMETRY_BUMP_DX:
|
||||||
svm_node_geometry_bump_dx(sd, stack, node.y, node.z);
|
svm_node_geometry_bump_dx(sd, stack, node.y, node.z);
|
||||||
break;
|
break;
|
||||||
@@ -263,6 +266,7 @@ __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderT
|
|||||||
case NODE_LIGHT_PATH:
|
case NODE_LIGHT_PATH:
|
||||||
svm_node_light_path(sd, stack, node.y, node.z, path_flag);
|
svm_node_light_path(sd, stack, node.y, node.z, path_flag);
|
||||||
break;
|
break;
|
||||||
|
#endif
|
||||||
case NODE_CONVERT:
|
case NODE_CONVERT:
|
||||||
svm_node_convert(sd, stack, node.y, node.z, node.w);
|
svm_node_convert(sd, stack, node.y, node.z, node.w);
|
||||||
break;
|
break;
|
||||||
@@ -272,6 +276,7 @@ __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderT
|
|||||||
case NODE_VALUE_V:
|
case NODE_VALUE_V:
|
||||||
svm_node_value_v(kg, sd, stack, node.y, &offset);
|
svm_node_value_v(kg, sd, stack, node.y, &offset);
|
||||||
break;
|
break;
|
||||||
|
#ifdef __EXTRA_NODES__
|
||||||
case NODE_INVERT:
|
case NODE_INVERT:
|
||||||
svm_node_invert(sd, stack, node.y, node.z, node.w);
|
svm_node_invert(sd, stack, node.y, node.z, node.w);
|
||||||
break;
|
break;
|
||||||
@@ -293,21 +298,25 @@ __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderT
|
|||||||
case NODE_HSV:
|
case NODE_HSV:
|
||||||
svm_node_hsv(kg, sd, stack, node.y, node.z, node.w, &offset);
|
svm_node_hsv(kg, sd, stack, node.y, node.z, node.w, &offset);
|
||||||
break;
|
break;
|
||||||
|
#endif
|
||||||
case NODE_ATTR:
|
case NODE_ATTR:
|
||||||
svm_node_attr(kg, sd, stack, node);
|
svm_node_attr(kg, sd, stack, node);
|
||||||
break;
|
break;
|
||||||
|
#ifdef __EXTRA_NODES__
|
||||||
case NODE_ATTR_BUMP_DX:
|
case NODE_ATTR_BUMP_DX:
|
||||||
svm_node_attr_bump_dx(kg, sd, stack, node);
|
svm_node_attr_bump_dx(kg, sd, stack, node);
|
||||||
break;
|
break;
|
||||||
case NODE_ATTR_BUMP_DY:
|
case NODE_ATTR_BUMP_DY:
|
||||||
svm_node_attr_bump_dy(kg, sd, stack, node);
|
svm_node_attr_bump_dy(kg, sd, stack, node);
|
||||||
break;
|
break;
|
||||||
|
#endif
|
||||||
case NODE_FRESNEL:
|
case NODE_FRESNEL:
|
||||||
svm_node_fresnel(sd, stack, node.y, node.z, node.w);
|
svm_node_fresnel(sd, stack, node.y, node.z, node.w);
|
||||||
break;
|
break;
|
||||||
case NODE_LAYER_WEIGHT:
|
case NODE_LAYER_WEIGHT:
|
||||||
svm_node_layer_weight(sd, stack, node);
|
svm_node_layer_weight(sd, stack, node);
|
||||||
break;
|
break;
|
||||||
|
#ifdef __EXTRA_NODES__
|
||||||
case NODE_SET_DISPLACEMENT:
|
case NODE_SET_DISPLACEMENT:
|
||||||
svm_node_set_displacement(sd, stack, node.y);
|
svm_node_set_displacement(sd, stack, node.y);
|
||||||
break;
|
break;
|
||||||
@@ -323,6 +332,7 @@ __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderT
|
|||||||
case NODE_NORMAL:
|
case NODE_NORMAL:
|
||||||
svm_node_normal(kg, sd, stack, node.y, node.z, node.w, &offset);
|
svm_node_normal(kg, sd, stack, node.y, node.z, node.w, &offset);
|
||||||
break;
|
break;
|
||||||
|
#endif
|
||||||
case NODE_MAPPING:
|
case NODE_MAPPING:
|
||||||
svm_node_mapping(kg, sd, stack, node.y, node.z, &offset);
|
svm_node_mapping(kg, sd, stack, node.y, node.z, &offset);
|
||||||
break;
|
break;
|
||||||
@@ -332,15 +342,18 @@ __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderT
|
|||||||
case NODE_TEX_COORD:
|
case NODE_TEX_COORD:
|
||||||
svm_node_tex_coord(kg, sd, stack, node.y, node.z);
|
svm_node_tex_coord(kg, sd, stack, node.y, node.z);
|
||||||
break;
|
break;
|
||||||
|
#ifdef __EXTRA_NODES__
|
||||||
case NODE_TEX_COORD_BUMP_DX:
|
case NODE_TEX_COORD_BUMP_DX:
|
||||||
svm_node_tex_coord_bump_dx(kg, sd, stack, node.y, node.z);
|
svm_node_tex_coord_bump_dx(kg, sd, stack, node.y, node.z);
|
||||||
break;
|
break;
|
||||||
case NODE_TEX_COORD_BUMP_DY:
|
case NODE_TEX_COORD_BUMP_DY:
|
||||||
svm_node_tex_coord_bump_dy(kg, sd, stack, node.y, node.z);
|
svm_node_tex_coord_bump_dy(kg, sd, stack, node.y, node.z);
|
||||||
break;
|
break;
|
||||||
|
#endif
|
||||||
case NODE_EMISSION_SET_WEIGHT_TOTAL:
|
case NODE_EMISSION_SET_WEIGHT_TOTAL:
|
||||||
svm_node_emission_set_weight_total(kg, sd, node.y, node.z, node.w);
|
svm_node_emission_set_weight_total(kg, sd, node.y, node.z, node.w);
|
||||||
break;
|
break;
|
||||||
|
#ifdef __EXTRA_NODES__
|
||||||
case NODE_RGB_RAMP:
|
case NODE_RGB_RAMP:
|
||||||
svm_node_rgb_ramp(kg, sd, stack, node, &offset);
|
svm_node_rgb_ramp(kg, sd, stack, node, &offset);
|
||||||
break;
|
break;
|
||||||
@@ -350,6 +363,7 @@ __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderT
|
|||||||
case NODE_LIGHT_FALLOFF:
|
case NODE_LIGHT_FALLOFF:
|
||||||
svm_node_light_falloff(sd, stack, node);
|
svm_node_light_falloff(sd, stack, node);
|
||||||
break;
|
break;
|
||||||
|
#endif
|
||||||
case NODE_END:
|
case NODE_END:
|
||||||
default:
|
default:
|
||||||
#ifndef __MULTI_CLOSURE__
|
#ifndef __MULTI_CLOSURE__
|
||||||
|
@@ -22,7 +22,7 @@ CCL_NAMESPACE_BEGIN
|
|||||||
|
|
||||||
__device void svm_node_attr_init(KernelGlobals *kg, ShaderData *sd,
|
__device void svm_node_attr_init(KernelGlobals *kg, ShaderData *sd,
|
||||||
uint4 node, NodeAttributeType *type,
|
uint4 node, NodeAttributeType *type,
|
||||||
NodeAttributeType *mesh_type, AttributeElement *elem, uint *offset, uint *out_offset)
|
NodeAttributeType *mesh_type, AttributeElement *elem, int *offset, uint *out_offset)
|
||||||
{
|
{
|
||||||
if(sd->object != ~0) {
|
if(sd->object != ~0) {
|
||||||
/* find attribute by unique id */
|
/* find attribute by unique id */
|
||||||
@@ -35,7 +35,7 @@ __device void svm_node_attr_init(KernelGlobals *kg, ShaderData *sd,
|
|||||||
|
|
||||||
/* return result */
|
/* return result */
|
||||||
*elem = (AttributeElement)attr_map.y;
|
*elem = (AttributeElement)attr_map.y;
|
||||||
*offset = attr_map.z;
|
*offset = as_int(attr_map.z);
|
||||||
*mesh_type = (NodeAttributeType)attr_map.w;
|
*mesh_type = (NodeAttributeType)attr_map.w;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@@ -53,7 +53,8 @@ __device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, uin
|
|||||||
{
|
{
|
||||||
NodeAttributeType type, mesh_type;
|
NodeAttributeType type, mesh_type;
|
||||||
AttributeElement elem;
|
AttributeElement elem;
|
||||||
uint offset, out_offset;
|
uint out_offset;
|
||||||
|
int offset;
|
||||||
|
|
||||||
svm_node_attr_init(kg, sd, node, &type, &mesh_type, &elem, &offset, &out_offset);
|
svm_node_attr_init(kg, sd, node, &type, &mesh_type, &elem, &offset, &out_offset);
|
||||||
|
|
||||||
@@ -84,7 +85,8 @@ __device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *st
|
|||||||
{
|
{
|
||||||
NodeAttributeType type, mesh_type;
|
NodeAttributeType type, mesh_type;
|
||||||
AttributeElement elem;
|
AttributeElement elem;
|
||||||
uint offset, out_offset;
|
uint out_offset;
|
||||||
|
int offset;
|
||||||
|
|
||||||
svm_node_attr_init(kg, sd, node, &type, &mesh_type, &elem, &offset, &out_offset);
|
svm_node_attr_init(kg, sd, node, &type, &mesh_type, &elem, &offset, &out_offset);
|
||||||
|
|
||||||
@@ -119,7 +121,8 @@ __device void svm_node_attr_bump_dy(KernelGlobals *kg, ShaderData *sd, float *st
|
|||||||
{
|
{
|
||||||
NodeAttributeType type, mesh_type;
|
NodeAttributeType type, mesh_type;
|
||||||
AttributeElement elem;
|
AttributeElement elem;
|
||||||
uint offset, out_offset;
|
uint out_offset;
|
||||||
|
int offset;
|
||||||
|
|
||||||
svm_node_attr_init(kg, sd, node, &type, &mesh_type, &elem, &offset, &out_offset);
|
svm_node_attr_init(kg, sd, node, &type, &mesh_type, &elem, &offset, &out_offset);
|
||||||
|
|
||||||
|
@@ -18,6 +18,75 @@
|
|||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
#ifdef __KERNEL_OPENCL__
|
||||||
|
|
||||||
|
/* For OpenCL all images are packed in a single array, and we do manual lookup
|
||||||
|
* and interpolation. */
|
||||||
|
|
||||||
|
__device_inline float4 svm_image_texture_read(KernelGlobals *kg, int offset)
|
||||||
|
{
|
||||||
|
uchar4 r = kernel_tex_fetch(__tex_image_packed, offset);
|
||||||
|
float f = 1.0f/255.0f;
|
||||||
|
return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device_inline int svm_image_texture_wrap_periodic(int x, int width)
|
||||||
|
{
|
||||||
|
x %= width;
|
||||||
|
if(x < 0)
|
||||||
|
x += width;
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device_inline int svm_image_texture_wrap_clamp(int x, int width)
|
||||||
|
{
|
||||||
|
return clamp(x, 0, width-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device_inline float svm_image_texture_frac(float x, int *ix)
|
||||||
|
{
|
||||||
|
int i = (int)x - ((x < 0.0f)? 1: 0);
|
||||||
|
*ix = i;
|
||||||
|
return x - (float)i;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y)
|
||||||
|
{
|
||||||
|
uint4 info = kernel_tex_fetch(__tex_image_packed_info, id);
|
||||||
|
uint width = info.x;
|
||||||
|
uint height = info.y;
|
||||||
|
uint offset = info.z;
|
||||||
|
uint periodic = info.w;
|
||||||
|
|
||||||
|
int ix, iy, nix, niy;
|
||||||
|
float tx = svm_image_texture_frac(x*width, &ix);
|
||||||
|
float ty = svm_image_texture_frac(y*height, &iy);
|
||||||
|
|
||||||
|
if(periodic) {
|
||||||
|
ix = svm_image_texture_wrap_periodic(ix, width);
|
||||||
|
iy = svm_image_texture_wrap_periodic(iy, height);
|
||||||
|
|
||||||
|
nix = svm_image_texture_wrap_periodic(ix+1, width);
|
||||||
|
niy = svm_image_texture_wrap_periodic(iy+1, height);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ix = svm_image_texture_wrap_clamp(ix, width);
|
||||||
|
iy = svm_image_texture_wrap_clamp(iy, height);
|
||||||
|
|
||||||
|
nix = svm_image_texture_wrap_clamp(ix+1, width);
|
||||||
|
niy = svm_image_texture_wrap_clamp(iy+1, height);
|
||||||
|
}
|
||||||
|
|
||||||
|
float4 r = (1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, offset + ix + iy*width);
|
||||||
|
r += (1.0f - ty)*tx*svm_image_texture_read(kg, offset + nix + iy*width);
|
||||||
|
r += ty*(1.0f - tx)*svm_image_texture_read(kg, offset + ix + niy*width);
|
||||||
|
r += ty*tx*svm_image_texture_read(kg, offset + nix + niy*width);
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
__device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y)
|
__device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y)
|
||||||
{
|
{
|
||||||
float4 r;
|
float4 r;
|
||||||
@@ -31,9 +100,6 @@ __device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y)
|
|||||||
also note that cuda has 128 textures limit, we use 100 now, since
|
also note that cuda has 128 textures limit, we use 100 now, since
|
||||||
we still need some for other storage */
|
we still need some for other storage */
|
||||||
|
|
||||||
#ifdef __KERNEL_OPENCL__
|
|
||||||
r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); /* todo */
|
|
||||||
#else
|
|
||||||
switch(id) {
|
switch(id) {
|
||||||
case 0: r = kernel_tex_image_interp(__tex_image_000, x, y); break;
|
case 0: r = kernel_tex_image_interp(__tex_image_000, x, y); break;
|
||||||
case 1: r = kernel_tex_image_interp(__tex_image_001, x, y); break;
|
case 1: r = kernel_tex_image_interp(__tex_image_001, x, y); break;
|
||||||
@@ -139,11 +205,12 @@ __device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y)
|
|||||||
kernel_assert(0);
|
kernel_assert(0);
|
||||||
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
__device void svm_node_tex_image(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
|
__device void svm_node_tex_image(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
|
||||||
{
|
{
|
||||||
uint id = node.y;
|
uint id = node.y;
|
||||||
|
@@ -34,6 +34,7 @@ CCL_NAMESPACE_BEGIN
|
|||||||
ImageManager::ImageManager()
|
ImageManager::ImageManager()
|
||||||
{
|
{
|
||||||
need_update = true;
|
need_update = true;
|
||||||
|
pack_images = false;
|
||||||
osl_texture_system = NULL;
|
osl_texture_system = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -45,6 +46,11 @@ ImageManager::~ImageManager()
|
|||||||
assert(!float_images[slot]);
|
assert(!float_images[slot]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ImageManager::set_pack_images(bool pack_images_)
|
||||||
|
{
|
||||||
|
pack_images = pack_images_;
|
||||||
|
}
|
||||||
|
|
||||||
void ImageManager::set_osl_texture_system(void *texture_system)
|
void ImageManager::set_osl_texture_system(void *texture_system)
|
||||||
{
|
{
|
||||||
osl_texture_system = texture_system;
|
osl_texture_system = texture_system;
|
||||||
@@ -84,7 +90,7 @@ int ImageManager::add_image(const string& filename, bool& is_float)
|
|||||||
size_t slot;
|
size_t slot;
|
||||||
|
|
||||||
/* load image info and find out if we need a float texture */
|
/* load image info and find out if we need a float texture */
|
||||||
is_float = is_float_image(filename);
|
is_float = (pack_images)? false: is_float_image(filename);
|
||||||
|
|
||||||
if(is_float) {
|
if(is_float) {
|
||||||
/* find existing image */
|
/* find existing image */
|
||||||
@@ -361,7 +367,8 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, int sl
|
|||||||
if(slot >= 10) name = string_printf("__tex_image_float_0%d", slot);
|
if(slot >= 10) name = string_printf("__tex_image_float_0%d", slot);
|
||||||
else name = string_printf("__tex_image_float_00%d", slot);
|
else name = string_printf("__tex_image_float_00%d", slot);
|
||||||
|
|
||||||
device->tex_alloc(name.c_str(), tex_img, true, true);
|
if(!pack_images)
|
||||||
|
device->tex_alloc(name.c_str(), tex_img, true, true);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
string filename = path_filename(images[slot]->filename);
|
string filename = path_filename(images[slot]->filename);
|
||||||
@@ -387,7 +394,8 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, int sl
|
|||||||
if(slot >= 10) name = string_printf("__tex_image_0%d", slot);
|
if(slot >= 10) name = string_printf("__tex_image_0%d", slot);
|
||||||
else name = string_printf("__tex_image_00%d", slot);
|
else name = string_printf("__tex_image_00%d", slot);
|
||||||
|
|
||||||
device->tex_alloc(name.c_str(), tex_img, true, true);
|
if(!pack_images)
|
||||||
|
device->tex_alloc(name.c_str(), tex_img, true, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
img->need_load = false;
|
img->need_load = false;
|
||||||
@@ -466,9 +474,49 @@ void ImageManager::device_update(Device *device, DeviceScene *dscene, Progress&
|
|||||||
|
|
||||||
pool.wait_work();
|
pool.wait_work();
|
||||||
|
|
||||||
|
if(pack_images)
|
||||||
|
device_pack_images(device, dscene, progress);
|
||||||
|
|
||||||
need_update = false;
|
need_update = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ImageManager::device_pack_images(Device *device, DeviceScene *dscene, Progress& progess)
|
||||||
|
{
|
||||||
|
/* for OpenCL, we pack all image textures inside a single big texture, and
|
||||||
|
will do our own interpolation in the kernel */
|
||||||
|
size_t size = 0;
|
||||||
|
|
||||||
|
for(size_t slot = 0; slot < images.size(); slot++) {
|
||||||
|
if(!images[slot])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
device_vector<uchar4>& tex_img = dscene->tex_image[slot];
|
||||||
|
size += tex_img.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
uint4 *info = dscene->tex_image_packed_info.resize(images.size());
|
||||||
|
uchar4 *pixels = dscene->tex_image_packed.resize(size);
|
||||||
|
|
||||||
|
size_t offset = 0;
|
||||||
|
|
||||||
|
for(size_t slot = 0; slot < images.size(); slot++) {
|
||||||
|
if(!images[slot])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
device_vector<uchar4>& tex_img = dscene->tex_image[slot];
|
||||||
|
|
||||||
|
info[slot] = make_uint4(tex_img.data_width, tex_img.data_height, offset, 1);
|
||||||
|
|
||||||
|
memcpy(pixels+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
|
||||||
|
offset += tex_img.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
if(dscene->tex_image_packed.size())
|
||||||
|
device->tex_alloc("__tex_image_packed", dscene->tex_image_packed);
|
||||||
|
if(dscene->tex_image_packed_info.size())
|
||||||
|
device->tex_alloc("__tex_image_packed_info", dscene->tex_image_packed_info);
|
||||||
|
}
|
||||||
|
|
||||||
void ImageManager::device_free(Device *device, DeviceScene *dscene)
|
void ImageManager::device_free(Device *device, DeviceScene *dscene)
|
||||||
{
|
{
|
||||||
for(size_t slot = 0; slot < images.size(); slot++)
|
for(size_t slot = 0; slot < images.size(); slot++)
|
||||||
@@ -476,6 +524,12 @@ void ImageManager::device_free(Device *device, DeviceScene *dscene)
|
|||||||
for(size_t slot = 0; slot < float_images.size(); slot++)
|
for(size_t slot = 0; slot < float_images.size(); slot++)
|
||||||
device_free_image(device, dscene, slot + TEX_IMAGE_FLOAT_START);
|
device_free_image(device, dscene, slot + TEX_IMAGE_FLOAT_START);
|
||||||
|
|
||||||
|
device->tex_free(dscene->tex_image_packed);
|
||||||
|
dscene->tex_image_packed.clear();
|
||||||
|
|
||||||
|
device->tex_free(dscene->tex_image_packed_info);
|
||||||
|
dscene->tex_image_packed_info.clear();
|
||||||
|
|
||||||
images.clear();
|
images.clear();
|
||||||
float_images.clear();
|
float_images.clear();
|
||||||
}
|
}
|
||||||
|
@@ -47,6 +47,7 @@ public:
|
|||||||
void device_free(Device *device, DeviceScene *dscene);
|
void device_free(Device *device, DeviceScene *dscene);
|
||||||
|
|
||||||
void set_osl_texture_system(void *texture_system);
|
void set_osl_texture_system(void *texture_system);
|
||||||
|
void set_pack_images(bool pack_images_);
|
||||||
|
|
||||||
bool need_update;
|
bool need_update;
|
||||||
|
|
||||||
@@ -61,12 +62,15 @@ private:
|
|||||||
vector<Image*> images;
|
vector<Image*> images;
|
||||||
vector<Image*> float_images;
|
vector<Image*> float_images;
|
||||||
void *osl_texture_system;
|
void *osl_texture_system;
|
||||||
|
bool pack_images;
|
||||||
|
|
||||||
bool file_load_image(Image *img, device_vector<uchar4>& tex_img);
|
bool file_load_image(Image *img, device_vector<uchar4>& tex_img);
|
||||||
bool file_load_float_image(Image *img, device_vector<float4>& tex_img);
|
bool file_load_float_image(Image *img, device_vector<float4>& tex_img);
|
||||||
|
|
||||||
void device_load_image(Device *device, DeviceScene *dscene, int slot, Progress *progess);
|
void device_load_image(Device *device, DeviceScene *dscene, int slot, Progress *progess);
|
||||||
void device_free_image(Device *device, DeviceScene *dscene, int slot);
|
void device_free_image(Device *device, DeviceScene *dscene, int slot);
|
||||||
|
|
||||||
|
void device_pack_images(Device *device, DeviceScene *dscene, Progress& progess);
|
||||||
};
|
};
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
@@ -421,7 +421,7 @@ void MeshManager::update_svm_attributes(Device *device, DeviceScene *dscene, Sce
|
|||||||
|
|
||||||
attr_map[index].x = id;
|
attr_map[index].x = id;
|
||||||
attr_map[index].y = req.element;
|
attr_map[index].y = req.element;
|
||||||
attr_map[index].z = req.offset;
|
attr_map[index].z = as_uint(req.offset);
|
||||||
|
|
||||||
if(req.type == TypeDesc::TypeFloat)
|
if(req.type == TypeDesc::TypeFloat)
|
||||||
attr_map[index].w = NODE_ATTR_FLOAT;
|
attr_map[index].w = NODE_ATTR_FLOAT;
|
||||||
|
@@ -111,6 +111,8 @@ void Scene::device_update(Device *device_, Progress& progress)
|
|||||||
* - Displacement shader must have all shader data available.
|
* - Displacement shader must have all shader data available.
|
||||||
* - Light manager needs final mesh data to compute emission CDF.
|
* - Light manager needs final mesh data to compute emission CDF.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
image_manager->set_pack_images(device->info.pack_images);
|
||||||
|
|
||||||
progress.set_status("Updating Background");
|
progress.set_status("Updating Background");
|
||||||
background->device_update(device, &dscene, this);
|
background->device_update(device, &dscene, this);
|
||||||
|
@@ -97,6 +97,10 @@ public:
|
|||||||
device_vector<uchar4> tex_image[TEX_NUM_IMAGES];
|
device_vector<uchar4> tex_image[TEX_NUM_IMAGES];
|
||||||
device_vector<float4> tex_float_image[TEX_NUM_FLOAT_IMAGES];
|
device_vector<float4> tex_float_image[TEX_NUM_FLOAT_IMAGES];
|
||||||
|
|
||||||
|
/* opencl images */
|
||||||
|
device_vector<uchar4> tex_image_packed;
|
||||||
|
device_vector<uint4> tex_image_packed_info;
|
||||||
|
|
||||||
KernelData data;
|
KernelData data;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@@ -965,6 +965,20 @@ __device_inline void print_int4(const char *label, const int4& a)
|
|||||||
|
|
||||||
#ifndef __KERNEL_OPENCL__
|
#ifndef __KERNEL_OPENCL__
|
||||||
|
|
||||||
|
__device_inline unsigned int as_int(uint i)
|
||||||
|
{
|
||||||
|
union { unsigned int ui; int i; } u;
|
||||||
|
u.ui = i;
|
||||||
|
return u.i;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device_inline unsigned int as_uint(int i)
|
||||||
|
{
|
||||||
|
union { unsigned int ui; int i; } u;
|
||||||
|
u.i = i;
|
||||||
|
return u.ui;
|
||||||
|
}
|
||||||
|
|
||||||
__device_inline unsigned int as_uint(float f)
|
__device_inline unsigned int as_uint(float f)
|
||||||
{
|
{
|
||||||
union { unsigned int i; float f; } u;
|
union { unsigned int i; float f; } u;
|
||||||
|
Reference in New Issue
Block a user