Cycles: Initial support of 3D textures for CUDA rendering
Supports both smoke/fire and point density textures now. Reduces number of textures available for sm_20 and sm_21, but you have to compromise somewhere on such a limited hardware. Currently limited to linear interpolation only, and decoupled ray marching is not supported yet. Think those could be considered just a further improvement. Some quick example: https://developer.blender.org/F282934 Code is minimal and we can fully consider it a fix for missing support of 3D textures with CUDA. Reviewers: lukasstockner97, brecht, juicyfruit, dingto Reviewed By: brecht, juicyfruit, dingto Subscribers: mib2berlin Differential Revision: https://developer.blender.org/D1806
This commit is contained in:
@@ -474,9 +474,20 @@ public:
|
|||||||
InterpolationType interpolation,
|
InterpolationType interpolation,
|
||||||
ExtensionType extension)
|
ExtensionType extension)
|
||||||
{
|
{
|
||||||
/* todo: support 3D textures, only CPU for now */
|
|
||||||
VLOG(1) << "Texture allocate: " << name << ", " << mem.memory_size() << " bytes.";
|
VLOG(1) << "Texture allocate: " << name << ", " << mem.memory_size() << " bytes.";
|
||||||
|
|
||||||
|
string bind_name = name;
|
||||||
|
if(mem.data_depth > 1) {
|
||||||
|
/* Kernel uses different bind names for 2d and 3d float textures,
|
||||||
|
* so we have to adjust couple of things here.
|
||||||
|
*/
|
||||||
|
vector<string> tokens;
|
||||||
|
string_split(tokens, name, "_");
|
||||||
|
bind_name = string_printf("__tex_image_%s3d_%s",
|
||||||
|
tokens[2].c_str(),
|
||||||
|
tokens[3].c_str());
|
||||||
|
}
|
||||||
|
|
||||||
/* determine format */
|
/* determine format */
|
||||||
CUarray_format_enum format;
|
CUarray_format_enum format;
|
||||||
size_t dsize = datatype_size(mem.data_type);
|
size_t dsize = datatype_size(mem.data_type);
|
||||||
@@ -496,7 +507,7 @@ public:
|
|||||||
CUtexref texref = NULL;
|
CUtexref texref = NULL;
|
||||||
|
|
||||||
cuda_push_context();
|
cuda_push_context();
|
||||||
cuda_assert(cuModuleGetTexRef(&texref, cuModule, name));
|
cuda_assert(cuModuleGetTexRef(&texref, cuModule, bind_name.c_str()));
|
||||||
|
|
||||||
if(!texref) {
|
if(!texref) {
|
||||||
cuda_pop_context();
|
cuda_pop_context();
|
||||||
@@ -505,20 +516,49 @@ public:
|
|||||||
|
|
||||||
if(interpolation != INTERPOLATION_NONE) {
|
if(interpolation != INTERPOLATION_NONE) {
|
||||||
CUarray handle = NULL;
|
CUarray handle = NULL;
|
||||||
CUDA_ARRAY_DESCRIPTOR desc;
|
|
||||||
|
|
||||||
desc.Width = mem.data_width;
|
if(mem.data_depth > 1) {
|
||||||
desc.Height = mem.data_height;
|
CUDA_ARRAY3D_DESCRIPTOR desc;
|
||||||
desc.Format = format;
|
|
||||||
desc.NumChannels = mem.data_elements;
|
|
||||||
|
|
||||||
cuda_assert(cuArrayCreate(&handle, &desc));
|
desc.Width = mem.data_width;
|
||||||
|
desc.Height = mem.data_height;
|
||||||
|
desc.Depth = mem.data_depth;
|
||||||
|
desc.Format = format;
|
||||||
|
desc.NumChannels = mem.data_elements;
|
||||||
|
desc.Flags = 0;
|
||||||
|
|
||||||
|
cuda_assert(cuArray3DCreate(&handle, &desc));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
CUDA_ARRAY_DESCRIPTOR desc;
|
||||||
|
|
||||||
|
desc.Width = mem.data_width;
|
||||||
|
desc.Height = mem.data_height;
|
||||||
|
desc.Format = format;
|
||||||
|
desc.NumChannels = mem.data_elements;
|
||||||
|
|
||||||
|
cuda_assert(cuArrayCreate(&handle, &desc));
|
||||||
|
}
|
||||||
|
|
||||||
if(!handle) {
|
if(!handle) {
|
||||||
cuda_pop_context();
|
cuda_pop_context();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(mem.data_depth > 1) {
|
||||||
|
CUDA_MEMCPY3D param;
|
||||||
|
memset(¶m, 0, sizeof(param));
|
||||||
|
param.dstMemoryType = CU_MEMORYTYPE_ARRAY;
|
||||||
|
param.dstArray = handle;
|
||||||
|
param.srcMemoryType = CU_MEMORYTYPE_HOST;
|
||||||
|
param.srcHost = (void*)mem.data_pointer;
|
||||||
|
param.srcPitch = mem.data_width*dsize*mem.data_elements;
|
||||||
|
param.WidthInBytes = param.srcPitch;
|
||||||
|
param.Height = mem.data_height;
|
||||||
|
param.Depth = mem.data_depth;
|
||||||
|
|
||||||
|
cuda_assert(cuMemcpy3D(¶m));
|
||||||
|
}
|
||||||
if(mem.data_height > 1) {
|
if(mem.data_height > 1) {
|
||||||
CUDA_MEMCPY2D param;
|
CUDA_MEMCPY2D param;
|
||||||
memset(¶m, 0, sizeof(param));
|
memset(¶m, 0, sizeof(param));
|
||||||
@@ -595,7 +635,7 @@ public:
|
|||||||
CUdeviceptr cumem;
|
CUdeviceptr cumem;
|
||||||
size_t cubytes;
|
size_t cubytes;
|
||||||
|
|
||||||
cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, name));
|
cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, bind_name.c_str()));
|
||||||
|
|
||||||
if(cubytes == 8) {
|
if(cubytes == 8) {
|
||||||
/* 64 bit device pointer */
|
/* 64 bit device pointer */
|
||||||
|
@@ -29,6 +29,21 @@ CCL_NAMESPACE_BEGIN
|
|||||||
|
|
||||||
/* Return position normalized to 0..1 in mesh bounds */
|
/* Return position normalized to 0..1 in mesh bounds */
|
||||||
|
|
||||||
|
#ifdef __KERNEL_GPU__
|
||||||
|
ccl_device float4 volume_image_texture_3d(int id, float x, float y, float z)
|
||||||
|
{
|
||||||
|
float4 r;
|
||||||
|
switch(id) {
|
||||||
|
case 0: r = kernel_tex_image_interp_3d(__tex_image_float3d_000, x, y, z); break;
|
||||||
|
case 1: r = kernel_tex_image_interp_3d(__tex_image_float3d_001, x, y, z); break;
|
||||||
|
case 2: r = kernel_tex_image_interp_3d(__tex_image_float3d_002, x, y, z); break;
|
||||||
|
case 3: r = kernel_tex_image_interp_3d(__tex_image_float3d_003, x, y, z); break;
|
||||||
|
case 4: r = kernel_tex_image_interp_3d(__tex_image_float3d_004, x, y, z); break;
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
#endif /* __KERNEL_GPU__ */
|
||||||
|
|
||||||
ccl_device float3 volume_normalized_position(KernelGlobals *kg, const ShaderData *sd, float3 P)
|
ccl_device float3 volume_normalized_position(KernelGlobals *kg, const ShaderData *sd, float3 P)
|
||||||
{
|
{
|
||||||
/* todo: optimize this so it's just a single matrix multiplication when
|
/* todo: optimize this so it's just a single matrix multiplication when
|
||||||
@@ -50,7 +65,7 @@ ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd,
|
|||||||
{
|
{
|
||||||
float3 P = volume_normalized_position(kg, sd, sd->P);
|
float3 P = volume_normalized_position(kg, sd, sd->P);
|
||||||
#ifdef __KERNEL_GPU__
|
#ifdef __KERNEL_GPU__
|
||||||
float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
float4 r = volume_image_texture_3d(id, P.x, P.y, P.z);
|
||||||
#else
|
#else
|
||||||
float4 r;
|
float4 r;
|
||||||
if(sd->flag & SD_VOLUME_CUBIC)
|
if(sd->flag & SD_VOLUME_CUBIC)
|
||||||
@@ -70,7 +85,7 @@ ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *s
|
|||||||
{
|
{
|
||||||
float3 P = volume_normalized_position(kg, sd, sd->P);
|
float3 P = volume_normalized_position(kg, sd, sd->P);
|
||||||
#ifdef __KERNEL_GPU__
|
#ifdef __KERNEL_GPU__
|
||||||
float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
float4 r = volume_image_texture_3d(id, P.x, P.y, P.z);
|
||||||
#else
|
#else
|
||||||
float4 r;
|
float4 r;
|
||||||
if(sd->flag & SD_VOLUME_CUBIC)
|
if(sd->flag & SD_VOLUME_CUBIC)
|
||||||
|
@@ -62,6 +62,7 @@ typedef texture<int, 1> texture_int;
|
|||||||
typedef texture<uint4, 1> texture_uint4;
|
typedef texture<uint4, 1> texture_uint4;
|
||||||
typedef texture<uchar4, 1> texture_uchar4;
|
typedef texture<uchar4, 1> texture_uchar4;
|
||||||
typedef texture<float4, 2> texture_image_float4;
|
typedef texture<float4, 2> texture_image_float4;
|
||||||
|
typedef texture<float4, 3> texture_image3d_float4;
|
||||||
typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4;
|
typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4;
|
||||||
|
|
||||||
/* Macros to handle different memory storage on different devices */
|
/* Macros to handle different memory storage on different devices */
|
||||||
@@ -79,6 +80,7 @@ typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4;
|
|||||||
#define kernel_tex_fetch(t, index) t[(index)]
|
#define kernel_tex_fetch(t, index) t[(index)]
|
||||||
#endif
|
#endif
|
||||||
#define kernel_tex_image_interp(t, x, y) tex2D(t, x, y)
|
#define kernel_tex_image_interp(t, x, y) tex2D(t, x, y)
|
||||||
|
#define kernel_tex_image_interp_3d(t, x, y, z) tex3D(t, x, y, z)
|
||||||
|
|
||||||
#define kernel_data __data
|
#define kernel_data __data
|
||||||
|
|
||||||
|
@@ -79,6 +79,12 @@ KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float_002)
|
|||||||
KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float_003)
|
KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float_003)
|
||||||
KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float_004)
|
KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float_004)
|
||||||
|
|
||||||
|
KERNEL_IMAGE_TEX(float4, texture_image3d_float4, __tex_image_float3d_000)
|
||||||
|
KERNEL_IMAGE_TEX(float4, texture_image3d_float4, __tex_image_float3d_001)
|
||||||
|
KERNEL_IMAGE_TEX(float4, texture_image3d_float4, __tex_image_float3d_002)
|
||||||
|
KERNEL_IMAGE_TEX(float4, texture_image3d_float4, __tex_image_float3d_003)
|
||||||
|
KERNEL_IMAGE_TEX(float4, texture_image3d_float4, __tex_image_float3d_004)
|
||||||
|
|
||||||
/* image */
|
/* image */
|
||||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_005)
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_005)
|
||||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_006)
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_006)
|
||||||
|
@@ -447,11 +447,11 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ccl_a
|
|||||||
svm_node_blackbody(kg, sd, stack, node.y, node.z);
|
svm_node_blackbody(kg, sd, stack, node.y, node.z);
|
||||||
break;
|
break;
|
||||||
# endif /* __EXTRA_NODES__ */
|
# endif /* __EXTRA_NODES__ */
|
||||||
# if NODES_FEATURE(NODE_FEATURE_VOLUME) && !defined(__KERNEL_GPU__)
|
# if NODES_FEATURE(NODE_FEATURE_VOLUME)
|
||||||
case NODE_TEX_VOXEL:
|
case NODE_TEX_VOXEL:
|
||||||
svm_node_tex_voxel(kg, sd, stack, node, &offset);
|
svm_node_tex_voxel(kg, sd, stack, node, &offset);
|
||||||
break;
|
break;
|
||||||
# endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) && !defined(__KERNEL_GPU__) */
|
# endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) */
|
||||||
#endif /* NODES_GROUP(NODE_GROUP_LEVEL_3) */
|
#endif /* NODES_GROUP(NODE_GROUP_LEVEL_3) */
|
||||||
case NODE_END:
|
case NODE_END:
|
||||||
return;
|
return;
|
||||||
|
@@ -246,13 +246,13 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
|
|||||||
case 90: r = kernel_tex_image_interp(__tex_image_090, x, y); break;
|
case 90: r = kernel_tex_image_interp(__tex_image_090, x, y); break;
|
||||||
case 91: r = kernel_tex_image_interp(__tex_image_091, x, y); break;
|
case 91: r = kernel_tex_image_interp(__tex_image_091, x, y); break;
|
||||||
case 92: r = kernel_tex_image_interp(__tex_image_092, x, y); break;
|
case 92: r = kernel_tex_image_interp(__tex_image_092, x, y); break;
|
||||||
|
|
||||||
|
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300)
|
||||||
case 93: r = kernel_tex_image_interp(__tex_image_093, x, y); break;
|
case 93: r = kernel_tex_image_interp(__tex_image_093, x, y); break;
|
||||||
case 94: r = kernel_tex_image_interp(__tex_image_094, x, y); break;
|
case 94: r = kernel_tex_image_interp(__tex_image_094, x, y); break;
|
||||||
case 95: r = kernel_tex_image_interp(__tex_image_095, x, y); break;
|
case 95: r = kernel_tex_image_interp(__tex_image_095, x, y); break;
|
||||||
case 96: r = kernel_tex_image_interp(__tex_image_096, x, y); break;
|
case 96: r = kernel_tex_image_interp(__tex_image_096, x, y); break;
|
||||||
case 97: r = kernel_tex_image_interp(__tex_image_097, x, y); break;
|
case 97: r = kernel_tex_image_interp(__tex_image_097, x, y); break;
|
||||||
|
|
||||||
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300)
|
|
||||||
case 98: r = kernel_tex_image_interp(__tex_image_098, x, y); break;
|
case 98: r = kernel_tex_image_interp(__tex_image_098, x, y); break;
|
||||||
case 99: r = kernel_tex_image_interp(__tex_image_099, x, y); break;
|
case 99: r = kernel_tex_image_interp(__tex_image_099, x, y); break;
|
||||||
case 100: r = kernel_tex_image_interp(__tex_image_100, x, y); break;
|
case 100: r = kernel_tex_image_interp(__tex_image_100, x, y); break;
|
||||||
|
@@ -16,8 +16,6 @@
|
|||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
#if !defined(__KERNEL_GPU__)
|
|
||||||
|
|
||||||
/* TODO(sergey): Think of making it more generic volume-type attribute
|
/* TODO(sergey): Think of making it more generic volume-type attribute
|
||||||
* sampler.
|
* sampler.
|
||||||
*/
|
*/
|
||||||
@@ -43,13 +41,15 @@ ccl_device void svm_node_tex_voxel(KernelGlobals *kg,
|
|||||||
tfm.w = read_node_float(kg, offset);
|
tfm.w = read_node_float(kg, offset);
|
||||||
co = transform_point(&tfm, co);
|
co = transform_point(&tfm, co);
|
||||||
}
|
}
|
||||||
|
#if defined(__KERNEL_GPU__)
|
||||||
|
float4 r = volume_image_texture_3d(id, co.x, co.y, co.z);
|
||||||
|
#else
|
||||||
float4 r = kernel_tex_image_interp_3d(id, co.x, co.y, co.z);
|
float4 r = kernel_tex_image_interp_3d(id, co.x, co.y, co.z);
|
||||||
|
#endif
|
||||||
if (stack_valid(density_out_offset))
|
if (stack_valid(density_out_offset))
|
||||||
stack_store_float(stack, density_out_offset, r.w);
|
stack_store_float(stack, density_out_offset, r.w);
|
||||||
if (stack_valid(color_out_offset))
|
if (stack_valid(color_out_offset))
|
||||||
stack_store_float3(stack, color_out_offset, make_float3(r.x, r.y, r.z));
|
stack_store_float3(stack, color_out_offset, make_float3(r.x, r.y, r.z));
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* !defined(__KERNEL_GPU__) */
|
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
@@ -29,7 +29,7 @@
|
|||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
/* generic */
|
/* generic */
|
||||||
#define TEX_NUM_IMAGES 94
|
#define TEX_NUM_IMAGES 88
|
||||||
#define TEX_IMAGE_BYTE_START TEX_NUM_FLOAT_IMAGES
|
#define TEX_IMAGE_BYTE_START TEX_NUM_FLOAT_IMAGES
|
||||||
|
|
||||||
/* extended gpu */
|
/* extended gpu */
|
||||||
|
Reference in New Issue
Block a user