Cycles / GPU Image Textures:
* On nvidia Kepler GPUs (sm_30 and above), there are now 145 byte images available, instead of 95. We could extend this to about 200 if needed. Could not test this, as I don't have a Kepler GPU, so feedback on this would be appreciated. Thanks to Brecht for review and some fixes. :)
This commit is contained in:
@@ -53,6 +53,7 @@ public:
|
|||||||
string description;
|
string description;
|
||||||
string id;
|
string id;
|
||||||
int num;
|
int num;
|
||||||
|
int extended_images;
|
||||||
bool display_device;
|
bool display_device;
|
||||||
bool advanced_shading;
|
bool advanced_shading;
|
||||||
bool pack_images;
|
bool pack_images;
|
||||||
@@ -63,6 +64,7 @@ public:
|
|||||||
type = DEVICE_CPU;
|
type = DEVICE_CPU;
|
||||||
id = "CPU";
|
id = "CPU";
|
||||||
num = 0;
|
num = 0;
|
||||||
|
extended_images = false;
|
||||||
display_device = false;
|
display_device = false;
|
||||||
advanced_shading = true;
|
advanced_shading = true;
|
||||||
pack_images = false;
|
pack_images = false;
|
||||||
|
@@ -340,6 +340,7 @@ void device_cpu_info(vector<DeviceInfo>& devices)
|
|||||||
info.description = system_cpu_brand_string();
|
info.description = system_cpu_brand_string();
|
||||||
info.id = "CPU";
|
info.id = "CPU";
|
||||||
info.num = 0;
|
info.num = 0;
|
||||||
|
info.extended_images = true;
|
||||||
info.advanced_shading = true;
|
info.advanced_shading = true;
|
||||||
info.pack_images = false;
|
info.pack_images = false;
|
||||||
|
|
||||||
|
@@ -1035,6 +1035,7 @@ void device_cuda_info(vector<DeviceInfo>& devices)
|
|||||||
int major, minor;
|
int major, minor;
|
||||||
cuDeviceComputeCapability(&major, &minor, num);
|
cuDeviceComputeCapability(&major, &minor, num);
|
||||||
info.advanced_shading = (major >= 2);
|
info.advanced_shading = (major >= 2);
|
||||||
|
info.extended_images = (major >= 3);
|
||||||
info.pack_images = false;
|
info.pack_images = false;
|
||||||
|
|
||||||
/* if device has a kernel timeout, assume it is used for display */
|
/* if device has a kernel timeout, assume it is used for display */
|
||||||
|
@@ -330,6 +330,7 @@ static bool device_multi_add(vector<DeviceInfo>& devices, DeviceType type, bool
|
|||||||
|
|
||||||
info.advanced_shading = with_advanced_shading;
|
info.advanced_shading = with_advanced_shading;
|
||||||
info.pack_images = false;
|
info.pack_images = false;
|
||||||
|
info.extended_images = true;
|
||||||
|
|
||||||
foreach(DeviceInfo& subinfo, devices) {
|
foreach(DeviceInfo& subinfo, devices) {
|
||||||
if(subinfo.type == type) {
|
if(subinfo.type == type) {
|
||||||
@@ -353,6 +354,7 @@ static bool device_multi_add(vector<DeviceInfo>& devices, DeviceType type, bool
|
|||||||
if(subinfo.display_device)
|
if(subinfo.display_device)
|
||||||
info.display_device = true;
|
info.display_device = true;
|
||||||
info.pack_images = info.pack_images || subinfo.pack_images;
|
info.pack_images = info.pack_images || subinfo.pack_images;
|
||||||
|
info.extended_images = info.extended_images && subinfo.extended_images;
|
||||||
num_added++;
|
num_added++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -176,6 +176,61 @@ KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_097)
|
|||||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_098)
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_098)
|
||||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_099)
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_099)
|
||||||
|
|
||||||
|
/* Kepler and above */
|
||||||
|
#if defined(__KERNEL_CUDA__) && __CUDA_ARCH__ >= 300
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_100)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_101)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_102)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_103)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_104)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_105)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_106)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_107)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_108)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_109)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_110)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_111)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_112)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_113)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_114)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_115)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_116)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_117)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_118)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_119)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_120)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_121)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_122)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_123)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_124)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_125)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_126)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_127)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_128)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_129)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_130)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_131)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_132)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_133)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_134)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_135)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_136)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_137)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_138)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_139)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_140)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_141)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_142)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_143)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_144)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_145)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_146)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_147)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_148)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_149)
|
||||||
|
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_150)
|
||||||
|
#endif
|
||||||
|
|
||||||
/* packed image (opencl) */
|
/* packed image (opencl) */
|
||||||
KERNEL_TEX(uchar4, texture_uchar4, __tex_image_packed)
|
KERNEL_TEX(uchar4, texture_uchar4, __tex_image_packed)
|
||||||
KERNEL_TEX(uint4, texture_uint4, __tex_image_packed_info)
|
KERNEL_TEX(uint4, texture_uint4, __tex_image_packed_info)
|
||||||
|
@@ -229,6 +229,60 @@ __device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, u
|
|||||||
case 97: r = kernel_tex_image_interp(__tex_image_097, x, y); break;
|
case 97: r = kernel_tex_image_interp(__tex_image_097, x, y); break;
|
||||||
case 98: r = kernel_tex_image_interp(__tex_image_098, x, y); break;
|
case 98: r = kernel_tex_image_interp(__tex_image_098, x, y); break;
|
||||||
case 99: r = kernel_tex_image_interp(__tex_image_099, x, y); break;
|
case 99: r = kernel_tex_image_interp(__tex_image_099, x, y); break;
|
||||||
|
#if defined(__KERNEL_CUDA__) && __CUDA_ARCH__ >= 300
|
||||||
|
case 100: r = kernel_tex_image_interp(__tex_image_100, x, y); break;
|
||||||
|
case 101: r = kernel_tex_image_interp(__tex_image_101, x, y); break;
|
||||||
|
case 102: r = kernel_tex_image_interp(__tex_image_102, x, y); break;
|
||||||
|
case 103: r = kernel_tex_image_interp(__tex_image_103, x, y); break;
|
||||||
|
case 104: r = kernel_tex_image_interp(__tex_image_104, x, y); break;
|
||||||
|
case 105: r = kernel_tex_image_interp(__tex_image_105, x, y); break;
|
||||||
|
case 106: r = kernel_tex_image_interp(__tex_image_106, x, y); break;
|
||||||
|
case 107: r = kernel_tex_image_interp(__tex_image_107, x, y); break;
|
||||||
|
case 108: r = kernel_tex_image_interp(__tex_image_108, x, y); break;
|
||||||
|
case 109: r = kernel_tex_image_interp(__tex_image_109, x, y); break;
|
||||||
|
case 110: r = kernel_tex_image_interp(__tex_image_110, x, y); break;
|
||||||
|
case 111: r = kernel_tex_image_interp(__tex_image_111, x, y); break;
|
||||||
|
case 112: r = kernel_tex_image_interp(__tex_image_112, x, y); break;
|
||||||
|
case 113: r = kernel_tex_image_interp(__tex_image_113, x, y); break;
|
||||||
|
case 114: r = kernel_tex_image_interp(__tex_image_114, x, y); break;
|
||||||
|
case 115: r = kernel_tex_image_interp(__tex_image_115, x, y); break;
|
||||||
|
case 116: r = kernel_tex_image_interp(__tex_image_116, x, y); break;
|
||||||
|
case 117: r = kernel_tex_image_interp(__tex_image_117, x, y); break;
|
||||||
|
case 118: r = kernel_tex_image_interp(__tex_image_118, x, y); break;
|
||||||
|
case 119: r = kernel_tex_image_interp(__tex_image_119, x, y); break;
|
||||||
|
case 120: r = kernel_tex_image_interp(__tex_image_120, x, y); break;
|
||||||
|
case 121: r = kernel_tex_image_interp(__tex_image_121, x, y); break;
|
||||||
|
case 122: r = kernel_tex_image_interp(__tex_image_122, x, y); break;
|
||||||
|
case 123: r = kernel_tex_image_interp(__tex_image_123, x, y); break;
|
||||||
|
case 124: r = kernel_tex_image_interp(__tex_image_124, x, y); break;
|
||||||
|
case 125: r = kernel_tex_image_interp(__tex_image_125, x, y); break;
|
||||||
|
case 126: r = kernel_tex_image_interp(__tex_image_126, x, y); break;
|
||||||
|
case 127: r = kernel_tex_image_interp(__tex_image_127, x, y); break;
|
||||||
|
case 128: r = kernel_tex_image_interp(__tex_image_128, x, y); break;
|
||||||
|
case 129: r = kernel_tex_image_interp(__tex_image_129, x, y); break;
|
||||||
|
case 130: r = kernel_tex_image_interp(__tex_image_130, x, y); break;
|
||||||
|
case 131: r = kernel_tex_image_interp(__tex_image_131, x, y); break;
|
||||||
|
case 132: r = kernel_tex_image_interp(__tex_image_132, x, y); break;
|
||||||
|
case 133: r = kernel_tex_image_interp(__tex_image_133, x, y); break;
|
||||||
|
case 134: r = kernel_tex_image_interp(__tex_image_134, x, y); break;
|
||||||
|
case 135: r = kernel_tex_image_interp(__tex_image_135, x, y); break;
|
||||||
|
case 136: r = kernel_tex_image_interp(__tex_image_136, x, y); break;
|
||||||
|
case 137: r = kernel_tex_image_interp(__tex_image_137, x, y); break;
|
||||||
|
case 138: r = kernel_tex_image_interp(__tex_image_138, x, y); break;
|
||||||
|
case 139: r = kernel_tex_image_interp(__tex_image_139, x, y); break;
|
||||||
|
case 140: r = kernel_tex_image_interp(__tex_image_140, x, y); break;
|
||||||
|
case 141: r = kernel_tex_image_interp(__tex_image_141, x, y); break;
|
||||||
|
case 142: r = kernel_tex_image_interp(__tex_image_142, x, y); break;
|
||||||
|
case 143: r = kernel_tex_image_interp(__tex_image_143, x, y); break;
|
||||||
|
case 144: r = kernel_tex_image_interp(__tex_image_144, x, y); break;
|
||||||
|
case 145: r = kernel_tex_image_interp(__tex_image_145, x, y); break;
|
||||||
|
case 146: r = kernel_tex_image_interp(__tex_image_146, x, y); break;
|
||||||
|
case 147: r = kernel_tex_image_interp(__tex_image_147, x, y); break;
|
||||||
|
case 148: r = kernel_tex_image_interp(__tex_image_148, x, y); break;
|
||||||
|
case 149: r = kernel_tex_image_interp(__tex_image_149, x, y); break;
|
||||||
|
case 150: r = kernel_tex_image_interp(__tex_image_150, x, y); break;
|
||||||
|
#endif
|
||||||
|
|
||||||
default:
|
default:
|
||||||
kernel_assert(0);
|
kernel_assert(0);
|
||||||
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
||||||
|
@@ -61,11 +61,16 @@ void ImageManager::set_osl_texture_system(void *texture_system)
|
|||||||
osl_texture_system = texture_system;
|
osl_texture_system = texture_system;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ImageManager::set_extended_image_limits(void)
|
void ImageManager::set_extended_image_limits(const DeviceInfo& info)
|
||||||
{
|
{
|
||||||
tex_num_images = TEX_EXTENDED_NUM_IMAGES;
|
if(info.type == DEVICE_CPU) {
|
||||||
|
tex_num_images = TEX_EXTENDED_NUM_IMAGES_CPU;
|
||||||
tex_num_float_images = TEX_EXTENDED_NUM_FLOAT_IMAGES;
|
tex_num_float_images = TEX_EXTENDED_NUM_FLOAT_IMAGES;
|
||||||
tex_image_byte_start = TEX_EXTENDED_IMAGE_BYTE_START;
|
tex_image_byte_start = TEX_EXTENDED_IMAGE_BYTE_START;
|
||||||
|
}
|
||||||
|
else if ((info.type == DEVICE_CUDA || info.type == DEVICE_MULTI) && info.extended_images) {
|
||||||
|
tex_num_images = TEX_EXTENDED_NUM_IMAGES_GPU;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ImageManager::set_animation_frame_update(int frame)
|
bool ImageManager::set_animation_frame_update(int frame)
|
||||||
|
@@ -19,6 +19,7 @@
|
|||||||
#ifndef __IMAGE_H__
|
#ifndef __IMAGE_H__
|
||||||
#define __IMAGE_H__
|
#define __IMAGE_H__
|
||||||
|
|
||||||
|
#include "device.h"
|
||||||
#include "device_memory.h"
|
#include "device_memory.h"
|
||||||
|
|
||||||
#include "util_string.h"
|
#include "util_string.h"
|
||||||
@@ -29,12 +30,15 @@
|
|||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
/* Normal Image amount */
|
||||||
#define TEX_NUM_IMAGES 95
|
#define TEX_NUM_IMAGES 95
|
||||||
#define TEX_IMAGE_BYTE_START TEX_NUM_FLOAT_IMAGES
|
#define TEX_IMAGE_BYTE_START TEX_NUM_FLOAT_IMAGES
|
||||||
|
|
||||||
|
/* Extended Image amount*/
|
||||||
#define TEX_EXTENDED_NUM_FLOAT_IMAGES 5
|
#define TEX_EXTENDED_NUM_FLOAT_IMAGES 5
|
||||||
#define TEX_EXTENDED_NUM_IMAGES 512
|
|
||||||
#define TEX_EXTENDED_IMAGE_BYTE_START TEX_EXTENDED_NUM_FLOAT_IMAGES
|
#define TEX_EXTENDED_IMAGE_BYTE_START TEX_EXTENDED_NUM_FLOAT_IMAGES
|
||||||
|
#define TEX_EXTENDED_NUM_IMAGES_CPU 512
|
||||||
|
#define TEX_EXTENDED_NUM_IMAGES_GPU 145
|
||||||
|
|
||||||
/* color to use when textures are not found */
|
/* color to use when textures are not found */
|
||||||
#define TEX_IMAGE_MISSING_R 1
|
#define TEX_IMAGE_MISSING_R 1
|
||||||
@@ -60,7 +64,7 @@ public:
|
|||||||
|
|
||||||
void set_osl_texture_system(void *texture_system);
|
void set_osl_texture_system(void *texture_system);
|
||||||
void set_pack_images(bool pack_images_);
|
void set_pack_images(bool pack_images_);
|
||||||
void set_extended_image_limits(void);
|
void set_extended_image_limits(const DeviceInfo& info);
|
||||||
bool set_animation_frame_update(int frame);
|
bool set_animation_frame_update(int frame);
|
||||||
|
|
||||||
bool need_update;
|
bool need_update;
|
||||||
|
@@ -63,8 +63,8 @@ Scene::Scene(const SceneParams& params_, const DeviceInfo& device_info_)
|
|||||||
else
|
else
|
||||||
shader_manager = ShaderManager::create(this, SceneParams::SVM);
|
shader_manager = ShaderManager::create(this, SceneParams::SVM);
|
||||||
|
|
||||||
if (device_info_.type == DEVICE_CPU)
|
/* Extended Image limits for CPU and Kepler GPUs */
|
||||||
image_manager->set_extended_image_limits();
|
image_manager->set_extended_image_limits(device_info_);
|
||||||
}
|
}
|
||||||
|
|
||||||
Scene::~Scene()
|
Scene::~Scene()
|
||||||
|
@@ -105,8 +105,8 @@ public:
|
|||||||
/* integrator */
|
/* integrator */
|
||||||
device_vector<uint> sobol_directions;
|
device_vector<uint> sobol_directions;
|
||||||
|
|
||||||
/* images */
|
/* CPU images */
|
||||||
device_vector<uchar4> tex_image[TEX_EXTENDED_NUM_IMAGES];
|
device_vector<uchar4> tex_image[TEX_EXTENDED_NUM_IMAGES_CPU];
|
||||||
device_vector<float4> tex_float_image[TEX_EXTENDED_NUM_FLOAT_IMAGES];
|
device_vector<float4> tex_float_image[TEX_EXTENDED_NUM_FLOAT_IMAGES];
|
||||||
|
|
||||||
/* opencl images */
|
/* opencl images */
|
||||||
|
Reference in New Issue
Block a user