Cycles: Remove Fermi texture code.
This should be the last Fermi removal commit, unless I missed something. It's been a pleasure Fermi!
This commit is contained in:
@@ -359,7 +359,6 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo>& subdevices, int th
|
||||
info.description = "Multi Device";
|
||||
info.num = 0;
|
||||
|
||||
info.has_fermi_limits = false;
|
||||
info.has_half_images = true;
|
||||
info.has_volume_decoupled = true;
|
||||
info.bvh_layout_mask = BVH_LAYOUT_ALL;
|
||||
@@ -395,8 +394,6 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo>& subdevices, int th
|
||||
}
|
||||
|
||||
/* Accumulate device info. */
|
||||
info.has_fermi_limits = info.has_fermi_limits ||
|
||||
device.has_fermi_limits;
|
||||
info.has_half_images &= device.has_half_images;
|
||||
info.has_volume_decoupled &= device.has_volume_decoupled;
|
||||
info.bvh_layout_mask = device.bvh_layout_mask & info.bvh_layout_mask;
|
||||
|
@@ -56,7 +56,6 @@ public:
|
||||
int num;
|
||||
bool display_device; /* GPU is used as a display device. */
|
||||
bool advanced_shading; /* Supports full shading system. */
|
||||
bool has_fermi_limits; /* Fixed number of textures limit. */
|
||||
bool has_half_images; /* Support half-float textures. */
|
||||
bool has_volume_decoupled; /* Decoupled volume shading. */
|
||||
BVHLayoutMask bvh_layout_mask; /* Bitmask of supported BVH layouts. */
|
||||
@@ -73,7 +72,6 @@ public:
|
||||
cpu_threads = 0;
|
||||
display_device = false;
|
||||
advanced_shading = true;
|
||||
has_fermi_limits = false;
|
||||
has_half_images = false;
|
||||
has_volume_decoupled = false;
|
||||
bvh_layout_mask = BVH_LAYOUT_NONE;
|
||||
|
@@ -309,9 +309,7 @@ public:
|
||||
|
||||
delete split_kernel;
|
||||
|
||||
if(!info.has_fermi_limits) {
|
||||
texture_info.free();
|
||||
}
|
||||
|
||||
cuda_assert(cuCtxDestroy(cuContext));
|
||||
}
|
||||
@@ -680,7 +678,7 @@ public:
|
||||
|
||||
void load_texture_info()
|
||||
{
|
||||
if(!info.has_fermi_limits && need_texture_info) {
|
||||
if(need_texture_info) {
|
||||
texture_info.copy_to_device();
|
||||
need_texture_info = false;
|
||||
}
|
||||
@@ -1018,9 +1016,6 @@ public:
|
||||
{
|
||||
CUDAContextScope scope(this);
|
||||
|
||||
/* Check if we are on sm_30 or above, for bindless textures. */
|
||||
bool has_fermi_limits = info.has_fermi_limits;
|
||||
|
||||
/* General variables for both architectures */
|
||||
string bind_name = mem.name;
|
||||
size_t dsize = datatype_size(mem.data_type);
|
||||
@@ -1076,25 +1071,6 @@ public:
|
||||
/* Image Texture Storage */
|
||||
CUtexref texref = NULL;
|
||||
|
||||
if(has_fermi_limits) {
|
||||
if(mem.data_depth > 1) {
|
||||
/* Kernel uses different bind names for 2d and 3d float textures,
|
||||
* so we have to adjust couple of things here.
|
||||
*/
|
||||
vector<string> tokens;
|
||||
string_split(tokens, mem.name, "_");
|
||||
bind_name = string_printf("__tex_image_%s_3d_%s",
|
||||
tokens[2].c_str(),
|
||||
tokens[3].c_str());
|
||||
}
|
||||
|
||||
cuda_assert(cuModuleGetTexRef(&texref, cuModule, bind_name.c_str()));
|
||||
|
||||
if(!texref) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
CUarray_format_enum format;
|
||||
switch(mem.data_type) {
|
||||
case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break;
|
||||
@@ -1187,7 +1163,6 @@ public:
|
||||
cuda_assert(cuMemcpyHtoD(mem.device_pointer, mem.host_pointer, size));
|
||||
}
|
||||
|
||||
if(!has_fermi_limits) {
|
||||
/* Kepler+, bindless textures. */
|
||||
int flat_slot = 0;
|
||||
if(string_startswith(mem.name, "__tex_image")) {
|
||||
@@ -1251,34 +1226,6 @@ public:
|
||||
info.depth = mem.data_depth;
|
||||
need_texture_info = true;
|
||||
}
|
||||
else {
|
||||
/* Fermi, fixed texture slots. */
|
||||
if(array_3d) {
|
||||
cuda_assert(cuTexRefSetArray(texref, array_3d, CU_TRSA_OVERRIDE_FORMAT));
|
||||
}
|
||||
else if(mem.data_height > 0) {
|
||||
CUDA_ARRAY_DESCRIPTOR array_desc;
|
||||
array_desc.Format = format;
|
||||
array_desc.Height = mem.data_height;
|
||||
array_desc.Width = mem.data_width;
|
||||
array_desc.NumChannels = mem.data_elements;
|
||||
cuda_assert(cuTexRefSetAddress2D_v3(texref, &array_desc, mem.device_pointer, dst_pitch));
|
||||
}
|
||||
else {
|
||||
cuda_assert(cuTexRefSetAddress(NULL, texref, cuda_device_ptr(mem.device_pointer), size));
|
||||
}
|
||||
|
||||
/* Attach to texture reference. */
|
||||
cuda_assert(cuTexRefSetFilterMode(texref, filter_mode));
|
||||
cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_NORMALIZED_COORDINATES));
|
||||
cuda_assert(cuTexRefSetFormat(texref, format, mem.data_elements));
|
||||
cuda_assert(cuTexRefSetAddressMode(texref, 0, address_mode));
|
||||
cuda_assert(cuTexRefSetAddressMode(texref, 1, address_mode));
|
||||
if(mem.data_depth > 1) {
|
||||
cuda_assert(cuTexRefSetAddressMode(texref, 2, address_mode));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void tex_free(device_memory& mem)
|
||||
{
|
||||
@@ -2545,7 +2492,6 @@ void device_cuda_info(vector<DeviceInfo>& devices)
|
||||
info.num = num;
|
||||
|
||||
info.advanced_shading = (major >= 3);
|
||||
info.has_fermi_limits = !(major >= 3);
|
||||
info.has_half_images = (major >= 3);
|
||||
info.has_volume_decoupled = false;
|
||||
info.bvh_layout_mask = BVH_LAYOUT_BVH2;
|
||||
|
@@ -135,18 +135,9 @@ ccl_device_inline uint ccl_num_groups(uint d)
|
||||
|
||||
/* Textures */
|
||||
|
||||
/* Use arrays for regular data. This is a little slower than textures on Fermi,
|
||||
* but allows for cleaner code and we will stop supporting Fermi soon. */
|
||||
/* Use arrays for regular data. */
|
||||
#define kernel_tex_fetch(t, index) t[(index)]
|
||||
|
||||
/* On Kepler (6xx) and above, we use Bindless Textures for images.
|
||||
* On Fermi cards (4xx and 5xx), we have to use regular textures. */
|
||||
#if __CUDA_ARCH__ < 300
|
||||
typedef texture<float4, 2> texture_image_float4;
|
||||
typedef texture<float4, 3> texture_image3d_float4;
|
||||
typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4;
|
||||
#endif
|
||||
|
||||
#define kernel_data __data
|
||||
|
||||
/* Use fast math functions */
|
||||
|
@@ -78,111 +78,8 @@ KERNEL_TEX(float, __lookup_table)
|
||||
/* sobol */
|
||||
KERNEL_TEX(uint, __sobol_directions)
|
||||
|
||||
#if !defined(__KERNEL_CUDA__) || __CUDA_ARCH__ >= 300
|
||||
/* image textures */
|
||||
KERNEL_TEX(TextureInfo, __texture_info)
|
||||
#else
|
||||
/* full-float image */
|
||||
KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float4_000)
|
||||
KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float4_008)
|
||||
KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float4_016)
|
||||
KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float4_024)
|
||||
KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float4_032)
|
||||
|
||||
KERNEL_IMAGE_TEX(float4, texture_image3d_float4, __tex_image_float4_3d_000)
|
||||
KERNEL_IMAGE_TEX(float4, texture_image3d_float4, __tex_image_float4_3d_008)
|
||||
KERNEL_IMAGE_TEX(float4, texture_image3d_float4, __tex_image_float4_3d_016)
|
||||
KERNEL_IMAGE_TEX(float4, texture_image3d_float4, __tex_image_float4_3d_024)
|
||||
KERNEL_IMAGE_TEX(float4, texture_image3d_float4, __tex_image_float4_3d_032)
|
||||
|
||||
/* image
|
||||
* These texture names are encoded to their flattened slots as
|
||||
* ImageManager::type_index_to_flattened_slot() returns them. */
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_001)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_009)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_017)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_025)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_033)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_041)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_049)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_057)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_065)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_073)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_081)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_089)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_097)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_105)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_113)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_121)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_129)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_137)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_145)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_153)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_161)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_169)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_177)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_185)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_193)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_201)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_209)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_217)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_225)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_233)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_241)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_249)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_257)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_265)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_273)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_281)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_289)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_297)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_305)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_313)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_321)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_329)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_337)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_345)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_353)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_361)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_369)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_377)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_385)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_393)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_401)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_409)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_417)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_425)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_433)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_441)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_449)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_457)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_465)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_473)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_481)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_489)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_497)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_505)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_513)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_521)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_529)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_537)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_545)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_553)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_561)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_569)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_577)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_585)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_593)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_601)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_609)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_617)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_625)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_633)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_641)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_649)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_657)
|
||||
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_665)
|
||||
#endif /* defined(__KERNEL_CUDA__) && __CUDA_ARCH__ < 300 */
|
||||
|
||||
#undef KERNEL_TEX
|
||||
#undef KERNEL_IMAGE_TEX
|
||||
|
@@ -14,10 +14,6 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
|
||||
/* Kepler */
|
||||
|
||||
/* w0, w1, w2, and w3 are the four cubic B-spline basis functions. */
|
||||
ccl_device float cubic_w0(float a)
|
||||
{
|
||||
@@ -191,120 +187,3 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x,
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Fermi */
|
||||
|
||||
ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
|
||||
{
|
||||
float4 r;
|
||||
switch(id) {
|
||||
case 0: r = tex2D(__tex_image_float4_000, x, y); break;
|
||||
case 8: r = tex2D(__tex_image_float4_008, x, y); break;
|
||||
case 16: r = tex2D(__tex_image_float4_016, x, y); break;
|
||||
case 24: r = tex2D(__tex_image_float4_024, x, y); break;
|
||||
case 32: r = tex2D(__tex_image_float4_032, x, y); break;
|
||||
case 1: r = tex2D(__tex_image_byte4_001, x, y); break;
|
||||
case 9: r = tex2D(__tex_image_byte4_009, x, y); break;
|
||||
case 17: r = tex2D(__tex_image_byte4_017, x, y); break;
|
||||
case 25: r = tex2D(__tex_image_byte4_025, x, y); break;
|
||||
case 33: r = tex2D(__tex_image_byte4_033, x, y); break;
|
||||
case 41: r = tex2D(__tex_image_byte4_041, x, y); break;
|
||||
case 49: r = tex2D(__tex_image_byte4_049, x, y); break;
|
||||
case 57: r = tex2D(__tex_image_byte4_057, x, y); break;
|
||||
case 65: r = tex2D(__tex_image_byte4_065, x, y); break;
|
||||
case 73: r = tex2D(__tex_image_byte4_073, x, y); break;
|
||||
case 81: r = tex2D(__tex_image_byte4_081, x, y); break;
|
||||
case 89: r = tex2D(__tex_image_byte4_089, x, y); break;
|
||||
case 97: r = tex2D(__tex_image_byte4_097, x, y); break;
|
||||
case 105: r = tex2D(__tex_image_byte4_105, x, y); break;
|
||||
case 113: r = tex2D(__tex_image_byte4_113, x, y); break;
|
||||
case 121: r = tex2D(__tex_image_byte4_121, x, y); break;
|
||||
case 129: r = tex2D(__tex_image_byte4_129, x, y); break;
|
||||
case 137: r = tex2D(__tex_image_byte4_137, x, y); break;
|
||||
case 145: r = tex2D(__tex_image_byte4_145, x, y); break;
|
||||
case 153: r = tex2D(__tex_image_byte4_153, x, y); break;
|
||||
case 161: r = tex2D(__tex_image_byte4_161, x, y); break;
|
||||
case 169: r = tex2D(__tex_image_byte4_169, x, y); break;
|
||||
case 177: r = tex2D(__tex_image_byte4_177, x, y); break;
|
||||
case 185: r = tex2D(__tex_image_byte4_185, x, y); break;
|
||||
case 193: r = tex2D(__tex_image_byte4_193, x, y); break;
|
||||
case 201: r = tex2D(__tex_image_byte4_201, x, y); break;
|
||||
case 209: r = tex2D(__tex_image_byte4_209, x, y); break;
|
||||
case 217: r = tex2D(__tex_image_byte4_217, x, y); break;
|
||||
case 225: r = tex2D(__tex_image_byte4_225, x, y); break;
|
||||
case 233: r = tex2D(__tex_image_byte4_233, x, y); break;
|
||||
case 241: r = tex2D(__tex_image_byte4_241, x, y); break;
|
||||
case 249: r = tex2D(__tex_image_byte4_249, x, y); break;
|
||||
case 257: r = tex2D(__tex_image_byte4_257, x, y); break;
|
||||
case 265: r = tex2D(__tex_image_byte4_265, x, y); break;
|
||||
case 273: r = tex2D(__tex_image_byte4_273, x, y); break;
|
||||
case 281: r = tex2D(__tex_image_byte4_281, x, y); break;
|
||||
case 289: r = tex2D(__tex_image_byte4_289, x, y); break;
|
||||
case 297: r = tex2D(__tex_image_byte4_297, x, y); break;
|
||||
case 305: r = tex2D(__tex_image_byte4_305, x, y); break;
|
||||
case 313: r = tex2D(__tex_image_byte4_313, x, y); break;
|
||||
case 321: r = tex2D(__tex_image_byte4_321, x, y); break;
|
||||
case 329: r = tex2D(__tex_image_byte4_329, x, y); break;
|
||||
case 337: r = tex2D(__tex_image_byte4_337, x, y); break;
|
||||
case 345: r = tex2D(__tex_image_byte4_345, x, y); break;
|
||||
case 353: r = tex2D(__tex_image_byte4_353, x, y); break;
|
||||
case 361: r = tex2D(__tex_image_byte4_361, x, y); break;
|
||||
case 369: r = tex2D(__tex_image_byte4_369, x, y); break;
|
||||
case 377: r = tex2D(__tex_image_byte4_377, x, y); break;
|
||||
case 385: r = tex2D(__tex_image_byte4_385, x, y); break;
|
||||
case 393: r = tex2D(__tex_image_byte4_393, x, y); break;
|
||||
case 401: r = tex2D(__tex_image_byte4_401, x, y); break;
|
||||
case 409: r = tex2D(__tex_image_byte4_409, x, y); break;
|
||||
case 417: r = tex2D(__tex_image_byte4_417, x, y); break;
|
||||
case 425: r = tex2D(__tex_image_byte4_425, x, y); break;
|
||||
case 433: r = tex2D(__tex_image_byte4_433, x, y); break;
|
||||
case 441: r = tex2D(__tex_image_byte4_441, x, y); break;
|
||||
case 449: r = tex2D(__tex_image_byte4_449, x, y); break;
|
||||
case 457: r = tex2D(__tex_image_byte4_457, x, y); break;
|
||||
case 465: r = tex2D(__tex_image_byte4_465, x, y); break;
|
||||
case 473: r = tex2D(__tex_image_byte4_473, x, y); break;
|
||||
case 481: r = tex2D(__tex_image_byte4_481, x, y); break;
|
||||
case 489: r = tex2D(__tex_image_byte4_489, x, y); break;
|
||||
case 497: r = tex2D(__tex_image_byte4_497, x, y); break;
|
||||
case 505: r = tex2D(__tex_image_byte4_505, x, y); break;
|
||||
case 513: r = tex2D(__tex_image_byte4_513, x, y); break;
|
||||
case 521: r = tex2D(__tex_image_byte4_521, x, y); break;
|
||||
case 529: r = tex2D(__tex_image_byte4_529, x, y); break;
|
||||
case 537: r = tex2D(__tex_image_byte4_537, x, y); break;
|
||||
case 545: r = tex2D(__tex_image_byte4_545, x, y); break;
|
||||
case 553: r = tex2D(__tex_image_byte4_553, x, y); break;
|
||||
case 561: r = tex2D(__tex_image_byte4_561, x, y); break;
|
||||
case 569: r = tex2D(__tex_image_byte4_569, x, y); break;
|
||||
case 577: r = tex2D(__tex_image_byte4_577, x, y); break;
|
||||
case 585: r = tex2D(__tex_image_byte4_585, x, y); break;
|
||||
case 593: r = tex2D(__tex_image_byte4_593, x, y); break;
|
||||
case 601: r = tex2D(__tex_image_byte4_601, x, y); break;
|
||||
case 609: r = tex2D(__tex_image_byte4_609, x, y); break;
|
||||
case 617: r = tex2D(__tex_image_byte4_617, x, y); break;
|
||||
case 625: r = tex2D(__tex_image_byte4_625, x, y); break;
|
||||
case 633: r = tex2D(__tex_image_byte4_633, x, y); break;
|
||||
case 641: r = tex2D(__tex_image_byte4_641, x, y); break;
|
||||
case 649: r = tex2D(__tex_image_byte4_649, x, y); break;
|
||||
case 657: r = tex2D(__tex_image_byte4_657, x, y); break;
|
||||
case 665: r = tex2D(__tex_image_byte4_665, x, y); break;
|
||||
default: r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp)
|
||||
{
|
||||
float4 r;
|
||||
switch(id) {
|
||||
case 0: r = tex3D(__tex_image_float4_3d_000, x, y, z); break;
|
||||
case 8: r = tex3D(__tex_image_float4_3d_008, x, y, z); break;
|
||||
case 16: r = tex3D(__tex_image_float4_3d_016, x, y, z); break;
|
||||
case 24: r = tex3D(__tex_image_float4_3d_024, x, y, z); break;
|
||||
case 32: r = tex3D(__tex_image_float4_3d_032, x, y, z); break;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -49,7 +49,6 @@ ImageManager::ImageManager(const DeviceInfo& info)
|
||||
/* Set image limits */
|
||||
max_num_images = TEX_NUM_MAX;
|
||||
has_half_images = info.has_half_images;
|
||||
cuda_fermi_limits = info.has_fermi_limits;
|
||||
|
||||
for(size_t type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
|
||||
tex_num_images[type] = 0;
|
||||
@@ -255,7 +254,7 @@ int ImageManager::add_image(const string& filename,
|
||||
/* Check whether it's a float texture. */
|
||||
is_float = (type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4);
|
||||
|
||||
/* No single channel and half textures on CUDA (Fermi) and no half on OpenCL, use available slots */
|
||||
/* No half textures on OpenCL, use available slots */
|
||||
if(!has_half_images) {
|
||||
if(type == IMAGE_DATA_TYPE_HALF4) {
|
||||
type = IMAGE_DATA_TYPE_FLOAT4;
|
||||
@@ -265,15 +264,6 @@ int ImageManager::add_image(const string& filename,
|
||||
}
|
||||
}
|
||||
|
||||
if(cuda_fermi_limits) {
|
||||
if(type == IMAGE_DATA_TYPE_FLOAT) {
|
||||
type = IMAGE_DATA_TYPE_FLOAT4;
|
||||
}
|
||||
else if(type == IMAGE_DATA_TYPE_BYTE) {
|
||||
type = IMAGE_DATA_TYPE_BYTE4;
|
||||
}
|
||||
}
|
||||
|
||||
/* Fnd existing image. */
|
||||
for(slot = 0; slot < images[type].size(); slot++) {
|
||||
img = images[type][slot];
|
||||
@@ -303,18 +293,8 @@ int ImageManager::add_image(const string& filename,
|
||||
break;
|
||||
}
|
||||
|
||||
/* Count if we're over the limit */
|
||||
if(cuda_fermi_limits) {
|
||||
if(tex_num_images[IMAGE_DATA_TYPE_BYTE4] == TEX_NUM_BYTE4_CUDA
|
||||
|| tex_num_images[IMAGE_DATA_TYPE_FLOAT4] == TEX_NUM_FLOAT4_CUDA)
|
||||
{
|
||||
printf("ImageManager::add_image: Reached %s image limit (%d), skipping '%s'\n",
|
||||
name_from_type(type).c_str(), tex_num_images[type], filename.c_str());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Very unlikely, since max_num_images is insanely big. But better safe than sorry. */
|
||||
/* Count if we're over the limit.
|
||||
* Very unlikely, since max_num_images is insanely big. But better safe than sorry. */
|
||||
int tex_count = 0;
|
||||
for(int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
|
||||
tex_count += tex_num_images[type];
|
||||
@@ -324,7 +304,6 @@ int ImageManager::add_image(const string& filename,
|
||||
max_num_images, filename.c_str());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if(slot == images[type].size()) {
|
||||
images[type].resize(images[type].size() + 1);
|
||||
|
@@ -121,7 +121,6 @@ private:
|
||||
int tex_num_images[IMAGE_DATA_NUM_TYPES];
|
||||
int max_num_images;
|
||||
bool has_half_images;
|
||||
bool cuda_fermi_limits;
|
||||
|
||||
thread_mutex device_mutex;
|
||||
int animation_frame;
|
||||
|
@@ -20,22 +20,6 @@
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Texture limits on devices. */
|
||||
|
||||
/* CUDA (Geforce 4xx and 5xx) */
|
||||
#define TEX_NUM_FLOAT4_CUDA 5
|
||||
#define TEX_NUM_BYTE4_CUDA 84
|
||||
#define TEX_NUM_HALF4_CUDA 0
|
||||
#define TEX_NUM_FLOAT_CUDA 0
|
||||
#define TEX_NUM_BYTE_CUDA 0
|
||||
#define TEX_NUM_HALF_CUDA 0
|
||||
#define TEX_START_FLOAT4_CUDA 0
|
||||
#define TEX_START_BYTE4_CUDA TEX_NUM_FLOAT4_CUDA
|
||||
#define TEX_START_HALF4_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA)
|
||||
#define TEX_START_FLOAT_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA)
|
||||
#define TEX_START_BYTE_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA + TEX_NUM_FLOAT_CUDA)
|
||||
#define TEX_START_HALF_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA + TEX_NUM_FLOAT_CUDA + TEX_NUM_BYTE_CUDA)
|
||||
|
||||
/* Any architecture other than old CUDA cards */
|
||||
#define TEX_NUM_MAX (INT_MAX >> 4)
|
||||
|
||||
/* Color to use when textures are not found. */
|
||||
@@ -44,11 +28,8 @@ CCL_NAMESPACE_BEGIN
|
||||
#define TEX_IMAGE_MISSING_B 1
|
||||
#define TEX_IMAGE_MISSING_A 1
|
||||
|
||||
#if defined (__KERNEL_CUDA__) && (__CUDA_ARCH__ < 300)
|
||||
# define kernel_tex_type(tex) (tex < TEX_START_BYTE4_CUDA ? IMAGE_DATA_TYPE_FLOAT4 : IMAGE_DATA_TYPE_BYTE4)
|
||||
#else
|
||||
# define kernel_tex_type(tex) (tex & IMAGE_DATA_TYPE_MASK)
|
||||
#endif
|
||||
/* Texture type. */
|
||||
#define kernel_tex_type(tex) (tex & IMAGE_DATA_TYPE_MASK)
|
||||
|
||||
/* Interpolation types for textures
|
||||
* cuda also use texture space to store other objects */
|
||||
|
Reference in New Issue
Block a user