Cycles / CUDA:
* Remove support for CUDA Toolkit 4.x, only Toolkit 5.0 and above are supported now. * Remove support for sm_1x cards (< Fermi) for good. We didn't officially support those cards for a few releases already, now remove some special code that was still there.
This commit is contained in:
@@ -227,14 +227,12 @@ public:
|
||||
|
||||
bool support_device(bool experimental)
|
||||
{
|
||||
if(!experimental) {
|
||||
int major, minor;
|
||||
cuDeviceComputeCapability(&major, &minor, cuDevId);
|
||||
int major, minor;
|
||||
cuDeviceComputeCapability(&major, &minor, cuDevId);
|
||||
|
||||
if(major < 2) {
|
||||
cuda_error_message(string_printf("CUDA device supported only with compute capability 2.0 or up, found %d.%d.", major, minor));
|
||||
return false;
|
||||
}
|
||||
if(major < 2) {
|
||||
cuda_error_message(string_printf("CUDA device supported only with compute capability 2.0 or up, found %d.%d.", major, minor));
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -286,8 +284,12 @@ public:
|
||||
cuda_error_message("CUDA nvcc compiler version could not be parsed.");
|
||||
return "";
|
||||
}
|
||||
if(cuda_version < 50) {
|
||||
printf("Unsupported CUDA version %d.%d detected, you need CUDA 5.0.\n", cuda_version/10, cuda_version%10);
|
||||
return "";
|
||||
}
|
||||
|
||||
if(cuda_version != 50)
|
||||
else if(cuda_version > 50)
|
||||
printf("CUDA version %d.%d detected, build may succeed but only CUDA 5.0 is officially supported.\n", cuda_version/10, cuda_version%10);
|
||||
|
||||
/* compile */
|
||||
@@ -296,36 +298,14 @@ public:
|
||||
const int machine = system_cpu_bits();
|
||||
string arch_flags;
|
||||
|
||||
/* build flags depending on CUDA version and arch */
|
||||
if(cuda_version < 50) {
|
||||
/* CUDA 4.x */
|
||||
if(major == 1) {
|
||||
/* sm_1x */
|
||||
arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0";
|
||||
}
|
||||
else if(major == 2) {
|
||||
/* sm_2x */
|
||||
arch_flags = "--maxrregcount=24";
|
||||
}
|
||||
else {
|
||||
/* sm_3x */
|
||||
arch_flags = "--maxrregcount=32";
|
||||
}
|
||||
/* CUDA 5.x build flags for different archs */
|
||||
if(major == 2) {
|
||||
/* sm_2x */
|
||||
arch_flags = "--maxrregcount=32 --use_fast_math";
|
||||
}
|
||||
else {
|
||||
/* CUDA 5.x */
|
||||
if(major == 1) {
|
||||
/* sm_1x */
|
||||
arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0 --use_fast_math";
|
||||
}
|
||||
else if(major == 2) {
|
||||
/* sm_2x */
|
||||
arch_flags = "--maxrregcount=32 --use_fast_math";
|
||||
}
|
||||
else {
|
||||
/* sm_3x */
|
||||
arch_flags = "--maxrregcount=32 --use_fast_math";
|
||||
}
|
||||
else if(major == 3) {
|
||||
/* sm_3x */
|
||||
arch_flags = "--maxrregcount=32 --use_fast_math";
|
||||
}
|
||||
|
||||
double starttime = time_dt();
|
||||
|
@@ -151,37 +151,17 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
||||
|
||||
set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}")
|
||||
|
||||
# build flags depending on CUDA version and arch
|
||||
if(CUDA_VERSION LESS 50)
|
||||
# CUDA 4.x
|
||||
if(${arch} MATCHES "sm_1[0-9]")
|
||||
# sm_1x
|
||||
set(cuda_arch_flags "--maxrregcount=24 --opencc-options -OPT:Olimit=0")
|
||||
elseif(${arch} MATCHES "sm_2[0-9]")
|
||||
# sm_2x
|
||||
set(cuda_arch_flags "--maxrregcount=24")
|
||||
else()
|
||||
# sm_3x
|
||||
set(cuda_arch_flags "--maxrregcount=32")
|
||||
endif()
|
||||
|
||||
set(cuda_math_flags "")
|
||||
else()
|
||||
# CUDA 5.x
|
||||
if(${arch} MATCHES "sm_1[0-9]")
|
||||
# sm_1x
|
||||
set(cuda_arch_flags "--maxrregcount=24 --opencc-options -OPT:Olimit=0")
|
||||
elseif(${arch} MATCHES "sm_2[0-9]")
|
||||
# sm_2x
|
||||
set(cuda_arch_flags "--maxrregcount=32")
|
||||
else()
|
||||
# sm_3x
|
||||
set(cuda_arch_flags "--maxrregcount=32")
|
||||
endif()
|
||||
|
||||
set(cuda_math_flags "--use_fast_math")
|
||||
# CUDA 5.x build flags for different archs
|
||||
if(${arch} MATCHES "sm_2[0-9]")
|
||||
# sm_2x
|
||||
set(cuda_arch_flags "--maxrregcount=32")
|
||||
elseif(${arch} MATCHES "sm_3[0-9]")
|
||||
# sm_3x
|
||||
set(cuda_arch_flags "--maxrregcount=32")
|
||||
endif()
|
||||
|
||||
set(cuda_math_flags "--use_fast_math")
|
||||
|
||||
if(CUDA_VERSION LESS 50 AND ${arch} MATCHES "sm_35")
|
||||
message(WARNING "Can't build kernel for CUDA sm_35 architecture, skipping")
|
||||
else()
|
||||
|
@@ -86,33 +86,13 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']:
|
||||
for arch in cuda_archs:
|
||||
cubin_file = os.path.join(build_dir, "kernel_%s.cubin" % arch)
|
||||
|
||||
# build flags depending on CUDA version and arch
|
||||
if cuda_version < 50:
|
||||
if arch == "sm_35":
|
||||
print("Can't build kernel for CUDA sm_35 architecture, skipping")
|
||||
continue
|
||||
|
||||
# CUDA 4.x
|
||||
if arch.startswith("sm_1"):
|
||||
# sm_1x
|
||||
cuda_arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0"
|
||||
elif arch.startswith("sm_2"):
|
||||
# sm_2x
|
||||
cuda_arch_flags = "--maxrregcount=24"
|
||||
else:
|
||||
# sm_3x
|
||||
cuda_arch_flags = "--maxrregcount=32"
|
||||
else:
|
||||
# CUDA 5.x
|
||||
if arch.startswith("sm_1"):
|
||||
# sm_1x
|
||||
cuda_arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0 --use_fast_math"
|
||||
elif arch.startswith("sm_2"):
|
||||
# sm_2x
|
||||
cuda_arch_flags = "--maxrregcount=32 --use_fast_math"
|
||||
else:
|
||||
# sm_3x
|
||||
cuda_arch_flags = "--maxrregcount=32 --use_fast_math"
|
||||
# CUDA 5.x build flags for different archs
|
||||
if arch.startswith("sm_2"):
|
||||
# sm_2x
|
||||
cuda_arch_flags = "--maxrregcount=32 --use_fast_math"
|
||||
elif arch.startswith("sm_3"):
|
||||
# sm_3x
|
||||
cuda_arch_flags = "--maxrregcount=32 --use_fast_math"
|
||||
|
||||
command = "\"%s\" -arch=%s %s %s \"%s\" -o \"%s\"" % (nvcc, arch, nvcc_flags, cuda_arch_flags, kernel_file, cubin_file)
|
||||
|
||||
|
@@ -36,15 +36,8 @@ CCL_NAMESPACE_BEGIN
|
||||
/* ShaderData setup from incoming ray */
|
||||
|
||||
#ifdef __OBJECT_MOTION__
|
||||
#if defined(__KERNEL_CUDA_VERSION__) && __KERNEL_CUDA_VERSION__ <= 42
|
||||
__device_noinline
|
||||
#else
|
||||
__device
|
||||
#endif
|
||||
void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time)
|
||||
__device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time)
|
||||
{
|
||||
/* note that this is a separate non-inlined function to work around crash
|
||||
* on CUDA sm 2.0, otherwise kernel execution crashes (compiler bug?) */
|
||||
if(sd->flag & SD_OBJECT_MOTION) {
|
||||
sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, time);
|
||||
sd->ob_itfm= transform_quick_inverse(sd->ob_tfm);
|
||||
@@ -56,12 +49,7 @@ void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float tim
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__KERNEL_CUDA_VERSION__) && __KERNEL_CUDA_VERSION__ <= 42
|
||||
__device_noinline
|
||||
#else
|
||||
__device
|
||||
#endif
|
||||
void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd,
|
||||
__device void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd,
|
||||
const Intersection *isect, const Ray *ray, int bounce)
|
||||
{
|
||||
#ifdef __INSTANCING__
|
||||
@@ -249,12 +237,7 @@ __device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderData
|
||||
|
||||
/* ShaderData setup from position sampled on mesh */
|
||||
|
||||
#if defined(__KERNEL_CUDA_VERSION__) && __KERNEL_CUDA_VERSION__ <= 42
|
||||
__device_noinline
|
||||
#else
|
||||
__device
|
||||
#endif
|
||||
void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd,
|
||||
__device void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd,
|
||||
const float3 P, const float3 Ng, const float3 I,
|
||||
int shader, int object, int prim, float u, float v, float t, float time, int bounce, int segment)
|
||||
{
|
||||
|
Reference in New Issue
Block a user