Cycles / CUDA:

* Remove support for  CUDA Toolkit 4.x, only Toolkit 5.0 and above are supported now.
* Remove support for sm_1x cards (< Fermi) for good. We didn't officially support those cards for a few releases already, now remove some special code that was still there.
This commit is contained in:
Thomas Dinges
2013-10-08 15:29:28 +00:00
parent dfe1610504
commit b5a5773fa9
4 changed files with 36 additions and 113 deletions

View File

@@ -227,14 +227,12 @@ public:
bool support_device(bool experimental) bool support_device(bool experimental)
{ {
if(!experimental) { int major, minor;
int major, minor; cuDeviceComputeCapability(&major, &minor, cuDevId);
cuDeviceComputeCapability(&major, &minor, cuDevId);
if(major < 2) { if(major < 2) {
cuda_error_message(string_printf("CUDA device supported only with compute capability 2.0 or up, found %d.%d.", major, minor)); cuda_error_message(string_printf("CUDA device supported only with compute capability 2.0 or up, found %d.%d.", major, minor));
return false; return false;
}
} }
return true; return true;
@@ -286,8 +284,12 @@ public:
cuda_error_message("CUDA nvcc compiler version could not be parsed."); cuda_error_message("CUDA nvcc compiler version could not be parsed.");
return ""; return "";
} }
if(cuda_version < 50) {
printf("Unsupported CUDA version %d.%d detected, you need CUDA 5.0.\n", cuda_version/10, cuda_version%10);
return "";
}
if(cuda_version != 50) else if(cuda_version > 50)
printf("CUDA version %d.%d detected, build may succeed but only CUDA 5.0 is officially supported.\n", cuda_version/10, cuda_version%10); printf("CUDA version %d.%d detected, build may succeed but only CUDA 5.0 is officially supported.\n", cuda_version/10, cuda_version%10);
/* compile */ /* compile */
@@ -296,36 +298,14 @@ public:
const int machine = system_cpu_bits(); const int machine = system_cpu_bits();
string arch_flags; string arch_flags;
/* build flags depending on CUDA version and arch */ /* CUDA 5.x build flags for different archs */
if(cuda_version < 50) { if(major == 2) {
/* CUDA 4.x */ /* sm_2x */
if(major == 1) { arch_flags = "--maxrregcount=32 --use_fast_math";
/* sm_1x */
arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0";
}
else if(major == 2) {
/* sm_2x */
arch_flags = "--maxrregcount=24";
}
else {
/* sm_3x */
arch_flags = "--maxrregcount=32";
}
} }
else { else if(major == 3) {
/* CUDA 5.x */ /* sm_3x */
if(major == 1) { arch_flags = "--maxrregcount=32 --use_fast_math";
/* sm_1x */
arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0 --use_fast_math";
}
else if(major == 2) {
/* sm_2x */
arch_flags = "--maxrregcount=32 --use_fast_math";
}
else {
/* sm_3x */
arch_flags = "--maxrregcount=32 --use_fast_math";
}
} }
double starttime = time_dt(); double starttime = time_dt();

View File

@@ -151,36 +151,16 @@ if(WITH_CYCLES_CUDA_BINARIES)
set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}") set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}")
# build flags depending on CUDA version and arch # CUDA 5.x build flags for different archs
if(CUDA_VERSION LESS 50) if(${arch} MATCHES "sm_2[0-9]")
# CUDA 4.x # sm_2x
if(${arch} MATCHES "sm_1[0-9]") set(cuda_arch_flags "--maxrregcount=32")
# sm_1x elseif(${arch} MATCHES "sm_3[0-9]")
set(cuda_arch_flags "--maxrregcount=24 --opencc-options -OPT:Olimit=0") # sm_3x
elseif(${arch} MATCHES "sm_2[0-9]") set(cuda_arch_flags "--maxrregcount=32")
# sm_2x
set(cuda_arch_flags "--maxrregcount=24")
else()
# sm_3x
set(cuda_arch_flags "--maxrregcount=32")
endif()
set(cuda_math_flags "")
else()
# CUDA 5.x
if(${arch} MATCHES "sm_1[0-9]")
# sm_1x
set(cuda_arch_flags "--maxrregcount=24 --opencc-options -OPT:Olimit=0")
elseif(${arch} MATCHES "sm_2[0-9]")
# sm_2x
set(cuda_arch_flags "--maxrregcount=32")
else()
# sm_3x
set(cuda_arch_flags "--maxrregcount=32")
endif()
set(cuda_math_flags "--use_fast_math")
endif() endif()
set(cuda_math_flags "--use_fast_math")
if(CUDA_VERSION LESS 50 AND ${arch} MATCHES "sm_35") if(CUDA_VERSION LESS 50 AND ${arch} MATCHES "sm_35")
message(WARNING "Can't build kernel for CUDA sm_35 architecture, skipping") message(WARNING "Can't build kernel for CUDA sm_35 architecture, skipping")

View File

@@ -86,33 +86,13 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']:
for arch in cuda_archs: for arch in cuda_archs:
cubin_file = os.path.join(build_dir, "kernel_%s.cubin" % arch) cubin_file = os.path.join(build_dir, "kernel_%s.cubin" % arch)
# build flags depending on CUDA version and arch # CUDA 5.x build flags for different archs
if cuda_version < 50: if arch.startswith("sm_2"):
if arch == "sm_35": # sm_2x
print("Can't build kernel for CUDA sm_35 architecture, skipping") cuda_arch_flags = "--maxrregcount=32 --use_fast_math"
continue elif arch.startswith("sm_3"):
# sm_3x
# CUDA 4.x cuda_arch_flags = "--maxrregcount=32 --use_fast_math"
if arch.startswith("sm_1"):
# sm_1x
cuda_arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0"
elif arch.startswith("sm_2"):
# sm_2x
cuda_arch_flags = "--maxrregcount=24"
else:
# sm_3x
cuda_arch_flags = "--maxrregcount=32"
else:
# CUDA 5.x
if arch.startswith("sm_1"):
# sm_1x
cuda_arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0 --use_fast_math"
elif arch.startswith("sm_2"):
# sm_2x
cuda_arch_flags = "--maxrregcount=32 --use_fast_math"
else:
# sm_3x
cuda_arch_flags = "--maxrregcount=32 --use_fast_math"
command = "\"%s\" -arch=%s %s %s \"%s\" -o \"%s\"" % (nvcc, arch, nvcc_flags, cuda_arch_flags, kernel_file, cubin_file) command = "\"%s\" -arch=%s %s %s \"%s\" -o \"%s\"" % (nvcc, arch, nvcc_flags, cuda_arch_flags, kernel_file, cubin_file)

View File

@@ -36,15 +36,8 @@ CCL_NAMESPACE_BEGIN
/* ShaderData setup from incoming ray */ /* ShaderData setup from incoming ray */
#ifdef __OBJECT_MOTION__ #ifdef __OBJECT_MOTION__
#if defined(__KERNEL_CUDA_VERSION__) && __KERNEL_CUDA_VERSION__ <= 42 __device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time)
__device_noinline
#else
__device
#endif
void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time)
{ {
/* note that this is a separate non-inlined function to work around crash
* on CUDA sm 2.0, otherwise kernel execution crashes (compiler bug?) */
if(sd->flag & SD_OBJECT_MOTION) { if(sd->flag & SD_OBJECT_MOTION) {
sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, time); sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, time);
sd->ob_itfm= transform_quick_inverse(sd->ob_tfm); sd->ob_itfm= transform_quick_inverse(sd->ob_tfm);
@@ -56,12 +49,7 @@ void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float tim
} }
#endif #endif
#if defined(__KERNEL_CUDA_VERSION__) && __KERNEL_CUDA_VERSION__ <= 42 __device void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd,
__device_noinline
#else
__device
#endif
void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd,
const Intersection *isect, const Ray *ray, int bounce) const Intersection *isect, const Ray *ray, int bounce)
{ {
#ifdef __INSTANCING__ #ifdef __INSTANCING__
@@ -249,12 +237,7 @@ __device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderData
/* ShaderData setup from position sampled on mesh */ /* ShaderData setup from position sampled on mesh */
#if defined(__KERNEL_CUDA_VERSION__) && __KERNEL_CUDA_VERSION__ <= 42 __device void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd,
__device_noinline
#else
__device
#endif
void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd,
const float3 P, const float3 Ng, const float3 I, const float3 P, const float3 Ng, const float3 I,
int shader, int object, int prim, float u, float v, float t, float time, int bounce, int segment) int shader, int object, int prim, float u, float v, float t, float time, int bounce, int segment)
{ {