From b5a5773fa98b5ddf18dc68bc77df15cc79211ef5 Mon Sep 17 00:00:00 2001 From: Thomas Dinges Date: Tue, 8 Oct 2013 15:29:28 +0000 Subject: [PATCH] Cycles / CUDA: * Remove support for CUDA Toolkit 4.x, only Toolkit 5.0 and above are supported now. * Remove support for sm_1x cards (< Fermi) for good. We didn't officially support those cards for a few releases already, now remove some special code that was still there. --- intern/cycles/device/device_cuda.cpp | 54 +++++++++------------------- intern/cycles/kernel/CMakeLists.txt | 38 +++++--------------- intern/cycles/kernel/SConscript | 34 ++++-------------- intern/cycles/kernel/kernel_shader.h | 23 ++---------- 4 files changed, 36 insertions(+), 113 deletions(-) diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 5440bd91987..4ce7f6fd729 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -227,14 +227,12 @@ public: bool support_device(bool experimental) { - if(!experimental) { - int major, minor; - cuDeviceComputeCapability(&major, &minor, cuDevId); + int major, minor; + cuDeviceComputeCapability(&major, &minor, cuDevId); - if(major < 2) { - cuda_error_message(string_printf("CUDA device supported only with compute capability 2.0 or up, found %d.%d.", major, minor)); - return false; - } + if(major < 2) { + cuda_error_message(string_printf("CUDA device supported only with compute capability 2.0 or up, found %d.%d.", major, minor)); + return false; } return true; @@ -286,8 +284,12 @@ public: cuda_error_message("CUDA nvcc compiler version could not be parsed."); return ""; } + if(cuda_version < 50) { + printf("Unsupported CUDA version %d.%d detected, you need CUDA 5.0.\n", cuda_version/10, cuda_version%10); + return ""; + } - if(cuda_version != 50) + else if(cuda_version > 50) printf("CUDA version %d.%d detected, build may succeed but only CUDA 5.0 is officially supported.\n", cuda_version/10, cuda_version%10); /* compile */ @@ -296,36 +298,14 @@ public: const int machine = system_cpu_bits(); string arch_flags; - /* build flags depending on CUDA version and arch */ - if(cuda_version < 50) { - /* CUDA 4.x */ - if(major == 1) { - /* sm_1x */ - arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0"; - } - else if(major == 2) { - /* sm_2x */ - arch_flags = "--maxrregcount=24"; - } - else { - /* sm_3x */ - arch_flags = "--maxrregcount=32"; - } + /* CUDA 5.x build flags for different archs */ + if(major == 2) { + /* sm_2x */ + arch_flags = "--maxrregcount=32 --use_fast_math"; } - else { - /* CUDA 5.x */ - if(major == 1) { - /* sm_1x */ - arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0 --use_fast_math"; - } - else if(major == 2) { - /* sm_2x */ - arch_flags = "--maxrregcount=32 --use_fast_math"; - } - else { - /* sm_3x */ - arch_flags = "--maxrregcount=32 --use_fast_math"; - } + else if(major == 3) { + /* sm_3x */ + arch_flags = "--maxrregcount=32 --use_fast_math"; } double starttime = time_dt(); diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index eaa4e304ebb..56ba0e08743 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -151,36 +151,16 @@ if(WITH_CYCLES_CUDA_BINARIES) set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}") - # build flags depending on CUDA version and arch - if(CUDA_VERSION LESS 50) - # CUDA 4.x - if(${arch} MATCHES "sm_1[0-9]") - # sm_1x - set(cuda_arch_flags "--maxrregcount=24 --opencc-options -OPT:Olimit=0") - elseif(${arch} MATCHES "sm_2[0-9]") - # sm_2x - set(cuda_arch_flags "--maxrregcount=24") - else() - # sm_3x - set(cuda_arch_flags "--maxrregcount=32") - endif() - - set(cuda_math_flags "") - else() - # CUDA 5.x - if(${arch} MATCHES "sm_1[0-9]") - # sm_1x - set(cuda_arch_flags "--maxrregcount=24 --opencc-options -OPT:Olimit=0") - elseif(${arch} MATCHES "sm_2[0-9]") - # sm_2x - set(cuda_arch_flags "--maxrregcount=32") - else() - # sm_3x - set(cuda_arch_flags "--maxrregcount=32") - endif() - - set(cuda_math_flags "--use_fast_math") + # CUDA 5.x build flags for different archs + if(${arch} MATCHES "sm_2[0-9]") + # sm_2x + set(cuda_arch_flags "--maxrregcount=32") + elseif(${arch} MATCHES "sm_3[0-9]") + # sm_3x + set(cuda_arch_flags "--maxrregcount=32") endif() + + set(cuda_math_flags "--use_fast_math") if(CUDA_VERSION LESS 50 AND ${arch} MATCHES "sm_35") message(WARNING "Can't build kernel for CUDA sm_35 architecture, skipping") diff --git a/intern/cycles/kernel/SConscript b/intern/cycles/kernel/SConscript index 6459c3ed183..a0522d9ba8e 100644 --- a/intern/cycles/kernel/SConscript +++ b/intern/cycles/kernel/SConscript @@ -86,33 +86,13 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']: for arch in cuda_archs: cubin_file = os.path.join(build_dir, "kernel_%s.cubin" % arch) - # build flags depending on CUDA version and arch - if cuda_version < 50: - if arch == "sm_35": - print("Can't build kernel for CUDA sm_35 architecture, skipping") - continue - - # CUDA 4.x - if arch.startswith("sm_1"): - # sm_1x - cuda_arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0" - elif arch.startswith("sm_2"): - # sm_2x - cuda_arch_flags = "--maxrregcount=24" - else: - # sm_3x - cuda_arch_flags = "--maxrregcount=32" - else: - # CUDA 5.x - if arch.startswith("sm_1"): - # sm_1x - cuda_arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0 --use_fast_math" - elif arch.startswith("sm_2"): - # sm_2x - cuda_arch_flags = "--maxrregcount=32 --use_fast_math" - else: - # sm_3x - cuda_arch_flags = "--maxrregcount=32 --use_fast_math" + # CUDA 5.x build flags for different archs + if arch.startswith("sm_2"): + # sm_2x + cuda_arch_flags = "--maxrregcount=32 --use_fast_math" + elif arch.startswith("sm_3"): + # sm_3x + cuda_arch_flags = "--maxrregcount=32 --use_fast_math" command = "\"%s\" -arch=%s %s %s \"%s\" -o \"%s\"" % (nvcc, arch, nvcc_flags, cuda_arch_flags, kernel_file, cubin_file) diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index ee71a0cfcf4..81630caed9a 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -36,15 +36,8 @@ CCL_NAMESPACE_BEGIN /* ShaderData setup from incoming ray */ #ifdef __OBJECT_MOTION__ -#if defined(__KERNEL_CUDA_VERSION__) && __KERNEL_CUDA_VERSION__ <= 42 -__device_noinline -#else -__device -#endif -void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time) +__device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time) { - /* note that this is a separate non-inlined function to work around crash - * on CUDA sm 2.0, otherwise kernel execution crashes (compiler bug?) */ if(sd->flag & SD_OBJECT_MOTION) { sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, time); sd->ob_itfm= transform_quick_inverse(sd->ob_tfm); @@ -56,12 +49,7 @@ void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float tim } #endif -#if defined(__KERNEL_CUDA_VERSION__) && __KERNEL_CUDA_VERSION__ <= 42 -__device_noinline -#else -__device -#endif -void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd, +__device void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, int bounce) { #ifdef __INSTANCING__ @@ -249,12 +237,7 @@ __device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderData /* ShaderData setup from position sampled on mesh */ -#if defined(__KERNEL_CUDA_VERSION__) && __KERNEL_CUDA_VERSION__ <= 42 -__device_noinline -#else -__device -#endif -void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd, +__device void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd, const float3 P, const float3 Ng, const float3 I, int shader, int object, int prim, float u, float v, float t, float time, int bounce, int segment) {