Cycles / CUDA:

* Remove support for CUDA Toolkit 4.x, only Toolkit 5.0 and above are supported now. * Remove support for sm_1x cards (< Fermi) for good. We didn't officially support those cards for a few releases already, now remove some special code that was still there.
2013-10-08 15:29:28 +00:00
parent dfe1610504
commit b5a5773fa9
4 changed files with 36 additions and 113 deletions
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -227,14 +227,12 @@ public:
 	bool support_device(bool experimental)
 	{
-		if(!experimental) {
+		int major, minor;
-			int major, minor;
+		cuDeviceComputeCapability(&major, &minor, cuDevId);
 			cuDeviceComputeCapability(&major, &minor, cuDevId);
-			if(major < 2) {
+		if(major < 2) {
-				cuda_error_message(string_printf("CUDA device supported only with compute capability 2.0 or up, found %d.%d.", major, minor));
+			cuda_error_message(string_printf("CUDA device supported only with compute capability 2.0 or up, found %d.%d.", major, minor));
-				return false;
+			return false;
 			}
 		}
 		return true;
@@ -286,8 +284,12 @@ public:
 			cuda_error_message("CUDA nvcc compiler version could not be parsed.");
 			return "";
 		}
 		if(cuda_version < 50) {
 			printf("Unsupported CUDA version %d.%d detected, you need CUDA 5.0.\n", cuda_version/10, cuda_version%10);
 			return "";
 		}
-		if(cuda_version != 50)
+		else if(cuda_version > 50)
 			printf("CUDA version %d.%d detected, build may succeed but only CUDA 5.0 is officially supported.\n", cuda_version/10, cuda_version%10);
 		/* compile */
@@ -296,36 +298,14 @@ public:
 		const int machine = system_cpu_bits();
 		string arch_flags;
-		/* build flags depending on CUDA version and arch */
+		/* CUDA 5.x build flags for different archs */
-		if(cuda_version < 50) {
+		if(major == 2) {
-			/* CUDA 4.x */
+			/* sm_2x */
-			if(major == 1) {
+			arch_flags = "--maxrregcount=32 --use_fast_math";
 				/* sm_1x */
 				arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0";
 			}
 			else if(major == 2) {
 				/* sm_2x */
 				arch_flags = "--maxrregcount=24";
 			}
 			else {
 				/* sm_3x */
 				arch_flags = "--maxrregcount=32";
 			}
 		}
-		else {
+		else if(major == 3) {
-			/* CUDA 5.x */
+			/* sm_3x */
-			if(major == 1) {
+			arch_flags = "--maxrregcount=32 --use_fast_math";
 				/* sm_1x */
 				arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0 --use_fast_math";
 			}
 			else if(major == 2) {
 				/* sm_2x */
 				arch_flags = "--maxrregcount=32 --use_fast_math";
 			}
 			else {
 				/* sm_3x */
 				arch_flags = "--maxrregcount=32 --use_fast_math";
 			}
 		}
 		double starttime = time_dt();
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -151,36 +151,16 @@ if(WITH_CYCLES_CUDA_BINARIES)
 		set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}")
-		# build flags depending on CUDA version and arch
+		# CUDA 5.x build flags for different archs
-		if(CUDA_VERSION LESS 50)
+		if(${arch} MATCHES "sm_2[0-9]")
-			# CUDA 4.x
+			# sm_2x
-			if(${arch} MATCHES "sm_1[0-9]")
+			set(cuda_arch_flags "--maxrregcount=32")
-				# sm_1x
+		elseif(${arch} MATCHES "sm_3[0-9]")
-				set(cuda_arch_flags "--maxrregcount=24 --opencc-options -OPT:Olimit=0")
+			# sm_3x
-			elseif(${arch} MATCHES "sm_2[0-9]")
+			set(cuda_arch_flags "--maxrregcount=32")
 				# sm_2x
 				set(cuda_arch_flags "--maxrregcount=24")
 			else()
 				# sm_3x
 				set(cuda_arch_flags "--maxrregcount=32")
 			endif()
 			set(cuda_math_flags "")
 		else()
 			# CUDA 5.x
 			if(${arch} MATCHES "sm_1[0-9]")
 				# sm_1x
 				set(cuda_arch_flags "--maxrregcount=24 --opencc-options -OPT:Olimit=0")
 			elseif(${arch} MATCHES "sm_2[0-9]")
 				# sm_2x
 				set(cuda_arch_flags "--maxrregcount=32")
 			else()
 				# sm_3x
 				set(cuda_arch_flags "--maxrregcount=32")
 			endif()
 			set(cuda_math_flags "--use_fast_math")
 		endif()
 		set(cuda_math_flags "--use_fast_math")
 		if(CUDA_VERSION LESS 50 AND ${arch} MATCHES "sm_35")
 			message(WARNING "Can't build kernel for CUDA sm_35 architecture, skipping")
--- a/intern/cycles/kernel/SConscript
+++ b/intern/cycles/kernel/SConscript
@@ -86,33 +86,13 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']:
    for arch in cuda_archs:
        cubin_file = os.path.join(build_dir, "kernel_%s.cubin" % arch)
-		# build flags depending on CUDA version and arch
+        # CUDA 5.x build flags for different archs
-        if cuda_version < 50:
+        if arch.startswith("sm_2"):
-            if arch == "sm_35":
+            # sm_2x
-                print("Can't build kernel for CUDA sm_35 architecture, skipping")
+            cuda_arch_flags = "--maxrregcount=32 --use_fast_math"
-                continue
+        elif arch.startswith("sm_3"):
-
+            # sm_3x
-            # CUDA 4.x
+            cuda_arch_flags = "--maxrregcount=32 --use_fast_math"
            if arch.startswith("sm_1"):
                # sm_1x
                cuda_arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0"
            elif arch.startswith("sm_2"):
                # sm_2x
                cuda_arch_flags = "--maxrregcount=24"
            else:
                # sm_3x
                cuda_arch_flags = "--maxrregcount=32"
        else:
            # CUDA 5.x
            if arch.startswith("sm_1"):
                # sm_1x
                cuda_arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0 --use_fast_math"
            elif arch.startswith("sm_2"):
                # sm_2x
                cuda_arch_flags = "--maxrregcount=32 --use_fast_math"
            else:
                # sm_3x
                cuda_arch_flags = "--maxrregcount=32 --use_fast_math"
        command = "\"%s\" -arch=%s %s %s \"%s\" -o \"%s\"" % (nvcc, arch, nvcc_flags, cuda_arch_flags, kernel_file, cubin_file)
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -36,15 +36,8 @@ CCL_NAMESPACE_BEGIN
 /* ShaderData setup from incoming ray */
 #ifdef __OBJECT_MOTION__
-#if defined(__KERNEL_CUDA_VERSION__) && __KERNEL_CUDA_VERSION__ <= 42
+__device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time)
 __device_noinline
 #else
 __device
 #endif
 void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time)
 {
 	/* note that this is a separate non-inlined function to work around crash
 	 * on CUDA sm 2.0, otherwise kernel execution crashes (compiler bug?) */
 	if(sd->flag & SD_OBJECT_MOTION) {
 		sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, time);
 		sd->ob_itfm= transform_quick_inverse(sd->ob_tfm);
@@ -56,12 +49,7 @@ void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float tim
 }
 #endif
-#if defined(__KERNEL_CUDA_VERSION__) && __KERNEL_CUDA_VERSION__ <= 42
+__device void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd,
 __device_noinline
 #else
 __device
 #endif
 void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd,
 	const Intersection *isect, const Ray *ray, int bounce)
 {
 #ifdef __INSTANCING__
@@ -249,12 +237,7 @@ __device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderData
 /* ShaderData setup from position sampled on mesh */
-#if defined(__KERNEL_CUDA_VERSION__) && __KERNEL_CUDA_VERSION__ <= 42
+__device void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd,
 __device_noinline
 #else
 __device
 #endif
 void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd,
 	const float3 P, const float3 Ng, const float3 I,
 	int shader, int object, int prim, float u, float v, float t, float time, int bounce, int segment)
 {