Cycles: Make CUDA adaptive feature compile a Debug flag.

If the CUDA Toolkit is installed and the user is on Linux, adaptive, feature based CUDA runtime compile is now possible to enable via: * Environment flag CYCLES_CUDA_ADAPTIVE_COMPILE or * Debug menu (Debug value 256) in the Cycles UI.
2016-05-06 22:34:15 +02:00
parent bd335f13fe
commit 734d1aec3f
6 changed files with 61 additions and 19 deletions
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -594,6 +594,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
        cls.debug_use_cpu_sse2 = BoolProperty(name="SSE2", default=True)
        cls.debug_use_qbvh = BoolProperty(name="QBVH", default=True)

+        cls.debug_use_cuda_adaptive_compile = BoolProperty(name="Adaptive Compile", default=False)
+
        cls.debug_opencl_kernel_type = EnumProperty(
            name="OpenCL Kernel Type",
            default='DEFAULT',
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -1553,6 +1553,10 @@ class CyclesRender_PT_debug(CyclesButtonsPanel, Panel):
        row.prop(cscene, "debug_use_cpu_avx2", toggle=True)
        col.prop(cscene, "debug_use_qbvh")

+        col = layout.column()
+        col.label('CUDA Flags:')
+        col.prop(cscene, "debug_use_cuda_adaptive_compile")
+
        col = layout.column()
        col.label('OpenCL Flags:')
        col.prop(cscene, "debug_opencl_kernel_type", text="Kernel")
--- a/intern/cycles/blender/blender_python.cpp
+++ b/intern/cycles/blender/blender_python.cpp
@@ -70,6 +70,8 @@ bool debug_flags_sync_from_scene(BL::Scene b_scene)
 	flags.cpu.sse3 = get_boolean(cscene, "debug_use_cpu_sse3");
 	flags.cpu.sse2 = get_boolean(cscene, "debug_use_cpu_sse2");
 	flags.cpu.qbvh = get_boolean(cscene, "debug_use_qbvh");
+	/* Synchronize CUDA flags. */
+	flags.cuda.adaptive_compile = get_boolean(cscene, "debug_use_cuda_adaptive_compile");
 	/* Synchronize OpenCL kernel type. */
 	switch(get_enum(cscene, "debug_opencl_kernel_type")) {
 		case 0:
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -41,11 +41,6 @@
 #include "util_types.h"
 #include "util_time.h"

-/* use feature-adaptive kernel compilation.
- * Requires CUDA toolkit to be installed and currently only works on Linux.
- */
-/* #define KERNEL_USE_ADAPTIVE */
-
 CCL_NAMESPACE_BEGIN

 #ifndef WITH_CUDA_DYNLOAD
@@ -245,6 +240,11 @@ public:
 		return true;
 	}

+	bool use_adaptive_compilation()
+	{
+		return DebugFlags().cuda.adaptive_compile;
+	}
+
 	string compile_kernel(const DeviceRequestedFeatures& requested_features)
 	{
 		/* compute cubin name */
@@ -252,6 +252,9 @@ public:
 		cuDeviceComputeCapability(&major, &minor, cuDevId);
 		string cubin;

+		/* adaptive compile */
+		bool use_adaptive_compile = use_adaptive_compilation();
+
 		/* attempt to use kernel provided with blender */
 		cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin", major, minor));
 		VLOG(1) << "Testing for pre-compiled kernel " << cubin;
@@ -264,17 +267,19 @@ public:
 		string kernel_path = path_get("kernel");
 		string md5 = path_files_md5_hash(kernel_path);

-#ifdef KERNEL_USE_ADAPTIVE
-		string feature_build_options = requested_features.get_build_options();
+		string feature_build_options;
+		if(use_adaptive_compile) {
+			feature_build_options = requested_features.get_build_options();
 			string device_md5 = util_md5_string(feature_build_options);
 			cubin = string_printf("cycles_kernel_%s_sm%d%d_%s.cubin",
 		                          device_md5.c_str(),
 		                          major, minor,
 		                          md5.c_str());
-#else
+		}
+		else {
 			(void)requested_features;
 			cubin = string_printf("cycles_kernel_sm%d%d_%s.cubin", major, minor, md5.c_str());
-#endif
+		}

 		cubin = path_user_get(path_join("cache", cubin));
 		VLOG(1) << "Testing for locally compiled kernel " << cubin;
@@ -331,9 +336,8 @@ public:
 			"-DNVCC -D__KERNEL_CUDA_VERSION__=%d",
 			nvcc, major, minor, machine, kernel.c_str(), cubin.c_str(), include.c_str(), cuda_version);

-#ifdef KERNEL_USE_ADAPTIVE
+		if(use_adaptive_compile)
 			command += " " + feature_build_options;
-#endif

 		const char* extra_cflags = getenv("CYCLES_CUDA_EXTRA_CFLAGS");
 		if(extra_cflags) {
--- a/intern/cycles/util/util_debug.cpp
+++ b/intern/cycles/util/util_debug.cpp
@@ -57,6 +57,18 @@ void DebugFlags::CPU::reset()
 	qbvh = true;
 }

+DebugFlags::CUDA::CUDA()
+  : adaptive_compile(false)
+{
+	reset();
+}
+
+void DebugFlags::CUDA::reset()
+{
+	if(getenv("CYCLES_CUDA_ADAPTIVE_COMPILE") != NULL)
+		adaptive_compile = true;
+}
+
 DebugFlags::OpenCL::OpenCL()
  : device_type(DebugFlags::OpenCL::DEVICE_ALL),
    kernel_type(DebugFlags::OpenCL::KERNEL_DEFAULT),
@@ -123,6 +135,9 @@ std::ostream& operator <<(std::ostream &os,
 	   << "  SSE3   : " << string_from_bool(debug_flags.cpu.sse3)  << "\n"
 	   << "  SSE2   : " << string_from_bool(debug_flags.cpu.sse2)  << "\n";

+	os << "CUDA flags:\n"
+	   << " Adaptive Compile: " << string_from_bool(debug_flags.cuda.adaptive_compile) << "\n";
+
 	const char *opencl_device_type,
 	           *opencl_kernel_type;
 	switch(debug_flags.opencl.device_type) {
--- a/intern/cycles/util/util_debug.h
+++ b/intern/cycles/util/util_debug.h
@@ -46,6 +46,18 @@ public:
 		bool qbvh;
 	};

+	/* Descriptor of CUDA feature-set to be used. */
+	struct CUDA {
+		CUDA();
+
+		/* Reset flags to their defaults. */
+		void reset();
+
+		/* Whether adaptive feature based runtime compile is enabled or not.
+		 * Requires the CUDA Toolkit and only works on Linux atm. */
+		bool adaptive_compile;
+	};
+
 	/* Descriptor of OpenCL feature-set to be used. */
 	struct OpenCL {
 		OpenCL();
@@ -107,6 +119,9 @@ public:
 	/* Requested CPU flags. */
 	CPU cpu;

+	/* Requested CUDA flags. */
+	CUDA cuda;
+
 	/* Requested OpenCL flags. */
 	OpenCL opencl;