Cycles: Make CUDA adaptive feature compile a Debug flag.
If the CUDA Toolkit is installed and the user is on Linux, adaptive, feature based CUDA runtime compile is now possible to enable via: * Environment flag CYCLES_CUDA_ADAPTIVE_COMPILE or * Debug menu (Debug value 256) in the Cycles UI.
This commit is contained in:
@@ -594,6 +594,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
||||
cls.debug_use_cpu_sse2 = BoolProperty(name="SSE2", default=True)
|
||||
cls.debug_use_qbvh = BoolProperty(name="QBVH", default=True)
|
||||
|
||||
cls.debug_use_cuda_adaptive_compile = BoolProperty(name="Adaptive Compile", default=False)
|
||||
|
||||
cls.debug_opencl_kernel_type = EnumProperty(
|
||||
name="OpenCL Kernel Type",
|
||||
default='DEFAULT',
|
||||
|
@@ -1553,6 +1553,10 @@ class CyclesRender_PT_debug(CyclesButtonsPanel, Panel):
|
||||
row.prop(cscene, "debug_use_cpu_avx2", toggle=True)
|
||||
col.prop(cscene, "debug_use_qbvh")
|
||||
|
||||
col = layout.column()
|
||||
col.label('CUDA Flags:')
|
||||
col.prop(cscene, "debug_use_cuda_adaptive_compile")
|
||||
|
||||
col = layout.column()
|
||||
col.label('OpenCL Flags:')
|
||||
col.prop(cscene, "debug_opencl_kernel_type", text="Kernel")
|
||||
|
@@ -70,6 +70,8 @@ bool debug_flags_sync_from_scene(BL::Scene b_scene)
|
||||
flags.cpu.sse3 = get_boolean(cscene, "debug_use_cpu_sse3");
|
||||
flags.cpu.sse2 = get_boolean(cscene, "debug_use_cpu_sse2");
|
||||
flags.cpu.qbvh = get_boolean(cscene, "debug_use_qbvh");
|
||||
/* Synchronize CUDA flags. */
|
||||
flags.cuda.adaptive_compile = get_boolean(cscene, "debug_use_cuda_adaptive_compile");
|
||||
/* Synchronize OpenCL kernel type. */
|
||||
switch(get_enum(cscene, "debug_opencl_kernel_type")) {
|
||||
case 0:
|
||||
|
@@ -41,11 +41,6 @@
|
||||
#include "util_types.h"
|
||||
#include "util_time.h"
|
||||
|
||||
/* use feature-adaptive kernel compilation.
|
||||
* Requires CUDA toolkit to be installed and currently only works on Linux.
|
||||
*/
|
||||
/* #define KERNEL_USE_ADAPTIVE */
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#ifndef WITH_CUDA_DYNLOAD
|
||||
@@ -245,6 +240,11 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
bool use_adaptive_compilation()
|
||||
{
|
||||
return DebugFlags().cuda.adaptive_compile;
|
||||
}
|
||||
|
||||
string compile_kernel(const DeviceRequestedFeatures& requested_features)
|
||||
{
|
||||
/* compute cubin name */
|
||||
@@ -252,6 +252,9 @@ public:
|
||||
cuDeviceComputeCapability(&major, &minor, cuDevId);
|
||||
string cubin;
|
||||
|
||||
/* adaptive compile */
|
||||
bool use_adaptive_compile = use_adaptive_compilation();
|
||||
|
||||
/* attempt to use kernel provided with blender */
|
||||
cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin", major, minor));
|
||||
VLOG(1) << "Testing for pre-compiled kernel " << cubin;
|
||||
@@ -264,17 +267,19 @@ public:
|
||||
string kernel_path = path_get("kernel");
|
||||
string md5 = path_files_md5_hash(kernel_path);
|
||||
|
||||
#ifdef KERNEL_USE_ADAPTIVE
|
||||
string feature_build_options = requested_features.get_build_options();
|
||||
string feature_build_options;
|
||||
if(use_adaptive_compile) {
|
||||
feature_build_options = requested_features.get_build_options();
|
||||
string device_md5 = util_md5_string(feature_build_options);
|
||||
cubin = string_printf("cycles_kernel_%s_sm%d%d_%s.cubin",
|
||||
device_md5.c_str(),
|
||||
major, minor,
|
||||
md5.c_str());
|
||||
#else
|
||||
}
|
||||
else {
|
||||
(void)requested_features;
|
||||
cubin = string_printf("cycles_kernel_sm%d%d_%s.cubin", major, minor, md5.c_str());
|
||||
#endif
|
||||
}
|
||||
|
||||
cubin = path_user_get(path_join("cache", cubin));
|
||||
VLOG(1) << "Testing for locally compiled kernel " << cubin;
|
||||
@@ -331,9 +336,8 @@ public:
|
||||
"-DNVCC -D__KERNEL_CUDA_VERSION__=%d",
|
||||
nvcc, major, minor, machine, kernel.c_str(), cubin.c_str(), include.c_str(), cuda_version);
|
||||
|
||||
#ifdef KERNEL_USE_ADAPTIVE
|
||||
if(use_adaptive_compile)
|
||||
command += " " + feature_build_options;
|
||||
#endif
|
||||
|
||||
const char* extra_cflags = getenv("CYCLES_CUDA_EXTRA_CFLAGS");
|
||||
if(extra_cflags) {
|
||||
|
@@ -57,6 +57,18 @@ void DebugFlags::CPU::reset()
|
||||
qbvh = true;
|
||||
}
|
||||
|
||||
DebugFlags::CUDA::CUDA()
|
||||
: adaptive_compile(false)
|
||||
{
|
||||
reset();
|
||||
}
|
||||
|
||||
void DebugFlags::CUDA::reset()
|
||||
{
|
||||
if(getenv("CYCLES_CUDA_ADAPTIVE_COMPILE") != NULL)
|
||||
adaptive_compile = true;
|
||||
}
|
||||
|
||||
DebugFlags::OpenCL::OpenCL()
|
||||
: device_type(DebugFlags::OpenCL::DEVICE_ALL),
|
||||
kernel_type(DebugFlags::OpenCL::KERNEL_DEFAULT),
|
||||
@@ -123,6 +135,9 @@ std::ostream& operator <<(std::ostream &os,
|
||||
<< " SSE3 : " << string_from_bool(debug_flags.cpu.sse3) << "\n"
|
||||
<< " SSE2 : " << string_from_bool(debug_flags.cpu.sse2) << "\n";
|
||||
|
||||
os << "CUDA flags:\n"
|
||||
<< " Adaptive Compile: " << string_from_bool(debug_flags.cuda.adaptive_compile) << "\n";
|
||||
|
||||
const char *opencl_device_type,
|
||||
*opencl_kernel_type;
|
||||
switch(debug_flags.opencl.device_type) {
|
||||
|
@@ -46,6 +46,18 @@ public:
|
||||
bool qbvh;
|
||||
};
|
||||
|
||||
/* Descriptor of CUDA feature-set to be used. */
|
||||
struct CUDA {
|
||||
CUDA();
|
||||
|
||||
/* Reset flags to their defaults. */
|
||||
void reset();
|
||||
|
||||
/* Whether adaptive feature based runtime compile is enabled or not.
|
||||
* Requires the CUDA Toolkit and only works on Linux atm. */
|
||||
bool adaptive_compile;
|
||||
};
|
||||
|
||||
/* Descriptor of OpenCL feature-set to be used. */
|
||||
struct OpenCL {
|
||||
OpenCL();
|
||||
@@ -107,6 +119,9 @@ public:
|
||||
/* Requested CPU flags. */
|
||||
CPU cpu;
|
||||
|
||||
/* Requested CUDA flags. */
|
||||
CUDA cuda;
|
||||
|
||||
/* Requested OpenCL flags. */
|
||||
OpenCL opencl;
|
||||
|
||||
|
Reference in New Issue
Block a user