Cycles: Add support for adaptive kernel compilation to OptiX device

This modifies the common CUDA implementation for adaptive kernel compilation slightly to support both CUBIN and PTX output (the latter which is then used in the OptiX device). It also fixes adaptive kernel compilation on Windows.

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D6851
This commit is contained in:
Patrick Mours
2020-02-17 13:35:31 +01:00
parent 12b6ddaf95
commit 2278aa0da9
6 changed files with 122 additions and 134 deletions

View File

@@ -683,23 +683,23 @@ static int cuewNvrtcInit(void) {
int cuewInit(cuuint32_t flags) { int cuewInit(cuuint32_t flags) {
int result = CUEW_SUCCESS; int result = CUEW_SUCCESS;
if (flags & CUEW_INIT_CUDA) { if (flags & CUEW_INIT_CUDA) {
result = cuewCudaInit(); result = cuewCudaInit();
if (result != CUEW_SUCCESS) { if (result != CUEW_SUCCESS) {
return result; return result;
} }
} }
if (flags & CUEW_INIT_NVRTC) { if (flags & CUEW_INIT_NVRTC) {
result = cuewNvrtcInit(); result = cuewNvrtcInit();
if (result != CUEW_SUCCESS) { if (result != CUEW_SUCCESS) {
return result; return result;
} }
} }
return result; return result;
} }
@@ -798,7 +798,10 @@ static int path_exists(const char *path) {
const char *cuewCompilerPath(void) { const char *cuewCompilerPath(void) {
#ifdef _WIN32 #ifdef _WIN32
const char *defaultpaths[] = {"C:/CUDA/bin", NULL}; const char *defaultpaths[] = {
"C:/CUDA/bin",
"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin",
NULL};
const char *executable = "nvcc.exe"; const char *executable = "nvcc.exe";
#else #else
const char *defaultpaths[] = { const char *defaultpaths[] = {
@@ -832,9 +835,12 @@ const char *cuewCompilerPath(void) {
} }
} }
#ifndef _WIN32
{ {
#ifdef _WIN32
FILE *handle = popen("where nvcc", "r");
#else
FILE *handle = popen("which nvcc", "r"); FILE *handle = popen("which nvcc", "r");
#endif
if (handle) { if (handle) {
char buffer[4096] = {0}; char buffer[4096] = {0};
int len = fread(buffer, 1, sizeof(buffer) - 1, handle); int len = fread(buffer, 1, sizeof(buffer) - 1, handle);
@@ -845,7 +851,6 @@ const char *cuewCompilerPath(void) {
} }
} }
} }
#endif
return NULL; return NULL;
} }
@@ -859,23 +864,6 @@ int cuewNvrtcVersion(void) {
return 0; return 0;
} }
static size_t safe_strnlen(const char *s, size_t maxlen) {
size_t length;
for (length = 0; length < maxlen; s++, length++) {
if (*s == '\0') {
break;
}
}
return length;
}
static char *safe_strncpy(char *dest, const char *src, size_t n) {
const size_t src_len = safe_strnlen(src, n - 1);
memcpy(dest, src, src_len);
dest[src_len] = '\0';
return dest;
}
int cuewCompilerVersion(void) { int cuewCompilerVersion(void) {
const char *path = cuewCompilerPath(); const char *path = cuewCompilerPath();
const char *marker = "Cuda compilation tools, release "; const char *marker = "Cuda compilation tools, release ";
@@ -891,8 +879,9 @@ int cuewCompilerVersion(void) {
} }
/* get --version output */ /* get --version output */
safe_strncpy(command, path, sizeof(command)); strncat(command, "\"", 1);
strncat(command, " --version", sizeof(command) - strlen(path)); strncat(command, path, sizeof(command) - 1);
strncat(command, "\" --version", sizeof(command) - strlen(path) - 1);
pipe = popen(command, "r"); pipe = popen(command, "r");
if (!pipe) { if (!pipe) {
fprintf(stderr, "CUDA: failed to run compiler to retrieve version"); fprintf(stderr, "CUDA: failed to run compiler to retrieve version");
@@ -922,4 +911,3 @@ int cuewCompilerVersion(void) {
return 10 * major + minor; return 10 * major + minor;
} }

View File

@@ -228,11 +228,8 @@ if(WITH_CYCLES_DEVICE_OPTIX)
SYSTEM SYSTEM
${OPTIX_INCLUDE_DIR} ${OPTIX_INCLUDE_DIR}
) )
# Need pre-compiled CUDA binaries in the OptiX device
set(WITH_CYCLES_CUDA_BINARIES ON)
else() else()
message(STATUS "Optix not found, disabling it from Cycles") message(STATUS "OptiX not found, disabling it from Cycles")
set(WITH_CYCLES_DEVICE_OPTIX OFF) set(WITH_CYCLES_DEVICE_OPTIX OFF)
endif() endif()
endif() endif()

View File

@@ -109,15 +109,13 @@ class CUDADevice : public Device {
bool use_split_kernel(); bool use_split_kernel();
string compile_kernel_get_common_cflags(const DeviceRequestedFeatures &requested_features, virtual string compile_kernel_get_common_cflags(
bool filter = false, const DeviceRequestedFeatures &requested_features, bool filter = false, bool split = false);
bool split = false);
bool compile_check_compiler();
string compile_kernel(const DeviceRequestedFeatures &requested_features, string compile_kernel(const DeviceRequestedFeatures &requested_features,
bool filter = false, const char *name,
bool split = false); const char *base = "cuda",
bool force_ptx = false);
virtual bool load_kernels(const DeviceRequestedFeatures &requested_features); virtual bool load_kernels(const DeviceRequestedFeatures &requested_features);

View File

@@ -329,70 +329,27 @@ string CUDADevice::compile_kernel_get_common_cflags(
return cflags; return cflags;
} }
bool CUDADevice::compile_check_compiler()
{
const char *nvcc = cuewCompilerPath();
if (nvcc == NULL) {
cuda_error_message(
"CUDA nvcc compiler not found. "
"Install CUDA toolkit in default location.");
return false;
}
const int cuda_version = cuewCompilerVersion();
VLOG(1) << "Found nvcc " << nvcc << ", CUDA version " << cuda_version << ".";
const int major = cuda_version / 10, minor = cuda_version % 10;
if (cuda_version == 0) {
cuda_error_message("CUDA nvcc compiler version could not be parsed.");
return false;
}
if (cuda_version < 80) {
printf(
"Unsupported CUDA version %d.%d detected, "
"you need CUDA 8.0 or newer.\n",
major,
minor);
return false;
}
else if (cuda_version != 101) {
printf(
"CUDA version %d.%d detected, build may succeed but only "
"CUDA 10.1 is officially supported.\n",
major,
minor);
}
return true;
}
string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_features, string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_features,
bool filter, const char *name,
bool split) const char *base,
bool force_ptx)
{ {
const char *name, *source; /* Compute kernel name. */
if (filter) {
name = "filter";
source = "filter.cu";
}
else if (split) {
name = "kernel_split";
source = "kernel_split.cu";
}
else {
name = "kernel";
source = "kernel.cu";
}
/* Compute cubin name. */
int major, minor; int major, minor;
cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId); cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId); cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
/* Attempt to use kernel provided with Blender. */ /* Attempt to use kernel provided with Blender. */
if (!use_adaptive_compilation()) { if (!use_adaptive_compilation()) {
const string cubin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor)); if (!force_ptx) {
VLOG(1) << "Testing for pre-compiled kernel " << cubin << "."; const string cubin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor));
if (path_exists(cubin)) { VLOG(1) << "Testing for pre-compiled kernel " << cubin << ".";
VLOG(1) << "Using precompiled kernel."; if (path_exists(cubin)) {
return cubin; VLOG(1) << "Using precompiled kernel.";
return cubin;
}
} }
const string ptx = path_get(string_printf("lib/%s_compute_%d%d.ptx", name, major, minor)); const string ptx = path_get(string_printf("lib/%s_compute_%d%d.ptx", name, major, minor));
VLOG(1) << "Testing for pre-compiled kernel " << ptx << "."; VLOG(1) << "Testing for pre-compiled kernel " << ptx << ".";
if (path_exists(ptx)) { if (path_exists(ptx)) {
@@ -401,19 +358,21 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_featu
} }
} }
const string common_cflags = compile_kernel_get_common_cflags(requested_features, filter, split);
/* Try to use locally compiled kernel. */ /* Try to use locally compiled kernel. */
const string source_path = path_get("source"); string source_path = path_get("source");
const string kernel_md5 = path_files_md5_hash(source_path); const string source_md5 = path_files_md5_hash(source_path);
/* We include cflags into md5 so changing cuda toolkit or changing other /* We include cflags into md5 so changing cuda toolkit or changing other
* compiler command line arguments makes sure cubin gets re-built. * compiler command line arguments makes sure cubin gets re-built.
*/ */
const string cubin_md5 = util_md5_string(kernel_md5 + common_cflags); string common_cflags = compile_kernel_get_common_cflags(
requested_features, strstr(name, "filter") != NULL, strstr(name, "split") != NULL);
const string kernel_md5 = util_md5_string(source_md5 + common_cflags);
const char *const kernel_ext = force_ptx ? "ptx" : "cubin";
const char *const kernel_arch = force_ptx ? "compute" : "sm";
const string cubin_file = string_printf( const string cubin_file = string_printf(
"cycles_%s_sm%d%d_%s.cubin", name, major, minor, cubin_md5.c_str()); "cycles_%s_%s_%d%d_%s.%s", name, kernel_arch, major, minor, kernel_md5.c_str(), kernel_ext);
const string cubin = path_cache_get(path_join("kernels", cubin_file)); const string cubin = path_cache_get(path_join("kernels", cubin_file));
VLOG(1) << "Testing for locally compiled kernel " << cubin << "."; VLOG(1) << "Testing for locally compiled kernel " << cubin << ".";
if (path_exists(cubin)) { if (path_exists(cubin)) {
@@ -422,7 +381,7 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_featu
} }
# ifdef _WIN32 # ifdef _WIN32
if (have_precompiled_kernels()) { if (!use_adaptive_compilation() && have_precompiled_kernels()) {
if (major < 3) { if (major < 3) {
cuda_error_message( cuda_error_message(
string_printf("CUDA device requires compute capability 3.0 or up, " string_printf("CUDA device requires compute capability 3.0 or up, "
@@ -437,42 +396,69 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_featu
major, major,
minor)); minor));
} }
return ""; return string();
} }
# endif # endif
/* Compile. */ /* Compile. */
if (!compile_check_compiler()) { const char *const nvcc = cuewCompilerPath();
return ""; if (nvcc == NULL) {
cuda_error_message(
"CUDA nvcc compiler not found. "
"Install CUDA toolkit in default location.");
return string();
} }
const char *nvcc = cuewCompilerPath();
const string kernel = path_join(path_join(source_path, "kernel"), const int nvcc_cuda_version = cuewCompilerVersion();
path_join("kernels", path_join("cuda", source))); VLOG(1) << "Found nvcc " << nvcc << ", CUDA version " << nvcc_cuda_version << ".";
if (nvcc_cuda_version < 80) {
printf(
"Unsupported CUDA version %d.%d detected, "
"you need CUDA 8.0 or newer.\n",
nvcc_cuda_version / 10,
nvcc_cuda_version % 10);
return string();
}
else if (nvcc_cuda_version != 101) {
printf(
"CUDA version %d.%d detected, build may succeed but only "
"CUDA 10.1 is officially supported.\n",
nvcc_cuda_version / 10,
nvcc_cuda_version % 10);
}
double starttime = time_dt(); double starttime = time_dt();
printf("Compiling CUDA kernel ...\n");
path_create_directories(cubin); path_create_directories(cubin);
source_path = path_join(path_join(source_path, "kernel"),
path_join("kernels", path_join(base, string_printf("%s.cu", name))));
string command = string_printf( string command = string_printf(
"\"%s\" " "\"%s\" "
"-arch=sm_%d%d " "-arch=%s_%d%d "
"--cubin \"%s\" " "--%s \"%s\" "
"-o \"%s\" " "-o \"%s\" "
"%s ", "%s",
nvcc, nvcc,
kernel_arch,
major, major,
minor, minor,
kernel.c_str(), kernel_ext,
source_path.c_str(),
cubin.c_str(), cubin.c_str(),
common_cflags.c_str()); common_cflags.c_str());
printf("%s\n", command.c_str()); printf("Compiling CUDA kernel ...\n%s\n", command.c_str());
if (system(command.c_str()) == -1) { #ifdef _WIN32
command = "call " + command;
#endif
if (system(command.c_str()) != 0) {
cuda_error_message( cuda_error_message(
"Failed to execute compilation command, " "Failed to execute compilation command, "
"see console for details."); "see console for details.");
return ""; return string();
} }
/* Verify if compilation succeeded */ /* Verify if compilation succeeded */
@@ -480,7 +466,7 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_featu
cuda_error_message( cuda_error_message(
"CUDA kernel compilation failed, " "CUDA kernel compilation failed, "
"see console for details."); "see console for details.");
return ""; return string();
} }
printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime); printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime);
@@ -509,12 +495,14 @@ bool CUDADevice::load_kernels(const DeviceRequestedFeatures &requested_features)
return false; return false;
/* get kernel */ /* get kernel */
string cubin = compile_kernel(requested_features, false, use_split_kernel()); const char *kernel_name = use_split_kernel() ? "kernel_split" : "kernel";
if (cubin == "") string cubin = compile_kernel(requested_features, kernel_name);
if (cubin.empty())
return false; return false;
string filter_cubin = compile_kernel(requested_features, true, false); const char *filter_name = "filter";
if (filter_cubin == "") string filter_cubin = compile_kernel(requested_features, filter_name);
if (filter_cubin.empty())
return false; return false;
/* open module */ /* open module */

View File

@@ -43,7 +43,6 @@ bool device_cuda_init()
VLOG(1) << "Found precompiled kernels"; VLOG(1) << "Found precompiled kernels";
result = true; result = true;
} }
# ifndef _WIN32
else if (cuewCompilerPath() != NULL) { else if (cuewCompilerPath() != NULL) {
VLOG(1) << "Found CUDA compiler " << cuewCompilerPath(); VLOG(1) << "Found CUDA compiler " << cuewCompilerPath();
result = true; result = true;
@@ -52,7 +51,6 @@ bool device_cuda_init()
VLOG(1) << "Neither precompiled kernels nor CUDA compiler was found," VLOG(1) << "Neither precompiled kernels nor CUDA compiler was found,"
<< " unable to use CUDA"; << " unable to use CUDA";
} }
# endif
} }
else { else {
VLOG(1) << "CUEW initialization failed: " VLOG(1) << "CUEW initialization failed: "

View File

@@ -293,6 +293,23 @@ class OptiXDevice : public CUDADevice {
return BVH_LAYOUT_OPTIX; return BVH_LAYOUT_OPTIX;
} }
string compile_kernel_get_common_cflags(const DeviceRequestedFeatures &requested_features,
bool filter,
bool /*split*/) override
{
// Split kernel is not supported in OptiX
string common_cflags = CUDADevice::compile_kernel_get_common_cflags(
requested_features, filter, false);
// Add OptiX SDK include directory to include paths
const char *optix_sdk_path = getenv("OPTIX_ROOT_DIR");
if (optix_sdk_path) {
common_cflags += string_printf(" -I\"%s/include\"", optix_sdk_path);
}
return common_cflags;
}
bool load_kernels(const DeviceRequestedFeatures &requested_features) override bool load_kernels(const DeviceRequestedFeatures &requested_features) override
{ {
if (have_error()) { if (have_error()) {
@@ -367,9 +384,11 @@ class OptiXDevice : public CUDADevice {
} }
{ // Load and compile PTX module with OptiX kernels { // Load and compile PTX module with OptiX kernels
string ptx_data; string ptx_data, ptx_filename = path_get("lib/kernel_optix.ptx");
const string ptx_filename = "lib/kernel_optix.ptx"; if (use_adaptive_compilation()) {
if (!path_read_text(path_get(ptx_filename), ptx_data)) { ptx_filename = compile_kernel(requested_features, "kernel_optix", "optix", true);
}
if (ptx_filename.empty() || !path_read_text(ptx_filename, ptx_data)) {
set_error("Failed loading OptiX kernel " + ptx_filename + "."); set_error("Failed loading OptiX kernel " + ptx_filename + ".");
return false; return false;
} }