Cycles: various fixes for HIP and compilation of HIP binaries

* Additional structs added to the hipew loader for device props
* Adds hipRTC functions to the loader for future usage
* Enables CPU+GPU usage for HIP
* Cleanup to the adaptive kernel compilation process
* Fix for kernel compilation failures with HIP with latest master

Ref T92393, D12958
This commit is contained in:
Sayak Biswas
2021-10-21 20:57:17 +02:00
committed by Brecht Van Lommel
parent d1fcf93f03
commit d092933abb
6 changed files with 208 additions and 43 deletions

View File

@@ -1329,7 +1329,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
elif entry.type == 'CPU':
cpu_devices.append(entry)
# Extend all GPU devices with CPU.
if compute_device_type != 'CPU' and compute_device_type != 'HIP':
if compute_device_type != 'CPU':
devices.extend(cpu_devices)
return devices

View File

@@ -208,7 +208,7 @@ bool HIPDevice::use_adaptive_compilation()
return DebugFlags().hip.adaptive_compile;
}
/* Common NVCC flags which stays the same regardless of shading model,
/* Common HIPCC flags which stays the same regardless of shading model,
* kernel sources md5 and only depends on compiler or compilation settings.
*/
string HIPDevice::compile_kernel_get_common_cflags(const uint kernel_features)
@@ -239,11 +239,13 @@ string HIPDevice::compile_kernel(const uint kernel_features,
int major, minor;
hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
hipDeviceProp_t props;
hipGetDeviceProperties(&props, hipDevId);
/* Attempt to use kernel provided with Blender. */
if (!use_adaptive_compilation()) {
if (!force_ptx) {
const string fatbin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor));
const string fatbin = path_get(string_printf("lib/%s_%s.fatbin", name, props.gcnArchName));
VLOG(1) << "Testing for pre-compiled kernel " << fatbin << ".";
if (path_exists(fatbin)) {
VLOG(1) << "Using precompiled kernel.";
@@ -283,17 +285,21 @@ string HIPDevice::compile_kernel(const uint kernel_features,
const string kernel_md5 = util_md5_string(source_md5 + common_cflags);
const char *const kernel_ext = "genco";
std::string options;
# ifdef _WIN32
const char *const options =
"save-temps -Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp";
options.append("Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp -ffast-math");
# else
const char *const options =
"save-temps -Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp -O3 -ggdb";
options.append("Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp -O3 -ffast-math");
# endif
# ifdef _DEBUG
options.append(" -save-temps");
# endif
options.append(" --amdgpu-target=").append(props.gcnArchName);
const string include_path = source_path;
const char *const kernel_arch = force_ptx ? "compute" : "sm";
const char *const kernel_arch = props.gcnArchName;
const string fatbin_file = string_printf(
"cycles_%s_%s_%d%d_%s", name, kernel_arch, major, minor, kernel_md5.c_str());
"cycles_%s_%s_%s", name, kernel_arch, kernel_md5.c_str());
const string fatbin = path_cache_get(path_join("kernels", fatbin_file));
VLOG(1) << "Testing for locally compiled kernel " << fatbin << ".";
if (path_exists(fatbin)) {
@@ -350,7 +356,7 @@ string HIPDevice::compile_kernel(const uint kernel_features,
string command = string_printf("%s -%s -I %s --%s %s -o \"%s\"",
hipcc,
options,
options.c_str(),
include_path.c_str(),
kernel_ext,
source_path.c_str(),

View File

@@ -487,9 +487,6 @@ endif()
# HIP module
if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
# 64 bit only
set(HIP_BITS 64)
# build for each arch
set(hip_sources device/hip/kernel.cpp
${SRC_HEADERS}
@@ -504,32 +501,41 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
set(hip_fatbins)
macro(CYCLES_HIP_KERNEL_ADD arch prev_arch name flags sources experimental)
if(${arch} MATCHES "compute_.*")
set(format "ptx")
else()
set(format "fatbin")
endif()
set(format "fatbin")
set(hip_file ${name}_${arch}.${format})
set(kernel_sources ${sources})
if(NOT ${prev_arch} STREQUAL "none")
if(${prev_arch} MATCHES "compute_.*")
set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.ptx)
else()
set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.fatbin)
endif()
set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.fatbin)
endif()
set(hip_kernel_src "/device/hip/${name}.cpp")
set(hip_flags ${flags}
if(WIN32)
set(hip_command ${CMAKE_COMMAND})
set(hip_flags
-E env "HIP_PATH=${HIP_ROOT_DIR}" "PATH=${HIP_PERL_PATH}"
${HIP_HIPCC_EXECUTABLE}.bat)
else()
set(hip_command ${HIP_HIPCC_EXECUTABLE})
set(hip_flags)
endif()
set(hip_flags
${hip_flags}
--amdgpu-target=${arch}
${HIP_HIPCC_FLAGS}
--genco
${CMAKE_CURRENT_SOURCE_DIR}${hip_kernel_src}
${flags}
-D CCL_NAMESPACE_BEGIN=
-D CCL_NAMESPACE_END=
-D HIPCC
-m ${HIP_BITS}
-I ${CMAKE_CURRENT_SOURCE_DIR}/..
-I ${CMAKE_CURRENT_SOURCE_DIR}/device/hip
--use_fast_math
-Wno-parentheses-equality
-Wno-unused-value
--hipcc-func-supp
-ffast-math
-o ${CMAKE_CURRENT_BINARY_DIR}/${hip_file})
if(${experimental})
@@ -541,20 +547,9 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
set(hip_flags ${hip_flags} -D __KERNEL_DEBUG__)
endif()
if(WITH_NANOVDB)
set(hip_flags ${hip_flags}
-D WITH_NANOVDB
-I "${NANOVDB_INCLUDE_DIR}")
endif()
add_custom_command(
OUTPUT ${hip_file}
COMMAND ${HIP_HIPCC_EXECUTABLE}
-arch=${arch}
${HIP_HIPCC_FLAGS}
--${format}
${CMAKE_CURRENT_SOURCE_DIR}${hip_kernel_src}
${hip_flags}
add_custom_target(
${hip_file}
COMMAND ${hip_command} ${hip_flags}
DEPENDS ${kernel_sources})
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hip_file}" ${CYCLES_INSTALL_PATH}/lib)
list(APPEND hip_fatbins ${hip_file})

View File

@@ -27,10 +27,10 @@ CCL_NAMESPACE_BEGIN
/* Not actually used, just a NULL pointer that gets passed everywhere, which we
* hope gets optimized out by the compiler. */
struct KernelGlobals {
/* NOTE: Keep the size in sync with SHADOW_STACK_MAX_HITS. */
struct KernelGlobalsGPU {
int unused[1];
};
typedef ccl_global const KernelGlobalsGPU *ccl_restrict KernelGlobals;
/* Global scene data and textures */
__constant__ KernelData __data;