Cycles: various fixes for HIP and compilation of HIP binaries
* Additional structs added to the hipew loader for device props * Adds hipRTC functions to the loader for future usage * Enables CPU+GPU usage for HIP * Cleanup to the adaptive kernel compilation process * Fix for kernel compilation failures with HIP with latest master Ref T92393, D12958
This commit is contained in:

committed by
Brecht Van Lommel

parent
d1fcf93f03
commit
d092933abb
@@ -1329,7 +1329,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
elif entry.type == 'CPU':
|
||||
cpu_devices.append(entry)
|
||||
# Extend all GPU devices with CPU.
|
||||
if compute_device_type != 'CPU' and compute_device_type != 'HIP':
|
||||
if compute_device_type != 'CPU':
|
||||
devices.extend(cpu_devices)
|
||||
return devices
|
||||
|
||||
|
@@ -208,7 +208,7 @@ bool HIPDevice::use_adaptive_compilation()
|
||||
return DebugFlags().hip.adaptive_compile;
|
||||
}
|
||||
|
||||
/* Common NVCC flags which stays the same regardless of shading model,
|
||||
/* Common HIPCC flags which stays the same regardless of shading model,
|
||||
* kernel sources md5 and only depends on compiler or compilation settings.
|
||||
*/
|
||||
string HIPDevice::compile_kernel_get_common_cflags(const uint kernel_features)
|
||||
@@ -239,11 +239,13 @@ string HIPDevice::compile_kernel(const uint kernel_features,
|
||||
int major, minor;
|
||||
hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
|
||||
hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
|
||||
hipDeviceProp_t props;
|
||||
hipGetDeviceProperties(&props, hipDevId);
|
||||
|
||||
/* Attempt to use kernel provided with Blender. */
|
||||
if (!use_adaptive_compilation()) {
|
||||
if (!force_ptx) {
|
||||
const string fatbin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor));
|
||||
const string fatbin = path_get(string_printf("lib/%s_%s.fatbin", name, props.gcnArchName));
|
||||
VLOG(1) << "Testing for pre-compiled kernel " << fatbin << ".";
|
||||
if (path_exists(fatbin)) {
|
||||
VLOG(1) << "Using precompiled kernel.";
|
||||
@@ -283,17 +285,21 @@ string HIPDevice::compile_kernel(const uint kernel_features,
|
||||
const string kernel_md5 = util_md5_string(source_md5 + common_cflags);
|
||||
|
||||
const char *const kernel_ext = "genco";
|
||||
std::string options;
|
||||
# ifdef _WIN32
|
||||
const char *const options =
|
||||
"save-temps -Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp";
|
||||
options.append("Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp -ffast-math");
|
||||
# else
|
||||
const char *const options =
|
||||
"save-temps -Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp -O3 -ggdb";
|
||||
options.append("Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp -O3 -ffast-math");
|
||||
# endif
|
||||
# ifdef _DEBUG
|
||||
options.append(" -save-temps");
|
||||
# endif
|
||||
options.append(" --amdgpu-target=").append(props.gcnArchName);
|
||||
|
||||
const string include_path = source_path;
|
||||
const char *const kernel_arch = force_ptx ? "compute" : "sm";
|
||||
const char *const kernel_arch = props.gcnArchName;
|
||||
const string fatbin_file = string_printf(
|
||||
"cycles_%s_%s_%d%d_%s", name, kernel_arch, major, minor, kernel_md5.c_str());
|
||||
"cycles_%s_%s_%s", name, kernel_arch, kernel_md5.c_str());
|
||||
const string fatbin = path_cache_get(path_join("kernels", fatbin_file));
|
||||
VLOG(1) << "Testing for locally compiled kernel " << fatbin << ".";
|
||||
if (path_exists(fatbin)) {
|
||||
@@ -350,7 +356,7 @@ string HIPDevice::compile_kernel(const uint kernel_features,
|
||||
|
||||
string command = string_printf("%s -%s -I %s --%s %s -o \"%s\"",
|
||||
hipcc,
|
||||
options,
|
||||
options.c_str(),
|
||||
include_path.c_str(),
|
||||
kernel_ext,
|
||||
source_path.c_str(),
|
||||
|
@@ -487,9 +487,6 @@ endif()
|
||||
# HIP module
|
||||
|
||||
if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
|
||||
# 64 bit only
|
||||
set(HIP_BITS 64)
|
||||
|
||||
# build for each arch
|
||||
set(hip_sources device/hip/kernel.cpp
|
||||
${SRC_HEADERS}
|
||||
@@ -504,32 +501,41 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
|
||||
set(hip_fatbins)
|
||||
|
||||
macro(CYCLES_HIP_KERNEL_ADD arch prev_arch name flags sources experimental)
|
||||
if(${arch} MATCHES "compute_.*")
|
||||
set(format "ptx")
|
||||
else()
|
||||
set(format "fatbin")
|
||||
endif()
|
||||
set(format "fatbin")
|
||||
set(hip_file ${name}_${arch}.${format})
|
||||
|
||||
set(kernel_sources ${sources})
|
||||
if(NOT ${prev_arch} STREQUAL "none")
|
||||
if(${prev_arch} MATCHES "compute_.*")
|
||||
set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.ptx)
|
||||
else()
|
||||
set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.fatbin)
|
||||
endif()
|
||||
set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.fatbin)
|
||||
endif()
|
||||
|
||||
set(hip_kernel_src "/device/hip/${name}.cpp")
|
||||
|
||||
set(hip_flags ${flags}
|
||||
if(WIN32)
|
||||
set(hip_command ${CMAKE_COMMAND})
|
||||
set(hip_flags
|
||||
-E env "HIP_PATH=${HIP_ROOT_DIR}" "PATH=${HIP_PERL_PATH}"
|
||||
${HIP_HIPCC_EXECUTABLE}.bat)
|
||||
else()
|
||||
set(hip_command ${HIP_HIPCC_EXECUTABLE})
|
||||
set(hip_flags)
|
||||
endif()
|
||||
|
||||
set(hip_flags
|
||||
${hip_flags}
|
||||
--amdgpu-target=${arch}
|
||||
${HIP_HIPCC_FLAGS}
|
||||
--genco
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${hip_kernel_src}
|
||||
${flags}
|
||||
-D CCL_NAMESPACE_BEGIN=
|
||||
-D CCL_NAMESPACE_END=
|
||||
-D HIPCC
|
||||
-m ${HIP_BITS}
|
||||
-I ${CMAKE_CURRENT_SOURCE_DIR}/..
|
||||
-I ${CMAKE_CURRENT_SOURCE_DIR}/device/hip
|
||||
--use_fast_math
|
||||
-Wno-parentheses-equality
|
||||
-Wno-unused-value
|
||||
--hipcc-func-supp
|
||||
-ffast-math
|
||||
-o ${CMAKE_CURRENT_BINARY_DIR}/${hip_file})
|
||||
|
||||
if(${experimental})
|
||||
@@ -541,20 +547,9 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
|
||||
set(hip_flags ${hip_flags} -D __KERNEL_DEBUG__)
|
||||
endif()
|
||||
|
||||
if(WITH_NANOVDB)
|
||||
set(hip_flags ${hip_flags}
|
||||
-D WITH_NANOVDB
|
||||
-I "${NANOVDB_INCLUDE_DIR}")
|
||||
endif()
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${hip_file}
|
||||
COMMAND ${HIP_HIPCC_EXECUTABLE}
|
||||
-arch=${arch}
|
||||
${HIP_HIPCC_FLAGS}
|
||||
--${format}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${hip_kernel_src}
|
||||
${hip_flags}
|
||||
add_custom_target(
|
||||
${hip_file}
|
||||
COMMAND ${hip_command} ${hip_flags}
|
||||
DEPENDS ${kernel_sources})
|
||||
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hip_file}" ${CYCLES_INSTALL_PATH}/lib)
|
||||
list(APPEND hip_fatbins ${hip_file})
|
||||
|
@@ -27,10 +27,10 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Not actually used, just a NULL pointer that gets passed everywhere, which we
|
||||
* hope gets optimized out by the compiler. */
|
||||
struct KernelGlobals {
|
||||
/* NOTE: Keep the size in sync with SHADOW_STACK_MAX_HITS. */
|
||||
struct KernelGlobalsGPU {
|
||||
int unused[1];
|
||||
};
|
||||
typedef ccl_global const KernelGlobalsGPU *ccl_restrict KernelGlobals;
|
||||
|
||||
/* Global scene data and textures */
|
||||
__constant__ KernelData __data;
|
||||
|
Reference in New Issue
Block a user