Cycles: update build configurations to include CUDA sm_35 architecture. When using
a compiler older than CUDA 5.0 it will give a warning and skip this architecture.
This commit is contained in:
@@ -264,7 +264,7 @@ option(WITH_CYCLES "Enable cycles Render Engine" ON)
|
|||||||
option(WITH_CYCLES_TEST "Build cycles test application" OFF)
|
option(WITH_CYCLES_TEST "Build cycles test application" OFF)
|
||||||
option(WITH_CYCLES_OSL "Build Cycles with OSL support" OFF)
|
option(WITH_CYCLES_OSL "Build Cycles with OSL support" OFF)
|
||||||
option(WITH_CYCLES_CUDA_BINARIES "Build cycles CUDA binaries" OFF)
|
option(WITH_CYCLES_CUDA_BINARIES "Build cycles CUDA binaries" OFF)
|
||||||
set(CYCLES_CUDA_BINARIES_ARCH sm_20 sm_21 sm_30 CACHE STRING "CUDA architectures to build binaries for")
|
set(CYCLES_CUDA_BINARIES_ARCH sm_20 sm_21 sm_30 sm_35 CACHE STRING "CUDA architectures to build binaries for")
|
||||||
mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
|
mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
|
||||||
unset(PLATFORM_DEFAULT)
|
unset(PLATFORM_DEFAULT)
|
||||||
|
|
||||||
|
@@ -2,4 +2,4 @@ BF_BUILDDIR = '../blender-build/linux-glibc211-i686'
|
|||||||
BF_INSTALLDIR = '../blender-install/linux-glibc211-i686'
|
BF_INSTALLDIR = '../blender-install/linux-glibc211-i686'
|
||||||
BF_NUMJOBS = 1
|
BF_NUMJOBS = 1
|
||||||
|
|
||||||
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
|
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
|
||||||
|
@@ -2,4 +2,4 @@ BF_BUILDDIR = '../blender-build/linux-glibc211-x86_64'
|
|||||||
BF_INSTALLDIR = '../blender-install/linux-glibc211-x86_64'
|
BF_INSTALLDIR = '../blender-install/linux-glibc211-x86_64'
|
||||||
BF_NUMJOBS = 1
|
BF_NUMJOBS = 1
|
||||||
|
|
||||||
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
|
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
|
||||||
|
@@ -315,7 +315,7 @@ BF_BOOST_LIBPATH = '${BF_BOOST}/lib'
|
|||||||
|
|
||||||
WITH_BF_CYCLES_CUDA_BINARIES = True
|
WITH_BF_CYCLES_CUDA_BINARIES = True
|
||||||
BF_CYCLES_CUDA_NVCC = '/usr/local/cuda/bin/nvcc'
|
BF_CYCLES_CUDA_NVCC = '/usr/local/cuda/bin/nvcc'
|
||||||
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
|
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
|
||||||
|
|
||||||
#Freestyle
|
#Freestyle
|
||||||
WITH_BF_FREESTYLE = True
|
WITH_BF_FREESTYLE = True
|
||||||
|
@@ -315,7 +315,7 @@ BF_BOOST_LIBPATH = '${BF_BOOST}/lib'
|
|||||||
|
|
||||||
WITH_BF_CYCLES_CUDA_BINARIES = True
|
WITH_BF_CYCLES_CUDA_BINARIES = True
|
||||||
BF_CYCLES_CUDA_NVCC = '/usr/local/cuda/bin/nvcc'
|
BF_CYCLES_CUDA_NVCC = '/usr/local/cuda/bin/nvcc'
|
||||||
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
|
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
|
||||||
|
|
||||||
#Freestyle
|
#Freestyle
|
||||||
WITH_BF_FREESTYLE = True
|
WITH_BF_FREESTYLE = True
|
||||||
|
@@ -315,7 +315,7 @@ BF_BOOST_LIBPATH = '${BF_BOOST}/lib'
|
|||||||
|
|
||||||
WITH_BF_CYCLES_CUDA_BINARIES = False
|
WITH_BF_CYCLES_CUDA_BINARIES = False
|
||||||
BF_CYCLES_CUDA_NVCC = '/usr/local/cuda/bin/nvcc'
|
BF_CYCLES_CUDA_NVCC = '/usr/local/cuda/bin/nvcc'
|
||||||
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
|
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
|
||||||
|
|
||||||
#Freestyle
|
#Freestyle
|
||||||
WITH_BF_FREESTYLE = True
|
WITH_BF_FREESTYLE = True
|
||||||
|
@@ -210,7 +210,7 @@ WITH_BF_CYCLES = WITH_BF_OIIO and WITH_BF_BOOST
|
|||||||
|
|
||||||
WITH_BF_CYCLES_CUDA_BINARIES = False
|
WITH_BF_CYCLES_CUDA_BINARIES = False
|
||||||
BF_CYCLES_CUDA_NVCC = '/usr/local/cuda/bin/nvcc'
|
BF_CYCLES_CUDA_NVCC = '/usr/local/cuda/bin/nvcc'
|
||||||
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
|
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
|
||||||
|
|
||||||
WITH_BF_OPENMP = True
|
WITH_BF_OPENMP = True
|
||||||
|
|
||||||
|
@@ -149,7 +149,7 @@ BF_OPENCOLLADA_LIBPATH = '${BF_OPENCOLLADA}/lib/opencollada'
|
|||||||
WITH_BF_CYCLES = True
|
WITH_BF_CYCLES = True
|
||||||
WITH_BF_CYCLES_CUDA_BINARIES = False
|
WITH_BF_CYCLES_CUDA_BINARIES = False
|
||||||
BF_CYCLES_CUDA_NVCC = "" # Path to the NVIDIA CUDA compiler
|
BF_CYCLES_CUDA_NVCC = "" # Path to the NVIDIA CUDA compiler
|
||||||
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
|
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
|
||||||
|
|
||||||
WITH_BF_OIIO = True
|
WITH_BF_OIIO = True
|
||||||
BF_OIIO = LIBDIR + '/openimageio'
|
BF_OIIO = LIBDIR + '/openimageio'
|
||||||
|
@@ -215,7 +215,7 @@ BF_BOOST_LIBPATH = '${BF_BOOST}/lib'
|
|||||||
#CUDA
|
#CUDA
|
||||||
WITH_BF_CYCLES_CUDA_BINARIES = False
|
WITH_BF_CYCLES_CUDA_BINARIES = False
|
||||||
#BF_CYCLES_CUDA_NVCC = "" # Path to the nvidia compiler
|
#BF_CYCLES_CUDA_NVCC = "" # Path to the nvidia compiler
|
||||||
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
|
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
|
||||||
|
|
||||||
#Ray trace optimization
|
#Ray trace optimization
|
||||||
WITH_BF_RAYOPTIMIZATION = True
|
WITH_BF_RAYOPTIMIZATION = True
|
||||||
|
@@ -146,7 +146,7 @@ BF_OPENCOLLADA_LIBPATH = '${BF_OPENCOLLADA}/lib/opencollada'
|
|||||||
WITH_BF_CYCLES = True
|
WITH_BF_CYCLES = True
|
||||||
WITH_BF_CYCLES_CUDA_BINARIES = False
|
WITH_BF_CYCLES_CUDA_BINARIES = False
|
||||||
BF_CYCLES_CUDA_NVCC = "" # Path to the NVIDIA CUDA compiler
|
BF_CYCLES_CUDA_NVCC = "" # Path to the NVIDIA CUDA compiler
|
||||||
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
|
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
|
||||||
|
|
||||||
WITH_BF_OIIO = True
|
WITH_BF_OIIO = True
|
||||||
BF_OIIO = LIBDIR + '/openimageio'
|
BF_OIIO = LIBDIR + '/openimageio'
|
||||||
|
@@ -212,7 +212,7 @@ BF_BOOST_LIBPATH = '${BF_BOOST}/lib'
|
|||||||
#CUDA
|
#CUDA
|
||||||
WITH_BF_CYCLES_CUDA_BINARIES = False
|
WITH_BF_CYCLES_CUDA_BINARIES = False
|
||||||
#BF_CYCLES_CUDA_NVCC = "" # Path to the nvidia compiler
|
#BF_CYCLES_CUDA_NVCC = "" # Path to the nvidia compiler
|
||||||
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
|
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
|
||||||
|
|
||||||
#Ray trace optimization
|
#Ray trace optimization
|
||||||
WITH_BF_RAYOPTIMIZATION = True
|
WITH_BF_RAYOPTIMIZATION = True
|
||||||
|
@@ -304,7 +304,7 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
/* CUDA 4.x */
|
/* CUDA 5.x */
|
||||||
if(major == 1) {
|
if(major == 1) {
|
||||||
/* sm_1x */
|
/* sm_1x */
|
||||||
arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0 --use_fast_math";
|
arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0 --use_fast_math";
|
||||||
|
@@ -130,6 +130,12 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
|||||||
string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${NVCC_OUT})
|
string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${NVCC_OUT})
|
||||||
set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}")
|
set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}")
|
||||||
|
|
||||||
|
# warn for other versions
|
||||||
|
if(CUDA_VERSION MATCHES "50")
|
||||||
|
else()
|
||||||
|
message(WARNING "CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, build may succeed but only CUDA 5.0 is officially supported")
|
||||||
|
endif()
|
||||||
|
|
||||||
# build for each arch
|
# build for each arch
|
||||||
set(cuda_sources kernel.cu ${SRC_HEADERS} ${SRC_SVM_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_UTIL_HEADERS})
|
set(cuda_sources kernel.cu ${SRC_HEADERS} ${SRC_SVM_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_UTIL_HEADERS})
|
||||||
set(cuda_cubins)
|
set(cuda_cubins)
|
||||||
@@ -139,12 +145,6 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
|||||||
|
|
||||||
set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}")
|
set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}")
|
||||||
|
|
||||||
# warn for other versions
|
|
||||||
if(CUDA_VERSION MATCHES "50")
|
|
||||||
else()
|
|
||||||
message(STATUS "CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, build may succeed but only CUDA 5.0 is officially supported")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# build flags depending on CUDA version and arch
|
# build flags depending on CUDA version and arch
|
||||||
if(CUDA_VERSION LESS 50)
|
if(CUDA_VERSION LESS 50)
|
||||||
# CUDA 4.x
|
# CUDA 4.x
|
||||||
@@ -176,6 +176,9 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
|||||||
set(cuda_math_flags "--use_fast_math")
|
set(cuda_math_flags "--use_fast_math")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(CUDA_VERSION LESS 50 AND ${arch} MATCHES "sm_35")
|
||||||
|
message(WARNING "Can't build kernel for CUDA sm_35 architecture, skipping")
|
||||||
|
else()
|
||||||
add_custom_command(
|
add_custom_command(
|
||||||
OUTPUT ${cuda_cubin}
|
OUTPUT ${cuda_cubin}
|
||||||
COMMAND ${CUDA_NVCC_EXECUTABLE} -arch=${arch} -m${CUDA_BITS} --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin} --ptxas-options="-v" ${cuda_arch_flags} ${cuda_version_flags} ${cuda_math_flags} -I${CMAKE_CURRENT_SOURCE_DIR}/../util -I${CMAKE_CURRENT_SOURCE_DIR}/svm -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DNVCC
|
COMMAND ${CUDA_NVCC_EXECUTABLE} -arch=${arch} -m${CUDA_BITS} --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin} --ptxas-options="-v" ${cuda_arch_flags} ${cuda_version_flags} ${cuda_math_flags} -I${CMAKE_CURRENT_SOURCE_DIR}/../util -I${CMAKE_CURRENT_SOURCE_DIR}/svm -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DNVCC
|
||||||
@@ -183,6 +186,7 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
|||||||
|
|
||||||
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib)
|
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib)
|
||||||
list(APPEND cuda_cubins ${cuda_cubin})
|
list(APPEND cuda_cubins ${cuda_cubin})
|
||||||
|
endif()
|
||||||
endforeach()
|
endforeach()
|
||||||
|
|
||||||
add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins})
|
add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins})
|
||||||
|
@@ -88,6 +88,10 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']:
|
|||||||
|
|
||||||
# build flags depending on CUDA version and arch
|
# build flags depending on CUDA version and arch
|
||||||
if cuda_version < 50:
|
if cuda_version < 50:
|
||||||
|
if arch == "sm_35":
|
||||||
|
print("Can't build kernel for CUDA sm_35 architecture, skipping")
|
||||||
|
continue
|
||||||
|
|
||||||
# CUDA 4.x
|
# CUDA 4.x
|
||||||
if arch.startswith("sm_1"):
|
if arch.startswith("sm_1"):
|
||||||
# sm_1x
|
# sm_1x
|
||||||
|
@@ -517,7 +517,7 @@ __device_inline const __m128 shuffle_swap(const __m128& a, const shuffle_swap_t&
|
|||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
/* somewhat slower version for SSE3 */
|
/* somewhat slower version for SSE2 */
|
||||||
typedef int shuffle_swap_t;
|
typedef int shuffle_swap_t;
|
||||||
|
|
||||||
__device_inline const shuffle_swap_t shuffle_swap_identity(void)
|
__device_inline const shuffle_swap_t shuffle_swap_identity(void)
|
||||||
|
Reference in New Issue
Block a user