Cycles: update build configurations to include CUDA sm_35 architecture. When using

a compiler older than CUDA 5.0 it will give a warning and skip this architecture.
This commit is contained in:
Brecht Van Lommel
2013-06-20 13:10:47 +00:00
parent a92468c7d7
commit 8d6e5e2fee
15 changed files with 33 additions and 25 deletions

View File

@@ -264,7 +264,7 @@ option(WITH_CYCLES "Enable cycles Render Engine" ON)
option(WITH_CYCLES_TEST "Build cycles test application" OFF)
option(WITH_CYCLES_OSL "Build Cycles with OSL support" OFF)
option(WITH_CYCLES_CUDA_BINARIES "Build cycles CUDA binaries" OFF)
set(CYCLES_CUDA_BINARIES_ARCH sm_20 sm_21 sm_30 CACHE STRING "CUDA architectures to build binaries for")
set(CYCLES_CUDA_BINARIES_ARCH sm_20 sm_21 sm_30 sm_35 CACHE STRING "CUDA architectures to build binaries for")
mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
unset(PLATFORM_DEFAULT)

View File

@@ -2,4 +2,4 @@ BF_BUILDDIR = '../blender-build/linux-glibc211-i686'
BF_INSTALLDIR = '../blender-install/linux-glibc211-i686'
BF_NUMJOBS = 1
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']

View File

@@ -2,4 +2,4 @@ BF_BUILDDIR = '../blender-build/linux-glibc211-x86_64'
BF_INSTALLDIR = '../blender-install/linux-glibc211-x86_64'
BF_NUMJOBS = 1
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']

View File

@@ -315,7 +315,7 @@ BF_BOOST_LIBPATH = '${BF_BOOST}/lib'
WITH_BF_CYCLES_CUDA_BINARIES = True
BF_CYCLES_CUDA_NVCC = '/usr/local/cuda/bin/nvcc'
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
#Freestyle
WITH_BF_FREESTYLE = True

View File

@@ -315,7 +315,7 @@ BF_BOOST_LIBPATH = '${BF_BOOST}/lib'
WITH_BF_CYCLES_CUDA_BINARIES = True
BF_CYCLES_CUDA_NVCC = '/usr/local/cuda/bin/nvcc'
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
#Freestyle
WITH_BF_FREESTYLE = True

View File

@@ -315,7 +315,7 @@ BF_BOOST_LIBPATH = '${BF_BOOST}/lib'
WITH_BF_CYCLES_CUDA_BINARIES = False
BF_CYCLES_CUDA_NVCC = '/usr/local/cuda/bin/nvcc'
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
#Freestyle
WITH_BF_FREESTYLE = True

View File

@@ -210,7 +210,7 @@ WITH_BF_CYCLES = WITH_BF_OIIO and WITH_BF_BOOST
WITH_BF_CYCLES_CUDA_BINARIES = False
BF_CYCLES_CUDA_NVCC = '/usr/local/cuda/bin/nvcc'
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
WITH_BF_OPENMP = True

View File

@@ -149,7 +149,7 @@ BF_OPENCOLLADA_LIBPATH = '${BF_OPENCOLLADA}/lib/opencollada'
WITH_BF_CYCLES = True
WITH_BF_CYCLES_CUDA_BINARIES = False
BF_CYCLES_CUDA_NVCC = "" # Path to the NVIDIA CUDA compiler
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
WITH_BF_OIIO = True
BF_OIIO = LIBDIR + '/openimageio'

View File

@@ -215,7 +215,7 @@ BF_BOOST_LIBPATH = '${BF_BOOST}/lib'
#CUDA
WITH_BF_CYCLES_CUDA_BINARIES = False
#BF_CYCLES_CUDA_NVCC = "" # Path to the nvidia compiler
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
#Ray trace optimization
WITH_BF_RAYOPTIMIZATION = True

View File

@@ -146,7 +146,7 @@ BF_OPENCOLLADA_LIBPATH = '${BF_OPENCOLLADA}/lib/opencollada'
WITH_BF_CYCLES = True
WITH_BF_CYCLES_CUDA_BINARIES = False
BF_CYCLES_CUDA_NVCC = "" # Path to the NVIDIA CUDA compiler
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
WITH_BF_OIIO = True
BF_OIIO = LIBDIR + '/openimageio'

View File

@@ -212,7 +212,7 @@ BF_BOOST_LIBPATH = '${BF_BOOST}/lib'
#CUDA
WITH_BF_CYCLES_CUDA_BINARIES = False
#BF_CYCLES_CUDA_NVCC = "" # Path to the nvidia compiler
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
#Ray trace optimization
WITH_BF_RAYOPTIMIZATION = True

View File

@@ -304,7 +304,7 @@ public:
}
}
else {
/* CUDA 4.x */
/* CUDA 5.x */
if(major == 1) {
/* sm_1x */
arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0 --use_fast_math";

View File

@@ -130,6 +130,12 @@ if(WITH_CYCLES_CUDA_BINARIES)
string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${NVCC_OUT})
set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}")
# warn for other versions
if(CUDA_VERSION MATCHES "50")
else()
message(WARNING "CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, build may succeed but only CUDA 5.0 is officially supported")
endif()
# build for each arch
set(cuda_sources kernel.cu ${SRC_HEADERS} ${SRC_SVM_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_UTIL_HEADERS})
set(cuda_cubins)
@@ -139,12 +145,6 @@ if(WITH_CYCLES_CUDA_BINARIES)
set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}")
# warn for other versions
if(CUDA_VERSION MATCHES "50")
else()
message(STATUS "CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, build may succeed but only CUDA 5.0 is officially supported")
endif()
# build flags depending on CUDA version and arch
if(CUDA_VERSION LESS 50)
# CUDA 4.x
@@ -176,13 +176,17 @@ if(WITH_CYCLES_CUDA_BINARIES)
set(cuda_math_flags "--use_fast_math")
endif()
add_custom_command(
OUTPUT ${cuda_cubin}
COMMAND ${CUDA_NVCC_EXECUTABLE} -arch=${arch} -m${CUDA_BITS} --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin} --ptxas-options="-v" ${cuda_arch_flags} ${cuda_version_flags} ${cuda_math_flags} -I${CMAKE_CURRENT_SOURCE_DIR}/../util -I${CMAKE_CURRENT_SOURCE_DIR}/svm -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DNVCC
DEPENDS ${cuda_sources})
if(CUDA_VERSION LESS 50 AND ${arch} MATCHES "sm_35")
message(WARNING "Can't build kernel for CUDA sm_35 architecture, skipping")
else()
add_custom_command(
OUTPUT ${cuda_cubin}
COMMAND ${CUDA_NVCC_EXECUTABLE} -arch=${arch} -m${CUDA_BITS} --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin} --ptxas-options="-v" ${cuda_arch_flags} ${cuda_version_flags} ${cuda_math_flags} -I${CMAKE_CURRENT_SOURCE_DIR}/../util -I${CMAKE_CURRENT_SOURCE_DIR}/svm -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DNVCC
DEPENDS ${cuda_sources})
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib)
list(APPEND cuda_cubins ${cuda_cubin})
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib)
list(APPEND cuda_cubins ${cuda_cubin})
endif()
endforeach()
add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins})

View File

@@ -88,6 +88,10 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']:
# build flags depending on CUDA version and arch
if cuda_version < 50:
if arch == "sm_35":
print("Can't build kernel for CUDA sm_35 architecture, skipping")
continue
# CUDA 4.x
if arch.startswith("sm_1"):
# sm_1x

View File

@@ -517,7 +517,7 @@ __device_inline const __m128 shuffle_swap(const __m128& a, const shuffle_swap_t&
#else
/* somewhat slower version for SSE3 */
/* somewhat slower version for SSE2 */
typedef int shuffle_swap_t;
__device_inline const shuffle_swap_t shuffle_swap_identity(void)