Cycles: Use native float->half conversion instructions for Haswell CPUs.
This makes OCIO viewport color correction a little bit faster (about -0.5s for 100 samples) Also set max half float value to 65504.0 to conform with IEEE 754.
This commit is contained in:
@@ -64,7 +64,7 @@ elseif(CMAKE_COMPILER_IS_GNUCC)
|
||||
set(CYCLES_AVX_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mfpmath=sse")
|
||||
endif()
|
||||
if(CXX_HAS_AVX2)
|
||||
set(CYCLES_AVX2_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mfma -mlzcnt -mbmi -mbmi2 -mfpmath=sse")
|
||||
set(CYCLES_AVX2_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mfma -mlzcnt -mbmi -mbmi2 -mf16c -mfpmath=sse")
|
||||
endif()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math")
|
||||
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||
@@ -80,7 +80,7 @@ elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||
set(CYCLES_AVX_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx")
|
||||
endif()
|
||||
if(CXX_HAS_AVX2)
|
||||
set(CYCLES_AVX2_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mfma -mlzcnt -mbmi -mbmi2")
|
||||
set(CYCLES_AVX2_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mfma -mlzcnt -mbmi -mbmi2 -mf16c")
|
||||
endif()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math")
|
||||
endif()
|
||||
|
Reference in New Issue
Block a user