Cycles: Use native float->half conversion instructions for Haswell CPUs.

This makes OCIO viewport color correction a little bit faster (about -0.5s for 100 samples)
Also set max half float value to 65504.0 to conform with IEEE 754.
This commit is contained in:
Sv. Lockal
2015-05-10 16:20:04 +00:00
parent 3a2c0ccdd0
commit 2ec221aa28
2 changed files with 16 additions and 18 deletions

View File

@@ -64,7 +64,7 @@ elseif(CMAKE_COMPILER_IS_GNUCC)
set(CYCLES_AVX_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mfpmath=sse")
endif()
if(CXX_HAS_AVX2)
set(CYCLES_AVX2_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mfma -mlzcnt -mbmi -mbmi2 -mfpmath=sse")
set(CYCLES_AVX2_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mfma -mlzcnt -mbmi -mbmi2 -mf16c -mfpmath=sse")
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math")
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
@@ -80,7 +80,7 @@ elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(CYCLES_AVX_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx")
endif()
if(CXX_HAS_AVX2)
set(CYCLES_AVX2_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mfma -mlzcnt -mbmi -mbmi2")
set(CYCLES_AVX2_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mfma -mlzcnt -mbmi -mbmi2 -mf16c")
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math")
endif()