diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt index 9f067bede33..a8293ac683c 100644 --- a/intern/cycles/CMakeLists.txt +++ b/intern/cycles/CMakeLists.txt @@ -13,9 +13,10 @@ if(WITH_RAYOPTIMIZATION AND SUPPORT_SSE_BUILD) endif() if(WIN32 AND MSVC) - set(CYCLES_SSE2_KERNEL_FLAGS "/arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /EHsc") - set(CYCLES_SSE3_KERNEL_FLAGS "/arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /EHsc") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /fp:fast -D_CRT_SECURE_NO_WARNINGS /EHsc") + set(CYCLES_SSE2_KERNEL_FLAGS "/arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /EHsc /Ox /GS-") + # there is no /arch:SSE3, but intrinsics are available anyway + set(CYCLES_SSE3_KERNEL_FLAGS "/arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /EHsc /Ox /GS-") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /fp:fast -D_CRT_SECURE_NO_WARNINGS /EHsc /Ox /GS-") elseif(CMAKE_COMPILER_IS_GNUCC) set(CYCLES_SSE2_KERNEL_FLAGS "-ffast-math -msse -msse2 -mfpmath=sse") set(CYCLES_SSE3_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -mfpmath=sse") diff --git a/intern/cycles/SConscript b/intern/cycles/SConscript index 43f838157b9..57f44ae9d52 100644 --- a/intern/cycles/SConscript +++ b/intern/cycles/SConscript @@ -79,11 +79,12 @@ if env['WITH_BF_RAYOPTIMIZATION']: sse3_cxxflags = Split(env['CXXFLAGS']) if env['OURPLATFORM'] == 'win32-vc': - sse2_cxxflags.append('/arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /EHsc'.split()) - sse3_cxxflags.append('/arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /EHsc'.split()) + sse2_cxxflags.append('/arch:SSE /arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /EHsc /Ox /Gs-'.split()) + # there is no /arch:SSE3, but intrinsics are available anyway + sse3_cxxflags.append('/arch:SSE /arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /EHsc /Ox /Gs-'.split()) elif env['OURPLATFORM'] == 'win64-vc': - sse2_cxxflags.append('-D_CRT_SECURE_NO_WARNINGS /fp:fast /EHsc'.split()) - sse3_cxxflags.append('-D_CRT_SECURE_NO_WARNINGS /fp:fast /EHsc'.split()) + sse2_cxxflags.append('-D_CRT_SECURE_NO_WARNINGS /fp:fast /EHsc /Ox /Gs-'.split()) + sse3_cxxflags.append('-D_CRT_SECURE_NO_WARNINGS /fp:fast /EHsc /Ox /Gs-'.split()) else: sse2_cxxflags.append('-ffast-math -msse -msse2 -mfpmath=sse'.split()) sse3_cxxflags.append('-ffast-math -msse -msse2 -msse3 -mssse3 -mfpmath=sse'.split()) diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp index 69ccf2588c9..dd647fec2d3 100644 --- a/intern/cycles/bvh/bvh.cpp +++ b/intern/cycles/bvh/bvh.cpp @@ -579,6 +579,7 @@ void RegularBVH::pack_nodes(const array& prims, const BVHNode *root) int nextNodeIdx = 0; vector stack; + stack.reserve(BVHParams::MAX_DEPTH*2); stack.push_back(BVHStackEntry(root, nextNodeIdx++)); while(stack.size()) { @@ -776,6 +777,7 @@ void QBVH::pack_nodes(const array& prims, const BVHNode *root) int nextNodeIdx = 0; vector stack; + stack.reserve(BVHParams::MAX_DEPTH*2); stack.push_back(BVHStackEntry(root, nextNodeIdx++)); while(stack.size()) { diff --git a/intern/cycles/kernel/kernel_jitter.h b/intern/cycles/kernel/kernel_jitter.h index 15d2151228f..17704b48cee 100644 --- a/intern/cycles/kernel/kernel_jitter.h +++ b/intern/cycles/kernel/kernel_jitter.h @@ -37,7 +37,7 @@ __device_inline int cmj_fast_mod_pow2(int a, int b) /* a must be > 0 and b must be > 1 */ __device_inline int cmj_fast_div_pow2(int a, int b) { -#ifdef __KERNEL_SSE2__ +#if defined(__KERNEL_SSE2__) && !defined(_MSC_VER) return a >> __builtin_ctz(b); #else return a/b; @@ -46,7 +46,7 @@ __device_inline int cmj_fast_div_pow2(int a, int b) __device_inline uint cmj_w_mask(uint w) { -#ifdef __KERNEL_SSE2__ +#if defined(__KERNEL_SSE2__) && !defined(_MSC_VER) return ((1 << (32 - __builtin_clz(w))) - 1); #else w |= w >> 1; diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index 68668d88d44..05ed8a2d3f6 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -165,7 +165,7 @@ __device_inline float clamp(float a, float mn, float mx) __device_inline int float_to_int(float f) { -#ifdef __KERNEL_SSE2__ +#if defined(__KERNEL_SSE2__) && !defined(_MSC_VER) return _mm_cvtt_ss2si(_mm_load_ss(&f)); #else return (int)f; diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h index 32529e47c23..7ad0403e6c6 100644 --- a/intern/cycles/util/util_types.h +++ b/intern/cycles/util/util_types.h @@ -23,11 +23,6 @@ #include - // Needed for _BistScanForward and _BitScanReverse on MSVC 2012 -# if defined(_MSC_VER) && _MSC_VER >= 1700 -# include -# endif - #endif /* Qualifiers for kernel code shared by CPU and GPU */ @@ -511,26 +506,6 @@ template __device_inline const __m12 } #endif -#if defined(__KERNEL_SSE2__) && defined(_MSC_VER) - -/* count zeros from start or end of integer bits */ - -__device_inline uint32_t __builtin_ctz(uint32_t i) -{ - unsigned long r = 0; - _BitScanForward(&r, i); - return (uint32_t)r; -} - -__device_inline uint32_t __builtin_clz(uint32_t i) -{ - unsigned long r = 0; - _BitScanReverse(&r, i); - return (uint32_t)r; -} - -#endif - CCL_NAMESPACE_END #endif /* __UTIL_TYPES_H__ */