Cycles: prepare to make CUDA 5.0 the official version we use

* Add CUDA compiler version detection to cmake/scons/runtime
* Remove noinline in kernel_shader.h and reenable --use_fast_math if CUDA 5.x
  is used, these were workarounds for CUDA 4.2 bugs
* Change max number of registers to 32 for sm 2.x (based on performance tests
  from Martijn Berger and confirmed here), and also for NVidia OpenCL.

Overall it seems that with these changes and the latest CUDA 5.0 download, that
performance is as good as or better than the 2.67b release with the scenes and
graphics cards I tested.
This commit is contained in:
Brecht Van Lommel
2013-06-19 17:54:23 +00:00
parent a7416641e6
commit 16204bd647
8 changed files with 209 additions and 22 deletions

View File

@@ -38,7 +38,12 @@ CCL_NAMESPACE_BEGIN
/* ShaderData setup from incoming ray */
#ifdef __OBJECT_MOTION__
__device_noinline void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time)
#if defined(__KERNEL_CUDA_VERSION__) && __KERNEL_CUDA_VERSION__ <= 42
__device_noinline
#else
__device
#endif
void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time)
{
/* note that this is a separate non-inlined function to work around crash
* on CUDA sm 2.0, otherwise kernel execution crashes (compiler bug?) */
@@ -53,7 +58,12 @@ __device_noinline void shader_setup_object_transforms(KernelGlobals *kg, ShaderD
}
#endif
__device_noinline void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd,
#if defined(__KERNEL_CUDA_VERSION__) && __KERNEL_CUDA_VERSION__ <= 42
__device_noinline
#else
__device
#endif
void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd,
const Intersection *isect, const Ray *ray)
{
#ifdef __INSTANCING__
@@ -260,7 +270,12 @@ __device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderData
/* ShaderData setup from position sampled on mesh */
__device_noinline void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd,
#if defined(__KERNEL_CUDA_VERSION__) && __KERNEL_CUDA_VERSION__ <= 42
__device_noinline
#else
__device
#endif
void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd,
const float3 P, const float3 Ng, const float3 I,
int shader, int object, int prim, float u, float v, float t, float time, int segment)
{