Cycles: Tweaks for 32bit CUDA binaries
Tweak some inline policies. Not totally crazy yet, and in fact we now have one less ifdef statement now.
This commit is contained in:
@@ -178,7 +178,12 @@ ccl_device_inline float3 motion_triangle_refine(KernelGlobals *kg, ShaderData *s
|
|||||||
/* Same as above, except that isect->t is assumed to be in object space for instancing */
|
/* Same as above, except that isect->t is assumed to be in object space for instancing */
|
||||||
|
|
||||||
#ifdef __SUBSURFACE__
|
#ifdef __SUBSURFACE__
|
||||||
ccl_device_inline float3 motion_triangle_refine_subsurface(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, float3 verts[3])
|
# if defined(__KERNEL_CUDA__) && (defined(i386) || defined(_M_IX86))
|
||||||
|
ccl_device_noinline
|
||||||
|
# else
|
||||||
|
ccl_device_inline
|
||||||
|
# endif
|
||||||
|
float3 motion_triangle_refine_subsurface(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, float3 verts[3])
|
||||||
{
|
{
|
||||||
float3 P = ray->P;
|
float3 P = ray->P;
|
||||||
float3 D = ray->D;
|
float3 D = ray->D;
|
||||||
|
@@ -49,13 +49,12 @@ typedef struct IsectPrecalc {
|
|||||||
float Sx, Sy, Sz;
|
float Sx, Sy, Sz;
|
||||||
} IsectPrecalc;
|
} IsectPrecalc;
|
||||||
|
|
||||||
#if defined(__KERNEL_CUDA__)
|
#if (defined(__KERNEL_OPENCL_APPLE__)) || \
|
||||||
ccl_device_inline
|
(defined(__KERNEL_CUDA__) && (defined(i386) || defined(_M_IX86)))
|
||||||
#elif defined(__KERNEL_OPENCL_APPLE__)
|
|
||||||
ccl_device_noinline
|
ccl_device_noinline
|
||||||
#else /* defined(__KERNEL_OPENCL_APPLE__) */
|
#else
|
||||||
ccl_device_inline
|
ccl_device_inline
|
||||||
#endif /* defined(__KERNEL_OPENCL_APPLE__) */
|
#endif
|
||||||
void triangle_intersect_precalc(float3 dir,
|
void triangle_intersect_precalc(float3 dir,
|
||||||
IsectPrecalc *isect_precalc)
|
IsectPrecalc *isect_precalc)
|
||||||
{
|
{
|
||||||
|
@@ -55,7 +55,7 @@ ccl_device_inline void bsdf_eval_init(BsdfEval *eval, ClosureType type, float3 v
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* TODO(sergey): This is just a workaround for annoying 6.5 compiler bug. */
|
/* TODO(sergey): This is just a workaround for annoying 6.5 compiler bug. */
|
||||||
#if !defined(__KERNEL_CUDA__) || __CUDA_ARCH__ < 500
|
#if !defined(__KERNEL_CUDA__) || __CUDA_ARCH__ < 520
|
||||||
ccl_device_inline
|
ccl_device_inline
|
||||||
#else
|
#else
|
||||||
ccl_device_noinline
|
ccl_device_noinline
|
||||||
|
@@ -620,12 +620,7 @@ ccl_device void lamp_light_sample(KernelGlobals *kg, int lamp,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__KERNEL_CUDA__) && (__CUDA_ARCH__ >= 500) && (defined(i386) || defined(_M_IX86))
|
ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D, float t, LightSample *ls)
|
||||||
ccl_device_noinline
|
|
||||||
#else
|
|
||||||
ccl_device
|
|
||||||
#endif
|
|
||||||
bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D, float t, LightSample *ls)
|
|
||||||
{
|
{
|
||||||
float4 data0 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 0);
|
float4 data0 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 0);
|
||||||
float4 data1 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 1);
|
float4 data1 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 1);
|
||||||
|
Reference in New Issue
Block a user