Attempted fix for issue with latest CUDA kernel for 32 bit windows/linux/mac,

there seems to be some sort of compiler bug in CUDA toolkit 4.2, uninlining a
few functions seems to avoid it.
This commit is contained in:
Brecht Van Lommel
2013-02-11 22:41:11 +00:00
parent 1c4f704d5c
commit 9307565269
5 changed files with 11 additions and 10 deletions

View File

@@ -20,7 +20,7 @@ CCL_NAMESPACE_BEGIN
/* Direction Emission */
__device float3 direct_emissive_eval(KernelGlobals *kg, float rando,
__device_noinline float3 direct_emissive_eval(KernelGlobals *kg, float rando,
LightSample *ls, float u, float v, float3 I, float t, float time)
{
/* setup shading at emitter */
@@ -74,7 +74,7 @@ __device float3 direct_emissive_eval(KernelGlobals *kg, float rando,
return eval;
}
__device bool direct_emission(KernelGlobals *kg, ShaderData *sd, int lindex,
__device_noinline bool direct_emission(KernelGlobals *kg, ShaderData *sd, int lindex,
float randt, float rando, float randu, float randv, Ray *ray, BsdfEval *eval,
bool *is_lamp)
{
@@ -148,7 +148,7 @@ __device bool direct_emission(KernelGlobals *kg, ShaderData *sd, int lindex,
/* Indirect Primitive Emission */
__device float3 indirect_primitive_emission(KernelGlobals *kg, ShaderData *sd, float t, int path_flag, float bsdf_pdf)
__device_noinline float3 indirect_primitive_emission(KernelGlobals *kg, ShaderData *sd, float t, int path_flag, float bsdf_pdf)
{
/* evaluate emissive closure */
float3 L = shader_emissive_eval(kg, sd);
@@ -171,7 +171,7 @@ __device float3 indirect_primitive_emission(KernelGlobals *kg, ShaderData *sd, f
/* Indirect Lamp Emission */
__device bool indirect_lamp_emission(KernelGlobals *kg, Ray *ray, int path_flag, float bsdf_pdf, float randt, float3 *emission)
__device_noinline bool indirect_lamp_emission(KernelGlobals *kg, Ray *ray, int path_flag, float bsdf_pdf, float randt, float3 *emission)
{
LightSample ls;
int lamp = lamp_light_eval_sample(kg, randt);
@@ -200,7 +200,7 @@ __device bool indirect_lamp_emission(KernelGlobals *kg, Ray *ray, int path_flag,
/* Indirect Background */
__device float3 indirect_background(KernelGlobals *kg, Ray *ray, int path_flag, float bsdf_pdf)
__device_noinline float3 indirect_background(KernelGlobals *kg, Ray *ray, int path_flag, float bsdf_pdf)
{
#ifdef __BACKGROUND__
/* evaluate background closure */

View File

@@ -454,7 +454,7 @@ __device void triangle_light_sample(KernelGlobals *kg, int prim, int object,
ls->lamp = ~0;
ls->shader |= SHADER_USE_MIS;
ls->t = 0.0f;
ls->type = LIGHT_AREA;
ls->type = LIGHT_TRIANGLE;
ls->eval_fac = 1.0f;
object_transform_light_sample(kg, ls, object, time);

View File

@@ -53,7 +53,7 @@ __device_noinline void shader_setup_object_transforms(KernelGlobals *kg, ShaderD
}
#endif
__device void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd,
__device_noinline void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd,
const Intersection *isect, const Ray *ray)
{
#ifdef __INSTANCING__
@@ -160,7 +160,7 @@ __device void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd,
/* ShaderData setup from position sampled on mesh */
__device void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd,
__device_noinline void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd,
const float3 P, const float3 Ng, const float3 I,
int shader, int object, int prim, float u, float v, float t, float time, int segment = ~0)
{
@@ -283,7 +283,7 @@ __device void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd,
/* ShaderData setup for displacement */
__device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd,
__device_noinline void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd,
int object, int prim, float u, float v)
{
/* Note: no OSLShader::init call here, this is done in shader_setup_from_sample! */

View File

@@ -299,6 +299,7 @@ typedef enum LightType {
LIGHT_AREA,
LIGHT_AO,
LIGHT_SPOT,
LIGHT_TRIANGLE,
LIGHT_STRAND
} LightType;

View File

@@ -36,7 +36,7 @@ class Device;
class DeviceScene;
class ImageManager;
class OSLRenderServices;
class OSLGlobals;
struct OSLGlobals;
class Scene;
class ShaderGraph;
class ShaderNode;