Fix #35665: more CUDA issues with recent kernel changes, tested on sm_20, sm_21

and sm_30 cards, so hopefully it should all work now.

Also includes some warnings fixes related to nvcc compiler arguments, should make
no difference otherwise.
This commit is contained in:
Brecht Van Lommel
2013-06-11 21:58:48 +00:00
parent 3d21bf9688
commit 37f92119e4
4 changed files with 61 additions and 77 deletions

View File

@@ -409,9 +409,8 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
/* ambient occlusion */
if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
/* todo: solve correlation */
float2 bsdf_uv = path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF_U);
float bsdf_u = bsdf_uv.x;
float bsdf_v = bsdf_uv.y;
float bsdf_u, bsdf_v;
path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF_U, &bsdf_u, &bsdf_v);
float ao_factor = kernel_data.background.ao_factor;
float3 ao_N;
@@ -450,9 +449,8 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
#else
float light_o = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT_F);
#endif
float2 light_uv = path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT_U);
float light_u = light_uv.x;
float light_v = light_uv.y;
float light_u, light_v;
path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT_U, &light_u, &light_v);
Ray light_ray;
BsdfEval L_light;
@@ -484,9 +482,8 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
BsdfEval bsdf_eval;
float3 bsdf_omega_in;
differential3 bsdf_domega_in;
float2 bsdf_uv = path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF_U);
float bsdf_u = bsdf_uv.x;
float bsdf_v = bsdf_uv.y;
float bsdf_u, bsdf_v;
path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF_U, &bsdf_u, &bsdf_v);
int label;
label = shader_bsdf_sample(kg, &sd, bsdf_u, bsdf_v, &bsdf_eval,
@@ -653,10 +650,8 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
#ifdef __AO__
/* ambient occlusion */
if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
/* todo: solve correlation */
float2 bsdf_uv = path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF_U);
float bsdf_u = bsdf_uv.x;
float bsdf_v = bsdf_uv.y;
float bsdf_u, bsdf_v;
path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF_U, &bsdf_u, &bsdf_v);
float ao_factor = kernel_data.background.ao_factor;
float3 ao_N;
@@ -695,9 +690,8 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
#else
float light_o = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_LIGHT_F);
#endif
float2 light_uv = path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_LIGHT_U);
float light_u = light_uv.x;
float light_v = light_uv.y;
float light_u, light_v;
path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_LIGHT_U, &light_u, &light_v);
Ray light_ray;
BsdfEval L_light;
@@ -730,9 +724,8 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
BsdfEval bsdf_eval;
float3 bsdf_omega_in;
differential3 bsdf_domega_in;
float2 bsdf_uv = path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF_U);
float bsdf_u = bsdf_uv.x;
float bsdf_v = bsdf_uv.y;
float bsdf_u, bsdf_v;
path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF_U, &bsdf_u, &bsdf_v);
int label;
label = shader_bsdf_sample(kg, &sd, bsdf_u, bsdf_v, &bsdf_eval,
@@ -784,10 +777,8 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
for(int j = 0; j < num_samples; j++) {
/* todo: solve correlation */
float2 bsdf_uv = path_rng_2D(kg, rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_BSDF_U);
float bsdf_u = bsdf_uv.x;
float bsdf_v = bsdf_uv.y;
float bsdf_u, bsdf_v;
path_rng_2D(kg, rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_BSDF_U, &bsdf_u, &bsdf_v);
float3 ao_D;
float ao_pdf;
@@ -836,9 +827,8 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
num_samples_inv *= 0.5f;
for(int j = 0; j < num_samples; j++) {
float2 light_uv = path_rng_2D(kg, &lamp_rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_LIGHT_U);
float light_u = light_uv.x;
float light_v = light_uv.y;
float light_u, light_v;
path_rng_2D(kg, &lamp_rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_LIGHT_U, &light_u, &light_v);
if(direct_emission(kg, sd, i, 0.0f, 0.0f, light_u, light_v, &light_ray, &L_light, &is_lamp)) {
/* trace shadow ray */
@@ -862,9 +852,8 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
for(int j = 0; j < num_samples; j++) {
float light_t = path_rng_1D(kg, rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_LIGHT);
float2 light_uv = path_rng_2D(kg, rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_LIGHT_U);
float light_u = light_uv.x;
float light_v = light_uv.y;
float light_u, light_v;
path_rng_2D(kg, rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_LIGHT_U, &light_u, &light_v);
/* only sample triangle lights */
if(kernel_data.integrator.num_all_lights)
@@ -913,9 +902,8 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
BsdfEval bsdf_eval;
float3 bsdf_omega_in;
differential3 bsdf_domega_in;
float2 bsdf_uv = path_rng_2D(kg, &bsdf_rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_BSDF_U);
float bsdf_u = bsdf_uv.x;
float bsdf_v = bsdf_uv.y;
float bsdf_u, bsdf_v;
path_rng_2D(kg, &bsdf_rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_BSDF_U, &bsdf_u, &bsdf_v);
int label;
label = shader_bsdf_sample_closure(kg, sd, sc, bsdf_u, bsdf_v, &bsdf_eval,
@@ -1162,11 +1150,8 @@ __device void kernel_path_trace(KernelGlobals *kg,
float lens_u = 0.0f, lens_v = 0.0f;
if(kernel_data.cam.aperturesize > 0.0f) {
float2 lens_uv = path_rng_2D(kg, &rng, sample, num_samples, PRNG_LENS_U);
lens_u = lens_uv.x;
lens_v = lens_uv.y;
}
if(kernel_data.cam.aperturesize > 0.0f)
path_rng_2D(kg, &rng, sample, num_samples, PRNG_LENS_U, &lens_u, &lens_v);
float time = 0.0f;