Fix T40370: cycles CUDA baking timeout with high number of AA samples.

Now baking does one AA sample at a time, just like final render. There is
also some code for shader antialiasing that solves T40369 but it is disabled
for now because there may be unpredictable side effects.
This commit is contained in:
Brecht Van Lommel
2014-06-06 14:40:09 +02:00
parent 553264ff8e
commit e4e58d4612
17 changed files with 249 additions and 169 deletions

View File

@@ -764,40 +764,45 @@ public:
int shader_w = min(shader_chunk_size, end - shader_x);
/* pass in parameters */
int offset = 0;
for(int sample = 0; sample < task.num_samples; sample++) {
/* pass in parameters */
int offset = 0;
cuda_assert(cuParamSetv(cuShader, offset, &d_input, sizeof(d_input)));
offset += sizeof(d_input);
cuda_assert(cuParamSetv(cuShader, offset, &d_input, sizeof(d_input)));
offset += sizeof(d_input);
cuda_assert(cuParamSetv(cuShader, offset, &d_output, sizeof(d_output)));
offset += sizeof(d_output);
cuda_assert(cuParamSetv(cuShader, offset, &d_output, sizeof(d_output)));
offset += sizeof(d_output);
int shader_eval_type = task.shader_eval_type;
offset = align_up(offset, __alignof(shader_eval_type));
int shader_eval_type = task.shader_eval_type;
offset = align_up(offset, __alignof(shader_eval_type));
cuda_assert(cuParamSeti(cuShader, offset, task.shader_eval_type));
offset += sizeof(task.shader_eval_type);
cuda_assert(cuParamSeti(cuShader, offset, task.shader_eval_type));
offset += sizeof(task.shader_eval_type);
cuda_assert(cuParamSeti(cuShader, offset, shader_x));
offset += sizeof(shader_x);
cuda_assert(cuParamSeti(cuShader, offset, shader_x));
offset += sizeof(shader_x);
cuda_assert(cuParamSeti(cuShader, offset, shader_w));
offset += sizeof(shader_w);
cuda_assert(cuParamSeti(cuShader, offset, shader_w));
offset += sizeof(shader_w);
cuda_assert(cuParamSetSize(cuShader, offset));
cuda_assert(cuParamSeti(cuShader, offset, sample));
offset += sizeof(sample);
/* launch kernel */
int threads_per_block;
cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader));
cuda_assert(cuParamSetSize(cuShader, offset));
int xblocks = (shader_w + threads_per_block - 1)/threads_per_block;
/* launch kernel */
int threads_per_block;
cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader));
cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1));
cuda_assert(cuFuncSetBlockShape(cuShader, threads_per_block, 1, 1));
cuda_assert(cuLaunchGrid(cuShader, xblocks, 1));
int xblocks = (shader_w + threads_per_block - 1)/threads_per_block;
cuda_assert(cuCtxSynchronize());
cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1));
cuda_assert(cuFuncSetBlockShape(cuShader, threads_per_block, 1, 1));
cuda_assert(cuLaunchGrid(cuShader, xblocks, 1));
cuda_assert(cuCtxSynchronize());
}
}
cuda_pop_context();