Fix T40370: cycles CUDA baking timeout with high number of AA samples.

Now baking does one AA sample at a time, just like final render. There is also some code for shader antialiasing that solves T40369 but it is disabled for now because there may be unpredictable side effects.
2014-06-06 14:40:09 +02:00
parent 553264ff8e
commit e4e58d4612
17 changed files with 249 additions and 169 deletions
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -764,40 +764,45 @@ public:

 			int shader_w = min(shader_chunk_size, end - shader_x);

-			/* pass in parameters */
-			int offset = 0;
+			for(int sample = 0; sample < task.num_samples; sample++) {
+				/* pass in parameters */
+				int offset = 0;

-			cuda_assert(cuParamSetv(cuShader, offset, &d_input, sizeof(d_input)));
-			offset += sizeof(d_input);
+				cuda_assert(cuParamSetv(cuShader, offset, &d_input, sizeof(d_input)));
+				offset += sizeof(d_input);

-			cuda_assert(cuParamSetv(cuShader, offset, &d_output, sizeof(d_output)));
-			offset += sizeof(d_output);
+				cuda_assert(cuParamSetv(cuShader, offset, &d_output, sizeof(d_output)));
+				offset += sizeof(d_output);

-			int shader_eval_type = task.shader_eval_type;
-			offset = align_up(offset, __alignof(shader_eval_type));
+				int shader_eval_type = task.shader_eval_type;
+				offset = align_up(offset, __alignof(shader_eval_type));

-			cuda_assert(cuParamSeti(cuShader, offset, task.shader_eval_type));
-			offset += sizeof(task.shader_eval_type);
+				cuda_assert(cuParamSeti(cuShader, offset, task.shader_eval_type));
+				offset += sizeof(task.shader_eval_type);

-			cuda_assert(cuParamSeti(cuShader, offset, shader_x));
-			offset += sizeof(shader_x);
+				cuda_assert(cuParamSeti(cuShader, offset, shader_x));
+				offset += sizeof(shader_x);

-			cuda_assert(cuParamSeti(cuShader, offset, shader_w));
-			offset += sizeof(shader_w);
+				cuda_assert(cuParamSeti(cuShader, offset, shader_w));
+				offset += sizeof(shader_w);

-			cuda_assert(cuParamSetSize(cuShader, offset));
+				cuda_assert(cuParamSeti(cuShader, offset, sample));
+				offset += sizeof(sample);

-			/* launch kernel */
-			int threads_per_block;
-			cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader));
+				cuda_assert(cuParamSetSize(cuShader, offset));

-			int xblocks = (shader_w + threads_per_block - 1)/threads_per_block;
+				/* launch kernel */
+				int threads_per_block;
+				cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader));

-			cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1));
-			cuda_assert(cuFuncSetBlockShape(cuShader, threads_per_block, 1, 1));
-			cuda_assert(cuLaunchGrid(cuShader, xblocks, 1));
+				int xblocks = (shader_w + threads_per_block - 1)/threads_per_block;

-			cuda_assert(cuCtxSynchronize());
+				cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1));
+				cuda_assert(cuFuncSetBlockShape(cuShader, threads_per_block, 1, 1));
+				cuda_assert(cuLaunchGrid(cuShader, xblocks, 1));
+
+				cuda_assert(cuCtxSynchronize());
+			}
 		}

 		cuda_pop_context();