Switch to Cuda 4.0 style api for kernel invocation. This is a small clean-up that has no functional changes but makes code a bit more readable.

Differential revision: https://developer.blender.org/D659

Reviewed by: Sergey Sharybin, Thomas Dinges
This commit is contained in:
Martijn Berger
2014-07-25 13:33:19 +02:00
parent d0f0d82205
commit bae2b3a688
3 changed files with 45 additions and 94 deletions

View File

@@ -615,40 +615,17 @@ public:
if(have_error())
return;
/* pass in parameters */
int offset = 0;
cuda_assert(cuParamSetv(cuPathTrace, offset, &d_buffer, sizeof(d_buffer)));
offset += sizeof(d_buffer);
cuda_assert(cuParamSetv(cuPathTrace, offset, &d_rng_state, sizeof(d_rng_state)));
offset += sizeof(d_rng_state);
offset = align_up(offset, __alignof(sample));
cuda_assert(cuParamSeti(cuPathTrace, offset, sample));
offset += sizeof(sample);
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.x));
offset += sizeof(rtile.x);
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.y));
offset += sizeof(rtile.y);
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.w));
offset += sizeof(rtile.w);
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.h));
offset += sizeof(rtile.h);
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.offset));
offset += sizeof(rtile.offset);
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.stride));
offset += sizeof(rtile.stride);
cuda_assert(cuParamSetSize(cuPathTrace, offset));
void *args[] = {&d_buffer,
&d_rng_state,
&sample,
&rtile.x,
&rtile.y,
&rtile.w,
&rtile.h,
&rtile.offset,
&rtile.stride};
/* launch kernel */
int threads_per_block;
@@ -666,8 +643,11 @@ public:
int yblocks = (rtile.h + ythreads - 1)/ythreads;
cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
cuda_assert(cuFuncSetBlockShape(cuPathTrace, xthreads, ythreads, 1));
cuda_assert(cuLaunchGrid(cuPathTrace, xblocks, yblocks));
cuda_assert(cuLaunchKernel(cuPathTrace,
xblocks , yblocks, 1, /* blocks */
xthreads, ythreads, 1, /* threads */
0, 0, args, 0));
cuda_assert(cuCtxSynchronize());
@@ -693,40 +673,19 @@ public:
cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_byte"));
}
/* pass in parameters */
int offset = 0;
cuda_assert(cuParamSetv(cuFilmConvert, offset, &d_rgba, sizeof(d_rgba)));
offset += sizeof(d_rgba);
cuda_assert(cuParamSetv(cuFilmConvert, offset, &d_buffer, sizeof(d_buffer)));
offset += sizeof(d_buffer);
float sample_scale = 1.0f/(task.sample + 1);
offset = align_up(offset, __alignof(sample_scale));
cuda_assert(cuParamSetf(cuFilmConvert, offset, sample_scale));
offset += sizeof(sample_scale);
cuda_assert(cuParamSeti(cuFilmConvert, offset, task.x));
offset += sizeof(task.x);
cuda_assert(cuParamSeti(cuFilmConvert, offset, task.y));
offset += sizeof(task.y);
cuda_assert(cuParamSeti(cuFilmConvert, offset, task.w));
offset += sizeof(task.w);
cuda_assert(cuParamSeti(cuFilmConvert, offset, task.h));
offset += sizeof(task.h);
cuda_assert(cuParamSeti(cuFilmConvert, offset, task.offset));
offset += sizeof(task.offset);
cuda_assert(cuParamSeti(cuFilmConvert, offset, task.stride));
offset += sizeof(task.stride);
cuda_assert(cuParamSetSize(cuFilmConvert, offset));
/* pass in parameters */
void *args[] = {&d_rgba,
&d_buffer,
&sample_scale,
&task.x,
&task.y,
&task.w,
&task.h,
&task.offset,
&task.stride};
/* launch kernel */
int threads_per_block;
@@ -738,8 +697,11 @@ public:
int yblocks = (task.h + ythreads - 1)/ythreads;
cuda_assert(cuFuncSetCacheConfig(cuFilmConvert, CU_FUNC_CACHE_PREFER_L1));
cuda_assert(cuFuncSetBlockShape(cuFilmConvert, xthreads, ythreads, 1));
cuda_assert(cuLaunchGrid(cuFilmConvert, xblocks, yblocks));
cuda_assert(cuLaunchKernel(cuFilmConvert,
xblocks , yblocks, 1, /* blocks */
xthreads, ythreads, 1, /* threads */
0, 0, args, 0));
unmap_pixels((rgba_byte)? rgba_byte: rgba_half);
@@ -777,31 +739,14 @@ public:
int shader_w = min(shader_chunk_size, end - shader_x);
for(int sample = 0; sample < task.num_samples; sample++) {
/* pass in parameters */
int offset = 0;
cuda_assert(cuParamSetv(cuShader, offset, &d_input, sizeof(d_input)));
offset += sizeof(d_input);
cuda_assert(cuParamSetv(cuShader, offset, &d_output, sizeof(d_output)));
offset += sizeof(d_output);
int shader_eval_type = task.shader_eval_type;
offset = align_up(offset, __alignof(shader_eval_type));
cuda_assert(cuParamSeti(cuShader, offset, task.shader_eval_type));
offset += sizeof(task.shader_eval_type);
cuda_assert(cuParamSeti(cuShader, offset, shader_x));
offset += sizeof(shader_x);
cuda_assert(cuParamSeti(cuShader, offset, shader_w));
offset += sizeof(shader_w);
cuda_assert(cuParamSeti(cuShader, offset, sample));
offset += sizeof(sample);
cuda_assert(cuParamSetSize(cuShader, offset));
void *args[] = {&d_input,
&d_output,
&task.shader_eval_type,
&shader_x,
&shader_w,
&sample};
/* launch kernel */
int threads_per_block;
@@ -810,8 +755,10 @@ public:
int xblocks = (shader_w + threads_per_block - 1)/threads_per_block;
cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1));
cuda_assert(cuFuncSetBlockShape(cuShader, threads_per_block, 1, 1));
cuda_assert(cuLaunchGrid(cuShader, xblocks, 1));
cuda_assert(cuLaunchKernel(cuShader,
xblocks , 1, 1, /* blocks */
threads_per_block, 1, 1, /* threads */
0, 0, args, 0));
cuda_assert(cuCtxSynchronize());
}

View File

@@ -149,6 +149,7 @@ tcuGLCtxCreate *cuGLCtxCreate;
tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
tcuCtxSetCurrent *cuCtxSetCurrent;
tcuLaunchKernel *cuLaunchKernel;
CCL_NAMESPACE_BEGIN
@@ -386,6 +387,7 @@ bool cuLibraryInit()
/* cuda 4.0 */
CUDA_LIBRARY_FIND(cuCtxSetCurrent);
CUDA_LIBRARY_FIND(cuLaunchKernel);
if(cuHavePrecompiledKernels())
result = true;

View File

@@ -509,6 +509,7 @@ typedef CUresult CUDAAPI tcuGLCtxCreate(CUcontext *pCtx, unsigned int Flags, CUd
typedef CUresult CUDAAPI tcuGraphicsGLRegisterBuffer(CUgraphicsResource *pCudaResource, GLuint buffer, unsigned int Flags);
typedef CUresult CUDAAPI tcuGraphicsGLRegisterImage(CUgraphicsResource *pCudaResource, GLuint image, GLenum target, unsigned int Flags);
typedef CUresult CUDAAPI tcuCtxSetCurrent(CUcontext ctx);
typedef CUresult CUDAAPI tcuLaunchKernel(CUfunction f, unsigned gridDimX, unsigned gridDimY, unsigned gridDimZ, unsigned blockDimX, unsigned blockDimY, unsigned blockDimZ, unsigned sharedMemBytes, CUstream hStream, void* kernelParams, void* extra);
/* function declarations */
@@ -629,6 +630,7 @@ extern tcuGLCtxCreate *cuGLCtxCreate;
extern tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
extern tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
extern tcuCtxSetCurrent *cuCtxSetCurrent;
extern tcuLaunchKernel *cuLaunchKernel;
#endif /* __UTIL_CUDA_H__ */