Cycles: Tweaks to support CUDA 8 toolkit
All the changes are mainly giving explicit tips on inlining functions, so they match how inlining worked with previous toolkit. This make kernel compiled by CUDA 8 render in average with same speed as previous kernels. Some scenes are somewhat faster, some of them are somewhat slower. But slowdown is within 1% so far. On a positive side it allows us to enable newer generation cards on buildbots (so GTX 10x0 will be officially supported soon).
This commit is contained in:
@@ -99,12 +99,12 @@ ccl_device void svm_node_tex_coord(KernelGlobals *kg,
|
||||
stack_store_float3(stack, out_offset, data);
|
||||
}
|
||||
|
||||
ccl_device_inline void svm_node_tex_coord_bump_dx(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
int path_flag,
|
||||
float *stack,
|
||||
uint4 node,
|
||||
int *offset)
|
||||
ccl_device void svm_node_tex_coord_bump_dx(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
int path_flag,
|
||||
float *stack,
|
||||
uint4 node,
|
||||
int *offset)
|
||||
{
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
float3 data;
|
||||
@@ -184,12 +184,12 @@ ccl_device_inline void svm_node_tex_coord_bump_dx(KernelGlobals *kg,
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline void svm_node_tex_coord_bump_dy(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
int path_flag,
|
||||
float *stack,
|
||||
uint4 node,
|
||||
int *offset)
|
||||
ccl_device void svm_node_tex_coord_bump_dy(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
int path_flag,
|
||||
float *stack,
|
||||
uint4 node,
|
||||
int *offset)
|
||||
{
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
float3 data;
|
||||
|
Reference in New Issue
Block a user