Cycles: Tweaks to support CUDA 8 toolkit

All the changes are mainly giving explicit tips on inlining functions,
so they match how inlining worked with previous toolkit.

This make kernel compiled by CUDA 8 render in average with same speed
as previous kernels. Some scenes are somewhat faster, some of them are
somewhat slower. But slowdown is within 1% so far.

On a positive side it allows us to enable newer generation cards on
buildbots (so GTX 10x0 will be officially supported soon).
This commit is contained in:
Sergey Sharybin
2016-08-01 15:40:46 +02:00
parent 7065022f7a
commit 6353ecb996
29 changed files with 250 additions and 126 deletions

View File

@@ -99,12 +99,12 @@ ccl_device void svm_node_tex_coord(KernelGlobals *kg,
stack_store_float3(stack, out_offset, data);
}
ccl_device_inline void svm_node_tex_coord_bump_dx(KernelGlobals *kg,
ShaderData *sd,
int path_flag,
float *stack,
uint4 node,
int *offset)
ccl_device void svm_node_tex_coord_bump_dx(KernelGlobals *kg,
ShaderData *sd,
int path_flag,
float *stack,
uint4 node,
int *offset)
{
#ifdef __RAY_DIFFERENTIALS__
float3 data;
@@ -184,12 +184,12 @@ ccl_device_inline void svm_node_tex_coord_bump_dx(KernelGlobals *kg,
#endif
}
ccl_device_inline void svm_node_tex_coord_bump_dy(KernelGlobals *kg,
ShaderData *sd,
int path_flag,
float *stack,
uint4 node,
int *offset)
ccl_device void svm_node_tex_coord_bump_dy(KernelGlobals *kg,
ShaderData *sd,
int path_flag,
float *stack,
uint4 node,
int *offset)
{
#ifdef __RAY_DIFFERENTIALS__
float3 data;