Cycles: Tweak inline policy for some functions
The goal is to make Experimental kernel closer in performance to the official kernel, avoiding spills and such. There should not be big impact on official kernel, own tests showed few percent performance drop on laptop's GPU. CPU was always the same speed on AVX, AVX2 and SSE4.1 CPUs i've been testing here. This seems to be the last essential step before we can get rid of Experimental kernel and enable SSS officially on GPU without causing some major performance issues. Surely some more tweaks are possibly required, but that we can do for until cows go home anyway.
This commit is contained in:
@@ -99,12 +99,12 @@ ccl_device void svm_node_tex_coord(KernelGlobals *kg,
|
||||
stack_store_float3(stack, out_offset, data);
|
||||
}
|
||||
|
||||
ccl_device void svm_node_tex_coord_bump_dx(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
int path_flag,
|
||||
float *stack,
|
||||
uint4 node,
|
||||
int *offset)
|
||||
ccl_device_inline void svm_node_tex_coord_bump_dx(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
int path_flag,
|
||||
float *stack,
|
||||
uint4 node,
|
||||
int *offset)
|
||||
{
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
float3 data;
|
||||
@@ -184,12 +184,12 @@ ccl_device void svm_node_tex_coord_bump_dx(KernelGlobals *kg,
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device void svm_node_tex_coord_bump_dy(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
int path_flag,
|
||||
float *stack,
|
||||
uint4 node,
|
||||
int *offset)
|
||||
ccl_device_inline void svm_node_tex_coord_bump_dy(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
int path_flag,
|
||||
float *stack,
|
||||
uint4 node,
|
||||
int *offset)
|
||||
{
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
float3 data;
|
||||
|
Reference in New Issue
Block a user