Files
blender/intern/cycles/kernel/svm/svm_attribute.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

281 lines
8.6 KiB
C
Raw Normal View History

/*
* Copyright 2011-2013 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
CCL_NAMESPACE_BEGIN
/* Attribute Node */
ccl_device AttributeDescriptor svm_node_attr_init(
KernelGlobals *kg, ShaderData *sd, uint4 node, NodeAttributeOutputType *type, uint *out_offset)
{
*out_offset = node.z;
*type = (NodeAttributeOutputType)node.w;
AttributeDescriptor desc;
2017-02-16 06:24:13 -05:00
if (sd->object != OBJECT_NONE) {
desc = find_attribute(kg, sd, node.y);
if (desc.offset == ATTR_STD_NOT_FOUND) {
desc = attribute_not_found();
desc.offset = 0;
desc.type = (NodeAttributeType)node.w;
}
}
else {
/* background */
desc = attribute_not_found();
desc.offset = 0;
desc.type = (NodeAttributeType)node.w;
}
return desc;
}
ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
{
NodeAttributeOutputType type = NODE_ATTR_OUTPUT_FLOAT;
uint out_offset = 0;
AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
#ifdef __VOLUME__
/* Volumes
* NOTE: moving this into its own node type might help improve performance. */
if (primitive_is_volume_attribute(sd, desc)) {
const float4 value = volume_attribute_float4(kg, sd, desc);
if (type == NODE_ATTR_OUTPUT_FLOAT) {
const float f = volume_attribute_value_to_float(value);
stack_store_float(stack, out_offset, f);
}
else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
const float3 f = volume_attribute_value_to_float3(value);
stack_store_float3(stack, out_offset, f);
}
else {
const float f = volume_attribute_value_to_alpha(value);
stack_store_float(stack, out_offset, f);
}
return;
}
#endif
/* Surface. */
if (desc.type == NODE_ATTR_FLOAT) {
float f = primitive_surface_attribute_float(kg, sd, desc, NULL, NULL);
if (type == NODE_ATTR_OUTPUT_FLOAT) {
stack_store_float(stack, out_offset, f);
}
else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
T61513: Refactored Cycles Attribute Retrieval There is a generic function to retrieve float and float3 attributes `primitive_attribute_float` and primitive_attribute_float3`. Inside these functions an prioritised if-else construction checked where the attribute is stored and then retrieved from that location. Actually the calling function most of the time already knows where the data is stored. So we could simplify this by splitting these functions and remove the check logic. This patch splits the `primitive_attribute_float?` functions into `primitive_surface_attribute_float?` and `primitive_volume_attribute_float?`. What leads to less branching and more optimum kernels. The original function is still being used by OSL and `svm_node_attr`. This will reduce the compilation time and render time for kernels. Especially in production scenes there is a lot of benefit. Impact in compilation times job | scene_name | previous | new | percentage -------+-----------------+----------+-------+------------ t61513 | empty | 10.63 | 10.66 | 0% t61513 | bmw | 17.91 | 17.65 | 1% t61513 | fishycat | 19.57 | 17.68 | 10% t61513 | barbershop | 54.10 | 24.41 | 55% t61513 | classroom | 17.55 | 16.29 | 7% t61513 | koro | 18.92 | 18.05 | 5% t61513 | pavillion | 17.43 | 16.52 | 5% t61513 | splash279 | 16.48 | 14.91 | 10% t61513 | volume_emission | 36.22 | 21.60 | 40% Impact in render times job | scene_name | previous | new | percentage -------+-----------------+----------+--------+------------ 61513 | empty | 21.06 | 20.35 | 3% 61513 | bmw | 198.44 | 190.05 | 4% 61513 | fishycat | 394.20 | 401.25 | -2% 61513 | barbershop | 1188.16 | 912.39 | 23% 61513 | classroom | 341.08 | 340.38 | 0% 61513 | koro | 472.43 | 471.80 | 0% 61513 | pavillion | 905.77 | 899.80 | 1% 61513 | splash279 | 55.26 | 54.86 | 1% 61513 | volume_emission | 62.59 | 61.70 | 1% There is also a possitive impact when using CPU and CUDA, but they are small. I didn't split the hair logic from the surface logic due to: * Hair and surface use same attribute types. It was not clear if it could be splitted when looking at the code only. * Hair and surface are quick to compile and to read. So the benefit is quite small. Differential Revision: https://developer.blender.org/D4375
2019-02-19 15:41:22 +01:00
stack_store_float3(stack, out_offset, make_float3(f, f, f));
}
else {
stack_store_float(stack, out_offset, 1.0f);
}
}
else if (desc.type == NODE_ATTR_FLOAT2) {
float2 f = primitive_surface_attribute_float2(kg, sd, desc, NULL, NULL);
if (type == NODE_ATTR_OUTPUT_FLOAT) {
stack_store_float(stack, out_offset, f.x);
}
else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
stack_store_float3(stack, out_offset, make_float3(f.x, f.y, 0.0f));
}
else {
stack_store_float(stack, out_offset, 1.0f);
}
}
else if (desc.type == NODE_ATTR_FLOAT4 || desc.type == NODE_ATTR_RGBA) {
float4 f = primitive_surface_attribute_float4(kg, sd, desc, NULL, NULL);
if (type == NODE_ATTR_OUTPUT_FLOAT) {
stack_store_float(stack, out_offset, average(float4_to_float3(f)));
}
else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
stack_store_float3(stack, out_offset, float4_to_float3(f));
}
else {
stack_store_float(stack, out_offset, f.w);
}
}
else {
float3 f = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
if (type == NODE_ATTR_OUTPUT_FLOAT) {
T61513: Refactored Cycles Attribute Retrieval There is a generic function to retrieve float and float3 attributes `primitive_attribute_float` and primitive_attribute_float3`. Inside these functions an prioritised if-else construction checked where the attribute is stored and then retrieved from that location. Actually the calling function most of the time already knows where the data is stored. So we could simplify this by splitting these functions and remove the check logic. This patch splits the `primitive_attribute_float?` functions into `primitive_surface_attribute_float?` and `primitive_volume_attribute_float?`. What leads to less branching and more optimum kernels. The original function is still being used by OSL and `svm_node_attr`. This will reduce the compilation time and render time for kernels. Especially in production scenes there is a lot of benefit. Impact in compilation times job | scene_name | previous | new | percentage -------+-----------------+----------+-------+------------ t61513 | empty | 10.63 | 10.66 | 0% t61513 | bmw | 17.91 | 17.65 | 1% t61513 | fishycat | 19.57 | 17.68 | 10% t61513 | barbershop | 54.10 | 24.41 | 55% t61513 | classroom | 17.55 | 16.29 | 7% t61513 | koro | 18.92 | 18.05 | 5% t61513 | pavillion | 17.43 | 16.52 | 5% t61513 | splash279 | 16.48 | 14.91 | 10% t61513 | volume_emission | 36.22 | 21.60 | 40% Impact in render times job | scene_name | previous | new | percentage -------+-----------------+----------+--------+------------ 61513 | empty | 21.06 | 20.35 | 3% 61513 | bmw | 198.44 | 190.05 | 4% 61513 | fishycat | 394.20 | 401.25 | -2% 61513 | barbershop | 1188.16 | 912.39 | 23% 61513 | classroom | 341.08 | 340.38 | 0% 61513 | koro | 472.43 | 471.80 | 0% 61513 | pavillion | 905.77 | 899.80 | 1% 61513 | splash279 | 55.26 | 54.86 | 1% 61513 | volume_emission | 62.59 | 61.70 | 1% There is also a possitive impact when using CPU and CUDA, but they are small. I didn't split the hair logic from the surface logic due to: * Hair and surface use same attribute types. It was not clear if it could be splitted when looking at the code only. * Hair and surface are quick to compile and to read. So the benefit is quite small. Differential Revision: https://developer.blender.org/D4375
2019-02-19 15:41:22 +01:00
stack_store_float(stack, out_offset, average(f));
}
else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
T61513: Refactored Cycles Attribute Retrieval There is a generic function to retrieve float and float3 attributes `primitive_attribute_float` and primitive_attribute_float3`. Inside these functions an prioritised if-else construction checked where the attribute is stored and then retrieved from that location. Actually the calling function most of the time already knows where the data is stored. So we could simplify this by splitting these functions and remove the check logic. This patch splits the `primitive_attribute_float?` functions into `primitive_surface_attribute_float?` and `primitive_volume_attribute_float?`. What leads to less branching and more optimum kernels. The original function is still being used by OSL and `svm_node_attr`. This will reduce the compilation time and render time for kernels. Especially in production scenes there is a lot of benefit. Impact in compilation times job | scene_name | previous | new | percentage -------+-----------------+----------+-------+------------ t61513 | empty | 10.63 | 10.66 | 0% t61513 | bmw | 17.91 | 17.65 | 1% t61513 | fishycat | 19.57 | 17.68 | 10% t61513 | barbershop | 54.10 | 24.41 | 55% t61513 | classroom | 17.55 | 16.29 | 7% t61513 | koro | 18.92 | 18.05 | 5% t61513 | pavillion | 17.43 | 16.52 | 5% t61513 | splash279 | 16.48 | 14.91 | 10% t61513 | volume_emission | 36.22 | 21.60 | 40% Impact in render times job | scene_name | previous | new | percentage -------+-----------------+----------+--------+------------ 61513 | empty | 21.06 | 20.35 | 3% 61513 | bmw | 198.44 | 190.05 | 4% 61513 | fishycat | 394.20 | 401.25 | -2% 61513 | barbershop | 1188.16 | 912.39 | 23% 61513 | classroom | 341.08 | 340.38 | 0% 61513 | koro | 472.43 | 471.80 | 0% 61513 | pavillion | 905.77 | 899.80 | 1% 61513 | splash279 | 55.26 | 54.86 | 1% 61513 | volume_emission | 62.59 | 61.70 | 1% There is also a possitive impact when using CPU and CUDA, but they are small. I didn't split the hair logic from the surface logic due to: * Hair and surface use same attribute types. It was not clear if it could be splitted when looking at the code only. * Hair and surface are quick to compile and to read. So the benefit is quite small. Differential Revision: https://developer.blender.org/D4375
2019-02-19 15:41:22 +01:00
stack_store_float3(stack, out_offset, f);
}
else {
stack_store_float(stack, out_offset, 1.0f);
}
}
}
ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
{
NodeAttributeOutputType type = NODE_ATTR_OUTPUT_FLOAT;
uint out_offset = 0;
AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
#ifdef __VOLUME__
/* Volume */
if (primitive_is_volume_attribute(sd, desc)) {
if (type == NODE_ATTR_OUTPUT_FLOAT) {
stack_store_float(stack, out_offset, 0.0f);
}
else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
stack_store_float3(stack, out_offset, make_float3(0.0f, 0.0f, 0.0f));
}
else {
stack_store_float(stack, out_offset, 1.0f);
}
return;
}
#endif
/* Surface */
if (desc.type == NODE_ATTR_FLOAT) {
T61513: Refactored Cycles Attribute Retrieval There is a generic function to retrieve float and float3 attributes `primitive_attribute_float` and primitive_attribute_float3`. Inside these functions an prioritised if-else construction checked where the attribute is stored and then retrieved from that location. Actually the calling function most of the time already knows where the data is stored. So we could simplify this by splitting these functions and remove the check logic. This patch splits the `primitive_attribute_float?` functions into `primitive_surface_attribute_float?` and `primitive_volume_attribute_float?`. What leads to less branching and more optimum kernels. The original function is still being used by OSL and `svm_node_attr`. This will reduce the compilation time and render time for kernels. Especially in production scenes there is a lot of benefit. Impact in compilation times job | scene_name | previous | new | percentage -------+-----------------+----------+-------+------------ t61513 | empty | 10.63 | 10.66 | 0% t61513 | bmw | 17.91 | 17.65 | 1% t61513 | fishycat | 19.57 | 17.68 | 10% t61513 | barbershop | 54.10 | 24.41 | 55% t61513 | classroom | 17.55 | 16.29 | 7% t61513 | koro | 18.92 | 18.05 | 5% t61513 | pavillion | 17.43 | 16.52 | 5% t61513 | splash279 | 16.48 | 14.91 | 10% t61513 | volume_emission | 36.22 | 21.60 | 40% Impact in render times job | scene_name | previous | new | percentage -------+-----------------+----------+--------+------------ 61513 | empty | 21.06 | 20.35 | 3% 61513 | bmw | 198.44 | 190.05 | 4% 61513 | fishycat | 394.20 | 401.25 | -2% 61513 | barbershop | 1188.16 | 912.39 | 23% 61513 | classroom | 341.08 | 340.38 | 0% 61513 | koro | 472.43 | 471.80 | 0% 61513 | pavillion | 905.77 | 899.80 | 1% 61513 | splash279 | 55.26 | 54.86 | 1% 61513 | volume_emission | 62.59 | 61.70 | 1% There is also a possitive impact when using CPU and CUDA, but they are small. I didn't split the hair logic from the surface logic due to: * Hair and surface use same attribute types. It was not clear if it could be splitted when looking at the code only. * Hair and surface are quick to compile and to read. So the benefit is quite small. Differential Revision: https://developer.blender.org/D4375
2019-02-19 15:41:22 +01:00
float dx;
float f = primitive_surface_attribute_float(kg, sd, desc, &dx, NULL);
if (type == NODE_ATTR_OUTPUT_FLOAT) {
stack_store_float(stack, out_offset, f + dx);
}
else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
T61513: Refactored Cycles Attribute Retrieval There is a generic function to retrieve float and float3 attributes `primitive_attribute_float` and primitive_attribute_float3`. Inside these functions an prioritised if-else construction checked where the attribute is stored and then retrieved from that location. Actually the calling function most of the time already knows where the data is stored. So we could simplify this by splitting these functions and remove the check logic. This patch splits the `primitive_attribute_float?` functions into `primitive_surface_attribute_float?` and `primitive_volume_attribute_float?`. What leads to less branching and more optimum kernels. The original function is still being used by OSL and `svm_node_attr`. This will reduce the compilation time and render time for kernels. Especially in production scenes there is a lot of benefit. Impact in compilation times job | scene_name | previous | new | percentage -------+-----------------+----------+-------+------------ t61513 | empty | 10.63 | 10.66 | 0% t61513 | bmw | 17.91 | 17.65 | 1% t61513 | fishycat | 19.57 | 17.68 | 10% t61513 | barbershop | 54.10 | 24.41 | 55% t61513 | classroom | 17.55 | 16.29 | 7% t61513 | koro | 18.92 | 18.05 | 5% t61513 | pavillion | 17.43 | 16.52 | 5% t61513 | splash279 | 16.48 | 14.91 | 10% t61513 | volume_emission | 36.22 | 21.60 | 40% Impact in render times job | scene_name | previous | new | percentage -------+-----------------+----------+--------+------------ 61513 | empty | 21.06 | 20.35 | 3% 61513 | bmw | 198.44 | 190.05 | 4% 61513 | fishycat | 394.20 | 401.25 | -2% 61513 | barbershop | 1188.16 | 912.39 | 23% 61513 | classroom | 341.08 | 340.38 | 0% 61513 | koro | 472.43 | 471.80 | 0% 61513 | pavillion | 905.77 | 899.80 | 1% 61513 | splash279 | 55.26 | 54.86 | 1% 61513 | volume_emission | 62.59 | 61.70 | 1% There is also a possitive impact when using CPU and CUDA, but they are small. I didn't split the hair logic from the surface logic due to: * Hair and surface use same attribute types. It was not clear if it could be splitted when looking at the code only. * Hair and surface are quick to compile and to read. So the benefit is quite small. Differential Revision: https://developer.blender.org/D4375
2019-02-19 15:41:22 +01:00
stack_store_float3(stack, out_offset, make_float3(f + dx, f + dx, f + dx));
}
else {
stack_store_float(stack, out_offset, 1.0f);
}
}
else if (desc.type == NODE_ATTR_FLOAT2) {
float2 dx;
float2 f = primitive_surface_attribute_float2(kg, sd, desc, &dx, NULL);
if (type == NODE_ATTR_OUTPUT_FLOAT) {
stack_store_float(stack, out_offset, f.x + dx.x);
}
else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
stack_store_float3(stack, out_offset, make_float3(f.x + dx.x, f.y + dx.y, 0.0f));
}
else {
stack_store_float(stack, out_offset, 1.0f);
}
}
else if (desc.type == NODE_ATTR_FLOAT4 || desc.type == NODE_ATTR_RGBA) {
float4 dx;
float4 f = primitive_surface_attribute_float4(kg, sd, desc, &dx, NULL);
if (type == NODE_ATTR_OUTPUT_FLOAT) {
stack_store_float(stack, out_offset, average(float4_to_float3(f + dx)));
}
else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
stack_store_float3(stack, out_offset, float4_to_float3(f + dx));
}
else {
stack_store_float(stack, out_offset, f.w + dx.w);
}
}
else {
T61513: Refactored Cycles Attribute Retrieval There is a generic function to retrieve float and float3 attributes `primitive_attribute_float` and primitive_attribute_float3`. Inside these functions an prioritised if-else construction checked where the attribute is stored and then retrieved from that location. Actually the calling function most of the time already knows where the data is stored. So we could simplify this by splitting these functions and remove the check logic. This patch splits the `primitive_attribute_float?` functions into `primitive_surface_attribute_float?` and `primitive_volume_attribute_float?`. What leads to less branching and more optimum kernels. The original function is still being used by OSL and `svm_node_attr`. This will reduce the compilation time and render time for kernels. Especially in production scenes there is a lot of benefit. Impact in compilation times job | scene_name | previous | new | percentage -------+-----------------+----------+-------+------------ t61513 | empty | 10.63 | 10.66 | 0% t61513 | bmw | 17.91 | 17.65 | 1% t61513 | fishycat | 19.57 | 17.68 | 10% t61513 | barbershop | 54.10 | 24.41 | 55% t61513 | classroom | 17.55 | 16.29 | 7% t61513 | koro | 18.92 | 18.05 | 5% t61513 | pavillion | 17.43 | 16.52 | 5% t61513 | splash279 | 16.48 | 14.91 | 10% t61513 | volume_emission | 36.22 | 21.60 | 40% Impact in render times job | scene_name | previous | new | percentage -------+-----------------+----------+--------+------------ 61513 | empty | 21.06 | 20.35 | 3% 61513 | bmw | 198.44 | 190.05 | 4% 61513 | fishycat | 394.20 | 401.25 | -2% 61513 | barbershop | 1188.16 | 912.39 | 23% 61513 | classroom | 341.08 | 340.38 | 0% 61513 | koro | 472.43 | 471.80 | 0% 61513 | pavillion | 905.77 | 899.80 | 1% 61513 | splash279 | 55.26 | 54.86 | 1% 61513 | volume_emission | 62.59 | 61.70 | 1% There is also a possitive impact when using CPU and CUDA, but they are small. I didn't split the hair logic from the surface logic due to: * Hair and surface use same attribute types. It was not clear if it could be splitted when looking at the code only. * Hair and surface are quick to compile and to read. So the benefit is quite small. Differential Revision: https://developer.blender.org/D4375
2019-02-19 15:41:22 +01:00
float3 dx;
float3 f = primitive_surface_attribute_float3(kg, sd, desc, &dx, NULL);
if (type == NODE_ATTR_OUTPUT_FLOAT) {
T61513: Refactored Cycles Attribute Retrieval There is a generic function to retrieve float and float3 attributes `primitive_attribute_float` and primitive_attribute_float3`. Inside these functions an prioritised if-else construction checked where the attribute is stored and then retrieved from that location. Actually the calling function most of the time already knows where the data is stored. So we could simplify this by splitting these functions and remove the check logic. This patch splits the `primitive_attribute_float?` functions into `primitive_surface_attribute_float?` and `primitive_volume_attribute_float?`. What leads to less branching and more optimum kernels. The original function is still being used by OSL and `svm_node_attr`. This will reduce the compilation time and render time for kernels. Especially in production scenes there is a lot of benefit. Impact in compilation times job | scene_name | previous | new | percentage -------+-----------------+----------+-------+------------ t61513 | empty | 10.63 | 10.66 | 0% t61513 | bmw | 17.91 | 17.65 | 1% t61513 | fishycat | 19.57 | 17.68 | 10% t61513 | barbershop | 54.10 | 24.41 | 55% t61513 | classroom | 17.55 | 16.29 | 7% t61513 | koro | 18.92 | 18.05 | 5% t61513 | pavillion | 17.43 | 16.52 | 5% t61513 | splash279 | 16.48 | 14.91 | 10% t61513 | volume_emission | 36.22 | 21.60 | 40% Impact in render times job | scene_name | previous | new | percentage -------+-----------------+----------+--------+------------ 61513 | empty | 21.06 | 20.35 | 3% 61513 | bmw | 198.44 | 190.05 | 4% 61513 | fishycat | 394.20 | 401.25 | -2% 61513 | barbershop | 1188.16 | 912.39 | 23% 61513 | classroom | 341.08 | 340.38 | 0% 61513 | koro | 472.43 | 471.80 | 0% 61513 | pavillion | 905.77 | 899.80 | 1% 61513 | splash279 | 55.26 | 54.86 | 1% 61513 | volume_emission | 62.59 | 61.70 | 1% There is also a possitive impact when using CPU and CUDA, but they are small. I didn't split the hair logic from the surface logic due to: * Hair and surface use same attribute types. It was not clear if it could be splitted when looking at the code only. * Hair and surface are quick to compile and to read. So the benefit is quite small. Differential Revision: https://developer.blender.org/D4375
2019-02-19 15:41:22 +01:00
stack_store_float(stack, out_offset, average(f + dx));
}
else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
T61513: Refactored Cycles Attribute Retrieval There is a generic function to retrieve float and float3 attributes `primitive_attribute_float` and primitive_attribute_float3`. Inside these functions an prioritised if-else construction checked where the attribute is stored and then retrieved from that location. Actually the calling function most of the time already knows where the data is stored. So we could simplify this by splitting these functions and remove the check logic. This patch splits the `primitive_attribute_float?` functions into `primitive_surface_attribute_float?` and `primitive_volume_attribute_float?`. What leads to less branching and more optimum kernels. The original function is still being used by OSL and `svm_node_attr`. This will reduce the compilation time and render time for kernels. Especially in production scenes there is a lot of benefit. Impact in compilation times job | scene_name | previous | new | percentage -------+-----------------+----------+-------+------------ t61513 | empty | 10.63 | 10.66 | 0% t61513 | bmw | 17.91 | 17.65 | 1% t61513 | fishycat | 19.57 | 17.68 | 10% t61513 | barbershop | 54.10 | 24.41 | 55% t61513 | classroom | 17.55 | 16.29 | 7% t61513 | koro | 18.92 | 18.05 | 5% t61513 | pavillion | 17.43 | 16.52 | 5% t61513 | splash279 | 16.48 | 14.91 | 10% t61513 | volume_emission | 36.22 | 21.60 | 40% Impact in render times job | scene_name | previous | new | percentage -------+-----------------+----------+--------+------------ 61513 | empty | 21.06 | 20.35 | 3% 61513 | bmw | 198.44 | 190.05 | 4% 61513 | fishycat | 394.20 | 401.25 | -2% 61513 | barbershop | 1188.16 | 912.39 | 23% 61513 | classroom | 341.08 | 340.38 | 0% 61513 | koro | 472.43 | 471.80 | 0% 61513 | pavillion | 905.77 | 899.80 | 1% 61513 | splash279 | 55.26 | 54.86 | 1% 61513 | volume_emission | 62.59 | 61.70 | 1% There is also a possitive impact when using CPU and CUDA, but they are small. I didn't split the hair logic from the surface logic due to: * Hair and surface use same attribute types. It was not clear if it could be splitted when looking at the code only. * Hair and surface are quick to compile and to read. So the benefit is quite small. Differential Revision: https://developer.blender.org/D4375
2019-02-19 15:41:22 +01:00
stack_store_float3(stack, out_offset, f + dx);
}
else {
stack_store_float(stack, out_offset, 1.0f);
}
}
}
ccl_device void svm_node_attr_bump_dy(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
{
NodeAttributeOutputType type = NODE_ATTR_OUTPUT_FLOAT;
uint out_offset = 0;
AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
#ifdef __VOLUME__
/* Volume */
if (primitive_is_volume_attribute(sd, desc)) {
if (type == NODE_ATTR_OUTPUT_FLOAT) {
stack_store_float(stack, out_offset, 0.0f);
}
else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
stack_store_float3(stack, out_offset, make_float3(0.0f, 0.0f, 0.0f));
}
else {
stack_store_float(stack, out_offset, 1.0f);
}
return;
}
#endif
/* Surface */
if (desc.type == NODE_ATTR_FLOAT) {
T61513: Refactored Cycles Attribute Retrieval There is a generic function to retrieve float and float3 attributes `primitive_attribute_float` and primitive_attribute_float3`. Inside these functions an prioritised if-else construction checked where the attribute is stored and then retrieved from that location. Actually the calling function most of the time already knows where the data is stored. So we could simplify this by splitting these functions and remove the check logic. This patch splits the `primitive_attribute_float?` functions into `primitive_surface_attribute_float?` and `primitive_volume_attribute_float?`. What leads to less branching and more optimum kernels. The original function is still being used by OSL and `svm_node_attr`. This will reduce the compilation time and render time for kernels. Especially in production scenes there is a lot of benefit. Impact in compilation times job | scene_name | previous | new | percentage -------+-----------------+----------+-------+------------ t61513 | empty | 10.63 | 10.66 | 0% t61513 | bmw | 17.91 | 17.65 | 1% t61513 | fishycat | 19.57 | 17.68 | 10% t61513 | barbershop | 54.10 | 24.41 | 55% t61513 | classroom | 17.55 | 16.29 | 7% t61513 | koro | 18.92 | 18.05 | 5% t61513 | pavillion | 17.43 | 16.52 | 5% t61513 | splash279 | 16.48 | 14.91 | 10% t61513 | volume_emission | 36.22 | 21.60 | 40% Impact in render times job | scene_name | previous | new | percentage -------+-----------------+----------+--------+------------ 61513 | empty | 21.06 | 20.35 | 3% 61513 | bmw | 198.44 | 190.05 | 4% 61513 | fishycat | 394.20 | 401.25 | -2% 61513 | barbershop | 1188.16 | 912.39 | 23% 61513 | classroom | 341.08 | 340.38 | 0% 61513 | koro | 472.43 | 471.80 | 0% 61513 | pavillion | 905.77 | 899.80 | 1% 61513 | splash279 | 55.26 | 54.86 | 1% 61513 | volume_emission | 62.59 | 61.70 | 1% There is also a possitive impact when using CPU and CUDA, but they are small. I didn't split the hair logic from the surface logic due to: * Hair and surface use same attribute types. It was not clear if it could be splitted when looking at the code only. * Hair and surface are quick to compile and to read. So the benefit is quite small. Differential Revision: https://developer.blender.org/D4375
2019-02-19 15:41:22 +01:00
float dy;
float f = primitive_surface_attribute_float(kg, sd, desc, NULL, &dy);
if (type == NODE_ATTR_OUTPUT_FLOAT) {
stack_store_float(stack, out_offset, f + dy);
}
else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
T61513: Refactored Cycles Attribute Retrieval There is a generic function to retrieve float and float3 attributes `primitive_attribute_float` and primitive_attribute_float3`. Inside these functions an prioritised if-else construction checked where the attribute is stored and then retrieved from that location. Actually the calling function most of the time already knows where the data is stored. So we could simplify this by splitting these functions and remove the check logic. This patch splits the `primitive_attribute_float?` functions into `primitive_surface_attribute_float?` and `primitive_volume_attribute_float?`. What leads to less branching and more optimum kernels. The original function is still being used by OSL and `svm_node_attr`. This will reduce the compilation time and render time for kernels. Especially in production scenes there is a lot of benefit. Impact in compilation times job | scene_name | previous | new | percentage -------+-----------------+----------+-------+------------ t61513 | empty | 10.63 | 10.66 | 0% t61513 | bmw | 17.91 | 17.65 | 1% t61513 | fishycat | 19.57 | 17.68 | 10% t61513 | barbershop | 54.10 | 24.41 | 55% t61513 | classroom | 17.55 | 16.29 | 7% t61513 | koro | 18.92 | 18.05 | 5% t61513 | pavillion | 17.43 | 16.52 | 5% t61513 | splash279 | 16.48 | 14.91 | 10% t61513 | volume_emission | 36.22 | 21.60 | 40% Impact in render times job | scene_name | previous | new | percentage -------+-----------------+----------+--------+------------ 61513 | empty | 21.06 | 20.35 | 3% 61513 | bmw | 198.44 | 190.05 | 4% 61513 | fishycat | 394.20 | 401.25 | -2% 61513 | barbershop | 1188.16 | 912.39 | 23% 61513 | classroom | 341.08 | 340.38 | 0% 61513 | koro | 472.43 | 471.80 | 0% 61513 | pavillion | 905.77 | 899.80 | 1% 61513 | splash279 | 55.26 | 54.86 | 1% 61513 | volume_emission | 62.59 | 61.70 | 1% There is also a possitive impact when using CPU and CUDA, but they are small. I didn't split the hair logic from the surface logic due to: * Hair and surface use same attribute types. It was not clear if it could be splitted when looking at the code only. * Hair and surface are quick to compile and to read. So the benefit is quite small. Differential Revision: https://developer.blender.org/D4375
2019-02-19 15:41:22 +01:00
stack_store_float3(stack, out_offset, make_float3(f + dy, f + dy, f + dy));
}
else {
stack_store_float(stack, out_offset, 1.0f);
}
}
else if (desc.type == NODE_ATTR_FLOAT2) {
float2 dy;
float2 f = primitive_surface_attribute_float2(kg, sd, desc, NULL, &dy);
if (type == NODE_ATTR_OUTPUT_FLOAT) {
stack_store_float(stack, out_offset, f.x + dy.x);
}
else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
stack_store_float3(stack, out_offset, make_float3(f.x + dy.x, f.y + dy.y, 0.0f));
}
else {
stack_store_float(stack, out_offset, 1.0f);
}
}
else if (desc.type == NODE_ATTR_FLOAT4 || desc.type == NODE_ATTR_RGBA) {
float4 dy;
float4 f = primitive_surface_attribute_float4(kg, sd, desc, NULL, &dy);
if (type == NODE_ATTR_OUTPUT_FLOAT) {
stack_store_float(stack, out_offset, average(float4_to_float3(f + dy)));
}
else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
stack_store_float3(stack, out_offset, float4_to_float3(f + dy));
}
else {
stack_store_float(stack, out_offset, f.w + dy.w);
}
}
else {
T61513: Refactored Cycles Attribute Retrieval There is a generic function to retrieve float and float3 attributes `primitive_attribute_float` and primitive_attribute_float3`. Inside these functions an prioritised if-else construction checked where the attribute is stored and then retrieved from that location. Actually the calling function most of the time already knows where the data is stored. So we could simplify this by splitting these functions and remove the check logic. This patch splits the `primitive_attribute_float?` functions into `primitive_surface_attribute_float?` and `primitive_volume_attribute_float?`. What leads to less branching and more optimum kernels. The original function is still being used by OSL and `svm_node_attr`. This will reduce the compilation time and render time for kernels. Especially in production scenes there is a lot of benefit. Impact in compilation times job | scene_name | previous | new | percentage -------+-----------------+----------+-------+------------ t61513 | empty | 10.63 | 10.66 | 0% t61513 | bmw | 17.91 | 17.65 | 1% t61513 | fishycat | 19.57 | 17.68 | 10% t61513 | barbershop | 54.10 | 24.41 | 55% t61513 | classroom | 17.55 | 16.29 | 7% t61513 | koro | 18.92 | 18.05 | 5% t61513 | pavillion | 17.43 | 16.52 | 5% t61513 | splash279 | 16.48 | 14.91 | 10% t61513 | volume_emission | 36.22 | 21.60 | 40% Impact in render times job | scene_name | previous | new | percentage -------+-----------------+----------+--------+------------ 61513 | empty | 21.06 | 20.35 | 3% 61513 | bmw | 198.44 | 190.05 | 4% 61513 | fishycat | 394.20 | 401.25 | -2% 61513 | barbershop | 1188.16 | 912.39 | 23% 61513 | classroom | 341.08 | 340.38 | 0% 61513 | koro | 472.43 | 471.80 | 0% 61513 | pavillion | 905.77 | 899.80 | 1% 61513 | splash279 | 55.26 | 54.86 | 1% 61513 | volume_emission | 62.59 | 61.70 | 1% There is also a possitive impact when using CPU and CUDA, but they are small. I didn't split the hair logic from the surface logic due to: * Hair and surface use same attribute types. It was not clear if it could be splitted when looking at the code only. * Hair and surface are quick to compile and to read. So the benefit is quite small. Differential Revision: https://developer.blender.org/D4375
2019-02-19 15:41:22 +01:00
float3 dy;
float3 f = primitive_surface_attribute_float3(kg, sd, desc, NULL, &dy);
if (type == NODE_ATTR_OUTPUT_FLOAT) {
T61513: Refactored Cycles Attribute Retrieval There is a generic function to retrieve float and float3 attributes `primitive_attribute_float` and primitive_attribute_float3`. Inside these functions an prioritised if-else construction checked where the attribute is stored and then retrieved from that location. Actually the calling function most of the time already knows where the data is stored. So we could simplify this by splitting these functions and remove the check logic. This patch splits the `primitive_attribute_float?` functions into `primitive_surface_attribute_float?` and `primitive_volume_attribute_float?`. What leads to less branching and more optimum kernels. The original function is still being used by OSL and `svm_node_attr`. This will reduce the compilation time and render time for kernels. Especially in production scenes there is a lot of benefit. Impact in compilation times job | scene_name | previous | new | percentage -------+-----------------+----------+-------+------------ t61513 | empty | 10.63 | 10.66 | 0% t61513 | bmw | 17.91 | 17.65 | 1% t61513 | fishycat | 19.57 | 17.68 | 10% t61513 | barbershop | 54.10 | 24.41 | 55% t61513 | classroom | 17.55 | 16.29 | 7% t61513 | koro | 18.92 | 18.05 | 5% t61513 | pavillion | 17.43 | 16.52 | 5% t61513 | splash279 | 16.48 | 14.91 | 10% t61513 | volume_emission | 36.22 | 21.60 | 40% Impact in render times job | scene_name | previous | new | percentage -------+-----------------+----------+--------+------------ 61513 | empty | 21.06 | 20.35 | 3% 61513 | bmw | 198.44 | 190.05 | 4% 61513 | fishycat | 394.20 | 401.25 | -2% 61513 | barbershop | 1188.16 | 912.39 | 23% 61513 | classroom | 341.08 | 340.38 | 0% 61513 | koro | 472.43 | 471.80 | 0% 61513 | pavillion | 905.77 | 899.80 | 1% 61513 | splash279 | 55.26 | 54.86 | 1% 61513 | volume_emission | 62.59 | 61.70 | 1% There is also a possitive impact when using CPU and CUDA, but they are small. I didn't split the hair logic from the surface logic due to: * Hair and surface use same attribute types. It was not clear if it could be splitted when looking at the code only. * Hair and surface are quick to compile and to read. So the benefit is quite small. Differential Revision: https://developer.blender.org/D4375
2019-02-19 15:41:22 +01:00
stack_store_float(stack, out_offset, average(f + dy));
}
else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
T61513: Refactored Cycles Attribute Retrieval There is a generic function to retrieve float and float3 attributes `primitive_attribute_float` and primitive_attribute_float3`. Inside these functions an prioritised if-else construction checked where the attribute is stored and then retrieved from that location. Actually the calling function most of the time already knows where the data is stored. So we could simplify this by splitting these functions and remove the check logic. This patch splits the `primitive_attribute_float?` functions into `primitive_surface_attribute_float?` and `primitive_volume_attribute_float?`. What leads to less branching and more optimum kernels. The original function is still being used by OSL and `svm_node_attr`. This will reduce the compilation time and render time for kernels. Especially in production scenes there is a lot of benefit. Impact in compilation times job | scene_name | previous | new | percentage -------+-----------------+----------+-------+------------ t61513 | empty | 10.63 | 10.66 | 0% t61513 | bmw | 17.91 | 17.65 | 1% t61513 | fishycat | 19.57 | 17.68 | 10% t61513 | barbershop | 54.10 | 24.41 | 55% t61513 | classroom | 17.55 | 16.29 | 7% t61513 | koro | 18.92 | 18.05 | 5% t61513 | pavillion | 17.43 | 16.52 | 5% t61513 | splash279 | 16.48 | 14.91 | 10% t61513 | volume_emission | 36.22 | 21.60 | 40% Impact in render times job | scene_name | previous | new | percentage -------+-----------------+----------+--------+------------ 61513 | empty | 21.06 | 20.35 | 3% 61513 | bmw | 198.44 | 190.05 | 4% 61513 | fishycat | 394.20 | 401.25 | -2% 61513 | barbershop | 1188.16 | 912.39 | 23% 61513 | classroom | 341.08 | 340.38 | 0% 61513 | koro | 472.43 | 471.80 | 0% 61513 | pavillion | 905.77 | 899.80 | 1% 61513 | splash279 | 55.26 | 54.86 | 1% 61513 | volume_emission | 62.59 | 61.70 | 1% There is also a possitive impact when using CPU and CUDA, but they are small. I didn't split the hair logic from the surface logic due to: * Hair and surface use same attribute types. It was not clear if it could be splitted when looking at the code only. * Hair and surface are quick to compile and to read. So the benefit is quite small. Differential Revision: https://developer.blender.org/D4375
2019-02-19 15:41:22 +01:00
stack_store_float3(stack, out_offset, f + dy);
}
else {
stack_store_float(stack, out_offset, 1.0f);
}
}
}
CCL_NAMESPACE_END