Cycles: use LIKELY/UNLIKELY macros
Gives overall ~3% speedup in own tests for BMW scene.
This commit is contained in:
@@ -50,7 +50,11 @@ CCL_NAMESPACE_BEGIN
|
|||||||
|
|
||||||
/* RenderServices implementation */
|
/* RenderServices implementation */
|
||||||
|
|
||||||
#define COPY_MATRIX44(m1, m2) memcpy(m1, m2, sizeof(*m2))
|
#define COPY_MATRIX44(m1, m2) { \
|
||||||
|
CHECK_TYPE(m1, OSL::Matrix44*); \
|
||||||
|
CHECK_TYPE(m2, Transform*); \
|
||||||
|
memcpy(m1, m2, sizeof(*m2)); \
|
||||||
|
} (void)0
|
||||||
|
|
||||||
/* static ustrings */
|
/* static ustrings */
|
||||||
ustring OSLRenderServices::u_distance("distance");
|
ustring OSLRenderServices::u_distance("distance");
|
||||||
|
@@ -61,22 +61,22 @@ ccl_device float3 rgb_to_hsv(float3 rgb)
|
|||||||
h = 0.0f;
|
h = 0.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(s == 0.0f) {
|
if(s != 0.0f) {
|
||||||
h = 0.0f;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
float3 cmax3 = make_float3(cmax, cmax, cmax);
|
float3 cmax3 = make_float3(cmax, cmax, cmax);
|
||||||
c = (cmax3 - rgb)/cdelta;
|
c = (cmax3 - rgb)/cdelta;
|
||||||
|
|
||||||
if(rgb.x == cmax) h = c.z - c.y;
|
if (rgb.x == cmax) h = c.z - c.y;
|
||||||
else if(rgb.y == cmax) h = 2.0f + c.x - c.z;
|
else if(rgb.y == cmax) h = 2.0f + c.x - c.z;
|
||||||
else h = 4.0f + c.y - c.x;
|
else h = 4.0f + c.y - c.x;
|
||||||
|
|
||||||
h /= 6.0f;
|
h /= 6.0f;
|
||||||
|
|
||||||
if(h < 0.0f)
|
if(h < 0.0f)
|
||||||
h += 1.0f;
|
h += 1.0f;
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
h = 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
return make_float3(h, s, v);
|
return make_float3(h, s, v);
|
||||||
}
|
}
|
||||||
@@ -90,13 +90,10 @@ ccl_device float3 hsv_to_rgb(float3 hsv)
|
|||||||
s = hsv.y;
|
s = hsv.y;
|
||||||
v = hsv.z;
|
v = hsv.z;
|
||||||
|
|
||||||
if(s == 0.0f) {
|
if(s != 0.0f) {
|
||||||
rgb = make_float3(v, v, v);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if(h == 1.0f)
|
if(h == 1.0f)
|
||||||
h = 0.0f;
|
h = 0.0f;
|
||||||
|
|
||||||
h *= 6.0f;
|
h *= 6.0f;
|
||||||
i = floorf(h);
|
i = floorf(h);
|
||||||
f = h - i;
|
f = h - i;
|
||||||
@@ -104,13 +101,16 @@ ccl_device float3 hsv_to_rgb(float3 hsv)
|
|||||||
p = v*(1.0f-s);
|
p = v*(1.0f-s);
|
||||||
q = v*(1.0f-(s*f));
|
q = v*(1.0f-(s*f));
|
||||||
t = v*(1.0f-(s*(1.0f-f)));
|
t = v*(1.0f-(s*(1.0f-f)));
|
||||||
|
|
||||||
if(i == 0.0f) rgb = make_float3(v, t, p);
|
if (i == 0.0f) rgb = make_float3(v, t, p);
|
||||||
else if(i == 1.0f) rgb = make_float3(q, v, p);
|
else if(i == 1.0f) rgb = make_float3(q, v, p);
|
||||||
else if(i == 2.0f) rgb = make_float3(p, v, t);
|
else if(i == 2.0f) rgb = make_float3(p, v, t);
|
||||||
else if(i == 3.0f) rgb = make_float3(p, q, v);
|
else if(i == 3.0f) rgb = make_float3(p, q, v);
|
||||||
else if(i == 4.0f) rgb = make_float3(t, p, v);
|
else if(i == 4.0f) rgb = make_float3(t, p, v);
|
||||||
else rgb = make_float3(v, p, q);
|
else rgb = make_float3(v, p, q);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
rgb = make_float3(v, v, v);
|
||||||
}
|
}
|
||||||
|
|
||||||
return rgb;
|
return rgb;
|
||||||
|
@@ -1237,7 +1237,7 @@ ccl_device float compatible_powf(float x, float y)
|
|||||||
|
|
||||||
ccl_device float safe_powf(float a, float b)
|
ccl_device float safe_powf(float a, float b)
|
||||||
{
|
{
|
||||||
if(a < 0.0f && b != float_to_int(b))
|
if(UNLIKELY(a < 0.0f && b != float_to_int(b)))
|
||||||
return 0.0f;
|
return 0.0f;
|
||||||
|
|
||||||
return compatible_powf(a, b);
|
return compatible_powf(a, b);
|
||||||
@@ -1245,7 +1245,7 @@ ccl_device float safe_powf(float a, float b)
|
|||||||
|
|
||||||
ccl_device float safe_logf(float a, float b)
|
ccl_device float safe_logf(float a, float b)
|
||||||
{
|
{
|
||||||
if(a < 0.0f || b < 0.0f)
|
if(UNLIKELY(a < 0.0f || b < 0.0f))
|
||||||
return 0.0f;
|
return 0.0f;
|
||||||
|
|
||||||
return logf(a)/logf(b);
|
return logf(a)/logf(b);
|
||||||
@@ -1305,7 +1305,7 @@ ccl_device bool ray_aligned_disk_intersect(
|
|||||||
float3 disk_N = normalize_len(ray_P - disk_P, &disk_t);
|
float3 disk_N = normalize_len(ray_P - disk_P, &disk_t);
|
||||||
float div = dot(ray_D, disk_N);
|
float div = dot(ray_D, disk_N);
|
||||||
|
|
||||||
if(div == 0.0f)
|
if(UNLIKELY(div == 0.0f))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* compute t to intersection point */
|
/* compute t to intersection point */
|
||||||
@@ -1335,7 +1335,7 @@ ccl_device bool ray_triangle_intersect(
|
|||||||
float3 s1 = cross(ray_D, e2);
|
float3 s1 = cross(ray_D, e2);
|
||||||
|
|
||||||
const float divisor = dot(s1, e1);
|
const float divisor = dot(s1, e1);
|
||||||
if(divisor == 0.0f)
|
if(UNLIKELY(divisor == 0.0f))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
const float invdivisor = 1.0f/divisor;
|
const float invdivisor = 1.0f/divisor;
|
||||||
@@ -1378,7 +1378,7 @@ ccl_device bool ray_triangle_intersect_uv(
|
|||||||
float3 s1 = cross(ray_D, e2);
|
float3 s1 = cross(ray_D, e2);
|
||||||
|
|
||||||
const float divisor = dot(s1, e1);
|
const float divisor = dot(s1, e1);
|
||||||
if(divisor == 0.0f)
|
if(UNLIKELY(divisor == 0.0f))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
const float invdivisor = 1.0f/divisor;
|
const float invdivisor = 1.0f/divisor;
|
||||||
|
@@ -75,7 +75,7 @@ static bool transform_matrix4_gj_inverse(float R[][4], float M[][4])
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(pivotsize == 0)
|
if(UNLIKELY(pivotsize == 0.0f))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if(pivot != i) {
|
if(pivot != i) {
|
||||||
@@ -106,7 +106,7 @@ static bool transform_matrix4_gj_inverse(float R[][4], float M[][4])
|
|||||||
for(int i = 3; i >= 0; --i) {
|
for(int i = 3; i >= 0; --i) {
|
||||||
float f;
|
float f;
|
||||||
|
|
||||||
if((f = M[i][i]) == 0)
|
if(UNLIKELY((f = M[i][i]) == 0.0f))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
for(int j = 0; j < 4; j++) {
|
for(int j = 0; j < 4; j++) {
|
||||||
@@ -135,15 +135,16 @@ Transform transform_inverse(const Transform& tfm)
|
|||||||
memcpy(R, &tfmR, sizeof(R));
|
memcpy(R, &tfmR, sizeof(R));
|
||||||
memcpy(M, &tfm, sizeof(M));
|
memcpy(M, &tfm, sizeof(M));
|
||||||
|
|
||||||
if(!transform_matrix4_gj_inverse(R, M)) {
|
if(UNLIKELY(!transform_matrix4_gj_inverse(R, M))) {
|
||||||
/* matrix is degenerate (e.g. 0 scale on some axis), ideally we should
|
/* matrix is degenerate (e.g. 0 scale on some axis), ideally we should
|
||||||
* never be in this situation, but try to invert it anyway with tweak */
|
* never be in this situation, but try to invert it anyway with tweak */
|
||||||
M[0][0] += 1e-8f;
|
M[0][0] += 1e-8f;
|
||||||
M[1][1] += 1e-8f;
|
M[1][1] += 1e-8f;
|
||||||
M[2][2] += 1e-8f;
|
M[2][2] += 1e-8f;
|
||||||
|
|
||||||
if(!transform_matrix4_gj_inverse(R, M))
|
if(UNLIKELY(!transform_matrix4_gj_inverse(R, M))) {
|
||||||
return transform_identity();
|
return transform_identity();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
memcpy(&tfmR, R, sizeof(R));
|
memcpy(&tfmR, R, sizeof(R));
|
||||||
|
@@ -456,6 +456,43 @@ enum InterpolationType {
|
|||||||
INTERPOLATION_SMART = 3,
|
INTERPOLATION_SMART = 3,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/* macros */
|
||||||
|
|
||||||
|
/* hints for branch pradiction, only use in code that runs a _lot_ */
|
||||||
|
#ifdef __GNUC__
|
||||||
|
# define LIKELY(x) __builtin_expect(!!(x), 1)
|
||||||
|
# define UNLIKELY(x) __builtin_expect(!!(x), 0)
|
||||||
|
#else
|
||||||
|
# define LIKELY(x) (x)
|
||||||
|
# define UNLIKELY(x) (x)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Causes warning:
|
||||||
|
* incompatible types when assigning to type 'Foo' from type 'Bar'
|
||||||
|
* ... the compiler optimizes away the temp var */
|
||||||
|
#ifdef __GNUC__
|
||||||
|
#define CHECK_TYPE(var, type) { \
|
||||||
|
__typeof(var) *__tmp; \
|
||||||
|
__tmp = (type *)NULL; \
|
||||||
|
(void)__tmp; \
|
||||||
|
} (void)0
|
||||||
|
|
||||||
|
#define CHECK_TYPE_PAIR(var_a, var_b) { \
|
||||||
|
__typeof(var_a) *__tmp; \
|
||||||
|
__tmp = (__typeof(var_b) *)NULL; \
|
||||||
|
(void)__tmp; \
|
||||||
|
} (void)0
|
||||||
|
#else
|
||||||
|
# define CHECK_TYPE(var, type)
|
||||||
|
# define CHECK_TYPE_PAIR(var_a, var_b)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* can be used in simple macros */
|
||||||
|
#define CHECK_TYPE_INLINE(val, type) \
|
||||||
|
((void)(((type)0) != (val)))
|
||||||
|
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
|
||||||
#endif /* __UTIL_TYPES_H__ */
|
#endif /* __UTIL_TYPES_H__ */
|
||||||
|
Reference in New Issue
Block a user