Cycles: use LIKELY/UNLIKELY macros

Gives overall ~3% speedup in own tests for BMW scene.
This commit is contained in:
Campbell Barton
2014-05-05 03:49:22 +10:00
parent dc13969e48
commit d828d44d7a
5 changed files with 67 additions and 25 deletions

View File

@@ -50,7 +50,11 @@ CCL_NAMESPACE_BEGIN
/* RenderServices implementation */
#define COPY_MATRIX44(m1, m2) memcpy(m1, m2, sizeof(*m2))
#define COPY_MATRIX44(m1, m2) { \
CHECK_TYPE(m1, OSL::Matrix44*); \
CHECK_TYPE(m2, Transform*); \
memcpy(m1, m2, sizeof(*m2)); \
} (void)0
/* static ustrings */
ustring OSLRenderServices::u_distance("distance");

View File

@@ -61,22 +61,22 @@ ccl_device float3 rgb_to_hsv(float3 rgb)
h = 0.0f;
}
if(s == 0.0f) {
h = 0.0f;
}
else {
if(s != 0.0f) {
float3 cmax3 = make_float3(cmax, cmax, cmax);
c = (cmax3 - rgb)/cdelta;
if(rgb.x == cmax) h = c.z - c.y;
else if(rgb.y == cmax) h = 2.0f + c.x - c.z;
else h = 4.0f + c.y - c.x;
if (rgb.x == cmax) h = c.z - c.y;
else if(rgb.y == cmax) h = 2.0f + c.x - c.z;
else h = 4.0f + c.y - c.x;
h /= 6.0f;
if(h < 0.0f)
h += 1.0f;
}
else {
h = 0.0f;
}
return make_float3(h, s, v);
}
@@ -90,13 +90,10 @@ ccl_device float3 hsv_to_rgb(float3 hsv)
s = hsv.y;
v = hsv.z;
if(s == 0.0f) {
rgb = make_float3(v, v, v);
}
else {
if(s != 0.0f) {
if(h == 1.0f)
h = 0.0f;
h *= 6.0f;
i = floorf(h);
f = h - i;
@@ -104,13 +101,16 @@ ccl_device float3 hsv_to_rgb(float3 hsv)
p = v*(1.0f-s);
q = v*(1.0f-(s*f));
t = v*(1.0f-(s*(1.0f-f)));
if(i == 0.0f) rgb = make_float3(v, t, p);
if (i == 0.0f) rgb = make_float3(v, t, p);
else if(i == 1.0f) rgb = make_float3(q, v, p);
else if(i == 2.0f) rgb = make_float3(p, v, t);
else if(i == 3.0f) rgb = make_float3(p, q, v);
else if(i == 4.0f) rgb = make_float3(t, p, v);
else rgb = make_float3(v, p, q);
else rgb = make_float3(v, p, q);
}
else {
rgb = make_float3(v, v, v);
}
return rgb;

View File

@@ -1237,7 +1237,7 @@ ccl_device float compatible_powf(float x, float y)
ccl_device float safe_powf(float a, float b)
{
if(a < 0.0f && b != float_to_int(b))
if(UNLIKELY(a < 0.0f && b != float_to_int(b)))
return 0.0f;
return compatible_powf(a, b);
@@ -1245,7 +1245,7 @@ ccl_device float safe_powf(float a, float b)
ccl_device float safe_logf(float a, float b)
{
if(a < 0.0f || b < 0.0f)
if(UNLIKELY(a < 0.0f || b < 0.0f))
return 0.0f;
return logf(a)/logf(b);
@@ -1305,7 +1305,7 @@ ccl_device bool ray_aligned_disk_intersect(
float3 disk_N = normalize_len(ray_P - disk_P, &disk_t);
float div = dot(ray_D, disk_N);
if(div == 0.0f)
if(UNLIKELY(div == 0.0f))
return false;
/* compute t to intersection point */
@@ -1335,7 +1335,7 @@ ccl_device bool ray_triangle_intersect(
float3 s1 = cross(ray_D, e2);
const float divisor = dot(s1, e1);
if(divisor == 0.0f)
if(UNLIKELY(divisor == 0.0f))
return false;
const float invdivisor = 1.0f/divisor;
@@ -1378,7 +1378,7 @@ ccl_device bool ray_triangle_intersect_uv(
float3 s1 = cross(ray_D, e2);
const float divisor = dot(s1, e1);
if(divisor == 0.0f)
if(UNLIKELY(divisor == 0.0f))
return false;
const float invdivisor = 1.0f/divisor;

View File

@@ -75,7 +75,7 @@ static bool transform_matrix4_gj_inverse(float R[][4], float M[][4])
}
}
if(pivotsize == 0)
if(UNLIKELY(pivotsize == 0.0f))
return false;
if(pivot != i) {
@@ -106,7 +106,7 @@ static bool transform_matrix4_gj_inverse(float R[][4], float M[][4])
for(int i = 3; i >= 0; --i) {
float f;
if((f = M[i][i]) == 0)
if(UNLIKELY((f = M[i][i]) == 0.0f))
return false;
for(int j = 0; j < 4; j++) {
@@ -135,15 +135,16 @@ Transform transform_inverse(const Transform& tfm)
memcpy(R, &tfmR, sizeof(R));
memcpy(M, &tfm, sizeof(M));
if(!transform_matrix4_gj_inverse(R, M)) {
if(UNLIKELY(!transform_matrix4_gj_inverse(R, M))) {
/* matrix is degenerate (e.g. 0 scale on some axis), ideally we should
* never be in this situation, but try to invert it anyway with tweak */
M[0][0] += 1e-8f;
M[1][1] += 1e-8f;
M[2][2] += 1e-8f;
if(!transform_matrix4_gj_inverse(R, M))
if(UNLIKELY(!transform_matrix4_gj_inverse(R, M))) {
return transform_identity();
}
}
memcpy(&tfmR, R, sizeof(R));

View File

@@ -456,6 +456,43 @@ enum InterpolationType {
INTERPOLATION_SMART = 3,
};
/* macros */
/* hints for branch pradiction, only use in code that runs a _lot_ */
#ifdef __GNUC__
# define LIKELY(x) __builtin_expect(!!(x), 1)
# define UNLIKELY(x) __builtin_expect(!!(x), 0)
#else
# define LIKELY(x) (x)
# define UNLIKELY(x) (x)
#endif
/* Causes warning:
* incompatible types when assigning to type 'Foo' from type 'Bar'
* ... the compiler optimizes away the temp var */
#ifdef __GNUC__
#define CHECK_TYPE(var, type) { \
__typeof(var) *__tmp; \
__tmp = (type *)NULL; \
(void)__tmp; \
} (void)0
#define CHECK_TYPE_PAIR(var_a, var_b) { \
__typeof(var_a) *__tmp; \
__tmp = (__typeof(var_b) *)NULL; \
(void)__tmp; \
} (void)0
#else
# define CHECK_TYPE(var, type)
# define CHECK_TYPE_PAIR(var_a, var_b)
#endif
/* can be used in simple macros */
#define CHECK_TYPE_INLINE(val, type) \
((void)(((type)0) != (val)))
CCL_NAMESPACE_END
#endif /* __UTIL_TYPES_H__ */