Cycles: use LIKELY/UNLIKELY macros
Gives overall ~3% speedup in own tests for BMW scene.
This commit is contained in:
@@ -50,7 +50,11 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* RenderServices implementation */
|
||||
|
||||
#define COPY_MATRIX44(m1, m2) memcpy(m1, m2, sizeof(*m2))
|
||||
#define COPY_MATRIX44(m1, m2) { \
|
||||
CHECK_TYPE(m1, OSL::Matrix44*); \
|
||||
CHECK_TYPE(m2, Transform*); \
|
||||
memcpy(m1, m2, sizeof(*m2)); \
|
||||
} (void)0
|
||||
|
||||
/* static ustrings */
|
||||
ustring OSLRenderServices::u_distance("distance");
|
||||
|
@@ -61,22 +61,22 @@ ccl_device float3 rgb_to_hsv(float3 rgb)
|
||||
h = 0.0f;
|
||||
}
|
||||
|
||||
if(s == 0.0f) {
|
||||
h = 0.0f;
|
||||
}
|
||||
else {
|
||||
if(s != 0.0f) {
|
||||
float3 cmax3 = make_float3(cmax, cmax, cmax);
|
||||
c = (cmax3 - rgb)/cdelta;
|
||||
|
||||
if(rgb.x == cmax) h = c.z - c.y;
|
||||
else if(rgb.y == cmax) h = 2.0f + c.x - c.z;
|
||||
else h = 4.0f + c.y - c.x;
|
||||
if (rgb.x == cmax) h = c.z - c.y;
|
||||
else if(rgb.y == cmax) h = 2.0f + c.x - c.z;
|
||||
else h = 4.0f + c.y - c.x;
|
||||
|
||||
h /= 6.0f;
|
||||
|
||||
if(h < 0.0f)
|
||||
h += 1.0f;
|
||||
}
|
||||
else {
|
||||
h = 0.0f;
|
||||
}
|
||||
|
||||
return make_float3(h, s, v);
|
||||
}
|
||||
@@ -90,10 +90,7 @@ ccl_device float3 hsv_to_rgb(float3 hsv)
|
||||
s = hsv.y;
|
||||
v = hsv.z;
|
||||
|
||||
if(s == 0.0f) {
|
||||
rgb = make_float3(v, v, v);
|
||||
}
|
||||
else {
|
||||
if(s != 0.0f) {
|
||||
if(h == 1.0f)
|
||||
h = 0.0f;
|
||||
|
||||
@@ -105,12 +102,15 @@ ccl_device float3 hsv_to_rgb(float3 hsv)
|
||||
q = v*(1.0f-(s*f));
|
||||
t = v*(1.0f-(s*(1.0f-f)));
|
||||
|
||||
if(i == 0.0f) rgb = make_float3(v, t, p);
|
||||
if (i == 0.0f) rgb = make_float3(v, t, p);
|
||||
else if(i == 1.0f) rgb = make_float3(q, v, p);
|
||||
else if(i == 2.0f) rgb = make_float3(p, v, t);
|
||||
else if(i == 3.0f) rgb = make_float3(p, q, v);
|
||||
else if(i == 4.0f) rgb = make_float3(t, p, v);
|
||||
else rgb = make_float3(v, p, q);
|
||||
else rgb = make_float3(v, p, q);
|
||||
}
|
||||
else {
|
||||
rgb = make_float3(v, v, v);
|
||||
}
|
||||
|
||||
return rgb;
|
||||
|
@@ -1237,7 +1237,7 @@ ccl_device float compatible_powf(float x, float y)
|
||||
|
||||
ccl_device float safe_powf(float a, float b)
|
||||
{
|
||||
if(a < 0.0f && b != float_to_int(b))
|
||||
if(UNLIKELY(a < 0.0f && b != float_to_int(b)))
|
||||
return 0.0f;
|
||||
|
||||
return compatible_powf(a, b);
|
||||
@@ -1245,7 +1245,7 @@ ccl_device float safe_powf(float a, float b)
|
||||
|
||||
ccl_device float safe_logf(float a, float b)
|
||||
{
|
||||
if(a < 0.0f || b < 0.0f)
|
||||
if(UNLIKELY(a < 0.0f || b < 0.0f))
|
||||
return 0.0f;
|
||||
|
||||
return logf(a)/logf(b);
|
||||
@@ -1305,7 +1305,7 @@ ccl_device bool ray_aligned_disk_intersect(
|
||||
float3 disk_N = normalize_len(ray_P - disk_P, &disk_t);
|
||||
float div = dot(ray_D, disk_N);
|
||||
|
||||
if(div == 0.0f)
|
||||
if(UNLIKELY(div == 0.0f))
|
||||
return false;
|
||||
|
||||
/* compute t to intersection point */
|
||||
@@ -1335,7 +1335,7 @@ ccl_device bool ray_triangle_intersect(
|
||||
float3 s1 = cross(ray_D, e2);
|
||||
|
||||
const float divisor = dot(s1, e1);
|
||||
if(divisor == 0.0f)
|
||||
if(UNLIKELY(divisor == 0.0f))
|
||||
return false;
|
||||
|
||||
const float invdivisor = 1.0f/divisor;
|
||||
@@ -1378,7 +1378,7 @@ ccl_device bool ray_triangle_intersect_uv(
|
||||
float3 s1 = cross(ray_D, e2);
|
||||
|
||||
const float divisor = dot(s1, e1);
|
||||
if(divisor == 0.0f)
|
||||
if(UNLIKELY(divisor == 0.0f))
|
||||
return false;
|
||||
|
||||
const float invdivisor = 1.0f/divisor;
|
||||
|
@@ -75,7 +75,7 @@ static bool transform_matrix4_gj_inverse(float R[][4], float M[][4])
|
||||
}
|
||||
}
|
||||
|
||||
if(pivotsize == 0)
|
||||
if(UNLIKELY(pivotsize == 0.0f))
|
||||
return false;
|
||||
|
||||
if(pivot != i) {
|
||||
@@ -106,7 +106,7 @@ static bool transform_matrix4_gj_inverse(float R[][4], float M[][4])
|
||||
for(int i = 3; i >= 0; --i) {
|
||||
float f;
|
||||
|
||||
if((f = M[i][i]) == 0)
|
||||
if(UNLIKELY((f = M[i][i]) == 0.0f))
|
||||
return false;
|
||||
|
||||
for(int j = 0; j < 4; j++) {
|
||||
@@ -135,15 +135,16 @@ Transform transform_inverse(const Transform& tfm)
|
||||
memcpy(R, &tfmR, sizeof(R));
|
||||
memcpy(M, &tfm, sizeof(M));
|
||||
|
||||
if(!transform_matrix4_gj_inverse(R, M)) {
|
||||
if(UNLIKELY(!transform_matrix4_gj_inverse(R, M))) {
|
||||
/* matrix is degenerate (e.g. 0 scale on some axis), ideally we should
|
||||
* never be in this situation, but try to invert it anyway with tweak */
|
||||
M[0][0] += 1e-8f;
|
||||
M[1][1] += 1e-8f;
|
||||
M[2][2] += 1e-8f;
|
||||
|
||||
if(!transform_matrix4_gj_inverse(R, M))
|
||||
if(UNLIKELY(!transform_matrix4_gj_inverse(R, M))) {
|
||||
return transform_identity();
|
||||
}
|
||||
}
|
||||
|
||||
memcpy(&tfmR, R, sizeof(R));
|
||||
|
@@ -456,6 +456,43 @@ enum InterpolationType {
|
||||
INTERPOLATION_SMART = 3,
|
||||
};
|
||||
|
||||
|
||||
/* macros */
|
||||
|
||||
/* hints for branch pradiction, only use in code that runs a _lot_ */
|
||||
#ifdef __GNUC__
|
||||
# define LIKELY(x) __builtin_expect(!!(x), 1)
|
||||
# define UNLIKELY(x) __builtin_expect(!!(x), 0)
|
||||
#else
|
||||
# define LIKELY(x) (x)
|
||||
# define UNLIKELY(x) (x)
|
||||
#endif
|
||||
|
||||
/* Causes warning:
|
||||
* incompatible types when assigning to type 'Foo' from type 'Bar'
|
||||
* ... the compiler optimizes away the temp var */
|
||||
#ifdef __GNUC__
|
||||
#define CHECK_TYPE(var, type) { \
|
||||
__typeof(var) *__tmp; \
|
||||
__tmp = (type *)NULL; \
|
||||
(void)__tmp; \
|
||||
} (void)0
|
||||
|
||||
#define CHECK_TYPE_PAIR(var_a, var_b) { \
|
||||
__typeof(var_a) *__tmp; \
|
||||
__tmp = (__typeof(var_b) *)NULL; \
|
||||
(void)__tmp; \
|
||||
} (void)0
|
||||
#else
|
||||
# define CHECK_TYPE(var, type)
|
||||
# define CHECK_TYPE_PAIR(var_a, var_b)
|
||||
#endif
|
||||
|
||||
/* can be used in simple macros */
|
||||
#define CHECK_TYPE_INLINE(val, type) \
|
||||
((void)(((type)0) != (val)))
|
||||
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* __UTIL_TYPES_H__ */
|
||||
|
Reference in New Issue
Block a user