Cycles: Cleanup, indent nested preprocessor directives

Quite straightforward, main trick is happening in path_source_replace_includes().

Reviewers: brecht, dingto, lukasstockner97, juicyfruit

Differential Revision: https://developer.blender.org/D1794
This commit is contained in:
Sergey Sharybin
2016-02-12 18:33:43 +01:00
parent 0e47e0cc9e
commit 700722f686
43 changed files with 649 additions and 639 deletions

View File

@@ -27,10 +27,10 @@
#include "../closure/bsdf_toon.h"
#include "../closure/bsdf_hair.h"
#ifdef __SUBSURFACE__
#include "../closure/bssrdf.h"
# include "../closure/bssrdf.h"
#endif
#ifdef __VOLUME__
#include "../closure/volume.h"
# include "../closure/volume.h"
#endif
CCL_NAMESPACE_BEGIN

View File

@@ -31,9 +31,9 @@
* without sse support on x86, it results in different results for float ops
* that you would otherwise expect to compare correctly */
#if !defined(__i386__) || defined(__SSE__)
#define NO_EXTENDED_PRECISION
# define NO_EXTENDED_PRECISION
#else
#define NO_EXTENDED_PRECISION volatile
# define NO_EXTENDED_PRECISION volatile
#endif
#include "geom_attribute.h"

View File

@@ -30,9 +30,9 @@ CCL_NAMESPACE_BEGIN
/* Don't inline intersect functions on GPU, this is faster */
#ifdef __KERNEL_GPU__
#define ccl_device_intersect ccl_device_noinline
# define ccl_device_intersect ccl_device_noinline
#else
#define ccl_device_intersect ccl_device_inline
# define ccl_device_intersect ccl_device_inline
#endif
/* BVH intersection function variations */
@@ -50,7 +50,7 @@ CCL_NAMESPACE_BEGIN
/* Common QBVH functions. */
#ifdef __QBVH__
#include "geom_qbvh.h"
# include "geom_qbvh.h"
#endif
/* Regular BVH traversal */
@@ -60,137 +60,137 @@ CCL_NAMESPACE_BEGIN
#include "geom_bvh_traversal.h"
#if defined(__INSTANCING__)
#define BVH_FUNCTION_NAME bvh_intersect_instancing
#define BVH_FUNCTION_FEATURES BVH_INSTANCING
#include "geom_bvh_traversal.h"
# define BVH_FUNCTION_NAME bvh_intersect_instancing
# define BVH_FUNCTION_FEATURES BVH_INSTANCING
# include "geom_bvh_traversal.h"
#endif
#if defined(__HAIR__)
#define BVH_FUNCTION_NAME bvh_intersect_hair
#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH
#include "geom_bvh_traversal.h"
# define BVH_FUNCTION_NAME bvh_intersect_hair
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH
# include "geom_bvh_traversal.h"
#endif
#if defined(__OBJECT_MOTION__)
#define BVH_FUNCTION_NAME bvh_intersect_motion
#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
#include "geom_bvh_traversal.h"
# define BVH_FUNCTION_NAME bvh_intersect_motion
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
# include "geom_bvh_traversal.h"
#endif
#if defined(__HAIR__) && defined(__OBJECT_MOTION__)
#define BVH_FUNCTION_NAME bvh_intersect_hair_motion
#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION
#include "geom_bvh_traversal.h"
# define BVH_FUNCTION_NAME bvh_intersect_hair_motion
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION
# include "geom_bvh_traversal.h"
#endif
/* Subsurface scattering BVH traversal */
#if defined(__SUBSURFACE__)
#define BVH_FUNCTION_NAME bvh_intersect_subsurface
#define BVH_FUNCTION_FEATURES 0
#include "geom_bvh_subsurface.h"
# define BVH_FUNCTION_NAME bvh_intersect_subsurface
# define BVH_FUNCTION_FEATURES 0
# include "geom_bvh_subsurface.h"
#endif
#if defined(__SUBSURFACE__) && defined(__OBJECT_MOTION__)
#define BVH_FUNCTION_NAME bvh_intersect_subsurface_motion
#define BVH_FUNCTION_FEATURES BVH_MOTION
#include "geom_bvh_subsurface.h"
# define BVH_FUNCTION_NAME bvh_intersect_subsurface_motion
# define BVH_FUNCTION_FEATURES BVH_MOTION
# include "geom_bvh_subsurface.h"
#endif
/* Volume BVH traversal */
#if defined(__VOLUME__)
#define BVH_FUNCTION_NAME bvh_intersect_volume
#define BVH_FUNCTION_FEATURES 0
#include "geom_bvh_volume.h"
# define BVH_FUNCTION_NAME bvh_intersect_volume
# define BVH_FUNCTION_FEATURES 0
# include "geom_bvh_volume.h"
#endif
#if defined(__VOLUME__) && defined(__INSTANCING__)
#define BVH_FUNCTION_NAME bvh_intersect_volume_instancing
#define BVH_FUNCTION_FEATURES BVH_INSTANCING
#include "geom_bvh_volume.h"
# define BVH_FUNCTION_NAME bvh_intersect_volume_instancing
# define BVH_FUNCTION_FEATURES BVH_INSTANCING
# include "geom_bvh_volume.h"
#endif
#if defined(__VOLUME__) && defined(__HAIR__)
#define BVH_FUNCTION_NAME bvh_intersect_volume_hair
#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH
#include "geom_bvh_volume.h"
# define BVH_FUNCTION_NAME bvh_intersect_volume_hair
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH
# include "geom_bvh_volume.h"
#endif
#if defined(__VOLUME__) && defined(__OBJECT_MOTION__)
#define BVH_FUNCTION_NAME bvh_intersect_volume_motion
#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
#include "geom_bvh_volume.h"
# define BVH_FUNCTION_NAME bvh_intersect_volume_motion
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
# include "geom_bvh_volume.h"
#endif
#if defined(__VOLUME__) && defined(__HAIR__) && defined(__OBJECT_MOTION__)
#define BVH_FUNCTION_NAME bvh_intersect_volume_hair_motion
#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION
#include "geom_bvh_volume.h"
# define BVH_FUNCTION_NAME bvh_intersect_volume_hair_motion
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION
# include "geom_bvh_volume.h"
#endif
/* Record all intersections - Shadow BVH traversal */
#if defined(__SHADOW_RECORD_ALL__)
#define BVH_FUNCTION_NAME bvh_intersect_shadow_all
#define BVH_FUNCTION_FEATURES 0
#include "geom_bvh_shadow.h"
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all
# define BVH_FUNCTION_FEATURES 0
# include "geom_bvh_shadow.h"
#endif
#if defined(__SHADOW_RECORD_ALL__) && defined(__INSTANCING__)
#define BVH_FUNCTION_NAME bvh_intersect_shadow_all_instancing
#define BVH_FUNCTION_FEATURES BVH_INSTANCING
#include "geom_bvh_shadow.h"
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_instancing
# define BVH_FUNCTION_FEATURES BVH_INSTANCING
# include "geom_bvh_shadow.h"
#endif
#if defined(__SHADOW_RECORD_ALL__) && defined(__HAIR__)
#define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair
#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
#include "geom_bvh_shadow.h"
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
# include "geom_bvh_shadow.h"
#endif
#if defined(__SHADOW_RECORD_ALL__) && defined(__OBJECT_MOTION__)
#define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion
#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
#include "geom_bvh_shadow.h"
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
# include "geom_bvh_shadow.h"
#endif
#if defined(__SHADOW_RECORD_ALL__) && defined(__HAIR__) && defined(__OBJECT_MOTION__)
#define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion
#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_MOTION
#include "geom_bvh_shadow.h"
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_MOTION
# include "geom_bvh_shadow.h"
#endif
/* Record all intersections - Volume BVH traversal */
#if defined(__VOLUME_RECORD_ALL__)
#define BVH_FUNCTION_NAME bvh_intersect_volume_all
#define BVH_FUNCTION_FEATURES 0
#include "geom_bvh_volume_all.h"
# define BVH_FUNCTION_NAME bvh_intersect_volume_all
# define BVH_FUNCTION_FEATURES 0
# include "geom_bvh_volume_all.h"
#endif
#if defined(__VOLUME_RECORD_ALL__) && defined(__INSTANCING__)
#define BVH_FUNCTION_NAME bvh_intersect_volume_all_instancing
#define BVH_FUNCTION_FEATURES BVH_INSTANCING
#include "geom_bvh_volume_all.h"
# define BVH_FUNCTION_NAME bvh_intersect_volume_all_instancing
# define BVH_FUNCTION_FEATURES BVH_INSTANCING
# include "geom_bvh_volume_all.h"
#endif
#if defined(__VOLUME_RECORD_ALL__) && defined(__HAIR__)
#define BVH_FUNCTION_NAME bvh_intersect_volume_all_hair
#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH
#include "geom_bvh_volume_all.h"
# define BVH_FUNCTION_NAME bvh_intersect_volume_all_hair
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH
# include "geom_bvh_volume_all.h"
#endif
#if defined(__VOLUME_RECORD_ALL__) && defined(__OBJECT_MOTION__)
#define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion
#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
#include "geom_bvh_volume_all.h"
# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
# include "geom_bvh_volume_all.h"
#endif
#if defined(__VOLUME_RECORD_ALL__) && defined(__HAIR__) && defined(__OBJECT_MOTION__)
#define BVH_FUNCTION_NAME bvh_intersect_volume_all_hair_motion
#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION
#include "geom_bvh_volume_all.h"
# define BVH_FUNCTION_NAME bvh_intersect_volume_all_hair_motion
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION
# include "geom_bvh_volume_all.h"
#endif
#undef BVH_FEATURE
@@ -208,10 +208,10 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
{
#ifdef __OBJECT_MOTION__
if(kernel_data.bvh.have_motion) {
#ifdef __HAIR__
# ifdef __HAIR__
if(kernel_data.bvh.have_curves)
return bvh_intersect_hair_motion(kg, ray, isect, visibility, lcg_state, difl, extmax);
#endif /* __HAIR__ */
# endif /* __HAIR__ */
return bvh_intersect_motion(kg, ray, isect, visibility);
}
@@ -224,19 +224,19 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
#ifdef __KERNEL_CPU__
#ifdef __INSTANCING__
# ifdef __INSTANCING__
if(kernel_data.bvh.have_instancing)
return bvh_intersect_instancing(kg, ray, isect, visibility);
#endif /* __INSTANCING__ */
# endif /* __INSTANCING__ */
return bvh_intersect(kg, ray, isect, visibility);
#else /* __KERNEL_CPU__ */
#ifdef __INSTANCING__
# ifdef __INSTANCING__
return bvh_intersect_instancing(kg, ray, isect, visibility);
#else
# else
return bvh_intersect(kg, ray, isect, visibility);
#endif /* __INSTANCING__ */
# endif /* __INSTANCING__ */
#endif /* __KERNEL_CPU__ */
}
@@ -271,71 +271,71 @@ ccl_device_intersect void scene_intersect_subsurface(KernelGlobals *kg,
#ifdef __SHADOW_RECORD_ALL__
ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg, const Ray *ray, Intersection *isect, uint max_hits, uint *num_hits)
{
#ifdef __OBJECT_MOTION__
# ifdef __OBJECT_MOTION__
if(kernel_data.bvh.have_motion) {
#ifdef __HAIR__
# ifdef __HAIR__
if(kernel_data.bvh.have_curves)
return bvh_intersect_shadow_all_hair_motion(kg, ray, isect, max_hits, num_hits);
#endif /* __HAIR__ */
# endif /* __HAIR__ */
return bvh_intersect_shadow_all_motion(kg, ray, isect, max_hits, num_hits);
}
#endif /* __OBJECT_MOTION__ */
# endif /* __OBJECT_MOTION__ */
#ifdef __HAIR__
# ifdef __HAIR__
if(kernel_data.bvh.have_curves)
return bvh_intersect_shadow_all_hair(kg, ray, isect, max_hits, num_hits);
#endif /* __HAIR__ */
# endif /* __HAIR__ */
#ifdef __INSTANCING__
# ifdef __INSTANCING__
if(kernel_data.bvh.have_instancing)
return bvh_intersect_shadow_all_instancing(kg, ray, isect, max_hits, num_hits);
#endif /* __INSTANCING__ */
# endif /* __INSTANCING__ */
return bvh_intersect_shadow_all(kg, ray, isect, max_hits, num_hits);
}
#endif
#endif /* __SHADOW_RECORD_ALL__ */
#ifdef __VOLUME__
ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg,
const Ray *ray,
Intersection *isect)
{
#ifdef __OBJECT_MOTION__
# ifdef __OBJECT_MOTION__
if(kernel_data.bvh.have_motion) {
#ifdef __HAIR__
# ifdef __HAIR__
if(kernel_data.bvh.have_curves)
return bvh_intersect_volume_hair_motion(kg, ray, isect);
#endif /* __HAIR__ */
# endif /* __HAIR__ */
return bvh_intersect_volume_motion(kg, ray, isect);
}
#endif /* __OBJECT_MOTION__ */
# endif /* __OBJECT_MOTION__ */
#ifdef __HAIR__
# ifdef __HAIR__
if(kernel_data.bvh.have_curves)
return bvh_intersect_volume_hair(kg, ray, isect);
#endif /* __HAIR__ */
# endif /* __HAIR__ */
#ifdef __KERNEL_CPU__
# ifdef __KERNEL_CPU__
#ifdef __INSTANCING__
# ifdef __INSTANCING__
if(kernel_data.bvh.have_instancing)
return bvh_intersect_volume_instancing(kg, ray, isect);
#endif /* __INSTANCING__ */
# endif /* __INSTANCING__ */
return bvh_intersect_volume(kg, ray, isect);
#else /* __KERNEL_CPU__ */
# else /* __KERNEL_CPU__ */
#ifdef __INSTANCING__
# ifdef __INSTANCING__
return bvh_intersect_volume_instancing(kg, ray, isect);
#else
# else
return bvh_intersect_volume(kg, ray, isect);
#endif /* __INSTANCING__ */
# endif /* __INSTANCING__ */
#endif /* __KERNEL_CPU__ */
# endif /* __KERNEL_CPU__ */
}
#endif
#endif /* __VOLUME__ */
#ifdef __VOLUME_RECORD_ALL__
ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg,
@@ -343,30 +343,30 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg,
Intersection *isect,
const uint max_hits)
{
#ifdef __OBJECT_MOTION__
# ifdef __OBJECT_MOTION__
if(kernel_data.bvh.have_motion) {
#ifdef __HAIR__
# ifdef __HAIR__
if(kernel_data.bvh.have_curves)
return bvh_intersect_volume_all_hair_motion(kg, ray, isect, max_hits);
#endif /* __HAIR__ */
# endif /* __HAIR__ */
return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits);
}
#endif /* __OBJECT_MOTION__ */
# endif /* __OBJECT_MOTION__ */
#ifdef __HAIR__
# ifdef __HAIR__
if(kernel_data.bvh.have_curves)
return bvh_intersect_volume_all_hair(kg, ray, isect, max_hits);
#endif /* __HAIR__ */
# endif /* __HAIR__ */
#ifdef __INSTANCING__
# ifdef __INSTANCING__
if(kernel_data.bvh.have_instancing)
return bvh_intersect_volume_all_instancing(kg, ray, isect, max_hits);
#endif /* __INSTANCING__ */
# endif /* __INSTANCING__ */
return bvh_intersect_volume_all(kg, ray, isect, max_hits);
}
#endif
#endif /* __VOLUME_RECORD_ALL__ */
/* Ray offset to avoid self intersection.

View File

@@ -18,7 +18,7 @@
*/
#ifdef __QBVH__
#include "geom_qbvh_shadow.h"
# include "geom_qbvh_shadow.h"
#endif
/* This is a template BVH traversal function, where various features can be
@@ -84,7 +84,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
#endif /* __KERNEL_SSE2__ */
IsectPrecalc isect_precalc;
triangle_intersect_precalc(dir, &isect_precalc);
@@ -127,14 +127,14 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
/* decide which nodes to traverse next */
#ifdef __VISIBILITY_FLAG__
# ifdef __VISIBILITY_FLAG__
/* this visibility test gives a 5% performance hit, how to solve? */
traverseChild0 = (c0max >= c0min) && (__float_as_uint(cnodes.z) & PATH_RAY_SHADOW);
traverseChild1 = (c1max >= c1min) && (__float_as_uint(cnodes.w) & PATH_RAY_SHADOW);
#else
# else
traverseChild0 = (c0max >= c0min);
traverseChild1 = (c1max >= c1min);
#endif
# endif
#else // __KERNEL_SSE2__
/* Intersect two child bounding boxes, SSE3 version adapted from Embree */
@@ -154,14 +154,14 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
/* decide which nodes to traverse next */
#ifdef __VISIBILITY_FLAG__
# ifdef __VISIBILITY_FLAG__
/* this visibility test gives a 5% performance hit, how to solve? */
traverseChild0 = (movemask(lrhit) & 1) && (__float_as_uint(cnodes.z) & PATH_RAY_SHADOW);
traverseChild1 = (movemask(lrhit) & 2) && (__float_as_uint(cnodes.w) & PATH_RAY_SHADOW);
#else
# else
traverseChild0 = (movemask(lrhit) & 1);
traverseChild1 = (movemask(lrhit) & 2);
#endif
# endif
#endif // __KERNEL_SSE2__
nodeAddr = __float_as_int(cnodes.x);
@@ -301,24 +301,24 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
/* instance push */
object = kernel_tex_fetch(__prim_object, -primAddr-1);
#if BVH_FEATURE(BVH_MOTION)
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
#else
# else
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
#endif
# endif
triangle_intersect_precalc(dir, &isect_precalc);
num_hits_in_instance = 0;
isect_array->t = isect_t;
#if defined(__KERNEL_SSE2__)
# if defined(__KERNEL_SSE2__)
Psplat[0] = ssef(P.x);
Psplat[1] = ssef(P.y);
Psplat[2] = ssef(P.z);
tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
# endif
++stackPtr;
kernel_assert(stackPtr < BVH_STACK_SIZE);
@@ -337,11 +337,11 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
if(num_hits_in_instance) {
float t_fac;
#if BVH_FEATURE(BVH_MOTION)
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
#else
# else
bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
#endif
# endif
triangle_intersect_precalc(dir, &isect_precalc);
@@ -352,25 +352,25 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
else {
float ignore_t = FLT_MAX;
#if BVH_FEATURE(BVH_MOTION)
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
#else
# else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
#endif
# endif
triangle_intersect_precalc(dir, &isect_precalc);
}
isect_t = tmax;
isect_array->t = isect_t;
#if defined(__KERNEL_SSE2__)
# if defined(__KERNEL_SSE2__)
Psplat[0] = ssef(P.x);
Psplat[1] = ssef(P.y);
Psplat[2] = ssef(P.z);
tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
# endif
object = OBJECT_NONE;
nodeAddr = traversalStack[stackPtr];

View File

@@ -18,7 +18,7 @@
*/
#ifdef __QBVH__
#include "geom_qbvh_subsurface.h"
# include "geom_qbvh_subsurface.h"
#endif
/* This is a template BVH traversal function for subsurface scattering, where

View File

@@ -18,7 +18,7 @@
*/
#ifdef __QBVH__
#include "geom_qbvh_traversal.h"
# include "geom_qbvh_traversal.h"
#endif
/* This is a template BVH traversal function, where various features can be
@@ -136,7 +136,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
if(difl != 0.0f) {
float hdiff = 1.0f + difl;
float ldiff = 1.0f - difl;
@@ -149,17 +149,17 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
c1max = min(hdiff * c1max, c1max + extmax);
}
}
#endif
# endif
/* decide which nodes to traverse next */
#ifdef __VISIBILITY_FLAG__
# ifdef __VISIBILITY_FLAG__
/* this visibility test gives a 5% performance hit, how to solve? */
traverseChild0 = (c0max >= c0min) && (__float_as_uint(cnodes.z) & visibility);
traverseChild1 = (c1max >= c1min) && (__float_as_uint(cnodes.w) & visibility);
#else
# else
traverseChild0 = (c0max >= c0min);
traverseChild1 = (c1max >= c1min);
#endif
# endif
#else // __KERNEL_SSE2__
/* Intersect two child bounding boxes, SSE3 version adapted from Embree */
@@ -177,7 +177,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
const ssef tminmax = minmax ^ pn;
#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
if(difl != 0.0f) {
float4 *tminmaxview = (float4*)&tminmax;
float &c0min = tminmaxview->x, &c1min = tminmaxview->y;
@@ -194,19 +194,19 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
c1max = min(hdiff * c1max, c1max + extmax);
}
}
#endif
# endif
const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
/* decide which nodes to traverse next */
#ifdef __VISIBILITY_FLAG__
# ifdef __VISIBILITY_FLAG__
/* this visibility test gives a 5% performance hit, how to solve? */
traverseChild0 = (movemask(lrhit) & 1) && (__float_as_uint(cnodes.z) & visibility);
traverseChild1 = (movemask(lrhit) & 2) && (__float_as_uint(cnodes.w) & visibility);
#else
# else
traverseChild0 = (movemask(lrhit) & 1);
traverseChild1 = (movemask(lrhit) & 2);
#endif
# endif
#endif // __KERNEL_SSE2__
nodeAddr = __float_as_int(cnodes.x);
@@ -287,20 +287,20 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
for(; primAddr < primAddr2; primAddr++) {
#if defined(__KERNEL_DEBUG__)
# if defined(__KERNEL_DEBUG__)
isect->num_traversal_steps++;
#endif
# endif
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
if(motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr)) {
/* shadow ray early termination */
#if defined(__KERNEL_SSE2__)
# if defined(__KERNEL_SSE2__)
if(visibility == PATH_RAY_SHADOW_OPAQUE)
return true;
tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
#else
# else
if(visibility == PATH_RAY_SHADOW_OPAQUE)
return true;
#endif
# endif
}
}
break;
@@ -310,9 +310,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
case PRIMITIVE_CURVE:
case PRIMITIVE_MOTION_CURVE: {
for(; primAddr < primAddr2; primAddr++) {
#if defined(__KERNEL_DEBUG__)
# if defined(__KERNEL_DEBUG__)
isect->num_traversal_steps++;
#endif
# endif
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
bool hit;
if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
@@ -321,14 +321,14 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
hit = bvh_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax);
if(hit) {
/* shadow ray early termination */
#if defined(__KERNEL_SSE2__)
# if defined(__KERNEL_SSE2__)
if(visibility == PATH_RAY_SHADOW_OPAQUE)
return true;
tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
#else
# else
if(visibility == PATH_RAY_SHADOW_OPAQUE)
return true;
#endif
# endif
}
}
break;
@@ -341,14 +341,14 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
/* instance push */
object = kernel_tex_fetch(__prim_object, -primAddr-1);
#if BVH_FEATURE(BVH_MOTION)
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
#else
# else
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
#endif
# endif
triangle_intersect_precalc(dir, &isect_precalc);
#if defined(__KERNEL_SSE2__)
# if defined(__KERNEL_SSE2__)
Psplat[0] = ssef(P.x);
Psplat[1] = ssef(P.y);
Psplat[2] = ssef(P.z);
@@ -356,7 +356,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
# endif
++stackPtr;
kernel_assert(stackPtr < BVH_STACK_SIZE);
@@ -364,9 +364,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
nodeAddr = kernel_tex_fetch(__object_node, object);
#if defined(__KERNEL_DEBUG__)
# if defined(__KERNEL_DEBUG__)
isect->num_traversed_instances++;
#endif
# endif
}
}
#endif /* FEATURE(BVH_INSTANCING) */
@@ -377,14 +377,14 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
kernel_assert(object != OBJECT_NONE);
/* instance pop */
#if BVH_FEATURE(BVH_MOTION)
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
#else
# else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
#endif
# endif
triangle_intersect_precalc(dir, &isect_precalc);
#if defined(__KERNEL_SSE2__)
# if defined(__KERNEL_SSE2__)
Psplat[0] = ssef(P.x);
Psplat[1] = ssef(P.y);
Psplat[2] = ssef(P.z);
@@ -392,7 +392,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
# endif
object = OBJECT_NONE;
nodeAddr = traversalStack[stackPtr];

View File

@@ -266,15 +266,15 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
if(object_flag & SD_OBJECT_HAS_VOLUME) {
#if BVH_FEATURE(BVH_MOTION)
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
#else
# else
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
#endif
# endif
triangle_intersect_precalc(dir, &isect_precalc);
#if defined(__KERNEL_SSE2__)
# if defined(__KERNEL_SSE2__)
Psplat[0] = ssef(P.x);
Psplat[1] = ssef(P.y);
Psplat[2] = ssef(P.z);
@@ -282,7 +282,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
# endif
++stackPtr;
kernel_assert(stackPtr < BVH_STACK_SIZE);
@@ -306,15 +306,15 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
kernel_assert(object != OBJECT_NONE);
/* instance pop */
#if BVH_FEATURE(BVH_MOTION)
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
#else
# else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
#endif
# endif
triangle_intersect_precalc(dir, &isect_precalc);
#if defined(__KERNEL_SSE2__)
# if defined(__KERNEL_SSE2__)
Psplat[0] = ssef(P.x);
Psplat[1] = ssef(P.y);
Psplat[2] = ssef(P.z);
@@ -322,7 +322,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
# endif
object = OBJECT_NONE;
nodeAddr = traversalStack[stackPtr];

View File

@@ -229,12 +229,12 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
isect_array->t = isect_t;
if(num_hits == max_hits) {
#if BVH_FEATURE(BVH_INSTANCING)
#if BVH_FEATURE(BVH_MOTION)
# if BVH_FEATURE(BVH_MOTION)
float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
#else
# else
Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
float t_fac = 1.0f / len(transform_direction(&itfm, dir));
#endif
# endif
for(int i = 0; i < num_hits_in_instance; i++) {
(isect_array-i-1)->t *= t_fac;
}
@@ -261,29 +261,29 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
/* Move on to next entry in intersections array. */
isect_array++;
num_hits++;
#if BVH_FEATURE(BVH_INSTANCING)
# if BVH_FEATURE(BVH_INSTANCING)
num_hits_in_instance++;
#endif
# endif
isect_array->t = isect_t;
if(num_hits == max_hits) {
#if BVH_FEATURE(BVH_INSTANCING)
# if BVH_FEATURE(BVH_INSTANCING)
# if BVH_FEATURE(BVH_MOTION)
float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
# else
Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
float t_fac = 1.0f / len(transform_direction(&itfm, dir));
#endif
# endif
for(int i = 0; i < num_hits_in_instance; i++) {
(isect_array-i-1)->t *= t_fac;
}
#endif /* BVH_FEATURE(BVH_INSTANCING) */
# endif /* BVH_FEATURE(BVH_INSTANCING) */
return num_hits;
}
}
}
break;
}
#endif
#endif /* BVH_MOTION */
#if BVH_FEATURE(BVH_HAIR)
case PRIMITIVE_CURVE:
case PRIMITIVE_MOTION_CURVE: {
@@ -304,29 +304,29 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
/* Move on to next entry in intersections array. */
isect_array++;
num_hits++;
#if BVH_FEATURE(BVH_INSTANCING)
# if BVH_FEATURE(BVH_INSTANCING)
num_hits_in_instance++;
#endif
# endif
isect_array->t = isect_t;
if(num_hits == max_hits) {
#if BVH_FEATURE(BVH_INSTANCING)
# if BVH_FEATURE(BVH_INSTANCING)
# if BVH_FEATURE(BVH_MOTION)
float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
# else
Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
float t_fac = 1.0f / len(transform_direction(&itfm, dir));
#endif
# endif
for(int i = 0; i < num_hits_in_instance; i++) {
(isect_array-i-1)->t *= t_fac;
}
#endif /* BVH_FEATURE(BVH_INSTANCING) */
# endif /* BVH_FEATURE(BVH_INSTANCING) */
return num_hits;
}
}
}
break;
}
#endif
#endif /* BVH_HAIR */
default: {
break;
}
@@ -340,17 +340,17 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
if(object_flag & SD_OBJECT_HAS_VOLUME) {
#if BVH_FEATURE(BVH_MOTION)
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
#else
# else
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
#endif
# endif
triangle_intersect_precalc(dir, &isect_precalc);
num_hits_in_instance = 0;
isect_array->t = isect_t;
#if defined(__KERNEL_SSE2__)
# if defined(__KERNEL_SSE2__)
Psplat[0] = ssef(P.x);
Psplat[1] = ssef(P.y);
Psplat[2] = ssef(P.z);
@@ -358,7 +358,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
# endif
++stackPtr;
kernel_assert(stackPtr < BVH_STACK_SIZE);
@@ -383,11 +383,11 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
if(num_hits_in_instance) {
float t_fac;
#if BVH_FEATURE(BVH_MOTION)
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
#else
# else
bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
#endif
# endif
triangle_intersect_precalc(dir, &isect_precalc);
/* Scale isect->t to adjust for instancing. */
for(int i = 0; i < num_hits_in_instance; i++) {
@@ -396,18 +396,18 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
}
else {
float ignore_t = FLT_MAX;
#if BVH_FEATURE(BVH_MOTION)
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
#else
# else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
#endif
# endif
triangle_intersect_precalc(dir, &isect_precalc);
}
isect_t = tmax;
isect_array->t = isect_t;
#if defined(__KERNEL_SSE2__)
# if defined(__KERNEL_SSE2__)
Psplat[0] = ssef(P.x);
Psplat[1] = ssef(P.y);
Psplat[2] = ssef(P.z);
@@ -415,7 +415,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
# endif
object = OBJECT_NONE;
nodeAddr = traversalStack[stackPtr];

View File

@@ -626,9 +626,9 @@ ccl_device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isec
{
/* define few macros to minimize code duplication for SSE */
#ifndef __KERNEL_SSE2__
#define len3_squared(x) len_squared(x)
#define len3(x) len(x)
#define dot3(x, y) dot(x, y)
# define len3_squared(x) len_squared(x)
# define len3(x) len(x)
# define dot3(x, y) dot(x, y)
#endif
int segment = PRIMITIVE_UNPACK_SEGMENT(type);
@@ -850,10 +850,10 @@ ccl_device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isec
return false;
#ifndef __KERNEL_SSE2__
#undef len3_squared
#undef len3
#undef dot3
#endif
# undef len3_squared
# undef len3
# undef dot3
# endif
}
ccl_device_inline float3 curvetangent(float t, float3 p0, float3 p1, float3 p2, float3 p3)

View File

@@ -133,11 +133,11 @@ ccl_device_inline float3 motion_triangle_refine(KernelGlobals *kg, ShaderData *s
if(UNLIKELY(t == 0.0f)) {
return P;
}
#ifdef __OBJECT_MOTION__
# ifdef __OBJECT_MOTION__
Transform tfm = ccl_fetch(sd, ob_itfm);
#else
# else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
#endif
# endif
P = transform_point(&tfm, P);
D = transform_direction(&tfm, D*t);
@@ -160,11 +160,11 @@ ccl_device_inline float3 motion_triangle_refine(KernelGlobals *kg, ShaderData *s
P = P + D*rt;
if(isect->object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
# ifdef __OBJECT_MOTION__
Transform tfm = ccl_fetch(sd, ob_tfm);
#else
# else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
#endif
# endif
P = transform_point(&tfm, P);
}
@@ -189,13 +189,13 @@ float3 motion_triangle_refine_subsurface(KernelGlobals *kg, ShaderData *sd, cons
float3 D = ray->D;
float t = isect->t;
#ifdef __INTERSECTION_REFINE__
# ifdef __INTERSECTION_REFINE__
if(isect->object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
# ifdef __OBJECT_MOTION__
Transform tfm = ccl_fetch(sd, ob_itfm);
#else
# else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
#endif
# endif
P = transform_point(&tfm, P);
D = transform_direction(&tfm, D);
@@ -217,19 +217,19 @@ float3 motion_triangle_refine_subsurface(KernelGlobals *kg, ShaderData *sd, cons
P = P + D*rt;
if(isect->object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
# ifdef __OBJECT_MOTION__
Transform tfm = ccl_fetch(sd, ob_tfm);
#else
# else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
#endif
# endif
P = transform_point(&tfm, P);
}
return P;
#else
# else
return P + D*t;
#endif
# endif
}
#endif

View File

@@ -109,11 +109,11 @@ ccl_device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd)
{
#ifdef __HAIR__
if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE)
#ifdef __DPDU__
# ifdef __DPDU__
return normalize(ccl_fetch(sd, dPdu));
#else
# else
return make_float3(0.0f, 0.0f, 0.0f);
#endif
# endif
#endif
/* try to create spherical tangent from generated coordinates */

View File

@@ -316,11 +316,11 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Instance push. */
object = kernel_tex_fetch(__prim_object, -primAddr-1);
#if BVH_FEATURE(BVH_MOTION)
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
#else
# else
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
#endif
# endif
num_hits_in_instance = 0;
isect_array->t = isect_t;
@@ -330,12 +330,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
tfar = ssef(isect_t);
idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
#ifdef __KERNEL_AVX2__
# ifdef __KERNEL_AVX2__
P_idir = P*idir;
P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
#else
# else
org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
#endif
# endif
triangle_intersect_precalc(dir, &isect_precalc);
++stackPtr;
@@ -356,11 +356,11 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(num_hits_in_instance) {
float t_fac;
#if BVH_FEATURE(BVH_MOTION)
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
#else
# else
bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
#endif
# endif
/* scale isect->t to adjust for instancing */
for(int i = 0; i < num_hits_in_instance; i++)
@@ -369,11 +369,11 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
else {
float ignore_t = FLT_MAX;
#if BVH_FEATURE(BVH_MOTION)
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
#else
# else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
#endif
# endif
}
isect_t = tmax;
@@ -384,12 +384,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
tfar = ssef(tmax);
idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
#ifdef __KERNEL_AVX2__
# ifdef __KERNEL_AVX2__
P_idir = P*idir;
P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
#else
# else
org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
#endif
# endif
triangle_intersect_precalc(dir, &isect_precalc);
object = OBJECT_NONE;

View File

@@ -134,11 +134,11 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
traverseChild = qbvh_node_intersect_robust(kg,
tnear,
tfar,
#ifdef __KERNEL_AVX2__
# ifdef __KERNEL_AVX2__
P_idir4,
#else
# else
org,
#endif
# endif
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
@@ -147,7 +147,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
&dist);
}
else
#endif
#endif /* BVH_HAIR_MINIMUM_WIDTH */
{
traverseChild = qbvh_node_intersect(kg,
tnear,
@@ -311,9 +311,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
for(; primAddr < primAddr2; primAddr++) {
#if defined(__KERNEL_DEBUG__)
# if defined(__KERNEL_DEBUG__)
isect->num_traversal_steps++;
#endif
# endif
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
if(motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr)) {
tfar = ssef(isect->t);
@@ -329,9 +329,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
case PRIMITIVE_CURVE:
case PRIMITIVE_MOTION_CURVE: {
for(; primAddr < primAddr2; primAddr++) {
#if defined(__KERNEL_DEBUG__)
# if defined(__KERNEL_DEBUG__)
isect->num_traversal_steps++;
#endif
# endif
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
bool hit;
if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
@@ -355,23 +355,23 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Instance push. */
object = kernel_tex_fetch(__prim_object, -primAddr-1);
#if BVH_FEATURE(BVH_MOTION)
# if BVH_FEATURE(BVH_MOTION)
qbvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &nodeDist, &ob_itfm);
#else
# else
qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &nodeDist);
#endif
# endif
if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
tfar = ssef(isect->t);
idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
#ifdef __KERNEL_AVX2__
# ifdef __KERNEL_AVX2__
P_idir = P*idir;
P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
#else
# else
org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
#endif
# endif
triangle_intersect_precalc(dir, &isect_precalc);
++stackPtr;
@@ -381,9 +381,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
nodeAddr = kernel_tex_fetch(__object_node, object);
#if defined(__KERNEL_DEBUG__)
# if defined(__KERNEL_DEBUG__)
isect->num_traversed_instances++;
#endif
# endif
}
}
#endif /* FEATURE(BVH_INSTANCING) */
@@ -394,23 +394,23 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
kernel_assert(object != OBJECT_NONE);
/* Instance pop. */
#if BVH_FEATURE(BVH_MOTION)
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
#else
# else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
#endif
# endif
if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
tfar = ssef(isect->t);
idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
#ifdef __KERNEL_AVX2__
# ifdef __KERNEL_AVX2__
P_idir = P*idir;
P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
#else
# else
org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
#endif
# endif
triangle_intersect_precalc(dir, &isect_precalc);
object = OBJECT_NONE;

View File

@@ -280,23 +280,23 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(object_flag & SD_OBJECT_HAS_VOLUME) {
#if BVH_FEATURE(BVH_MOTION)
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
#else
# else
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
#endif
# endif
if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
tfar = ssef(isect->t);
idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
#ifdef __KERNEL_AVX2__
# ifdef __KERNEL_AVX2__
P_idir = P*idir;
P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
#else
# else
org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
#endif
# endif
triangle_intersect_precalc(dir, &isect_precalc);
++stackPtr;
@@ -321,23 +321,23 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
kernel_assert(object != OBJECT_NONE);
/* Instance pop. */
#if BVH_FEATURE(BVH_MOTION)
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
#else
# else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
#endif
# endif
if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
tfar = ssef(isect->t);
idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
#ifdef __KERNEL_AVX2__
# ifdef __KERNEL_AVX2__
P_idir = P*idir;
P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
#else
# else
org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
#endif
# endif
triangle_intersect_precalc(dir, &isect_precalc);
object = OBJECT_NONE;

View File

@@ -246,12 +246,12 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
isect_array->t = isect_t;
if(num_hits == max_hits) {
#if BVH_FEATURE(BVH_INSTANCING)
#if BVH_FEATURE(BVH_MOTION)
# if BVH_FEATURE(BVH_MOTION)
float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
#else
# else
Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
float t_fac = 1.0f / len(transform_direction(&itfm, dir));
#endif
# endif
for(int i = 0; i < num_hits_in_instance; i++) {
(isect_array-i-1)->t *= t_fac;
}
@@ -278,22 +278,22 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Move on to next entry in intersections array. */
isect_array++;
num_hits++;
#if BVH_FEATURE(BVH_INSTANCING)
# if BVH_FEATURE(BVH_INSTANCING)
num_hits_in_instance++;
#endif
# endif
isect_array->t = isect_t;
if(num_hits == max_hits) {
#if BVH_FEATURE(BVH_INSTANCING)
# if BVH_FEATURE(BVH_INSTANCING)
# if BVH_FEATURE(BVH_MOTION)
float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
# else
Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
float t_fac = 1.0f / len(transform_direction(&itfm, dir));
#endif
# endif
for(int i = 0; i < num_hits_in_instance; i++) {
(isect_array-i-1)->t *= t_fac;
}
#endif /* BVH_FEATURE(BVH_INSTANCING) */
# endif /* BVH_FEATURE(BVH_INSTANCING) */
return num_hits;
}
}
@@ -321,29 +321,29 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Move on to next entry in intersections array. */
isect_array++;
num_hits++;
#if BVH_FEATURE(BVH_INSTANCING)
# if BVH_FEATURE(BVH_INSTANCING)
num_hits_in_instance++;
#endif
# endif
isect_array->t = isect_t;
if(num_hits == max_hits) {
#if BVH_FEATURE(BVH_INSTANCING)
# if BVH_FEATURE(BVH_INSTANCING)
# if BVH_FEATURE(BVH_MOTION)
float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
# else
Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
float t_fac = 1.0f / len(transform_direction(&itfm, dir));
#endif
# endif
for(int i = 0; i < num_hits_in_instance; i++) {
(isect_array-i-1)->t *= t_fac;
}
#endif /* BVH_FEATURE(BVH_INSTANCING) */
# endif /* BVH_FEATURE(BVH_INSTANCING) */
return num_hits;
}
}
}
break;
}
#endif
#endif /* BVH_HAIR */
}
}
#if BVH_FEATURE(BVH_INSTANCING)
@@ -354,23 +354,23 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(object_flag & SD_OBJECT_HAS_VOLUME) {
#if BVH_FEATURE(BVH_MOTION)
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
#else
# else
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
#endif
# endif
if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
tfar = ssef(isect_t);
idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
#ifdef __KERNEL_AVX2__
# ifdef __KERNEL_AVX2__
P_idir = P*idir;
P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
#else
# else
org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
#endif
# endif
triangle_intersect_precalc(dir, &isect_precalc);
num_hits_in_instance = 0;
isect_array->t = isect_t;
@@ -399,11 +399,11 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Instance pop. */
if(num_hits_in_instance) {
float t_fac;
#if BVH_FEATURE(BVH_MOTION)
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
#else
# else
bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
#endif
# endif
triangle_intersect_precalc(dir, &isect_precalc);
/* Scale isect->t to adjust for instancing. */
for(int i = 0; i < num_hits_in_instance; i++) {
@@ -412,11 +412,11 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
}
else {
float ignore_t = FLT_MAX;
#if BVH_FEATURE(BVH_MOTION)
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
#else
# else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
#endif
# endif
triangle_intersect_precalc(dir, &isect_precalc);
}
@@ -425,12 +425,12 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
tfar = ssef(isect_t);
idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
#ifdef __KERNEL_AVX2__
# ifdef __KERNEL_AVX2__
P_idir = P*idir;
P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
#else
# else
org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
#endif
# endif
triangle_intersect_precalc(dir, &isect_precalc);
isect_t = tmax;
isect_array->t = isect_t;

View File

@@ -316,11 +316,11 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg,
if(UNLIKELY(t == 0.0f)) {
return P;
}
#ifdef __OBJECT_MOTION__
# ifdef __OBJECT_MOTION__
Transform tfm = ccl_fetch(sd, ob_itfm);
#else
# else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
#endif
# endif
P = transform_point(&tfm, P);
D = transform_direction(&tfm, D*t);
@@ -342,11 +342,11 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg,
P = P + D*rt;
if(isect->object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
# ifdef __OBJECT_MOTION__
Transform tfm = ccl_fetch(sd, ob_tfm);
#else
# else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
#endif
# endif
P = transform_point(&tfm, P);
}

View File

@@ -92,16 +92,16 @@ ccl_device void camera_sample_perspective(KernelGlobals *kg, float raster_x, flo
#ifdef __CAMERA_MOTION__
if(kernel_data.cam.have_motion) {
#ifdef __KERNEL_OPENCL__
# ifdef __KERNEL_OPENCL__
const MotionTransform tfm = kernel_data.cam.motion;
transform_motion_interpolate(&cameratoworld,
((const DecompMotionTransform*)&tfm),
ray->time);
#else
# else
transform_motion_interpolate(&cameratoworld,
((const DecompMotionTransform*)&kernel_data.cam.motion),
ray->time);
#endif
# endif
}
#endif
@@ -176,16 +176,16 @@ ccl_device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, fl
#ifdef __CAMERA_MOTION__
if(kernel_data.cam.have_motion) {
#ifdef __KERNEL_OPENCL__
# ifdef __KERNEL_OPENCL__
const MotionTransform tfm = kernel_data.cam.motion;
transform_motion_interpolate(&cameratoworld,
(const DecompMotionTransform*)&tfm,
ray->time);
#else
# else
transform_motion_interpolate(&cameratoworld,
(const DecompMotionTransform*)&kernel_data.cam.motion,
ray->time);
#endif
# endif
}
#endif
@@ -260,16 +260,16 @@ ccl_device void camera_sample_panorama(KernelGlobals *kg, float raster_x, float
#ifdef __CAMERA_MOTION__
if(kernel_data.cam.have_motion) {
#ifdef __KERNEL_OPENCL__
# ifdef __KERNEL_OPENCL__
const MotionTransform tfm = kernel_data.cam.motion;
transform_motion_interpolate(&cameratoworld,
(const DecompMotionTransform*)&tfm,
ray->time);
#else
# else
transform_motion_interpolate(&cameratoworld,
(const DecompMotionTransform*)&kernel_data.cam.motion,
ray->time);
#endif
# endif
}
#endif

View File

@@ -71,13 +71,13 @@ typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4;
* of textures. On earlier cards this seems slower, but on Titan it is
* actually slightly faster in tests. */
#if __CUDA_ARCH__ < 300
#define __KERNEL_CUDA_TEX_STORAGE__
# define __KERNEL_CUDA_TEX_STORAGE__
#endif
#ifdef __KERNEL_CUDA_TEX_STORAGE__
#define kernel_tex_fetch(t, index) tex1Dfetch(t, index)
# define kernel_tex_fetch(t, index) tex1Dfetch(t, index)
#else
#define kernel_tex_fetch(t, index) t[(index)]
# define kernel_tex_fetch(t, index) t[(index)]
#endif
#define kernel_tex_image_interp(t, x, y) tex2D(t, x, y)
#define kernel_tex_image_interp_3d(t, x, y, z) tex3D(t, x, y, z)

View File

@@ -25,9 +25,9 @@
#define CCL_NAMESPACE_END
#ifdef __CL_NOINLINE__
#define ccl_noinline __attribute__((noinline))
# define ccl_noinline __attribute__((noinline))
#else
#define ccl_noinline
# define ccl_noinline
#endif
/* in opencl all functions are device functions, so leave this empty */
@@ -41,9 +41,9 @@
#define ccl_private __private
#ifdef __SPLIT_KERNEL__
#define ccl_addr_space __global
# define ccl_addr_space __global
#else
#define ccl_addr_space
# define ccl_addr_space
#endif
/* Selective nodes compilation. */
@@ -59,25 +59,25 @@
/* make_type definitions with opencl style element initializers */
#ifdef make_float2
#undef make_float2
# undef make_float2
#endif
#ifdef make_float3
#undef make_float3
# undef make_float3
#endif
#ifdef make_float4
#undef make_float4
# undef make_float4
#endif
#ifdef make_int2
#undef make_int2
# undef make_int2
#endif
#ifdef make_int3
#undef make_int3
# undef make_int3
#endif
#ifdef make_int4
#undef make_int4
# undef make_int4
#endif
#ifdef make_uchar4
#undef make_uchar4
# undef make_uchar4
#endif
#define make_float2(x, y) ((float2)(x, y))
@@ -108,19 +108,19 @@
#define fmodf(x, y) fmod((float)(x), (float)(y))
#ifndef __CL_USE_NATIVE__
#define sinf(x) native_sin(((float)(x)))
#define cosf(x) native_cos(((float)(x)))
#define tanf(x) native_tan(((float)(x)))
#define expf(x) native_exp(((float)(x)))
#define sqrtf(x) native_sqrt(((float)(x)))
#define logf(x) native_log(((float)(x)))
# define sinf(x) native_sin(((float)(x)))
# define cosf(x) native_cos(((float)(x)))
# define tanf(x) native_tan(((float)(x)))
# define expf(x) native_exp(((float)(x)))
# define sqrtf(x) native_sqrt(((float)(x)))
# define logf(x) native_log(((float)(x)))
#else
#define sinf(x) sin(((float)(x)))
#define cosf(x) cos(((float)(x)))
#define tanf(x) tan(((float)(x)))
#define expf(x) exp(((float)(x)))
#define sqrtf(x) sqrt(((float)(x)))
#define logf(x) log(((float)(x)))
# define sinf(x) sin(((float)(x)))
# define cosf(x) cos(((float)(x)))
# define tanf(x) tan(((float)(x)))
# define expf(x) exp(((float)(x)))
# define sqrtf(x) sqrt(((float)(x)))
# define logf(x) log(((float)(x)))
#endif
/* data lookup defines */

View File

@@ -40,9 +40,9 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
ray.D = ls->D;
ray.P = ls->P;
ray.t = 1.0f;
#ifdef __OBJECT_MOTION__
# ifdef __OBJECT_MOTION__
ray.time = time;
#endif
# endif
ray.dP = differential3_zero();
ray.dD = dI;
@@ -278,21 +278,21 @@ ccl_device_noinline float3 indirect_background(KernelGlobals *kg,
}
/* evaluate background closure */
#ifdef __SPLIT_KERNEL__
# ifdef __SPLIT_KERNEL__
Ray priv_ray = *ray;
shader_setup_from_background(kg, kg->sd_input, &priv_ray);
path_state_modify_bounce(state, true);
float3 L = shader_eval_background(kg, kg->sd_input, state, state->flag, SHADER_CONTEXT_EMISSION);
path_state_modify_bounce(state, false);
#else
# else
ShaderData sd;
shader_setup_from_background(kg, &sd, ray);
path_state_modify_bounce(state, true);
float3 L = shader_eval_background(kg, &sd, state, state->flag, SHADER_CONTEXT_EMISSION);
path_state_modify_bounce(state, false);
#endif
# endif
#ifdef __BACKGROUND_MIS__
/* check if background light exists or if we should skip pdf */

View File

@@ -25,36 +25,36 @@ CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_CPU__
#ifdef __OSL__
# ifdef __OSL__
struct OSLGlobals;
struct OSLThreadData;
struct OSLShadingSystem;
#endif
# endif
#define MAX_BYTE_IMAGES 1024
#define MAX_FLOAT_IMAGES 1024
# define MAX_BYTE_IMAGES 1024
# define MAX_FLOAT_IMAGES 1024
typedef struct KernelGlobals {
texture_image_uchar4 texture_byte_images[MAX_BYTE_IMAGES];
texture_image_float4 texture_float_images[MAX_FLOAT_IMAGES];
#define KERNEL_TEX(type, ttype, name) ttype name;
#define KERNEL_IMAGE_TEX(type, ttype, name)
#include "kernel_textures.h"
# define KERNEL_TEX(type, ttype, name) ttype name;
# define KERNEL_IMAGE_TEX(type, ttype, name)
# include "kernel_textures.h"
KernelData __data;
#ifdef __OSL__
# ifdef __OSL__
/* On the CPU, we also have the OSL globals here. Most data structures are shared
* with SVM, the difference is in the shaders and object/mesh attributes. */
OSLGlobals *osl;
OSLShadingSystem *osl_ss;
OSLThreadData *osl_tdata;
#endif
# endif
} KernelGlobals;
#endif
#endif /* __KERNEL_CPU__ */
/* For CUDA, constant memory textures must be globals, so we can't put them
* into a struct. As a result we don't actually use this struct and use actual
@@ -66,15 +66,15 @@ typedef struct KernelGlobals {
__constant__ KernelData __data;
typedef struct KernelGlobals {} KernelGlobals;
#ifdef __KERNEL_CUDA_TEX_STORAGE__
#define KERNEL_TEX(type, ttype, name) ttype name;
#else
#define KERNEL_TEX(type, ttype, name) const __constant__ __device__ type *name;
#endif
#define KERNEL_IMAGE_TEX(type, ttype, name) ttype name;
#include "kernel_textures.h"
# ifdef __KERNEL_CUDA_TEX_STORAGE__
# define KERNEL_TEX(type, ttype, name) ttype name;
# else
# define KERNEL_TEX(type, ttype, name) const __constant__ __device__ type *name;
# endif
# define KERNEL_IMAGE_TEX(type, ttype, name) ttype name;
# include "kernel_textures.h"
#endif
#endif /* __KERNEL_CUDA__ */
/* OpenCL */
@@ -83,17 +83,17 @@ typedef struct KernelGlobals {} KernelGlobals;
typedef ccl_addr_space struct KernelGlobals {
ccl_constant KernelData *data;
#define KERNEL_TEX(type, ttype, name) \
# define KERNEL_TEX(type, ttype, name) \
ccl_global type *name;
#include "kernel_textures.h"
# include "kernel_textures.h"
#ifdef __SPLIT_KERNEL__
# ifdef __SPLIT_KERNEL__
ShaderData *sd_input;
Intersection *isect_shadow;
#endif
# endif
} KernelGlobals;
#endif
#endif /* __KERNEL_OPENCL__ */
/* Interpolated lookup table access */

View File

@@ -753,12 +753,12 @@ ccl_device void object_transform_light_sample(KernelGlobals *kg, LightSample *ls
#ifdef __INSTANCING__
/* instance transform */
if(object >= 0) {
#ifdef __OBJECT_MOTION__
# ifdef __OBJECT_MOTION__
Transform itfm;
Transform tfm = object_fetch_transform_motion_test(kg, object, time, &itfm);
#else
# else
Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
#endif
# endif
ls->P = transform_point(&tfm, ls->P);
ls->Ng = normalize(transform_direction(&tfm, ls->Ng));

View File

@@ -15,7 +15,7 @@
*/
#ifdef __OSL__
#include "osl_shader.h"
# include "osl_shader.h"
#endif
#include "kernel_random.h"
@@ -32,11 +32,11 @@
#include "kernel_passes.h"
#ifdef __SUBSURFACE__
#include "kernel_subsurface.h"
# include "kernel_subsurface.h"
#endif
#ifdef __VOLUME__
#include "kernel_volume.h"
# include "kernel_volume.h"
#endif
#include "kernel_path_state.h"
@@ -47,7 +47,7 @@
#include "kernel_path_volume.h"
#ifdef __KERNEL_DEBUG__
#include "kernel_debug.h"
# include "kernel_debug.h"
#endif
CCL_NAMESPACE_BEGIN
@@ -106,7 +106,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
volume_stack_is_heterogeneous(kg,
state->volume_stack);
#ifdef __VOLUME_DECOUPLED__
# ifdef __VOLUME_DECOUPLED__
int sampling_method =
volume_stack_sampling_method(kg,
state->volume_stack);
@@ -195,14 +195,14 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
}
}
else
#endif
# endif
{
/* integrate along volume segment with distance sampling */
ShaderData volume_sd;
VolumeIntegrateResult result = kernel_volume_integrate(
kg, state, &volume_sd, &volume_ray, L, &throughput, rng, heterogeneous);
#ifdef __VOLUME_SCATTER__
# ifdef __VOLUME_SCATTER__
if(result == VOLUME_PATH_SCATTERED) {
/* direct lighting */
kernel_path_volume_connect_light(kg,
@@ -227,7 +227,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
break;
}
}
#endif
# endif
}
}
#endif
@@ -322,9 +322,9 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
light_ray.P = ray_offset(sd.P, sd.Ng);
light_ray.D = ao_D;
light_ray.t = kernel_data.background.ao_distance;
#ifdef __OBJECT_MOTION__
# ifdef __OBJECT_MOTION__
light_ray.time = sd.time;
#endif
# endif
light_ray.dP = sd.dP;
light_ray.dD = differential3_zero();
@@ -468,11 +468,11 @@ ccl_device bool kernel_path_subsurface_scatter(
&lcg_state,
bssrdf_u, bssrdf_v,
false);
#ifdef __VOLUME__
# ifdef __VOLUME__
ss_indirect->need_update_volume_stack =
kernel_data.integrator.use_volumes &&
ccl_fetch(sd, flag) & SD_OBJECT_INTERSECTS_VOLUME;
#endif
# endif
/* compute lighting with the BSDF closure */
for(int hit = 0; hit < num_hits; hit++) {
@@ -513,11 +513,11 @@ ccl_device bool kernel_path_subsurface_scatter(
hit_L,
hit_ray))
{
#ifdef __LAMP_MIS__
# ifdef __LAMP_MIS__
hit_state->ray_t = 0.0f;
#endif
# endif
#ifdef __VOLUME__
# ifdef __VOLUME__
if(ss_indirect->need_update_volume_stack) {
Ray volume_ray = *ray;
/* Setup ray from previous surface point to the new one. */
@@ -529,7 +529,7 @@ ccl_device bool kernel_path_subsurface_scatter(
&volume_ray,
hit_state->volume_stack);
}
#endif
# endif
path_radiance_reset_indirect(L);
ss_indirect->num_rays++;
}
@@ -682,7 +682,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack);
#ifdef __VOLUME_DECOUPLED__
# ifdef __VOLUME_DECOUPLED__
int sampling_method = volume_stack_sampling_method(kg, state.volume_stack);
bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, true, sampling_method);
@@ -736,14 +736,14 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
}
}
else
#endif
# endif
{
/* integrate along volume segment with distance sampling */
ShaderData volume_sd;
VolumeIntegrateResult result = kernel_volume_integrate(
kg, &state, &volume_sd, &volume_ray, &L, &throughput, rng, heterogeneous);
#ifdef __VOLUME_SCATTER__
# ifdef __VOLUME_SCATTER__
if(result == VOLUME_PATH_SCATTERED) {
/* direct lighting */
kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, &L);
@@ -754,7 +754,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
else
break;
}
#endif
# endif
}
}
#endif

View File

@@ -31,9 +31,9 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobal
BsdfEval L_light;
bool is_lamp;
#ifdef __OBJECT_MOTION__
# ifdef __OBJECT_MOTION__
light_ray.time = ccl_fetch(sd, time);
#endif
# endif
if(sample_all_lights) {
/* lamp sampling */

View File

@@ -36,9 +36,9 @@ ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng,
bool is_lamp;
/* connect to light from given point where shader has been evaluated */
#ifdef __OBJECT_MOTION__
# ifdef __OBJECT_MOTION__
light_ray.time = sd->time;
#endif
# endif
light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, state->bounce, &ls);
if(ls.pdf == 0.0f)
@@ -117,9 +117,9 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG
BsdfEval L_light;
bool is_lamp;
#ifdef __OBJECT_MOTION__
# ifdef __OBJECT_MOTION__
light_ray.time = sd->time;
#endif
# endif
if(sample_all_lights) {
/* lamp sampling */

View File

@@ -117,10 +117,10 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
/* instance transform */
object_normal_transform_auto(kg, sd, &ccl_fetch(sd, N));
object_normal_transform_auto(kg, sd, &ccl_fetch(sd, Ng));
#ifdef __DPDU__
# ifdef __DPDU__
object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdu));
object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdv));
#endif
# endif
}
#endif
@@ -158,10 +158,10 @@ ccl_device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderDat
sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
sd->type = isect->type;
#ifdef __UV__
# ifdef __UV__
sd->u = isect->u;
sd->v = isect->v;
#endif
# endif
/* fetch triangle data */
if(sd->type == PRIMITIVE_TRIANGLE) {
@@ -176,10 +176,10 @@ ccl_device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderDat
if(sd->shader & SHADER_SMOOTH_NORMAL)
sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v);
#ifdef __DPDU__
# ifdef __DPDU__
/* dPdu/dPdv */
triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
#endif
# endif
}
else {
/* motion triangle */
@@ -188,38 +188,38 @@ ccl_device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderDat
sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2);
#ifdef __INSTANCING__
# ifdef __INSTANCING__
if(isect->object != OBJECT_NONE) {
/* instance transform */
object_normal_transform(kg, sd, &sd->N);
object_normal_transform(kg, sd, &sd->Ng);
#ifdef __DPDU__
# ifdef __DPDU__
object_dir_transform(kg, sd, &sd->dPdu);
object_dir_transform(kg, sd, &sd->dPdv);
#endif
# endif
}
#endif
# endif
/* backfacing test */
if(backfacing) {
sd->flag |= SD_BACKFACING;
sd->Ng = -sd->Ng;
sd->N = -sd->N;
#ifdef __DPDU__
# ifdef __DPDU__
sd->dPdu = -sd->dPdu;
sd->dPdv = -sd->dPdv;
#endif
# endif
}
/* should not get used in principle as the shading will only use a diffuse
* BSDF, but the shader might still access it */
sd->I = sd->N;
#ifdef __RAY_DIFFERENTIALS__
# ifdef __RAY_DIFFERENTIALS__
/* differentials */
differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
/* don't modify dP and dI */
#endif
# endif
}
#endif
@@ -296,12 +296,12 @@ ccl_device void shader_setup_from_sample(KernelGlobals *kg,
#ifdef __DPDU__
triangle_dPdudv(kg, ccl_fetch(sd, prim), &ccl_fetch(sd, dPdu), &ccl_fetch(sd, dPdv));
#ifdef __INSTANCING__
# ifdef __INSTANCING__
if(instanced) {
object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdu));
object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdv));
}
#endif
# endif
#endif
}
else {
@@ -1020,12 +1020,12 @@ ccl_device void shader_eval_volume(KernelGlobals *kg, ShaderData *sd,
/* evaluate shader */
#ifdef __SVM__
#ifdef __OSL__
# ifdef __OSL__
if(kg->osl) {
OSLShader::eval_volume(kg, sd, state, path_flag, ctx);
}
else
#endif
# endif
{
svm_eval_nodes(kg, sd, state, SHADER_TYPE_VOLUME, path_flag);
}
@@ -1048,11 +1048,11 @@ ccl_device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd, ccl_
/* this will modify sd->P */
#ifdef __SVM__
#ifdef __OSL__
# ifdef __OSL__
if(kg->osl)
OSLShader::eval_displacement(kg, sd, ctx);
else
#endif
# endif
{
svm_eval_nodes(kg, sd, state, SHADER_TYPE_DISPLACEMENT, 0);
}

View File

@@ -15,11 +15,11 @@
*/
#ifndef KERNEL_TEX
#define KERNEL_TEX(type, ttype, name)
# define KERNEL_TEX(type, ttype, name)
#endif
#ifndef KERNEL_IMAGE_TEX
#define KERNEL_IMAGE_TEX(type, ttype, name)
# define KERNEL_IMAGE_TEX(type, ttype, name)
#endif
/* bvh */

View File

@@ -21,14 +21,14 @@
#include "svm/svm_types.h"
#ifndef __KERNEL_GPU__
#define __KERNEL_CPU__
# define __KERNEL_CPU__
#endif
/* TODO(sergey): This is only to make it possible to include this header
* from outside of the kernel. but this could be done somewhat cleaner?
*/
#ifndef ccl_addr_space
#define ccl_addr_space
# define ccl_addr_space
#endif
CCL_NAMESPACE_BEGIN
@@ -59,47 +59,47 @@ CCL_NAMESPACE_BEGIN
/* device capabilities */
#ifdef __KERNEL_CPU__
#ifdef __KERNEL_SSE2__
# ifdef __KERNEL_SSE2__
# define __QBVH__
#endif
#define __KERNEL_SHADING__
#define __KERNEL_ADV_SHADING__
#define __BRANCHED_PATH__
#ifdef WITH_OSL
#define __OSL__
#endif
#define __SUBSURFACE__
#define __CMJ__
#define __VOLUME__
#define __VOLUME_DECOUPLED__
#define __VOLUME_SCATTER__
#define __SHADOW_RECORD_ALL__
#define __VOLUME_RECORD_ALL__
#endif
# endif
# define __KERNEL_SHADING__
# define __KERNEL_ADV_SHADING__
# define __BRANCHED_PATH__
# ifdef WITH_OSL
# define __OSL__
# endif
# define __SUBSURFACE__
# define __CMJ__
# define __VOLUME__
# define __VOLUME_DECOUPLED__
# define __VOLUME_SCATTER__
# define __SHADOW_RECORD_ALL__
# define __VOLUME_RECORD_ALL__
#endif /* __KERNEL_CPU__ */
#ifdef __KERNEL_CUDA__
#define __KERNEL_SHADING__
#define __KERNEL_ADV_SHADING__
#define __BRANCHED_PATH__
#define __VOLUME__
#define __VOLUME_SCATTER__
#define __SUBSURFACE__
#define __CMJ__
#endif
# define __KERNEL_SHADING__
# define __KERNEL_ADV_SHADING__
# define __BRANCHED_PATH__
# define __VOLUME__
# define __VOLUME_SCATTER__
# define __SUBSURFACE__
# define __CMJ__
#endif /* __KERNEL_CUDA__ */
#ifdef __KERNEL_OPENCL__
/* keep __KERNEL_ADV_SHADING__ in sync with opencl_kernel_use_advanced_shading! */
#ifdef __KERNEL_OPENCL_NVIDIA__
# ifdef __KERNEL_OPENCL_NVIDIA__
# define __KERNEL_SHADING__
# define __KERNEL_ADV_SHADING__
# ifdef __KERNEL_EXPERIMENTAL__
# define __CMJ__
# endif
#endif
# endif /* __KERNEL_OPENCL_NVIDIA__ */
#ifdef __KERNEL_OPENCL_APPLE__
# ifdef __KERNEL_OPENCL_APPLE__
# define __KERNEL_SHADING__
# define __KERNEL_ADV_SHADING__
/* TODO(sergey): Currently experimental section is ignored here,
@@ -109,9 +109,9 @@ CCL_NAMESPACE_BEGIN
# ifdef __KERNEL_EXPERIMENTAL__
# define __CMJ__
# endif
#endif
# endif /* __KERNEL_OPENCL_NVIDIA__ */
#ifdef __KERNEL_OPENCL_AMD__
# ifdef __KERNEL_OPENCL_AMD__
# define __CL_USE_NATIVE__
# define __KERNEL_SHADING__
# define __MULTI_CLOSURE__
@@ -125,18 +125,18 @@ CCL_NAMESPACE_BEGIN
# ifdef __KERNEL_EXPERIMENTAL__
# define __TRANSPARENT_SHADOWS__
# endif
#endif
# endif /* __KERNEL_OPENCL_AMD__ */
#ifdef __KERNEL_OPENCL_INTEL_CPU__
# ifdef __KERNEL_OPENCL_INTEL_CPU__
# define __CL_USE_NATIVE__
# define __KERNEL_SHADING__
# define __KERNEL_ADV_SHADING__
# ifdef __KERNEL_EXPERIMENTAL__
# define __CMJ__
# endif
#endif
# endif /* __KERNEL_OPENCL_INTEL_CPU__ */
#endif // __KERNEL_OPENCL__
#endif /* __KERNEL_OPENCL__ */
/* kernel features */
#define __SOBOL__
@@ -152,23 +152,23 @@ CCL_NAMESPACE_BEGIN
#define __CLAMP_SAMPLE__
#ifdef __KERNEL_SHADING__
#define __SVM__
#define __EMISSION__
#define __TEXTURES__
#define __EXTRA_NODES__
#define __HOLDOUT__
# define __SVM__
# define __EMISSION__
# define __TEXTURES__
# define __EXTRA_NODES__
# define __HOLDOUT__
#endif
#ifdef __KERNEL_ADV_SHADING__
#define __MULTI_CLOSURE__
#define __TRANSPARENT_SHADOWS__
#define __PASSES__
#define __BACKGROUND_MIS__
#define __LAMP_MIS__
#define __AO__
#define __CAMERA_MOTION__
#define __OBJECT_MOTION__
#define __HAIR__
# define __MULTI_CLOSURE__
# define __TRANSPARENT_SHADOWS__
# define __PASSES__
# define __BACKGROUND_MIS__
# define __LAMP_MIS__
# define __AO__
# define __CAMERA_MOTION__
# define __OBJECT_MOTION__
# define __HAIR__
#endif
#ifdef WITH_CYCLES_DEBUG
@@ -628,7 +628,7 @@ typedef enum AttributeStandard {
# define MAX_CLOSURE __MAX_CLOSURE__
# endif
#else
#define MAX_CLOSURE 1
# define MAX_CLOSURE 1
#endif
/* This struct is to be 16 bytes aligned, we also keep some extra precautions:

View File

@@ -1149,7 +1149,7 @@ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
Ray volume_ray = *ray;
#ifdef __VOLUME_RECORD_ALL__
# ifdef __VOLUME_RECORD_ALL__
Intersection hits[2*VOLUME_STACK_SIZE];
uint num_hits = scene_intersect_volume_all(kg,
&volume_ray,
@@ -1166,7 +1166,7 @@ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
kernel_volume_stack_enter_exit(kg, &sd, stack);
}
}
#else
# else
Intersection isect;
int step = 0;
while(step < 2 * VOLUME_STACK_SIZE &&
@@ -1181,7 +1181,7 @@ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
volume_ray.t -= sd.ray_length;
++step;
}
#endif
# endif
}
#endif

View File

@@ -24,7 +24,7 @@
#ifdef __WORK_STEALING__
#ifdef __KERNEL_OPENCL__
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
# pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#endif
uint get_group_id_with_ray_index(uint ray_index,

View File

@@ -18,7 +18,7 @@
/* On x86-64, we can assume SSE2, so avoid the extra kernel and compile this one with SSE2 intrinsics */
#if defined(__x86_64__) || defined(_M_X64)
#define __KERNEL_SSE2__
# define __KERNEL_SSE2__
#endif
/* quiet unused define warnings */

View File

@@ -20,11 +20,11 @@
/* SSE optimization disabled for now on 32 bit, see bug #36316 */
#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
#define __KERNEL_SSE2__
#define __KERNEL_SSE3__
#define __KERNEL_SSSE3__
#define __KERNEL_SSE41__
#define __KERNEL_AVX__
# define __KERNEL_SSE2__
# define __KERNEL_SSE3__
# define __KERNEL_SSSE3__
# define __KERNEL_SSE41__
# define __KERNEL_AVX__
#endif
#include "util_optimization.h"
@@ -33,5 +33,4 @@
# include "kernel.h"
# define KERNEL_ARCH cpu_avx
# include "kernel_cpu_impl.h"
#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */

View File

@@ -20,12 +20,12 @@
/* SSE optimization disabled for now on 32 bit, see bug #36316 */
#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
#define __KERNEL_SSE2__
#define __KERNEL_SSE3__
#define __KERNEL_SSSE3__
#define __KERNEL_SSE41__
#define __KERNEL_AVX__
#define __KERNEL_AVX2__
# define __KERNEL_SSE2__
# define __KERNEL_SSE3__
# define __KERNEL_SSSE3__
# define __KERNEL_SSE41__
# define __KERNEL_AVX__
# define __KERNEL_AVX2__
#endif
#include "util_optimization.h"
@@ -34,5 +34,4 @@
# include "kernel.h"
# define KERNEL_ARCH cpu_avx2
# include "kernel_cpu_impl.h"
#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */

View File

@@ -20,7 +20,7 @@
/* SSE optimization disabled for now on 32 bit, see bug #36316 */
#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
#define __KERNEL_SSE2__
# define __KERNEL_SSE2__
#endif
#include "util_optimization.h"
@@ -29,5 +29,4 @@
# include "kernel.h"
# define KERNEL_ARCH cpu_sse2
# include "kernel_cpu_impl.h"
#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */

View File

@@ -20,9 +20,9 @@
/* SSE optimization disabled for now on 32 bit, see bug #36316 */
#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
#define __KERNEL_SSE2__
#define __KERNEL_SSE3__
#define __KERNEL_SSSE3__
# define __KERNEL_SSE2__
# define __KERNEL_SSE3__
# define __KERNEL_SSSE3__
#endif
#include "util_optimization.h"
@@ -31,5 +31,4 @@
# include "kernel.h"
# define KERNEL_ARCH cpu_sse3
# include "kernel_cpu_impl.h"
#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */

View File

@@ -20,10 +20,10 @@
/* SSE optimization disabled for now on 32 bit, see bug #36316 */
#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
#define __KERNEL_SSE2__
#define __KERNEL_SSE3__
#define __KERNEL_SSSE3__
#define __KERNEL_SSE41__
# define __KERNEL_SSE2__
# define __KERNEL_SSE3__
# define __KERNEL_SSSE3__
# define __KERNEL_SSE41__
#endif
#include "util_optimization.h"
@@ -32,5 +32,4 @@
# include "kernel.h"
# define KERNEL_ARCH cpu_sse41
# include "kernel_cpu_impl.h"
#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */

View File

@@ -31,67 +31,67 @@
/* 2.0 and 2.1 */
#if __CUDA_ARCH__ == 200 || __CUDA_ARCH__ == 210
#define CUDA_MULTIPRESSOR_MAX_REGISTERS 32768
#define CUDA_MULTIPROCESSOR_MAX_BLOCKS 8
#define CUDA_BLOCK_MAX_THREADS 1024
#define CUDA_THREAD_MAX_REGISTERS 63
# define CUDA_MULTIPRESSOR_MAX_REGISTERS 32768
# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 8
# define CUDA_BLOCK_MAX_THREADS 1024
# define CUDA_THREAD_MAX_REGISTERS 63
/* tunable parameters */
#define CUDA_THREADS_BLOCK_WIDTH 16
#define CUDA_KERNEL_MAX_REGISTERS 32
#define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 40
# define CUDA_THREADS_BLOCK_WIDTH 16
# define CUDA_KERNEL_MAX_REGISTERS 32
# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 40
/* 3.0 and 3.5 */
#elif __CUDA_ARCH__ == 300 || __CUDA_ARCH__ == 350
#define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536
#define CUDA_MULTIPROCESSOR_MAX_BLOCKS 16
#define CUDA_BLOCK_MAX_THREADS 1024
#define CUDA_THREAD_MAX_REGISTERS 63
# define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536
# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 16
# define CUDA_BLOCK_MAX_THREADS 1024
# define CUDA_THREAD_MAX_REGISTERS 63
/* tunable parameters */
#define CUDA_THREADS_BLOCK_WIDTH 16
#define CUDA_KERNEL_MAX_REGISTERS 63
#define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63
# define CUDA_THREADS_BLOCK_WIDTH 16
# define CUDA_KERNEL_MAX_REGISTERS 63
# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63
/* 3.2 */
#elif __CUDA_ARCH__ == 320
#define CUDA_MULTIPRESSOR_MAX_REGISTERS 32768
#define CUDA_MULTIPROCESSOR_MAX_BLOCKS 16
#define CUDA_BLOCK_MAX_THREADS 1024
#define CUDA_THREAD_MAX_REGISTERS 63
# define CUDA_MULTIPRESSOR_MAX_REGISTERS 32768
# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 16
# define CUDA_BLOCK_MAX_THREADS 1024
# define CUDA_THREAD_MAX_REGISTERS 63
/* tunable parameters */
#define CUDA_THREADS_BLOCK_WIDTH 16
#define CUDA_KERNEL_MAX_REGISTERS 63
#define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63
# define CUDA_THREADS_BLOCK_WIDTH 16
# define CUDA_KERNEL_MAX_REGISTERS 63
# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63
/* 3.7 */
#elif __CUDA_ARCH__ == 370
#define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536
#define CUDA_MULTIPROCESSOR_MAX_BLOCKS 16
#define CUDA_BLOCK_MAX_THREADS 1024
#define CUDA_THREAD_MAX_REGISTERS 255
# define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536
# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 16
# define CUDA_BLOCK_MAX_THREADS 1024
# define CUDA_THREAD_MAX_REGISTERS 255
/* tunable parameters */
#define CUDA_THREADS_BLOCK_WIDTH 16
#define CUDA_KERNEL_MAX_REGISTERS 63
#define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63
# define CUDA_THREADS_BLOCK_WIDTH 16
# define CUDA_KERNEL_MAX_REGISTERS 63
# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63
/* 5.0, 5.2 and 5.3 */
#elif __CUDA_ARCH__ == 500 || __CUDA_ARCH__ == 520 || __CUDA_ARCH__ == 530
#define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536
#define CUDA_MULTIPROCESSOR_MAX_BLOCKS 32
#define CUDA_BLOCK_MAX_THREADS 1024
#define CUDA_THREAD_MAX_REGISTERS 255
# define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536
# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 32
# define CUDA_BLOCK_MAX_THREADS 1024
# define CUDA_THREAD_MAX_REGISTERS 255
/* tunable parameters */
#define CUDA_THREADS_BLOCK_WIDTH 16
#define CUDA_KERNEL_MAX_REGISTERS 40
#define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63
# define CUDA_THREADS_BLOCK_WIDTH 16
# define CUDA_KERNEL_MAX_REGISTERS 40
# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63
/* unknown architecture */
#else
#error "Unknown or unsupported CUDA architecture, can't determine launch bounds"
# error "Unknown or unsupported CUDA architecture, can't determine launch bounds"
#endif
/* compute number of threads per block and minimum blocks per multiprocessor
@@ -106,19 +106,19 @@
/* sanity checks */
#if CUDA_THREADS_BLOCK_WIDTH*CUDA_THREADS_BLOCK_WIDTH > CUDA_BLOCK_MAX_THREADS
#error "Maximum number of threads per block exceeded"
# error "Maximum number of threads per block exceeded"
#endif
#if CUDA_MULTIPRESSOR_MAX_REGISTERS/(CUDA_THREADS_BLOCK_WIDTH*CUDA_THREADS_BLOCK_WIDTH*CUDA_KERNEL_MAX_REGISTERS) > CUDA_MULTIPROCESSOR_MAX_BLOCKS
#error "Maximum number of blocks per multiprocessor exceeded"
# error "Maximum number of blocks per multiprocessor exceeded"
#endif
#if CUDA_KERNEL_MAX_REGISTERS > CUDA_THREAD_MAX_REGISTERS
#error "Maximum number of registers per thread exceeded"
# error "Maximum number of registers per thread exceeded"
#endif
#if CUDA_KERNEL_BRANCHED_MAX_REGISTERS > CUDA_THREAD_MAX_REGISTERS
#error "Maximum number of registers per thread exceeded"
# error "Maximum number of registers per thread exceeded"
#endif
/* kernels */

View File

@@ -24,21 +24,21 @@
#include "../../kernel_film.h"
#if defined(__COMPILE_ONLY_MEGAKERNEL__) || !defined(__NO_BAKING__)
#include "../../kernel_path.h"
#include "../../kernel_path_branched.h"
# include "../../kernel_path.h"
# include "../../kernel_path_branched.h"
#else /* __COMPILE_ONLY_MEGAKERNEL__ */
/* Include only actually used headers for the case
* when path tracing kernels are not needed.
*/
#include "../../kernel_random.h"
#include "../../kernel_differential.h"
#include "../../kernel_montecarlo.h"
#include "../../kernel_projection.h"
#include "../../geom/geom.h"
# include "../../kernel_random.h"
# include "../../kernel_differential.h"
# include "../../kernel_montecarlo.h"
# include "../../kernel_projection.h"
# include "../../geom/geom.h"
#include "../../kernel_accumulate.h"
#include "../../kernel_camera.h"
#include "../../kernel_shader.h"
# include "../../kernel_accumulate.h"
# include "../../kernel_camera.h"
# include "../../kernel_shader.h"
#endif /* defined(__COMPILE_ONLY_MEGAKERNEL__) || !defined(__NO_BAKING__) */
#include "../../kernel_bake.h"

View File

@@ -53,7 +53,7 @@
#include "kernel_shader.h"
#ifdef WITH_PTEX
#include <Ptexture.h>
# include <Ptexture.h>
#endif
CCL_NAMESPACE_BEGIN

View File

@@ -436,11 +436,11 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
#endif
#ifdef __SUBSURFACE__
#ifndef __SPLIT_KERNEL__
# ifndef __SPLIT_KERNEL__
# define sc_next(sc) sc++
#else
# else
# define sc_next(sc) sc = ccl_fetch_array(sd, closure, ccl_fetch(sd, num_closure))
#endif
# endif
case CLOSURE_BSSRDF_CUBIC_ID:
case CLOSURE_BSSRDF_GAUSSIAN_ID:
case CLOSURE_BSSRDF_BURLEY_ID: {
@@ -471,9 +471,9 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
sc->data1 = texture_blur;
sc->data2 = albedo.x;
sc->T.x = sharpness;
#ifdef __OSL__
# ifdef __OSL__
sc->prim = NULL;
#endif
# endif
sc->N = N;
ccl_fetch(sd, flag) |= bssrdf_setup(sc, (ClosureType)type);
@@ -488,9 +488,9 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
sc->data1 = texture_blur;
sc->data2 = albedo.y;
sc->T.x = sharpness;
#ifdef __OSL__
# ifdef __OSL__
sc->prim = NULL;
#endif
# endif
sc->N = N;
ccl_fetch(sd, flag) |= bssrdf_setup(sc, (ClosureType)type);
@@ -505,9 +505,9 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
sc->data1 = texture_blur;
sc->data2 = albedo.z;
sc->T.x = sharpness;
#ifdef __OSL__
# ifdef __OSL__
sc->prim = NULL;
#endif
# endif
sc->N = N;
ccl_fetch(sd, flag) |= bssrdf_setup(sc, (ClosureType)type);

View File

@@ -133,13 +133,13 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint srgb, uint use_alpha)
{
#ifdef __KERNEL_CPU__
#ifdef __KERNEL_SSE2__
# ifdef __KERNEL_SSE2__
ssef r_ssef;
float4 &r = (float4 &)r_ssef;
r = kernel_tex_image_interp(id, x, y);
#else
# else
float4 r = kernel_tex_image_interp(id, x, y);
#endif
# endif
#else
float4 r;
@@ -247,7 +247,7 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
case 91: r = kernel_tex_image_interp(__tex_image_091, x, y); break;
case 92: r = kernel_tex_image_interp(__tex_image_092, x, y); break;
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300)
# if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300)
case 93: r = kernel_tex_image_interp(__tex_image_093, x, y); break;
case 94: r = kernel_tex_image_interp(__tex_image_094, x, y); break;
case 95: r = kernel_tex_image_interp(__tex_image_095, x, y); break;
@@ -306,7 +306,7 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
case 148: r = kernel_tex_image_interp(__tex_image_148, x, y); break;
case 149: r = kernel_tex_image_interp(__tex_image_149, x, y); break;
case 150: r = kernel_tex_image_interp(__tex_image_150, x, y); break;
#endif
# endif
default:
kernel_assert(0);

View File

@@ -90,8 +90,8 @@ ccl_device uint hash(uint kx, uint ky, uint kz)
#ifdef __KERNEL_SSE2__
ccl_device_inline ssei hash_sse(const ssei& kx, const ssei& ky, const ssei& kz)
{
#define rot(x,k) (((x)<<(k)) | (srl(x, 32-(k))))
#define xor_rot(a, b, c) do {a = a^b; a = a - rot(b, c);} while(0)
# define rot(x,k) (((x)<<(k)) | (srl(x, 32-(k))))
# define xor_rot(a, b, c) do {a = a^b; a = a - rot(b, c);} while(0)
uint len = 3;
ssei magic = ssei(0xdeadbeef + (len << 2) + 13);
@@ -108,8 +108,8 @@ ccl_device_inline ssei hash_sse(const ssei& kx, const ssei& ky, const ssei& kz)
xor_rot(c, b, 24);
return c;
#undef rot
#undef xor_rot
# undef rot
# undef xor_rot
}
#endif

View File

@@ -720,31 +720,46 @@ bool path_remove(const string& path)
return remove(path.c_str()) == 0;
}
string path_source_replace_includes(const string& source_, const string& path)
string path_source_replace_includes(const string& source, const string& path)
{
/* our own little c preprocessor that replaces #includes with the file
/* Our own little c preprocessor that replaces #includes with the file
* contents, to work around issue of opencl drivers not supporting
* include paths with spaces in them */
string source = source_;
const string include = "#include \"";
size_t n, pos = 0;
* include paths with spaces in them.
*/
while((n = source.find(include, pos)) != string::npos) {
size_t n_start = n + include.size();
size_t n_end = source.find("\"", n_start);
string filename = source.substr(n_start, n_end - n_start);
string result = "";
vector<string> lines;
string_split(lines, source, "\n");
for(size_t i = 0; i < lines.size(); ++i) {
string line = lines[i];
if(line[0] == '#') {
string token = string_strip(line.substr(1, line.size() - 1));
if(string_startswith(token, "include")) {
token = string_strip(token.substr(7, token.size() - 7));
if(token[0] == '"') {
size_t n_start = 1;
size_t n_end = token.find("\"", n_start);
string filename = token.substr(n_start, n_end - n_start);
string text, filepath = path_join(path, filename);
if(path_read_text(filepath, text)) {
text = path_source_replace_includes(text, path_dirname(filepath));
source.replace(n, n_end + 1 - n, "\n" + text + "\n");
/* Replace include directories with both current path
* and path extracted from the include file.
* Not totally robust, but works fine for Cycles kernel
* and avoids having list of include directories.x
*/
text = path_source_replace_includes(
text, path_dirname(filepath));
text = path_source_replace_includes(text, path);
line = token.replace(0, n_end + 1, "\n" + text + "\n");
}
else
pos = n_end;
}
}
}
result += line + "\n";
}
return source;
return result;
}
FILE *path_fopen(const string& path, const string& mode)