Fix cycles CUDA sm 1.3 build with 32 bit compiler, tweaked voronoi

and brick code so that it can be uninlined.
This commit is contained in:
Brecht Van Lommel
2012-11-30 07:27:17 +00:00
parent ad2b41bc4b
commit ceedd5bd35
7 changed files with 38 additions and 46 deletions

View File

@@ -110,8 +110,7 @@ BF_JACK_LIB_STATIC = '${BF_ZLIB}/lib/libjack.a'
# Cycles # Cycles
WITH_BF_CYCLES = True WITH_BF_CYCLES = True
WITH_BF_CYCLES_CUDA_BINARIES = True WITH_BF_CYCLES_CUDA_BINARIES = True
#BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_13', 'sm_20', 'sm_21', 'sm_30'] BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_13', 'sm_20', 'sm_21', 'sm_30']
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
WITH_BF_OIIO = True WITH_BF_OIIO = True
WITH_BF_STATICOIIO = True WITH_BF_STATICOIIO = True

View File

@@ -97,8 +97,7 @@ WITH_BF_JACK = True
# Cycles # Cycles
WITH_BF_CYCLES = True WITH_BF_CYCLES = True
WITH_BF_CYCLES_CUDA_BINARIES = True WITH_BF_CYCLES_CUDA_BINARIES = True
#BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_13', 'sm_20', 'sm_21', 'sm_30'] BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_13', 'sm_20', 'sm_21', 'sm_30']
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
WITH_BF_OIIO = True WITH_BF_OIIO = True
WITH_BF_STATICOIIO = True WITH_BF_STATICOIIO = True

View File

@@ -102,6 +102,7 @@ CCL_NAMESPACE_BEGIN
#define __IMAGE_TEXTURES__ #define __IMAGE_TEXTURES__
#define __EXTRA_NODES__ #define __EXTRA_NODES__
#define __HOLDOUT__ #define __HOLDOUT__
#define __NORMAL_MAP__
#endif #endif
#ifdef __KERNEL_ADV_SHADING__ #ifdef __KERNEL_ADV_SHADING__

View File

@@ -401,9 +401,13 @@ __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderT
case NODE_LIGHT_FALLOFF: case NODE_LIGHT_FALLOFF:
svm_node_light_falloff(sd, stack, node); svm_node_light_falloff(sd, stack, node);
break; break;
#endif
#ifdef __ANISOTROPIC__
case NODE_TANGENT: case NODE_TANGENT:
svm_node_tangent(kg, sd, stack, node); svm_node_tangent(kg, sd, stack, node);
break; break;
#endif
#ifdef __NORMAL_MAP__
case NODE_NORMAL_MAP: case NODE_NORMAL_MAP:
svm_node_normal_map(kg, sd, stack, node); svm_node_normal_map(kg, sd, stack, node);
break; break;

View File

@@ -28,9 +28,9 @@ __device_noinline float brick_noise(int n) /* fast integer noise */
return 0.5f * ((float)nn / 1073741824.0f); return 0.5f * ((float)nn / 1073741824.0f);
} }
__device_noinline float svm_brick(float3 p, float scale, float mortar_size, float bias, __device_noinline float2 svm_brick(float3 p, float scale, float mortar_size, float bias,
float brick_width, float row_height, float offset_amount, int offset_frequency, float brick_width, float row_height, float offset_amount, int offset_frequency,
float squash_amount, int squash_frequency, float *tint) float squash_amount, int squash_frequency)
{ {
p *= scale; p *= scale;
@@ -50,11 +50,12 @@ __device_noinline float svm_brick(float3 p, float scale, float mortar_size, floa
x = (p.x+offset) - brick_width*bricknum; x = (p.x+offset) - brick_width*bricknum;
y = p.y - row_height*rownum; y = p.y - row_height*rownum;
*tint = clamp((brick_noise((rownum << 16) + (bricknum & 0xFFFF)) + bias), 0.0f, 1.0f); return make_float2(
clamp((brick_noise((rownum << 16) + (bricknum & 0xFFFF)) + bias), 0.0f, 1.0f),
return (x < mortar_size || y < mortar_size || (x < mortar_size || y < mortar_size ||
x > (brick_width - mortar_size) || x > (brick_width - mortar_size) ||
y > (row_height - mortar_size)) ? 1.0f : 0.0f; y > (row_height - mortar_size)) ? 1.0f : 0.0f);
} }
__device void svm_node_tex_brick(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) __device void svm_node_tex_brick(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
@@ -70,8 +71,6 @@ __device void svm_node_tex_brick(KernelGlobals *kg, ShaderData *sd, float *stack
/* RNA properties */ /* RNA properties */
uint offset_frequency, squash_frequency; uint offset_frequency, squash_frequency;
float tint = 0.0f;
decode_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &mortar_offset); decode_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &mortar_offset);
decode_node_uchar4(node.z, &scale_offset, &mortar_size_offset, &bias_offset, &brick_width_offset); decode_node_uchar4(node.z, &scale_offset, &mortar_size_offset, &bias_offset, &brick_width_offset);
decode_node_uchar4(node.w, &row_height_offset, &color_offset, &fac_offset, NULL); decode_node_uchar4(node.w, &row_height_offset, &color_offset, &fac_offset, NULL);
@@ -92,9 +91,11 @@ __device void svm_node_tex_brick(KernelGlobals *kg, ShaderData *sd, float *stack
float offset_amount = __int_as_float(node3.z); float offset_amount = __int_as_float(node3.z);
float squash_amount = __int_as_float(node3.w); float squash_amount = __int_as_float(node3.w);
float f = svm_brick(co, scale, mortar_size, bias, brick_width, row_height, float2 f2 = svm_brick(co, scale, mortar_size, bias, brick_width, row_height,
offset_amount, offset_frequency, squash_amount, squash_frequency, offset_amount, offset_frequency, squash_amount, squash_frequency);
&tint);
float tint = f2.x;
float f = f2.y;
if(f != 1.0f) { if(f != 1.0f) {
float facm = 1.0f - tint; float facm = 1.0f - tint;

View File

@@ -42,8 +42,12 @@ __device float voronoi_distance(NodeDistanceMetric distance_metric, float3 d, fl
/* Voronoi / Worley like */ /* Voronoi / Worley like */
__device_noinline void voronoi(float3 p, NodeDistanceMetric distance_metric, float e, float da[4], float3 pa[4]) __device_noinline float4 voronoi_Fn(float3 p, float e, int n1, int n2)
{ {
float da[4];
float3 pa[4];
NodeDistanceMetric distance_metric = NODE_VORONOI_DISTANCE_SQUARED;
/* returns distances in da and point coords in pa */ /* returns distances in da and point coords in pa */
int xx, yy, zz, xi, yi, zi; int xx, yy, zz, xi, yi, zi;
@@ -105,33 +109,20 @@ __device_noinline void voronoi(float3 p, NodeDistanceMetric distance_metric, flo
} }
} }
} }
float4 result = make_float4(pa[n1].x, pa[n1].y, pa[n1].z, da[n1]);
if(n2 != -1)
result = make_float4(pa[n2].x, pa[n2].y, pa[n2].z, da[n2]) - result;
return result;
} }
__device float voronoi_Fn(float3 p, int n) __device float voronoi_F1(float3 p) { return voronoi_Fn(p, 0.0f, 0, -1).w; }
{ __device float voronoi_F2(float3 p) { return voronoi_Fn(p, 0.0f, 1, -1).w; }
float da[4]; __device float voronoi_F3(float3 p) { return voronoi_Fn(p, 0.0f, 2, -1).w; }
float3 pa[4]; __device float voronoi_F4(float3 p) { return voronoi_Fn(p, 0.0f, 3, -1).w; }
__device float voronoi_F1F2(float3 p) { return voronoi_Fn(p, 0.0f, 0, 1).w; }
voronoi(p, NODE_VORONOI_DISTANCE_SQUARED, 0, da, pa);
return da[n];
}
__device float voronoi_FnFn(float3 p, int n1, int n2)
{
float da[4];
float3 pa[4];
voronoi(p, NODE_VORONOI_DISTANCE_SQUARED, 0, da, pa);
return da[n2] - da[n1];
}
__device float voronoi_F1(float3 p) { return voronoi_Fn(p, 0); }
__device float voronoi_F2(float3 p) { return voronoi_Fn(p, 1); }
__device float voronoi_F3(float3 p) { return voronoi_Fn(p, 2); }
__device float voronoi_F4(float3 p) { return voronoi_Fn(p, 3); }
__device float voronoi_F1F2(float3 p) { return voronoi_FnFn(p, 0, 1); }
__device float voronoi_Cr(float3 p) __device float voronoi_Cr(float3 p)
{ {

View File

@@ -23,21 +23,18 @@ CCL_NAMESPACE_BEGIN
__device_noinline float4 svm_voronoi(NodeVoronoiColoring coloring, float scale, float3 p) __device_noinline float4 svm_voronoi(NodeVoronoiColoring coloring, float scale, float3 p)
{ {
/* compute distance and point coordinate of 4 nearest neighbours */ /* compute distance and point coordinate of 4 nearest neighbours */
float da[4]; float4 dpa0 = voronoi_Fn(p*scale, 1.0f, 0, -1);
float3 pa[4];
voronoi(p*scale, NODE_VORONOI_DISTANCE_SQUARED, 1.0f, da, pa);
/* output */ /* output */
float fac; float fac;
float3 color; float3 color;
if(coloring == NODE_VORONOI_INTENSITY) { if(coloring == NODE_VORONOI_INTENSITY) {
fac = fabsf(da[0]); fac = fabsf(dpa0.w);
color = make_float3(fac, fac, fac); color = make_float3(fac, fac, fac);
} }
else { else {
color = cellnoise_color(pa[0]); color = cellnoise_color(float4_to_float3(dpa0));
fac = average(color); fac = average(color);
} }