Cycles: some tweaks to try to get sm_13 shader compiling.

This commit is contained in:
Brecht Van Lommel
2011-08-29 17:17:40 +00:00
parent be0aef2ef2
commit eac2674f1d
11 changed files with 180 additions and 139 deletions

View File

@@ -151,15 +151,8 @@ __device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int pass, Ray
if((sd.flag & SD_HOLDOUT) && (path_flag & PATH_RAY_CAMERA)) { if((sd.flag & SD_HOLDOUT) && (path_flag & PATH_RAY_CAMERA)) {
float3 holdout_weight = shader_holdout_eval(kg, &sd); float3 holdout_weight = shader_holdout_eval(kg, &sd);
if(kernel_data.background.transparent) { if(kernel_data.background.transparent)
Ltransparent += average(holdout_weight*throughput); Ltransparent += average(holdout_weight*throughput);
}
else {
ShaderData sd;
shader_setup_from_background(kg, &sd, &ray);
L += holdout_weight*throughput*shader_eval_background(kg, &sd, path_flag);
shader_release(kg, &sd);
}
} }
#endif #endif

View File

@@ -160,111 +160,157 @@ __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderT
while(1) { while(1) {
uint4 node = read_node(kg, &offset); uint4 node = read_node(kg, &offset);
if(node.x == NODE_SHADER_JUMP) { switch(node.x) {
if(type == SHADER_TYPE_SURFACE) offset = node.y; case NODE_SHADER_JUMP: {
else if(type == SHADER_TYPE_VOLUME) offset = node.z; if(type == SHADER_TYPE_SURFACE) offset = node.y;
else if(type == SHADER_TYPE_DISPLACEMENT) offset = node.w; else if(type == SHADER_TYPE_VOLUME) offset = node.z;
else return; else if(type == SHADER_TYPE_DISPLACEMENT) offset = node.w;
} else return;
else if(node.x == NODE_CLOSURE_BSDF) break;
svm_node_closure_bsdf(sd, stack, node, randb); }
else if(node.x == NODE_CLOSURE_EMISSION) case NODE_CLOSURE_BSDF:
svm_node_closure_emission(sd); svm_node_closure_bsdf(sd, stack, node, randb);
else if(node.x == NODE_CLOSURE_BACKGROUND) break;
svm_node_closure_background(sd); case NODE_CLOSURE_EMISSION:
else if(node.x == NODE_CLOSURE_HOLDOUT) svm_node_closure_emission(sd);
svm_node_closure_holdout(sd); break;
else if(node.x == NODE_CLOSURE_SET_WEIGHT) case NODE_CLOSURE_BACKGROUND:
svm_node_closure_set_weight(sd, node.y, node.z, node.w); svm_node_closure_background(sd);
else if(node.x == NODE_CLOSURE_WEIGHT) break;
svm_node_closure_weight(sd, stack, node.y); case NODE_CLOSURE_HOLDOUT:
else if(node.x == NODE_EMISSION_WEIGHT) svm_node_closure_holdout(sd);
svm_node_emission_weight(kg, sd, stack, node); break;
else if(node.x == NODE_MIX_CLOSURE) case NODE_CLOSURE_SET_WEIGHT:
svm_node_mix_closure(sd, stack, node.y, node.z, &offset, &randb); svm_node_closure_set_weight(sd, node.y, node.z, node.w);
else if(node.x == NODE_ADD_CLOSURE) break;
svm_node_add_closure(sd, stack, node.y, node.z, &offset, &randb, &closure_weight); case NODE_CLOSURE_WEIGHT:
else if(node.x == NODE_JUMP) svm_node_closure_weight(sd, stack, node.y);
offset = node.y; break;
case NODE_EMISSION_WEIGHT:
svm_node_emission_weight(kg, sd, stack, node);
break;
case NODE_MIX_CLOSURE:
svm_node_mix_closure(sd, stack, node.y, node.z, &offset, &randb);
break;
case NODE_ADD_CLOSURE:
svm_node_add_closure(sd, stack, node.y, node.z, &offset, &randb, &closure_weight);
break;
case NODE_JUMP:
offset = node.y;
break;
#ifdef __TEXTURES__ #ifdef __TEXTURES__
else if(node.x == NODE_TEX_NOISE_F) case NODE_TEX_NOISE_F:
svm_node_tex_noise_f(sd, stack, node.y, node.z); svm_node_tex_noise_f(sd, stack, node.y, node.z);
else if(node.x == NODE_TEX_NOISE_V) break;
svm_node_tex_noise_v(sd, stack, node.y, node.z); case NODE_TEX_NOISE_V:
else if(node.x == NODE_TEX_IMAGE) svm_node_tex_noise_v(sd, stack, node.y, node.z);
svm_node_tex_image(kg, sd, stack, node); break;
else if(node.x == NODE_TEX_ENVIRONMENT) case NODE_TEX_IMAGE:
svm_node_tex_environment(kg, sd, stack, node); svm_node_tex_image(kg, sd, stack, node);
else if(node.x == NODE_TEX_SKY) break;
svm_node_tex_sky(kg, sd, stack, node.y, node.z); case NODE_TEX_ENVIRONMENT:
else if(node.x == NODE_TEX_BLEND) svm_node_tex_environment(kg, sd, stack, node);
svm_node_tex_blend(sd, stack, node); break;
else if(node.x == NODE_TEX_CLOUDS) case NODE_TEX_SKY:
svm_node_tex_clouds(sd, stack, node); svm_node_tex_sky(kg, sd, stack, node.y, node.z);
else if(node.x == NODE_TEX_VORONOI) break;
svm_node_tex_voronoi(kg, sd, stack, node, &offset); case NODE_TEX_BLEND:
else if(node.x == NODE_TEX_MUSGRAVE) svm_node_tex_blend(sd, stack, node);
svm_node_tex_musgrave(kg, sd, stack, node, &offset); break;
else if(node.x == NODE_TEX_MARBLE) case NODE_TEX_CLOUDS:
svm_node_tex_marble(kg, sd, stack, node, &offset); svm_node_tex_clouds(sd, stack, node);
else if(node.x == NODE_TEX_MAGIC) break;
svm_node_tex_magic(sd, stack, node); case NODE_TEX_VORONOI:
else if(node.x == NODE_TEX_STUCCI) svm_node_tex_voronoi(kg, sd, stack, node, &offset);
svm_node_tex_stucci(kg, sd, stack, node, &offset); break;
else if(node.x == NODE_TEX_DISTORTED_NOISE) case NODE_TEX_MUSGRAVE:
svm_node_tex_distorted_noise(kg, sd, stack, node, &offset); svm_node_tex_musgrave(kg, sd, stack, node, &offset);
else if(node.x == NODE_TEX_WOOD) break;
svm_node_tex_wood(kg, sd, stack, node, &offset); case NODE_TEX_MARBLE:
svm_node_tex_marble(kg, sd, stack, node, &offset);
break;
case NODE_TEX_MAGIC:
svm_node_tex_magic(sd, stack, node);
break;
case NODE_TEX_STUCCI:
svm_node_tex_stucci(kg, sd, stack, node, &offset);
break;
case NODE_TEX_DISTORTED_NOISE:
svm_node_tex_distorted_noise(kg, sd, stack, node, &offset);
break;
case NODE_TEX_WOOD:
svm_node_tex_wood(kg, sd, stack, node, &offset);
break;
#endif #endif
else if(node.x == NODE_GEOMETRY) case NODE_GEOMETRY:
svm_node_geometry(sd, stack, node.y, node.z); svm_node_geometry(sd, stack, node.y, node.z);
else if(node.x == NODE_GEOMETRY_BUMP_DX) break;
svm_node_geometry_bump_dx(sd, stack, node.y, node.z); case NODE_GEOMETRY_BUMP_DX:
else if(node.x == NODE_GEOMETRY_BUMP_DY) svm_node_geometry_bump_dx(sd, stack, node.y, node.z);
svm_node_geometry_bump_dy(sd, stack, node.y, node.z); break;
else if(node.x == NODE_LIGHT_PATH) case NODE_GEOMETRY_BUMP_DY:
svm_node_light_path(sd, stack, node.y, node.z, path_flag); svm_node_geometry_bump_dy(sd, stack, node.y, node.z);
else if(node.x == NODE_CONVERT) break;
svm_node_convert(sd, stack, node.y, node.z, node.w); case NODE_LIGHT_PATH:
else if(node.x == NODE_VALUE_F) svm_node_light_path(sd, stack, node.y, node.z, path_flag);
svm_node_value_f(kg, sd, stack, node.y, node.z); break;
else if(node.x == NODE_VALUE_V) case NODE_CONVERT:
svm_node_value_v(kg, sd, stack, node.y, &offset); svm_node_convert(sd, stack, node.y, node.z, node.w);
else if(node.x == NODE_MIX) break;
svm_node_mix(kg, sd, stack, node.y, node.z, node.w, &offset); case NODE_VALUE_F:
else if(node.x == NODE_ATTR) svm_node_value_f(kg, sd, stack, node.y, node.z);
svm_node_attr(kg, sd, stack, node); break;
else if(node.x == NODE_ATTR_BUMP_DX) case NODE_VALUE_V:
svm_node_attr_bump_dx(kg, sd, stack, node); svm_node_value_v(kg, sd, stack, node.y, &offset);
else if(node.x == NODE_ATTR_BUMP_DY) break;
svm_node_attr_bump_dy(kg, sd, stack, node); case NODE_MIX:
else if(node.x == NODE_FRESNEL) svm_node_mix(kg, sd, stack, node.y, node.z, node.w, &offset);
svm_node_fresnel(sd, stack, node.y, node.z, node.w); break;
else if(node.x == NODE_SET_DISPLACEMENT) case NODE_ATTR:
svm_node_set_displacement(sd, stack, node.y); svm_node_attr(kg, sd, stack, node);
else if(node.x == NODE_SET_BUMP) break;
svm_node_set_bump(sd, stack, node.y, node.z, node.w); case NODE_ATTR_BUMP_DX:
else if(node.x == NODE_MATH) svm_node_attr_bump_dx(kg, sd, stack, node);
svm_node_math(kg, sd, stack, node.y, node.z, node.w, &offset); break;
else if(node.x == NODE_VECTOR_MATH) case NODE_ATTR_BUMP_DY:
svm_node_vector_math(kg, sd, stack, node.y, node.z, node.w, &offset); svm_node_attr_bump_dy(kg, sd, stack, node);
else if(node.x == NODE_MAPPING) break;
svm_node_mapping(kg, sd, stack, node.y, node.z, &offset); case NODE_FRESNEL:
else if(node.x == NODE_TEX_COORD) svm_node_fresnel(sd, stack, node.y, node.z, node.w);
svm_node_tex_coord(kg, sd, stack, node.y, node.z); break;
else if(node.x == NODE_TEX_COORD_BUMP_DX) case NODE_SET_DISPLACEMENT:
svm_node_tex_coord_bump_dx(kg, sd, stack, node.y, node.z); svm_node_set_displacement(sd, stack, node.y);
else if(node.x == NODE_TEX_COORD_BUMP_DY) break;
svm_node_tex_coord_bump_dy(kg, sd, stack, node.y, node.z); case NODE_SET_BUMP:
else if(node.x == NODE_EMISSION_SET_WEIGHT_TOTAL) svm_node_set_bump(sd, stack, node.y, node.z, node.w);
svm_node_emission_set_weight_total(kg, sd, node.y, node.z, node.w); break;
else if(node.x == NODE_END) case NODE_MATH:
break; svm_node_math(kg, sd, stack, node.y, node.z, node.w, &offset);
else break;
return; case NODE_VECTOR_MATH:
svm_node_vector_math(kg, sd, stack, node.y, node.z, node.w, &offset);
break;
case NODE_MAPPING:
svm_node_mapping(kg, sd, stack, node.y, node.z, &offset);
break;
case NODE_TEX_COORD:
svm_node_tex_coord(kg, sd, stack, node.y, node.z);
break;
case NODE_TEX_COORD_BUMP_DX:
svm_node_tex_coord_bump_dx(kg, sd, stack, node.y, node.z);
break;
case NODE_TEX_COORD_BUMP_DY:
svm_node_tex_coord_bump_dy(kg, sd, stack, node.y, node.z);
break;
case NODE_EMISSION_SET_WEIGHT_TOTAL:
svm_node_emission_set_weight_total(kg, sd, node.y, node.z, node.w);
break;
case NODE_END:
default:
sd->svm_closure_weight *= closure_weight;
return;
}
} }
sd->svm_closure_weight *= closure_weight;
} }
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -20,7 +20,7 @@ CCL_NAMESPACE_BEGIN
/* Clouds */ /* Clouds */
__device void svm_clouds(NodeNoiseBasis basis, int hard, int depth, float size, float3 p, float *fac, float3 *color) __device_inline void svm_clouds(NodeNoiseBasis basis, int hard, int depth, float size, float3 p, float *fac, float3 *color)
{ {
p /= size; p /= size;

View File

@@ -20,7 +20,7 @@ CCL_NAMESPACE_BEGIN
/* Distorted Noise (variable lacunarity noise) */ /* Distorted Noise (variable lacunarity noise) */
__device float svm_distorted_noise(float3 p, float size, NodeNoiseBasis basis, NodeNoiseBasis distortion_basis, float distortion) __device_noinline float svm_distorted_noise(float3 p, float size, NodeNoiseBasis basis, NodeNoiseBasis distortion_basis, float distortion)
{ {
float3 r; float3 r;
float3 offset = make_float3(13.5f, 13.5f, 13.5f); float3 offset = make_float3(13.5f, 13.5f, 13.5f);

View File

@@ -20,7 +20,7 @@ CCL_NAMESPACE_BEGIN
/* Magic */ /* Magic */
__device float3 svm_magic(float3 p, int n, float turbulence) __device_noinline float3 svm_magic(float3 p, int n, float turbulence)
{ {
float turb = turbulence/5.0f; float turb = turbulence/5.0f;

View File

@@ -20,7 +20,7 @@ CCL_NAMESPACE_BEGIN
/* Marble */ /* Marble */
__device float svm_marble(float3 p, float size, NodeMarbleType type, NodeWaveType wave, NodeNoiseBasis basis, int hard, float turb, int depth) __device_noinline float svm_marble(float3 p, float size, NodeMarbleType type, NodeWaveType wave, NodeNoiseBasis basis, int hard, float turb, int depth)
{ {
float x = p.x; float x = p.x;
float y = p.y; float y = p.y;

View File

@@ -27,7 +27,7 @@ CCL_NAMESPACE_BEGIN
* from "Texturing and Modelling: A procedural approach" * from "Texturing and Modelling: A procedural approach"
*/ */
__device float noise_musgrave_fBm(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves) __device_noinline float noise_musgrave_fBm(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves)
{ {
float rmd; float rmd;
float value = 0.0f; float value = 0.0f;
@@ -55,7 +55,7 @@ __device float noise_musgrave_fBm(float3 p, NodeNoiseBasis basis, float H, float
* octaves: number of frequencies in the fBm * octaves: number of frequencies in the fBm
*/ */
__device float noise_musgrave_multi_fractal(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves) __device_noinline float noise_musgrave_multi_fractal(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves)
{ {
float rmd; float rmd;
float value = 1.0f; float value = 1.0f;
@@ -84,7 +84,7 @@ __device float noise_musgrave_multi_fractal(float3 p, NodeNoiseBasis basis, floa
* offset: raises the terrain from `sea level' * offset: raises the terrain from `sea level'
*/ */
__device float noise_musgrave_hetero_terrain(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset) __device_noinline float noise_musgrave_hetero_terrain(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset)
{ {
float value, increment, rmd; float value, increment, rmd;
float pwHL = pow(lacunarity, -H); float pwHL = pow(lacunarity, -H);
@@ -119,7 +119,7 @@ __device float noise_musgrave_hetero_terrain(float3 p, NodeNoiseBasis basis, flo
* offset: raises the terrain from `sea level' * offset: raises the terrain from `sea level'
*/ */
__device float noise_musgrave_hybrid_multi_fractal(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset, float gain) __device_noinline float noise_musgrave_hybrid_multi_fractal(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset, float gain)
{ {
float result, signal, weight, rmd; float result, signal, weight, rmd;
float pwHL = pow(lacunarity, -H); float pwHL = pow(lacunarity, -H);
@@ -156,7 +156,7 @@ __device float noise_musgrave_hybrid_multi_fractal(float3 p, NodeNoiseBasis basi
* offset: raises the terrain from `sea level' * offset: raises the terrain from `sea level'
*/ */
__device float noise_musgrave_ridged_multi_fractal(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset, float gain) __device_noinline float noise_musgrave_ridged_multi_fractal(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset, float gain)
{ {
float result, signal, weight; float result, signal, weight;
float pwHL = pow(lacunarity, -H); float pwHL = pow(lacunarity, -H);

View File

@@ -112,7 +112,7 @@ __device float scale3(float result)
return 0.9820f * result; return 0.9820f * result;
} }
__device float perlin(float x, float y, float z) __device_noinline float perlin(float x, float y, float z)
{ {
int X; float fx = floorfrac(x, &X); int X; float fx = floorfrac(x, &X);
int Y; float fy = floorfrac(y, &Y); int Y; float fy = floorfrac(y, &Y);
@@ -135,7 +135,7 @@ __device float perlin(float x, float y, float z)
return scale3(result); return scale3(result);
} }
__device float perlin_periodic(float x, float y, float z, float3 pperiod) __device_noinline float perlin_periodic(float x, float y, float z, float3 pperiod)
{ {
int X; float fx = floorfrac(x, &X); int X; float fx = floorfrac(x, &X);
int Y; float fy = floorfrac(y, &Y); int Y; float fy = floorfrac(y, &Y);
@@ -178,7 +178,7 @@ __device float snoise(float3 p)
} }
/* cell noise */ /* cell noise */
__device float cellnoise(float3 p) __device_noinline float cellnoise(float3 p)
{ {
uint ix = quick_floor(p.x); uint ix = quick_floor(p.x);
uint iy = quick_floor(p.y); uint iy = quick_floor(p.y);
@@ -210,7 +210,7 @@ __device float psnoise(float3 p, float3 pperiod)
} }
/* turbulence */ /* turbulence */
__device float turbulence(float3 P, int oct, bool hard) __device_noinline float turbulence(float3 P, int oct, bool hard)
{ {
float amp = 1.0f, fscale = 1.0f, sum = 0.0f; float amp = 1.0f, fscale = 1.0f, sum = 0.0f;
int i; int i;

View File

@@ -20,7 +20,7 @@ CCL_NAMESPACE_BEGIN
/* Stucci */ /* Stucci */
__device float svm_stucci(NodeStucciType type, NodeNoiseBasis basis, int hard, float turbulence, float size, float3 p) __device_noinline float svm_stucci(NodeStucciType type, NodeNoiseBasis basis, int hard, float turbulence, float size, float3 p)
{ {
p /= size; p /= size;

View File

@@ -20,9 +20,9 @@ CCL_NAMESPACE_BEGIN
/* Voronoi */ /* Voronoi */
__device float svm_voronoi(NodeDistanceMetric distance_metric, NodeVoronoiColoring coloring, __device_noinline float4 svm_voronoi(NodeDistanceMetric distance_metric, NodeVoronoiColoring coloring,
float weight1, float weight2, float weight3, float weight4, float weight1, float weight2, float weight3, float weight4,
float exponent, float intensity, float size, float3 p, float3 *color) float exponent, float intensity, float size, float3 p)
{ {
float aw1 = fabsf(weight1); float aw1 = fabsf(weight1);
float aw2 = fabsf(weight2); float aw2 = fabsf(weight2);
@@ -41,31 +41,32 @@ __device float svm_voronoi(NodeDistanceMetric distance_metric, NodeVoronoiColori
/* Scalar output */ /* Scalar output */
float fac = sc * fabsf(weight1*da[0] + weight2*da[1] + weight3*da[2] + weight4*da[3]); float fac = sc * fabsf(weight1*da[0] + weight2*da[1] + weight3*da[2] + weight4*da[3]);
float3 color;
/* colored output */ /* colored output */
if(coloring == NODE_VORONOI_INTENSITY) { if(coloring == NODE_VORONOI_INTENSITY) {
*color = make_float3(fac, fac, fac); color = make_float3(fac, fac, fac);
} }
else { else {
*color = aw1*cellnoise_color(pa[0]); color = aw1*cellnoise_color(pa[0]);
*color += aw2*cellnoise_color(pa[1]); color += aw2*cellnoise_color(pa[1]);
*color += aw3*cellnoise_color(pa[2]); color += aw3*cellnoise_color(pa[2]);
*color += aw4*cellnoise_color(pa[3]); color += aw4*cellnoise_color(pa[3]);
if(coloring != NODE_VORONOI_POSITION) { if(coloring != NODE_VORONOI_POSITION) {
float t1 = min((da[1] - da[0])*10.0f, 1.0f); float t1 = min((da[1] - da[0])*10.0f, 1.0f);
if(coloring == NODE_VORONOI_POSITION_OUTLINE_INTENSITY) if(coloring == NODE_VORONOI_POSITION_OUTLINE_INTENSITY)
*color *= t1*fac; color *= t1*fac;
else if(coloring == NODE_VORONOI_POSITION_OUTLINE) else if(coloring == NODE_VORONOI_POSITION_OUTLINE)
*color *= t1*sc; color *= t1*sc;
} }
else { else {
*color *= sc; color *= sc;
} }
} }
return fac; return make_float4(color.x, color.y, color.z, fac);
} }
__device void svm_node_tex_voronoi(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) __device void svm_node_tex_voronoi(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
@@ -92,10 +93,11 @@ __device void svm_node_tex_voronoi(KernelGlobals *kg, ShaderData *sd, float *sta
exponent = fmaxf(exponent, 1e-5f); exponent = fmaxf(exponent, 1e-5f);
size = nonzerof(size, 1e-5f); size = nonzerof(size, 1e-5f);
float3 color; float4 result = svm_voronoi((NodeDistanceMetric)distance_metric,
float f = svm_voronoi((NodeDistanceMetric)distance_metric,
(NodeVoronoiColoring)coloring, (NodeVoronoiColoring)coloring,
weight1, weight2, weight3, weight4, exponent, 1.0f, size, co, &color); weight1, weight2, weight3, weight4, exponent, 1.0f, size, co);
float3 color = make_float3(result.x, result.y, result.z);
float f = result.w;
if(stack_valid(fac_offset)) stack_store_float(stack, fac_offset, f); if(stack_valid(fac_offset)) stack_store_float(stack, fac_offset, f);
if(stack_valid(color_offset)) stack_store_float3(stack, color_offset, color); if(stack_valid(color_offset)) stack_store_float3(stack, color_offset, color);

View File

@@ -20,7 +20,7 @@ CCL_NAMESPACE_BEGIN
/* Wood */ /* Wood */
__device float svm_wood(float3 p, float size, NodeWoodType type, NodeWaveType wave, NodeNoiseBasis basis, uint hard, float turb) __device_noinline float svm_wood(float3 p, float size, NodeWoodType type, NodeWaveType wave, NodeNoiseBasis basis, uint hard, float turb)
{ {
float x = p.x; float x = p.x;
float y = p.y; float y = p.y;