Cycles: some tweaks to try to get sm_13 shader compiling.

This commit is contained in:
Brecht Van Lommel
2011-08-29 17:17:40 +00:00
parent be0aef2ef2
commit eac2674f1d
11 changed files with 180 additions and 139 deletions

View File

@@ -151,16 +151,9 @@ __device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int pass, Ray
if((sd.flag & SD_HOLDOUT) && (path_flag & PATH_RAY_CAMERA)) {
float3 holdout_weight = shader_holdout_eval(kg, &sd);
if(kernel_data.background.transparent) {
if(kernel_data.background.transparent)
Ltransparent += average(holdout_weight*throughput);
}
else {
ShaderData sd;
shader_setup_from_background(kg, &sd, &ray);
L += holdout_weight*throughput*shader_eval_background(kg, &sd, path_flag);
shader_release(kg, &sd);
}
}
#endif
#ifdef __EMISSION__

View File

@@ -160,111 +160,157 @@ __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderT
while(1) {
uint4 node = read_node(kg, &offset);
if(node.x == NODE_SHADER_JUMP) {
switch(node.x) {
case NODE_SHADER_JUMP: {
if(type == SHADER_TYPE_SURFACE) offset = node.y;
else if(type == SHADER_TYPE_VOLUME) offset = node.z;
else if(type == SHADER_TYPE_DISPLACEMENT) offset = node.w;
else return;
}
else if(node.x == NODE_CLOSURE_BSDF)
svm_node_closure_bsdf(sd, stack, node, randb);
else if(node.x == NODE_CLOSURE_EMISSION)
svm_node_closure_emission(sd);
else if(node.x == NODE_CLOSURE_BACKGROUND)
svm_node_closure_background(sd);
else if(node.x == NODE_CLOSURE_HOLDOUT)
svm_node_closure_holdout(sd);
else if(node.x == NODE_CLOSURE_SET_WEIGHT)
svm_node_closure_set_weight(sd, node.y, node.z, node.w);
else if(node.x == NODE_CLOSURE_WEIGHT)
svm_node_closure_weight(sd, stack, node.y);
else if(node.x == NODE_EMISSION_WEIGHT)
svm_node_emission_weight(kg, sd, stack, node);
else if(node.x == NODE_MIX_CLOSURE)
svm_node_mix_closure(sd, stack, node.y, node.z, &offset, &randb);
else if(node.x == NODE_ADD_CLOSURE)
svm_node_add_closure(sd, stack, node.y, node.z, &offset, &randb, &closure_weight);
else if(node.x == NODE_JUMP)
offset = node.y;
#ifdef __TEXTURES__
else if(node.x == NODE_TEX_NOISE_F)
svm_node_tex_noise_f(sd, stack, node.y, node.z);
else if(node.x == NODE_TEX_NOISE_V)
svm_node_tex_noise_v(sd, stack, node.y, node.z);
else if(node.x == NODE_TEX_IMAGE)
svm_node_tex_image(kg, sd, stack, node);
else if(node.x == NODE_TEX_ENVIRONMENT)
svm_node_tex_environment(kg, sd, stack, node);
else if(node.x == NODE_TEX_SKY)
svm_node_tex_sky(kg, sd, stack, node.y, node.z);
else if(node.x == NODE_TEX_BLEND)
svm_node_tex_blend(sd, stack, node);
else if(node.x == NODE_TEX_CLOUDS)
svm_node_tex_clouds(sd, stack, node);
else if(node.x == NODE_TEX_VORONOI)
svm_node_tex_voronoi(kg, sd, stack, node, &offset);
else if(node.x == NODE_TEX_MUSGRAVE)
svm_node_tex_musgrave(kg, sd, stack, node, &offset);
else if(node.x == NODE_TEX_MARBLE)
svm_node_tex_marble(kg, sd, stack, node, &offset);
else if(node.x == NODE_TEX_MAGIC)
svm_node_tex_magic(sd, stack, node);
else if(node.x == NODE_TEX_STUCCI)
svm_node_tex_stucci(kg, sd, stack, node, &offset);
else if(node.x == NODE_TEX_DISTORTED_NOISE)
svm_node_tex_distorted_noise(kg, sd, stack, node, &offset);
else if(node.x == NODE_TEX_WOOD)
svm_node_tex_wood(kg, sd, stack, node, &offset);
#endif
else if(node.x == NODE_GEOMETRY)
svm_node_geometry(sd, stack, node.y, node.z);
else if(node.x == NODE_GEOMETRY_BUMP_DX)
svm_node_geometry_bump_dx(sd, stack, node.y, node.z);
else if(node.x == NODE_GEOMETRY_BUMP_DY)
svm_node_geometry_bump_dy(sd, stack, node.y, node.z);
else if(node.x == NODE_LIGHT_PATH)
svm_node_light_path(sd, stack, node.y, node.z, path_flag);
else if(node.x == NODE_CONVERT)
svm_node_convert(sd, stack, node.y, node.z, node.w);
else if(node.x == NODE_VALUE_F)
svm_node_value_f(kg, sd, stack, node.y, node.z);
else if(node.x == NODE_VALUE_V)
svm_node_value_v(kg, sd, stack, node.y, &offset);
else if(node.x == NODE_MIX)
svm_node_mix(kg, sd, stack, node.y, node.z, node.w, &offset);
else if(node.x == NODE_ATTR)
svm_node_attr(kg, sd, stack, node);
else if(node.x == NODE_ATTR_BUMP_DX)
svm_node_attr_bump_dx(kg, sd, stack, node);
else if(node.x == NODE_ATTR_BUMP_DY)
svm_node_attr_bump_dy(kg, sd, stack, node);
else if(node.x == NODE_FRESNEL)
svm_node_fresnel(sd, stack, node.y, node.z, node.w);
else if(node.x == NODE_SET_DISPLACEMENT)
svm_node_set_displacement(sd, stack, node.y);
else if(node.x == NODE_SET_BUMP)
svm_node_set_bump(sd, stack, node.y, node.z, node.w);
else if(node.x == NODE_MATH)
svm_node_math(kg, sd, stack, node.y, node.z, node.w, &offset);
else if(node.x == NODE_VECTOR_MATH)
svm_node_vector_math(kg, sd, stack, node.y, node.z, node.w, &offset);
else if(node.x == NODE_MAPPING)
svm_node_mapping(kg, sd, stack, node.y, node.z, &offset);
else if(node.x == NODE_TEX_COORD)
svm_node_tex_coord(kg, sd, stack, node.y, node.z);
else if(node.x == NODE_TEX_COORD_BUMP_DX)
svm_node_tex_coord_bump_dx(kg, sd, stack, node.y, node.z);
else if(node.x == NODE_TEX_COORD_BUMP_DY)
svm_node_tex_coord_bump_dy(kg, sd, stack, node.y, node.z);
else if(node.x == NODE_EMISSION_SET_WEIGHT_TOTAL)
svm_node_emission_set_weight_total(kg, sd, node.y, node.z, node.w);
else if(node.x == NODE_END)
break;
else
}
case NODE_CLOSURE_BSDF:
svm_node_closure_bsdf(sd, stack, node, randb);
break;
case NODE_CLOSURE_EMISSION:
svm_node_closure_emission(sd);
break;
case NODE_CLOSURE_BACKGROUND:
svm_node_closure_background(sd);
break;
case NODE_CLOSURE_HOLDOUT:
svm_node_closure_holdout(sd);
break;
case NODE_CLOSURE_SET_WEIGHT:
svm_node_closure_set_weight(sd, node.y, node.z, node.w);
break;
case NODE_CLOSURE_WEIGHT:
svm_node_closure_weight(sd, stack, node.y);
break;
case NODE_EMISSION_WEIGHT:
svm_node_emission_weight(kg, sd, stack, node);
break;
case NODE_MIX_CLOSURE:
svm_node_mix_closure(sd, stack, node.y, node.z, &offset, &randb);
break;
case NODE_ADD_CLOSURE:
svm_node_add_closure(sd, stack, node.y, node.z, &offset, &randb, &closure_weight);
break;
case NODE_JUMP:
offset = node.y;
break;
#ifdef __TEXTURES__
case NODE_TEX_NOISE_F:
svm_node_tex_noise_f(sd, stack, node.y, node.z);
break;
case NODE_TEX_NOISE_V:
svm_node_tex_noise_v(sd, stack, node.y, node.z);
break;
case NODE_TEX_IMAGE:
svm_node_tex_image(kg, sd, stack, node);
break;
case NODE_TEX_ENVIRONMENT:
svm_node_tex_environment(kg, sd, stack, node);
break;
case NODE_TEX_SKY:
svm_node_tex_sky(kg, sd, stack, node.y, node.z);
break;
case NODE_TEX_BLEND:
svm_node_tex_blend(sd, stack, node);
break;
case NODE_TEX_CLOUDS:
svm_node_tex_clouds(sd, stack, node);
break;
case NODE_TEX_VORONOI:
svm_node_tex_voronoi(kg, sd, stack, node, &offset);
break;
case NODE_TEX_MUSGRAVE:
svm_node_tex_musgrave(kg, sd, stack, node, &offset);
break;
case NODE_TEX_MARBLE:
svm_node_tex_marble(kg, sd, stack, node, &offset);
break;
case NODE_TEX_MAGIC:
svm_node_tex_magic(sd, stack, node);
break;
case NODE_TEX_STUCCI:
svm_node_tex_stucci(kg, sd, stack, node, &offset);
break;
case NODE_TEX_DISTORTED_NOISE:
svm_node_tex_distorted_noise(kg, sd, stack, node, &offset);
break;
case NODE_TEX_WOOD:
svm_node_tex_wood(kg, sd, stack, node, &offset);
break;
#endif
case NODE_GEOMETRY:
svm_node_geometry(sd, stack, node.y, node.z);
break;
case NODE_GEOMETRY_BUMP_DX:
svm_node_geometry_bump_dx(sd, stack, node.y, node.z);
break;
case NODE_GEOMETRY_BUMP_DY:
svm_node_geometry_bump_dy(sd, stack, node.y, node.z);
break;
case NODE_LIGHT_PATH:
svm_node_light_path(sd, stack, node.y, node.z, path_flag);
break;
case NODE_CONVERT:
svm_node_convert(sd, stack, node.y, node.z, node.w);
break;
case NODE_VALUE_F:
svm_node_value_f(kg, sd, stack, node.y, node.z);
break;
case NODE_VALUE_V:
svm_node_value_v(kg, sd, stack, node.y, &offset);
break;
case NODE_MIX:
svm_node_mix(kg, sd, stack, node.y, node.z, node.w, &offset);
break;
case NODE_ATTR:
svm_node_attr(kg, sd, stack, node);
break;
case NODE_ATTR_BUMP_DX:
svm_node_attr_bump_dx(kg, sd, stack, node);
break;
case NODE_ATTR_BUMP_DY:
svm_node_attr_bump_dy(kg, sd, stack, node);
break;
case NODE_FRESNEL:
svm_node_fresnel(sd, stack, node.y, node.z, node.w);
break;
case NODE_SET_DISPLACEMENT:
svm_node_set_displacement(sd, stack, node.y);
break;
case NODE_SET_BUMP:
svm_node_set_bump(sd, stack, node.y, node.z, node.w);
break;
case NODE_MATH:
svm_node_math(kg, sd, stack, node.y, node.z, node.w, &offset);
break;
case NODE_VECTOR_MATH:
svm_node_vector_math(kg, sd, stack, node.y, node.z, node.w, &offset);
break;
case NODE_MAPPING:
svm_node_mapping(kg, sd, stack, node.y, node.z, &offset);
break;
case NODE_TEX_COORD:
svm_node_tex_coord(kg, sd, stack, node.y, node.z);
break;
case NODE_TEX_COORD_BUMP_DX:
svm_node_tex_coord_bump_dx(kg, sd, stack, node.y, node.z);
break;
case NODE_TEX_COORD_BUMP_DY:
svm_node_tex_coord_bump_dy(kg, sd, stack, node.y, node.z);
break;
case NODE_EMISSION_SET_WEIGHT_TOTAL:
svm_node_emission_set_weight_total(kg, sd, node.y, node.z, node.w);
break;
case NODE_END:
default:
sd->svm_closure_weight *= closure_weight;
return;
}
sd->svm_closure_weight *= closure_weight;
}
}
CCL_NAMESPACE_END

View File

@@ -20,7 +20,7 @@ CCL_NAMESPACE_BEGIN
/* Clouds */
__device void svm_clouds(NodeNoiseBasis basis, int hard, int depth, float size, float3 p, float *fac, float3 *color)
__device_inline void svm_clouds(NodeNoiseBasis basis, int hard, int depth, float size, float3 p, float *fac, float3 *color)
{
p /= size;

View File

@@ -20,7 +20,7 @@ CCL_NAMESPACE_BEGIN
/* Distorted Noise (variable lacunarity noise) */
__device float svm_distorted_noise(float3 p, float size, NodeNoiseBasis basis, NodeNoiseBasis distortion_basis, float distortion)
__device_noinline float svm_distorted_noise(float3 p, float size, NodeNoiseBasis basis, NodeNoiseBasis distortion_basis, float distortion)
{
float3 r;
float3 offset = make_float3(13.5f, 13.5f, 13.5f);

View File

@@ -20,7 +20,7 @@ CCL_NAMESPACE_BEGIN
/* Magic */
__device float3 svm_magic(float3 p, int n, float turbulence)
__device_noinline float3 svm_magic(float3 p, int n, float turbulence)
{
float turb = turbulence/5.0f;

View File

@@ -20,7 +20,7 @@ CCL_NAMESPACE_BEGIN
/* Marble */
__device float svm_marble(float3 p, float size, NodeMarbleType type, NodeWaveType wave, NodeNoiseBasis basis, int hard, float turb, int depth)
__device_noinline float svm_marble(float3 p, float size, NodeMarbleType type, NodeWaveType wave, NodeNoiseBasis basis, int hard, float turb, int depth)
{
float x = p.x;
float y = p.y;

View File

@@ -27,7 +27,7 @@ CCL_NAMESPACE_BEGIN
* from "Texturing and Modelling: A procedural approach"
*/
__device float noise_musgrave_fBm(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves)
__device_noinline float noise_musgrave_fBm(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves)
{
float rmd;
float value = 0.0f;
@@ -55,7 +55,7 @@ __device float noise_musgrave_fBm(float3 p, NodeNoiseBasis basis, float H, float
* octaves: number of frequencies in the fBm
*/
__device float noise_musgrave_multi_fractal(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves)
__device_noinline float noise_musgrave_multi_fractal(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves)
{
float rmd;
float value = 1.0f;
@@ -84,7 +84,7 @@ __device float noise_musgrave_multi_fractal(float3 p, NodeNoiseBasis basis, floa
* offset: raises the terrain from `sea level'
*/
__device float noise_musgrave_hetero_terrain(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset)
__device_noinline float noise_musgrave_hetero_terrain(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset)
{
float value, increment, rmd;
float pwHL = pow(lacunarity, -H);
@@ -119,7 +119,7 @@ __device float noise_musgrave_hetero_terrain(float3 p, NodeNoiseBasis basis, flo
* offset: raises the terrain from `sea level'
*/
__device float noise_musgrave_hybrid_multi_fractal(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset, float gain)
__device_noinline float noise_musgrave_hybrid_multi_fractal(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset, float gain)
{
float result, signal, weight, rmd;
float pwHL = pow(lacunarity, -H);
@@ -156,7 +156,7 @@ __device float noise_musgrave_hybrid_multi_fractal(float3 p, NodeNoiseBasis basi
* offset: raises the terrain from `sea level'
*/
__device float noise_musgrave_ridged_multi_fractal(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset, float gain)
__device_noinline float noise_musgrave_ridged_multi_fractal(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset, float gain)
{
float result, signal, weight;
float pwHL = pow(lacunarity, -H);

View File

@@ -112,7 +112,7 @@ __device float scale3(float result)
return 0.9820f * result;
}
__device float perlin(float x, float y, float z)
__device_noinline float perlin(float x, float y, float z)
{
int X; float fx = floorfrac(x, &X);
int Y; float fy = floorfrac(y, &Y);
@@ -135,7 +135,7 @@ __device float perlin(float x, float y, float z)
return scale3(result);
}
__device float perlin_periodic(float x, float y, float z, float3 pperiod)
__device_noinline float perlin_periodic(float x, float y, float z, float3 pperiod)
{
int X; float fx = floorfrac(x, &X);
int Y; float fy = floorfrac(y, &Y);
@@ -178,7 +178,7 @@ __device float snoise(float3 p)
}
/* cell noise */
__device float cellnoise(float3 p)
__device_noinline float cellnoise(float3 p)
{
uint ix = quick_floor(p.x);
uint iy = quick_floor(p.y);
@@ -210,7 +210,7 @@ __device float psnoise(float3 p, float3 pperiod)
}
/* turbulence */
__device float turbulence(float3 P, int oct, bool hard)
__device_noinline float turbulence(float3 P, int oct, bool hard)
{
float amp = 1.0f, fscale = 1.0f, sum = 0.0f;
int i;

View File

@@ -20,7 +20,7 @@ CCL_NAMESPACE_BEGIN
/* Stucci */
__device float svm_stucci(NodeStucciType type, NodeNoiseBasis basis, int hard, float turbulence, float size, float3 p)
__device_noinline float svm_stucci(NodeStucciType type, NodeNoiseBasis basis, int hard, float turbulence, float size, float3 p)
{
p /= size;

View File

@@ -20,9 +20,9 @@ CCL_NAMESPACE_BEGIN
/* Voronoi */
__device float svm_voronoi(NodeDistanceMetric distance_metric, NodeVoronoiColoring coloring,
__device_noinline float4 svm_voronoi(NodeDistanceMetric distance_metric, NodeVoronoiColoring coloring,
float weight1, float weight2, float weight3, float weight4,
float exponent, float intensity, float size, float3 p, float3 *color)
float exponent, float intensity, float size, float3 p)
{
float aw1 = fabsf(weight1);
float aw2 = fabsf(weight2);
@@ -41,31 +41,32 @@ __device float svm_voronoi(NodeDistanceMetric distance_metric, NodeVoronoiColori
/* Scalar output */
float fac = sc * fabsf(weight1*da[0] + weight2*da[1] + weight3*da[2] + weight4*da[3]);
float3 color;
/* colored output */
if(coloring == NODE_VORONOI_INTENSITY) {
*color = make_float3(fac, fac, fac);
color = make_float3(fac, fac, fac);
}
else {
*color = aw1*cellnoise_color(pa[0]);
*color += aw2*cellnoise_color(pa[1]);
*color += aw3*cellnoise_color(pa[2]);
*color += aw4*cellnoise_color(pa[3]);
color = aw1*cellnoise_color(pa[0]);
color += aw2*cellnoise_color(pa[1]);
color += aw3*cellnoise_color(pa[2]);
color += aw4*cellnoise_color(pa[3]);
if(coloring != NODE_VORONOI_POSITION) {
float t1 = min((da[1] - da[0])*10.0f, 1.0f);
if(coloring == NODE_VORONOI_POSITION_OUTLINE_INTENSITY)
*color *= t1*fac;
color *= t1*fac;
else if(coloring == NODE_VORONOI_POSITION_OUTLINE)
*color *= t1*sc;
color *= t1*sc;
}
else {
*color *= sc;
color *= sc;
}
}
return fac;
return make_float4(color.x, color.y, color.z, fac);
}
__device void svm_node_tex_voronoi(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
@@ -92,10 +93,11 @@ __device void svm_node_tex_voronoi(KernelGlobals *kg, ShaderData *sd, float *sta
exponent = fmaxf(exponent, 1e-5f);
size = nonzerof(size, 1e-5f);
float3 color;
float f = svm_voronoi((NodeDistanceMetric)distance_metric,
float4 result = svm_voronoi((NodeDistanceMetric)distance_metric,
(NodeVoronoiColoring)coloring,
weight1, weight2, weight3, weight4, exponent, 1.0f, size, co, &color);
weight1, weight2, weight3, weight4, exponent, 1.0f, size, co);
float3 color = make_float3(result.x, result.y, result.z);
float f = result.w;
if(stack_valid(fac_offset)) stack_store_float(stack, fac_offset, f);
if(stack_valid(color_offset)) stack_store_float3(stack, color_offset, color);

View File

@@ -20,7 +20,7 @@ CCL_NAMESPACE_BEGIN
/* Wood */
__device float svm_wood(float3 p, float size, NodeWoodType type, NodeWaveType wave, NodeNoiseBasis basis, uint hard, float turb)
__device_noinline float svm_wood(float3 p, float size, NodeWoodType type, NodeWaveType wave, NodeNoiseBasis basis, uint hard, float turb)
{
float x = p.x;
float y = p.y;