Merge branch 'master' into blender2.8

This commit is contained in:
Bastien Montagne
2017-08-07 16:16:43 +02:00
44 changed files with 601 additions and 499 deletions

View File

@@ -37,7 +37,6 @@ class AddPresetIntegrator(AddPresetBase, Operator):
"cycles.transmission_bounces",
"cycles.volume_bounces",
"cycles.transparent_max_bounces",
"cycles.use_transparent_shadows",
"cycles.caustics_reflective",
"cycles.caustics_refractive",
"cycles.blur_glossy"

View File

@@ -343,11 +343,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
min=0, max=1024,
default=8,
)
cls.use_transparent_shadows = BoolProperty(
name="Transparent Shadows",
description="Use transparency of surfaces for rendering shadows",
default=True,
)
cls.volume_step_size = FloatProperty(
name="Step Size",

View File

@@ -293,7 +293,6 @@ class CyclesRender_PT_light_paths(CyclesButtonsPanel, Panel):
sub = col.column(align=True)
sub.label("Transparency:")
sub.prop(cscene, "transparent_max_bounces", text="Max")
sub.prop(cscene, "use_transparent_shadows", text="Shadows")
col.separator()

View File

@@ -245,7 +245,6 @@ void BlenderSync::sync_integrator()
integrator->max_volume_bounce = get_int(cscene, "volume_bounces");
integrator->transparent_max_bounce = get_int(cscene, "transparent_max_bounces");
integrator->transparent_shadows = get_boolean(cscene, "use_transparent_shadows");
integrator->volume_max_steps = get_int(cscene, "volume_max_steps");
integrator->volume_step_size = get_float(cscene, "volume_step_size");

View File

@@ -48,6 +48,7 @@
#include "util/util_logging.h"
#include "util/util_map.h"
#include "util/util_opengl.h"
#include "util/util_optimization.h"
#include "util/util_progress.h"
#include "util/util_system.h"
#include "util/util_thread.h"

View File

@@ -1919,17 +1919,13 @@ public:
int threads_per_block;
cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func));
int xthreads = (int)sqrt(threads_per_block);
int ythreads = (int)sqrt(threads_per_block);
int xblocks = (dim.global_size[0] + xthreads - 1)/xthreads;
int yblocks = (dim.global_size[1] + ythreads - 1)/ythreads;
int xblocks = (dim.global_size[0]*dim.global_size[1] + threads_per_block - 1)/threads_per_block;
cuda_assert(cuFuncSetCacheConfig(func, CU_FUNC_CACHE_PREFER_L1));
cuda_assert(cuLaunchKernel(func,
xblocks , yblocks, 1, /* blocks */
xthreads, ythreads, 1, /* threads */
xblocks, 1, 1, /* blocks */
threads_per_block, 1, 1, /* threads */
0, 0, args, 0));
device->cuda_pop_context();

View File

@@ -635,7 +635,7 @@ bool OpenCLInfo::device_supported(const string& platform_name,
"Tahiti", "Pitcairn", "Capeverde", "Oland",
NULL
};
for (int i = 0; blacklist[i] != NULL; i++) {
for(int i = 0; blacklist[i] != NULL; i++) {
if(device_name == blacklist[i]) {
VLOG(1) << "AMD device " << device_name << " not supported";
return false;

View File

@@ -233,6 +233,7 @@ set(SRC_FILTER_HEADERS
set(SRC_UTIL_HEADERS
../util/util_atomic.h
../util/util_color.h
../util/util_defines.h
../util/util_half.h
../util/util_hash.h
../util/util_math.h

View File

@@ -16,7 +16,7 @@
CCL_NAMESPACE_BEGIN
#define ccl_get_feature_sse(pass) _mm_loadu_ps(buffer + (pass)*pass_stride)
#define ccl_get_feature_sse(pass) load_float4(buffer + (pass)*pass_stride)
/* Loop over the pixels in the range [low.x, high.x) x [low.y, high.y), 4 at a time.
* pixel_buffer always points to the first of the 4 current pixel in the first pass.
@@ -24,25 +24,25 @@ CCL_NAMESPACE_BEGIN
#define FOR_PIXEL_WINDOW_SSE pixel_buffer = buffer + (low.y - rect.y)*buffer_w + (low.x - rect.x); \
for(pixel.y = low.y; pixel.y < high.y; pixel.y++) { \
__m128 y4 = _mm_set1_ps(pixel.y); \
float4 y4 = make_float4(pixel.y); \
for(pixel.x = low.x; pixel.x < high.x; pixel.x += 4, pixel_buffer += 4) { \
__m128 x4 = _mm_add_ps(_mm_set1_ps(pixel.x), _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f)); \
__m128 active_pixels = _mm_cmplt_ps(x4, _mm_set1_ps(high.x));
float4 x4 = make_float4(pixel.x) + make_float4(0.0f, 1.0f, 2.0f, 3.0f); \
int4 active_pixels = x4 < make_float4(high.x);
#define END_FOR_PIXEL_WINDOW_SSE } \
pixel_buffer += buffer_w - (pixel.x - low.x); \
}
ccl_device_inline void filter_get_features_sse(__m128 x, __m128 y,
__m128 active_pixels,
ccl_device_inline void filter_get_features_sse(float4 x, float4 y,
int4 active_pixels,
const float *ccl_restrict buffer,
__m128 *features,
const __m128 *ccl_restrict mean,
float4 *features,
const float4 *ccl_restrict mean,
int pass_stride)
{
features[0] = x;
features[1] = y;
features[2] = _mm_fabs_ps(ccl_get_feature_sse(0));
features[2] = fabs(ccl_get_feature_sse(0));
features[3] = ccl_get_feature_sse(1);
features[4] = ccl_get_feature_sse(2);
features[5] = ccl_get_feature_sse(3);
@@ -52,53 +52,41 @@ ccl_device_inline void filter_get_features_sse(__m128 x, __m128 y,
features[9] = ccl_get_feature_sse(7);
if(mean) {
for(int i = 0; i < DENOISE_FEATURES; i++)
features[i] = _mm_sub_ps(features[i], mean[i]);
features[i] = features[i] - mean[i];
}
for(int i = 0; i < DENOISE_FEATURES; i++)
features[i] = _mm_mask_ps(features[i], active_pixels);
features[i] = mask(active_pixels, features[i]);
}
ccl_device_inline void filter_get_feature_scales_sse(__m128 x, __m128 y,
__m128 active_pixels,
ccl_device_inline void filter_get_feature_scales_sse(float4 x, float4 y,
int4 active_pixels,
const float *ccl_restrict buffer,
__m128 *scales,
const __m128 *ccl_restrict mean,
float4 *scales,
const float4 *ccl_restrict mean,
int pass_stride)
{
scales[0] = _mm_mask_ps(_mm_fabs_ps(_mm_sub_ps(x, mean[0])), active_pixels);
scales[1] = _mm_mask_ps(_mm_fabs_ps(_mm_sub_ps(y, mean[1])), active_pixels);
scales[2] = _mm_mask_ps(_mm_fabs_ps(_mm_sub_ps(_mm_fabs_ps(ccl_get_feature_sse(0)), mean[2])), active_pixels);
__m128 diff, scale;
diff = _mm_sub_ps(ccl_get_feature_sse(1), mean[3]);
scale = _mm_mul_ps(diff, diff);
diff = _mm_sub_ps(ccl_get_feature_sse(2), mean[4]);
scale = _mm_add_ps(scale, _mm_mul_ps(diff, diff));
diff = _mm_sub_ps(ccl_get_feature_sse(3), mean[5]);
scale = _mm_add_ps(scale, _mm_mul_ps(diff, diff));
scales[3] = _mm_mask_ps(scale, active_pixels);
scales[4] = _mm_mask_ps(_mm_fabs_ps(_mm_sub_ps(ccl_get_feature_sse(4), mean[6])), active_pixels);
diff = _mm_sub_ps(ccl_get_feature_sse(5), mean[7]);
scale = _mm_mul_ps(diff, diff);
diff = _mm_sub_ps(ccl_get_feature_sse(6), mean[8]);
scale = _mm_add_ps(scale, _mm_mul_ps(diff, diff));
diff = _mm_sub_ps(ccl_get_feature_sse(7), mean[9]);
scale = _mm_add_ps(scale, _mm_mul_ps(diff, diff));
scales[5] = _mm_mask_ps(scale, active_pixels);
scales[0] = fabs(x - mean[0]);
scales[1] = fabs(y - mean[1]);
scales[2] = fabs(fabs(ccl_get_feature_sse(0)) - mean[2]);
scales[3] = sqr(ccl_get_feature_sse(1) - mean[3]) +
sqr(ccl_get_feature_sse(2) - mean[4]) +
sqr(ccl_get_feature_sse(3) - mean[5]);
scales[4] = fabs(ccl_get_feature_sse(4) - mean[6]);
scales[5] = sqr(ccl_get_feature_sse(5) - mean[7]) +
sqr(ccl_get_feature_sse(6) - mean[8]) +
sqr(ccl_get_feature_sse(7) - mean[9]);
for(int i = 0; i < 6; i++)
scales[i] = mask(active_pixels, scales[i]);
}
ccl_device_inline void filter_calculate_scale_sse(__m128 *scale)
ccl_device_inline void filter_calculate_scale_sse(float4 *scale)
{
scale[0] = _mm_rcp_ps(_mm_max_ps(_mm_hmax_ps(scale[0]), _mm_set1_ps(0.01f)));
scale[1] = _mm_rcp_ps(_mm_max_ps(_mm_hmax_ps(scale[1]), _mm_set1_ps(0.01f)));
scale[2] = _mm_rcp_ps(_mm_max_ps(_mm_hmax_ps(scale[2]), _mm_set1_ps(0.01f)));
scale[6] = _mm_rcp_ps(_mm_max_ps(_mm_hmax_ps(scale[4]), _mm_set1_ps(0.01f)));
scale[7] = scale[8] = scale[9] = _mm_rcp_ps(_mm_max_ps(_mm_hmax_ps(_mm_sqrt_ps(scale[5])), _mm_set1_ps(0.01f)));
scale[3] = scale[4] = scale[5] = _mm_rcp_ps(_mm_max_ps(_mm_hmax_ps(_mm_sqrt_ps(scale[3])), _mm_set1_ps(0.01f)));
scale[0] = rcp(max(reduce_max(scale[0]), make_float4(0.01f)));
scale[1] = rcp(max(reduce_max(scale[1]), make_float4(0.01f)));
scale[2] = rcp(max(reduce_max(scale[2]), make_float4(0.01f)));
scale[6] = rcp(max(reduce_max(scale[4]), make_float4(0.01f)));
scale[7] = scale[8] = scale[9] = rcp(max(reduce_max(sqrt(scale[5])), make_float4(0.01f)));
scale[3] = scale[4] = scale[5] = rcp(max(reduce_max(sqrt(scale[3])), make_float4(0.01f)));
}

View File

@@ -50,10 +50,8 @@ ccl_device_inline void kernel_filter_nlm_blur(const float *ccl_restrict differen
int w,
int f)
{
#ifdef __KERNEL_SSE3__
int aligned_lowx = (rect.x & ~(3));
int aligned_highx = ((rect.z + 3) & ~(3));
#endif
int aligned_lowx = rect.x / 4;
int aligned_highx = (rect.z + 3) / 4;
for(int y = rect.y; y < rect.w; y++) {
const int low = max(rect.y, y-f);
const int high = min(rect.w, y+f+1);
@@ -61,15 +59,11 @@ ccl_device_inline void kernel_filter_nlm_blur(const float *ccl_restrict differen
out_image[y*w+x] = 0.0f;
}
for(int y1 = low; y1 < high; y1++) {
#ifdef __KERNEL_SSE3__
for(int x = aligned_lowx; x < aligned_highx; x+=4) {
_mm_store_ps(out_image + y*w+x, _mm_add_ps(_mm_load_ps(out_image + y*w+x), _mm_load_ps(difference_image + y1*w+x)));
float4* out_image4 = (float4*)(out_image + y*w);
float4* difference_image4 = (float4*)(difference_image + y1*w);
for(int x = aligned_lowx; x < aligned_highx; x++) {
out_image4[x] += difference_image4[x];
}
#else
for(int x = rect.x; x < rect.z; x++) {
out_image[y*w+x] += difference_image[y1*w+x];
}
#endif
}
for(int x = rect.x; x < rect.z; x++) {
out_image[y*w+x] *= 1.0f/(high - low);

View File

@@ -96,7 +96,7 @@ ccl_device void kernel_filter_get_feature(int sample,
int idx = (y-rect.y)*buffer_w + (x - rect.x);
mean[idx] = center_buffer[m_offset] / sample;
if (sample > 1) {
if(sample > 1) {
if(use_split_variance) {
variance[idx] = max(0.0f, (center_buffer[v_offset] - mean[idx]*mean[idx]*sample) / (sample * (sample-1)));
}

View File

@@ -24,7 +24,7 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff
{
int buffer_w = align_up(rect.z - rect.x, 4);
__m128 features[DENOISE_FEATURES];
float4 features[DENOISE_FEATURES];
const float *ccl_restrict pixel_buffer;
int2 pixel;
@@ -34,19 +34,19 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff
min(rect.w, y + radius + 1));
int num_pixels = (high.y - low.y) * (high.x - low.x);
__m128 feature_means[DENOISE_FEATURES];
float4 feature_means[DENOISE_FEATURES];
math_vector_zero_sse(feature_means, DENOISE_FEATURES);
FOR_PIXEL_WINDOW_SSE {
filter_get_features_sse(x4, y4, active_pixels, pixel_buffer, features, NULL, pass_stride);
math_vector_add_sse(feature_means, DENOISE_FEATURES, features);
} END_FOR_PIXEL_WINDOW_SSE
__m128 pixel_scale = _mm_set1_ps(1.0f / num_pixels);
float4 pixel_scale = make_float4(1.0f / num_pixels);
for(int i = 0; i < DENOISE_FEATURES; i++) {
feature_means[i] = _mm_mul_ps(_mm_hsum_ps(feature_means[i]), pixel_scale);
feature_means[i] = reduce_add(feature_means[i]) * pixel_scale;
}
__m128 feature_scale[DENOISE_FEATURES];
float4 feature_scale[DENOISE_FEATURES];
math_vector_zero_sse(feature_scale, DENOISE_FEATURES);
FOR_PIXEL_WINDOW_SSE {
filter_get_feature_scales_sse(x4, y4, active_pixels, pixel_buffer, features, feature_means, pass_stride);
@@ -55,12 +55,12 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff
filter_calculate_scale_sse(feature_scale);
__m128 feature_matrix_sse[DENOISE_FEATURES*DENOISE_FEATURES];
float4 feature_matrix_sse[DENOISE_FEATURES*DENOISE_FEATURES];
math_matrix_zero_sse(feature_matrix_sse, DENOISE_FEATURES);
FOR_PIXEL_WINDOW_SSE {
filter_get_features_sse(x4, y4, active_pixels, pixel_buffer, features, feature_means, pass_stride);
math_vector_mul_sse(features, DENOISE_FEATURES, feature_scale);
math_matrix_add_gramian_sse(feature_matrix_sse, DENOISE_FEATURES, features, _mm_set1_ps(1.0f));
math_matrix_add_gramian_sse(feature_matrix_sse, DENOISE_FEATURES, features, make_float4(1.0f));
} END_FOR_PIXEL_WINDOW_SSE
float feature_matrix[DENOISE_FEATURES*DENOISE_FEATURES];
@@ -98,7 +98,7 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff
/* Bake the feature scaling into the transformation matrix. */
for(int i = 0; i < DENOISE_FEATURES; i++) {
math_vector_scale(transform + i*DENOISE_FEATURES, _mm_cvtss_f32(feature_scale[i]), *rank);
math_vector_scale(transform + i*DENOISE_FEATURES, feature_scale[i][0], *rank);
}
}

View File

@@ -415,12 +415,7 @@ ccl_device_inline float3 bvh_clamp_direction(float3 dir)
ccl_device_inline float3 bvh_inverse_direction(float3 dir)
{
/* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */
#if defined(__KERNEL_SSE__) && 0
return rcp(dir);
#else
return 1.0f / dir;
#endif
}
/* Transform ray into object space to enter static object in BVH */

View File

@@ -100,6 +100,8 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
#ifndef __SPLIT_KERNEL__
#if defined(__BRANCHED_PATH__) || defined(__BAKING__)
ccl_device void kernel_path_indirect(KernelGlobals *kg,
ShaderData *sd,
ShaderData *emission_sd,
@@ -428,6 +430,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
}
}
#endif /* defined(__BRANCHED_PATH__) || defined(__BAKING__) */
ccl_device_inline float kernel_path_integrate(KernelGlobals *kg,
RNG *rng,

View File

@@ -173,7 +173,7 @@ ccl_device_inline float path_state_terminate_probability(KernelGlobals *kg, ccl_
}
#ifdef __SHADOW_TRICKS__
/* Exception for shadow catcher not working correctly with RR. */
else if ((state->flag & PATH_RAY_SHADOW_CATCHER) && (state->transparent_bounce <= 8)) {
else if((state->flag & PATH_RAY_SHADOW_CATCHER) && (state->transparent_bounce <= 8)) {
return 1.0f;
}
#endif
@@ -196,7 +196,7 @@ ccl_device_inline float path_state_terminate_probability(KernelGlobals *kg, ccl_
}
#ifdef __SHADOW_TRICKS__
/* Exception for shadow catcher not working correctly with RR. */
else if ((state->flag & PATH_RAY_SHADOW_CATCHER) && (state->bounce <= 3)) {
else if((state->flag & PATH_RAY_SHADOW_CATCHER) && (state->bounce <= 3)) {
return 1.0f;
}
#endif

View File

@@ -25,6 +25,7 @@
#else
/* SSE optimization disabled for now on 32 bit, see bug #36316 */
# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
# define __KERNEL_SSE__
# define __KERNEL_SSE2__
# endif
#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */

View File

@@ -25,6 +25,7 @@
#else
/* SSE optimization disabled for now on 32 bit, see bug #36316 */
# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
# define __KERNEL_SSE__
# define __KERNEL_SSE2__
# define __KERNEL_SSE3__
# define __KERNEL_SSSE3__

View File

@@ -25,6 +25,7 @@
#else
/* SSE optimization disabled for now on 32 bit, see bug #36316 */
# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
# define __KERNEL_SSE__
# define __KERNEL_SSE2__
# define __KERNEL_SSE3__
# define __KERNEL_SSSE3__

View File

@@ -81,8 +81,13 @@
# error "Unknown or unsupported CUDA architecture, can't determine launch bounds"
#endif
/* compute number of threads per block and minimum blocks per multiprocessor
* given the maximum number of registers per thread */
/* For split kernel using all registers seems fastest for now, but this
* is unlikely to be optimal once we resolve other bottlenecks. */
#define CUDA_KERNEL_SPLIT_MAX_REGISTERS CUDA_THREAD_MAX_REGISTERS
/* Compute number of threads per block and minimum blocks per multiprocessor
* given the maximum number of registers per thread. */
#define CUDA_LAUNCH_BOUNDS(threads_block_width, thread_num_registers) \
__launch_bounds__( \

View File

@@ -90,7 +90,7 @@ kernel_cuda_path_trace_data_init(
#define DEFINE_SPLIT_KERNEL_FUNCTION(name) \
extern "C" __global__ void \
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) \
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_SPLIT_MAX_REGISTERS) \
kernel_cuda_##name() \
{ \
kernel_##name(NULL); \
@@ -98,7 +98,7 @@ kernel_cuda_path_trace_data_init(
#define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \
extern "C" __global__ void \
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) \
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_SPLIT_MAX_REGISTERS) \
kernel_cuda_##name() \
{ \
ccl_local type locals; \

View File

@@ -39,7 +39,7 @@ ccl_device void kernel_shader_sort(KernelGlobals *kg,
ccl_local ushort *local_index = &locals->local_index[0];
/* copy to local memory */
for (uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) {
for(uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) {
uint idx = offset + i + lid;
uint add = input + idx;
uint value = (~0);
@@ -59,9 +59,9 @@ ccl_device void kernel_shader_sort(KernelGlobals *kg,
# ifdef __KERNEL_OPENCL__
/* bitonic sort */
for (uint length = 1; length < SHADER_SORT_BLOCK_SIZE; length <<= 1) {
for (uint inc = length; inc > 0; inc >>= 1) {
for (uint ii = 0; ii < SHADER_SORT_BLOCK_SIZE; ii += SHADER_SORT_LOCAL_SIZE) {
for(uint length = 1; length < SHADER_SORT_BLOCK_SIZE; length <<= 1) {
for(uint inc = length; inc > 0; inc >>= 1) {
for(uint ii = 0; ii < SHADER_SORT_BLOCK_SIZE; ii += SHADER_SORT_LOCAL_SIZE) {
uint i = lid + ii;
bool direction = ((i & (length << 1)) != 0);
uint j = i ^ inc;
@@ -81,7 +81,7 @@ ccl_device void kernel_shader_sort(KernelGlobals *kg,
# endif /* __KERNEL_OPENCL__ */
/* copy to destination */
for (uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) {
for(uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) {
uint idx = offset + i + lid;
uint lidx = local_index[i + lid];
uint outi = output + idx;

View File

@@ -344,7 +344,7 @@ int ImageManager::add_image(const string& filename,
else {
/* Very unlikely, since max_num_images is insanely big. But better safe than sorry. */
int tex_count = 0;
for (int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
for(int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
tex_count += tex_num_images[type];
}
if(tex_count > max_num_images) {

View File

@@ -39,7 +39,6 @@ NODE_DEFINE(Integrator)
SOCKET_INT(max_volume_bounce, "Max Volume Bounce", 7);
SOCKET_INT(transparent_max_bounce, "Transparent Max Bounce", 7);
SOCKET_BOOLEAN(transparent_shadows, "Transparent Shadows", false);
SOCKET_INT(ao_bounces, "AO Bounces", 0);
@@ -121,19 +120,14 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
* We only need to enable transparent shadows, if we actually have
* transparent shaders in the scene. Otherwise we can disable it
* to improve performance a bit. */
if(transparent_shadows) {
kintegrator->transparent_shadows = false;
foreach(Shader *shader, scene->shaders) {
/* keep this in sync with SD_HAS_TRANSPARENT_SHADOW in shader.cpp */
if((shader->has_surface_transparent && shader->use_transparent_shadow) || shader->has_volume) {
kintegrator->transparent_shadows = true;
break;
}
kintegrator->transparent_shadows = false;
foreach(Shader *shader, scene->shaders) {
/* keep this in sync with SD_HAS_TRANSPARENT_SHADOW in shader.cpp */
if((shader->has_surface_transparent && shader->use_transparent_shadow) || shader->has_volume) {
kintegrator->transparent_shadows = true;
break;
}
}
else {
kintegrator->transparent_shadows = false;
}
kintegrator->volume_max_steps = volume_max_steps;
kintegrator->volume_step_size = volume_step_size;

View File

@@ -39,7 +39,6 @@ public:
int max_volume_bounce;
int transparent_max_bounce;
bool transparent_shadows;
int ao_bounces;

View File

@@ -225,7 +225,7 @@ void LightManager::disable_ineffective_light(Device *device, Scene *scene)
bool LightManager::object_usable_as_light(Object *object) {
Mesh *mesh = object->mesh;
/* Skip objects with NaNs */
if (!object->bounds.valid()) {
if(!object->bounds.valid()) {
return false;
}
/* Skip if we are not visible for BSDFs. */

View File

@@ -721,7 +721,6 @@ DeviceRequestedFeatures Session::get_requested_device_features()
BakeManager *bake_manager = scene->bake_manager;
requested_features.use_baking = bake_manager->get_baking();
requested_features.use_integrator_branched = (scene->integrator->method == Integrator::BRANCHED_PATH);
requested_features.use_transparent &= scene->integrator->transparent_shadows;
requested_features.use_denoising = params.use_denoising;
return requested_features;

View File

@@ -503,9 +503,7 @@ void ShaderManager::device_update_common(Device *device,
KernelIntegrator *kintegrator = &dscene->data.integrator;
kintegrator->use_volumes = has_volumes;
/* TODO(sergey): De-duplicate with flags set in integrator.cpp. */
if(scene->integrator->transparent_shadows) {
kintegrator->transparent_shadows = has_transparent_shadow;
}
kintegrator->transparent_shadows = has_transparent_shadow;
}
void ShaderManager::device_free_common(Device *device, DeviceScene *dscene, Scene *scene)

View File

@@ -38,6 +38,7 @@ set(SRC_HEADERS
util_atomic.h
util_boundbox.h
util_debug.h
util_defines.h
util_guarded_allocator.cpp
util_foreach.h
util_function.h

View File

@@ -0,0 +1,134 @@
/*
* Copyright 2011-2017 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __UTIL_DEFINES_H__
#define __UTIL_DEFINES_H__
/* Bitness */
#if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64)
# define __KERNEL_64_BIT__
#endif
/* Qualifiers for kernel code shared by CPU and GPU */
#ifndef __KERNEL_GPU__
# define ccl_device static inline
# define ccl_device_noinline static
# define ccl_global
# define ccl_constant
# define ccl_local
# define ccl_local_param
# define ccl_private
# define ccl_restrict __restrict
# define __KERNEL_WITH_SSE_ALIGN__
# if defined(_WIN32) && !defined(FREE_WINDOWS)
# define ccl_device_inline static __forceinline
# define ccl_device_forceinline static __forceinline
# define ccl_align(...) __declspec(align(__VA_ARGS__))
# ifdef __KERNEL_64_BIT__
# define ccl_try_align(...) __declspec(align(__VA_ARGS__))
# else /* __KERNEL_64_BIT__ */
# undef __KERNEL_WITH_SSE_ALIGN__
/* No support for function arguments (error C2719). */
# define ccl_try_align(...)
# endif /* __KERNEL_64_BIT__ */
# define ccl_may_alias
# define ccl_always_inline __forceinline
# define ccl_never_inline __declspec(noinline)
# define ccl_maybe_unused
# else /* _WIN32 && !FREE_WINDOWS */
# define ccl_device_inline static inline __attribute__((always_inline))
# define ccl_device_forceinline static inline __attribute__((always_inline))
# define ccl_align(...) __attribute__((aligned(__VA_ARGS__)))
# ifndef FREE_WINDOWS64
# define __forceinline inline __attribute__((always_inline))
# endif
# define ccl_try_align(...) __attribute__((aligned(__VA_ARGS__)))
# define ccl_may_alias __attribute__((__may_alias__))
# define ccl_always_inline __attribute__((always_inline))
# define ccl_never_inline __attribute__((noinline))
# define ccl_maybe_unused __attribute__((used))
# endif /* _WIN32 && !FREE_WINDOWS */
/* Use to suppress '-Wimplicit-fallthrough' (in place of 'break'). */
# if defined(__GNUC__) && (__GNUC__ >= 7) /* gcc7.0+ only */
# define ATTR_FALLTHROUGH __attribute__((fallthrough))
# else
# define ATTR_FALLTHROUGH ((void)0)
# endif
#endif /* __KERNEL_GPU__ */
/* macros */
/* hints for branch prediction, only use in code that runs a _lot_ */
#if defined(__GNUC__) && defined(__KERNEL_CPU__)
# define LIKELY(x) __builtin_expect(!!(x), 1)
# define UNLIKELY(x) __builtin_expect(!!(x), 0)
#else
# define LIKELY(x) (x)
# define UNLIKELY(x) (x)
#endif
#if defined(__cplusplus) && ((__cplusplus >= 201103L) || (defined(_MSC_VER) && _MSC_VER >= 1800))
# define HAS_CPP11_FEATURES
#endif
#if defined(__GNUC__) || defined(__clang__)
# if defined(HAS_CPP11_FEATURES)
/* Some magic to be sure we don't have reference in the type. */
template<typename T> static inline T decltype_helper(T x) { return x; }
# define TYPEOF(x) decltype(decltype_helper(x))
# else
# define TYPEOF(x) typeof(x)
# endif
#endif
/* Causes warning:
* incompatible types when assigning to type 'Foo' from type 'Bar'
* ... the compiler optimizes away the temp var */
#ifdef __GNUC__
#define CHECK_TYPE(var, type) { \
TYPEOF(var) *__tmp; \
__tmp = (type *)NULL; \
(void)__tmp; \
} (void)0
#define CHECK_TYPE_PAIR(var_a, var_b) { \
TYPEOF(var_a) *__tmp; \
__tmp = (typeof(var_b) *)NULL; \
(void)__tmp; \
} (void)0
#else
# define CHECK_TYPE(var, type)
# define CHECK_TYPE_PAIR(var_a, var_b)
#endif
/* can be used in simple macros */
#define CHECK_TYPE_INLINE(val, type) \
((void)(((type)0) != (val)))
#ifndef __KERNEL_GPU__
# include <cassert>
# define util_assert(statement) assert(statement)
#else
# define util_assert(statement)
#endif
#endif /* __UTIL_DEFINES_H__ */

View File

@@ -94,6 +94,7 @@ ccl_device_inline float fminf(float a, float b)
#ifndef __KERNEL_GPU__
using std::isfinite;
using std::isnan;
using std::sqrt;
ccl_device_inline int abs(int x)
{

View File

@@ -108,8 +108,7 @@ ccl_device_inline float3 operator*(const float3& a, const float f)
ccl_device_inline float3 operator*(const float f, const float3& a)
{
/* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */
#if defined(__KERNEL_SSE__) && 0
#if defined(__KERNEL_SSE__)
return float3(_mm_mul_ps(_mm_set1_ps(f), a.m128));
#else
return make_float3(a.x*f, a.y*f, a.z*f);
@@ -118,10 +117,8 @@ ccl_device_inline float3 operator*(const float f, const float3& a)
ccl_device_inline float3 operator/(const float f, const float3& a)
{
/* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */
#if defined(__KERNEL_SSE__) && 0
__m128 rc = _mm_rcp_ps(a.m128);
return float3(_mm_mul_ps(_mm_set1_ps(f),rc));
#if defined(__KERNEL_SSE__)
return float3(_mm_div_ps(_mm_set1_ps(f), a.m128));
#else
return make_float3(f / a.x, f / a.y, f / a.z);
#endif
@@ -135,10 +132,8 @@ ccl_device_inline float3 operator/(const float3& a, const float f)
ccl_device_inline float3 operator/(const float3& a, const float3& b)
{
/* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */
#if defined(__KERNEL_SSE__) && 0
__m128 rc = _mm_rcp_ps(b.m128);
return float3(_mm_mul_ps(a, rc));
#if defined(__KERNEL_SSE__)
return float3(_mm_div_ps(a.m128, b.m128));
#else
return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
#endif
@@ -282,9 +277,8 @@ ccl_device_inline float3 mix(const float3& a, const float3& b, float t)
ccl_device_inline float3 rcp(const float3& a)
{
#ifdef __KERNEL_SSE__
const float4 r(_mm_rcp_ps(a.m128));
return float3(_mm_sub_ps(_mm_add_ps(r, r),
_mm_mul_ps(_mm_mul_ps(r, r), a)));
/* Don't use _mm_rcp_ps due to poor precision. */
return float3(_mm_div_ps(_mm_set_ps1(1.0f), a.m128));
#else
return make_float3(1.0f/a.x, 1.0f/a.y, 1.0f/a.z);
#endif

View File

@@ -48,23 +48,30 @@ ccl_device_inline bool operator==(const float4& a, const float4& b);
ccl_device_inline float dot(const float4& a, const float4& b);
ccl_device_inline float len_squared(const float4& a);
ccl_device_inline float4 rcp(const float4& a);
ccl_device_inline float4 sqrt(const float4& a);
ccl_device_inline float4 sqr(const float4& a);
ccl_device_inline float4 cross(const float4& a, const float4& b);
ccl_device_inline bool is_zero(const float4& a);
ccl_device_inline float reduce_add(const float4& a);
ccl_device_inline float average(const float4& a);
ccl_device_inline float len(const float4& a);
ccl_device_inline float4 normalize(const float4& a);
ccl_device_inline float4 safe_normalize(const float4& a);
ccl_device_inline float4 min(const float4& a, const float4& b);
ccl_device_inline float4 max(const float4& a, const float4& b);
ccl_device_inline float4 fabs(const float4& a);
#endif /* !__KERNEL_OPENCL__*/
#ifdef __KERNEL_SSE__
template<size_t index_0, size_t index_1, size_t index_2, size_t index_3>
__forceinline const float4 shuffle(const float4& b);
template<size_t index_0, size_t index_1, size_t index_2, size_t index_3>
__forceinline const float4 shuffle(const float4& a, const float4& b);
template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4& b);
template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4& a, const float4& b);
template<> __forceinline const float4 shuffle<2, 3, 2, 3>(const float4& a, const float4& b);
# ifdef __KERNEL_SSE3__
template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4& b);
template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4& b);
@@ -77,9 +84,7 @@ ccl_device_inline float4 select(const int4& mask,
const float4& b);
ccl_device_inline float4 reduce_min(const float4& a);
ccl_device_inline float4 reduce_max(const float4& a);
# if 0
ccl_device_inline float4 reduce_add(const float4& a);
# endif
#endif /* !__KERNEL_GPU__ */
/*******************************************************************************
@@ -128,7 +133,7 @@ ccl_device_inline float4 operator/(const float4& a, float f)
ccl_device_inline float4 operator/(const float4& a, const float4& b)
{
#ifdef __KERNEL_SSE__
return a * rcp(b);
return float4(_mm_div_ps(a.m128, b.m128));
#else
return make_float4(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w);
#endif
@@ -224,14 +229,30 @@ ccl_device_inline float len_squared(const float4& a)
ccl_device_inline float4 rcp(const float4& a)
{
#ifdef __KERNEL_SSE__
float4 r(_mm_rcp_ps(a.m128));
return float4(_mm_sub_ps(_mm_add_ps(r, r),
_mm_mul_ps(_mm_mul_ps(r, r), a)));
/* Don't use _mm_rcp_ps due to poor precision. */
return float4(_mm_div_ps(_mm_set_ps1(1.0f), a.m128));
#else
return make_float4(1.0f/a.x, 1.0f/a.y, 1.0f/a.z, 1.0f/a.w);
#endif
}
ccl_device_inline float4 sqrt(const float4& a)
{
#ifdef __KERNEL_SSE__
return float4(_mm_sqrt_ps(a.m128));
#else
return make_float4(sqrtf(a.x),
sqrtf(a.y),
sqrtf(a.z),
sqrtf(a.w));
#endif
}
ccl_device_inline float4 sqr(const float4& a)
{
return a * a;
}
ccl_device_inline float4 cross(const float4& a, const float4& b)
{
#ifdef __KERNEL_SSE__
@@ -254,20 +275,24 @@ ccl_device_inline bool is_zero(const float4& a)
#endif
}
ccl_device_inline float reduce_add(const float4& a)
ccl_device_inline float4 reduce_add(const float4& a)
{
#ifdef __KERNEL_SSE__
# ifdef __KERNEL_SSE3__
float4 h(_mm_hadd_ps(a.m128, a.m128));
return float4( _mm_hadd_ps(h.m128, h.m128));
# else
float4 h(shuffle<1,0,3,2>(a) + a);
/* TODO(sergey): Investigate efficiency. */
return _mm_cvtss_f32(shuffle<2,3,0,1>(h) + h);
return shuffle<2,3,0,1>(h) + h;
# endif
#else
return ((a.x + a.y) + (a.z + a.w));
return make_float4(((a.x + a.y) + (a.z + a.w)));
#endif
}
ccl_device_inline float average(const float4& a)
{
return reduce_add(a) * 0.25f;
return reduce_add(a)[0] * 0.25f;
}
ccl_device_inline float len(const float4& a)
@@ -309,6 +334,18 @@ ccl_device_inline float4 max(const float4& a, const float4& b)
max(a.w, b.w));
#endif
}
ccl_device_inline float4 fabs(const float4& a)
{
#ifdef __KERNEL_SSE__
return float4(_mm_and_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff))));
#else
return make_float4(fabsf(a.x),
fabsf(a.y),
fabsf(a.z),
fabsf(a.w));
#endif
}
#endif /* !__KERNEL_OPENCL__*/
#ifdef __KERNEL_SSE__
@@ -320,11 +357,28 @@ __forceinline const float4 shuffle(const float4& b)
_MM_SHUFFLE(index_3, index_2, index_1, index_0))));
}
template<size_t index_0, size_t index_1, size_t index_2, size_t index_3>
__forceinline const float4 shuffle(const float4& a, const float4& b)
{
return float4(_mm_shuffle_ps(a.m128, b.m128,
_MM_SHUFFLE(index_3, index_2, index_1, index_0)));
}
template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4& b)
{
return float4(_mm_castpd_ps(_mm_movedup_pd(_mm_castps_pd(b))));
}
template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4& a, const float4& b)
{
return float4(_mm_movelh_ps(a.m128, b.m128));
}
template<> __forceinline const float4 shuffle<2, 3, 2, 3>(const float4& a, const float4& b)
{
return float4(_mm_movehl_ps(b.m128, a.m128));
}
# ifdef __KERNEL_SSE3__
template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4& b)
{
@@ -344,9 +398,7 @@ ccl_device_inline float4 select(const int4& mask,
const float4& b)
{
#ifdef __KERNEL_SSE__
/* TODO(sergey): avoid cvt. */
return float4(_mm_or_ps(_mm_and_ps(_mm_cvtepi32_ps(mask), a),
_mm_andnot_ps(_mm_cvtepi32_ps(mask), b)));
return float4(_mm_blendv_ps(b.m128, a.m128, _mm_castsi128_ps(mask.m128)));
#else
return make_float4((mask.x)? a.x: b.x,
(mask.y)? a.y: b.y,
@@ -355,6 +407,13 @@ ccl_device_inline float4 select(const int4& mask,
#endif
}
ccl_device_inline float4 mask(const int4& mask,
const float4& a)
{
/* Replace elements of x with zero where mask isn't set. */
return select(mask, a, make_float4(0.0f));
}
ccl_device_inline float4 reduce_min(const float4& a)
{
#ifdef __KERNEL_SSE__
@@ -375,17 +434,15 @@ ccl_device_inline float4 reduce_max(const float4& a)
#endif
}
#if 0
ccl_device_inline float4 reduce_add(const float4& a)
ccl_device_inline float4 load_float4(const float *v)
{
#ifdef __KERNEL_SSE__
float4 h = shuffle<1,0,3,2>(a) + a;
return shuffle<2,3,0,1>(h) + h;
return float4(_mm_loadu_ps(v));
#else
return make_float4((a.x + a.y) + (a.z + a.w));
return make_float4(v[0], v[1], v[2], v[3]);
#endif
}
#endif
#endif /* !__KERNEL_GPU__ */
CCL_NAMESPACE_END

View File

@@ -223,20 +223,20 @@ ccl_device void math_matrix_jacobi_eigendecomposition(float *A, ccl_global float
{
const float singular_epsilon = 1e-9f;
for (int row = 0; row < n; row++) {
for (int col = 0; col < n; col++) {
for(int row = 0; row < n; row++) {
for(int col = 0; col < n; col++) {
MATS(V, n, row, col, v_stride) = (col == row) ? 1.0f : 0.0f;
}
}
for (int sweep = 0; sweep < 8; sweep++) {
for(int sweep = 0; sweep < 8; sweep++) {
float off_diagonal = 0.0f;
for (int row = 1; row < n; row++) {
for (int col = 0; col < row; col++) {
for(int row = 1; row < n; row++) {
for(int col = 0; col < row; col++) {
off_diagonal += fabsf(MAT(A, n, row, col));
}
}
if (off_diagonal < 1e-7f) {
if(off_diagonal < 1e-7f) {
/* The matrix has nearly reached diagonal form.
* Since the eigenvalues are only used to determine truncation, their exact values aren't required - a relative error of a few ULPs won't matter at all. */
break;
@@ -253,7 +253,7 @@ ccl_device void math_matrix_jacobi_eigendecomposition(float *A, ccl_global float
float abs_element = fabsf(element);
/* If we're in a later sweep and the element already is very small, just set it to zero and skip the rotation. */
if (sweep > 3 && abs_element <= singular_epsilon*fabsf(MAT(A, n, row, row)) && abs_element <= singular_epsilon*fabsf(MAT(A, n, col, col))) {
if(sweep > 3 && abs_element <= singular_epsilon*fabsf(MAT(A, n, row, row)) && abs_element <= singular_epsilon*fabsf(MAT(A, n, col, col))) {
MAT(A, n, row, col) = 0.0f;
continue;
}
@@ -272,10 +272,10 @@ ccl_device void math_matrix_jacobi_eigendecomposition(float *A, ccl_global float
* Then, we compute sin(phi) and cos(phi) themselves. */
float singular_diff = MAT(A, n, row, row) - MAT(A, n, col, col);
float ratio;
if (abs_element > singular_epsilon*fabsf(singular_diff)) {
if(abs_element > singular_epsilon*fabsf(singular_diff)) {
float cot_2phi = 0.5f*singular_diff / element;
ratio = 1.0f / (fabsf(cot_2phi) + sqrtf(1.0f + cot_2phi*cot_2phi));
if (cot_2phi < 0.0f) ratio = -ratio; /* Copy sign. */
if(cot_2phi < 0.0f) ratio = -ratio; /* Copy sign. */
}
else {
ratio = element / singular_diff;
@@ -315,21 +315,21 @@ ccl_device void math_matrix_jacobi_eigendecomposition(float *A, ccl_global float
}
/* Sort eigenvalues and the associated eigenvectors. */
for (int i = 0; i < n - 1; i++) {
for(int i = 0; i < n - 1; i++) {
float v = MAT(A, n, i, i);
int k = i;
for (int j = i; j < n; j++) {
if (MAT(A, n, j, j) >= v) {
for(int j = i; j < n; j++) {
if(MAT(A, n, j, j) >= v) {
v = MAT(A, n, j, j);
k = j;
}
}
if (k != i) {
if(k != i) {
/* Swap eigenvalues. */
MAT(A, n, k, k) = MAT(A, n, i, i);
MAT(A, n, i, i) = v;
/* Swap eigenvectors. */
for (int j = 0; j < n; j++) {
for(int j = 0; j < n; j++) {
float v = MATS(V, n, i, j, v_stride);
MATS(V, n, i, j, v_stride) = MATS(V, n, k, j, v_stride);
MATS(V, n, k, j, v_stride) = v;
@@ -339,59 +339,59 @@ ccl_device void math_matrix_jacobi_eigendecomposition(float *A, ccl_global float
}
#ifdef __KERNEL_SSE3__
ccl_device_inline void math_vector_zero_sse(__m128 *A, int n)
ccl_device_inline void math_vector_zero_sse(float4 *A, int n)
{
for(int i = 0; i < n; i++) {
A[i] = _mm_setzero_ps();
A[i] = make_float4(0.0f);
}
}
ccl_device_inline void math_matrix_zero_sse(__m128 *A, int n)
ccl_device_inline void math_matrix_zero_sse(float4 *A, int n)
{
for(int row = 0; row < n; row++) {
for(int col = 0; col <= row; col++) {
MAT(A, n, row, col) = _mm_setzero_ps();
MAT(A, n, row, col) = make_float4(0.0f);
}
}
}
/* Add Gramian matrix of v to A.
* The Gramian matrix of v is v^T*v, so element (i,j) is v[i]*v[j]. */
ccl_device_inline void math_matrix_add_gramian_sse(__m128 *A, int n, const __m128 *ccl_restrict v, __m128 weight)
ccl_device_inline void math_matrix_add_gramian_sse(float4 *A, int n, const float4 *ccl_restrict v, float4 weight)
{
for(int row = 0; row < n; row++) {
for(int col = 0; col <= row; col++) {
MAT(A, n, row, col) = _mm_add_ps(MAT(A, n, row, col), _mm_mul_ps(_mm_mul_ps(v[row], v[col]), weight));
MAT(A, n, row, col) = MAT(A, n, row, col) + v[row] * v[col] * weight;
}
}
}
ccl_device_inline void math_vector_add_sse(__m128 *V, int n, const __m128 *ccl_restrict a)
ccl_device_inline void math_vector_add_sse(float4 *V, int n, const float4 *ccl_restrict a)
{
for(int i = 0; i < n; i++) {
V[i] = _mm_add_ps(V[i], a[i]);
V[i] += a[i];
}
}
ccl_device_inline void math_vector_mul_sse(__m128 *V, int n, const __m128 *ccl_restrict a)
ccl_device_inline void math_vector_mul_sse(float4 *V, int n, const float4 *ccl_restrict a)
{
for(int i = 0; i < n; i++) {
V[i] = _mm_mul_ps(V[i], a[i]);
V[i] *= a[i];
}
}
ccl_device_inline void math_vector_max_sse(__m128 *a, const __m128 *ccl_restrict b, int n)
ccl_device_inline void math_vector_max_sse(float4 *a, const float4 *ccl_restrict b, int n)
{
for(int i = 0; i < n; i++) {
a[i] = _mm_max_ps(a[i], b[i]);
a[i] = max(a[i], b[i]);
}
}
ccl_device_inline void math_matrix_hsum(float *A, int n, const __m128 *ccl_restrict B)
ccl_device_inline void math_matrix_hsum(float *A, int n, const float4 *ccl_restrict B)
{
for(int row = 0; row < n; row++) {
for(int col = 0; col <= row; col++) {
MAT(A, n, row, col) = _mm_hsum_ss(MAT(B, n, row, col));
MAT(A, n, row, col) = reduce_add(MAT(B, n, row, col))[0];
}
}
}

View File

@@ -19,16 +19,6 @@
#ifndef __KERNEL_GPU__
/* quiet unused define warnings */
#if defined(__KERNEL_SSE2__) || \
defined(__KERNEL_SSE3__) || \
defined(__KERNEL_SSSE3__) || \
defined(__KERNEL_SSE41__) || \
defined(__KERNEL_AVX__) || \
defined(__KERNEL_AVX2__)
/* do nothing */
#endif
/* x86
*
* Compile a regular, SSE2 and SSE3 kernel. */
@@ -73,48 +63,6 @@
#endif /* defined(__x86_64__) || defined(_M_X64) */
/* SSE Experiment
*
* This is disabled code for an experiment to use SSE types globally for types
* such as float3 and float4. Currently this gives an overall slowdown. */
#if 0
# define __KERNEL_SSE__
# ifndef __KERNEL_SSE2__
# define __KERNEL_SSE2__
# endif
# ifndef __KERNEL_SSE3__
# define __KERNEL_SSE3__
# endif
# ifndef __KERNEL_SSSE3__
# define __KERNEL_SSSE3__
# endif
# ifndef __KERNEL_SSE4__
# define __KERNEL_SSE4__
# endif
#endif
/* SSE Intrinsics includes
*
* We assume __KERNEL_SSEX__ flags to have been defined at this point */
/* SSE intrinsics headers */
#ifndef FREE_WINDOWS64
#ifdef _MSC_VER
# include <intrin.h>
#elif (defined(__x86_64__) || defined(__i386__))
# include <x86intrin.h>
#endif
#else
/* MinGW64 has conflicting declarations for these SSE headers in <windows.h>.
* Since we can't avoid including <windows.h>, better only include that */
#include "util/util_windows.h"
#endif
#endif
#endif /* __UTIL_OPTIMIZATION_H__ */

View File

@@ -18,19 +18,38 @@
#ifndef __UTIL_SIMD_TYPES_H__
#define __UTIL_SIMD_TYPES_H__
#ifndef __KERNEL_GPU__
#include <limits>
#include "util/util_debug.h"
#include "util/util_types.h"
#include "util/util_defines.h"
/* SSE Intrinsics includes
*
* We assume __KERNEL_SSEX__ flags to have been defined at this point */
/* SSE intrinsics headers */
#ifndef FREE_WINDOWS64
#ifdef _MSC_VER
# include <intrin.h>
#elif (defined(__x86_64__) || defined(__i386__))
# include <x86intrin.h>
#endif
#else
/* MinGW64 has conflicting declarations for these SSE headers in <windows.h>.
* Since we can't avoid including <windows.h>, better only include that */
#include "util/util_windows.h"
#endif
CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_SSE2__
struct sseb;
struct ssei;
struct ssef;
extern const __m128 _mm_lookupmask_ps[16];
/* Special Types */
@@ -328,12 +347,9 @@ __forceinline size_t __bscf(size_t& v)
#endif /* _WIN32 */
static const unsigned int BITSCAN_NO_BIT_SET_32 = 32;
static const size_t BITSCAN_NO_BIT_SET_64 = 64;
#if !(defined(__SSE4_1__) || defined(__SSE4_2__))
#ifdef __KERNEL_SSE3__
/* Emulation of SSE4 functions with SSE3 */
# ifndef __KERNEL_SSE41__
/* Emulation of SSE4 functions with SSE2 */
#define _MM_FROUND_TO_NEAREST_INT 0x00
#define _MM_FROUND_TO_NEG_INF 0x01
@@ -342,48 +358,31 @@ static const size_t BITSCAN_NO_BIT_SET_64 = 64;
#define _MM_FROUND_CUR_DIRECTION 0x04
#undef _mm_blendv_ps
#define _mm_blendv_ps __emu_mm_blendv_ps
__forceinline __m128 _mm_blendv_ps( __m128 value, __m128 input, __m128 mask ) {
return _mm_or_ps(_mm_and_ps(mask, input), _mm_andnot_ps(mask, value));
}
#undef _mm_blend_ps
#define _mm_blend_ps __emu_mm_blend_ps
__forceinline __m128 _mm_blend_ps( __m128 value, __m128 input, const int mask ) {
assert(mask < 0x10); return _mm_blendv_ps(value, input, _mm_lookupmask_ps[mask]);
}
#undef _mm_blendv_epi8
#define _mm_blendv_epi8 __emu_mm_blendv_epi8
__forceinline __m128i _mm_blendv_epi8( __m128i value, __m128i input, __m128i mask ) {
return _mm_or_si128(_mm_and_si128(mask, input), _mm_andnot_si128(mask, value));
}
#undef _mm_mullo_epi32
#define _mm_mullo_epi32 __emu_mm_mullo_epi32
__forceinline __m128i _mm_mullo_epi32( __m128i value, __m128i input ) {
__m128i rvalue;
char* _r = (char*)(&rvalue + 1);
char* _v = (char*)(& value + 1);
char* _i = (char*)(& input + 1);
for( ssize_t i = -16 ; i != 0 ; i += 4 ) *((int32_t*)(_r + i)) = *((int32_t*)(_v + i))* *((int32_t*)(_i + i));
return rvalue;
}
#undef _mm_min_epi32
#define _mm_min_epi32 __emu_mm_min_epi32
__forceinline __m128i _mm_min_epi32( __m128i value, __m128i input ) {
return _mm_blendv_epi8(input, value, _mm_cmplt_epi32(value, input));
}
#undef _mm_max_epi32
#define _mm_max_epi32 __emu_mm_max_epi32
__forceinline __m128i _mm_max_epi32( __m128i value, __m128i input ) {
return _mm_blendv_epi8(value, input, _mm_cmplt_epi32(value, input));
}
#undef _mm_extract_epi32
#define _mm_extract_epi32 __emu_mm_extract_epi32
__forceinline int _mm_extract_epi32( __m128i input, const int index ) {
switch ( index ) {
case 0: return _mm_cvtsi128_si32(input);
@@ -395,24 +394,15 @@ __forceinline int _mm_extract_epi32( __m128i input, const int index ) {
}
#undef _mm_insert_epi32
#define _mm_insert_epi32 __emu_mm_insert_epi32
__forceinline __m128i _mm_insert_epi32( __m128i value, int input, const int index ) {
assert(index >= 0 && index < 4); ((int*)&value)[index] = input; return value;
}
#undef _mm_extract_ps
#define _mm_extract_ps __emu_mm_extract_ps
__forceinline int _mm_extract_ps( __m128 input, const int index ) {
int32_t* ptr = (int32_t*)&input; return ptr[index];
}
#undef _mm_insert_ps
#define _mm_insert_ps __emu_mm_insert_ps
__forceinline __m128 _mm_insert_ps( __m128 value, __m128 input, const int index )
{ assert(index < 0x100); ((float*)&value)[(index >> 4)&0x3] = ((float*)&input)[index >> 6]; return _mm_andnot_ps(_mm_lookupmask_ps[index&0xf], value); }
#undef _mm_round_ps
#define _mm_round_ps __emu_mm_round_ps
__forceinline __m128 _mm_round_ps( __m128 value, const int flags )
{
switch ( flags )
@@ -425,57 +415,7 @@ __forceinline __m128 _mm_round_ps( __m128 value, const int flags )
return value;
}
# ifdef _M_X64
#undef _mm_insert_epi64
#define _mm_insert_epi64 __emu_mm_insert_epi64
__forceinline __m128i _mm_insert_epi64( __m128i value, __int64 input, const int index ) {
assert(size_t(index) < 4); ((__int64*)&value)[index] = input; return value;
}
#undef _mm_extract_epi64
#define _mm_extract_epi64 __emu_mm_extract_epi64
__forceinline __int64 _mm_extract_epi64( __m128i input, const int index ) {
assert(size_t(index) < 2);
return index == 0 ? _mm_cvtsi128_si64x(input) : _mm_cvtsi128_si64x(_mm_unpackhi_epi64(input, input));
}
# endif
# endif
#undef _mm_fabs_ps
#define _mm_fabs_ps(x) _mm_and_ps(x, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)))
/* Return a __m128 with every element set to the largest element of v. */
ccl_device_inline __m128 _mm_hmax_ps(__m128 v)
{
/* v[0, 1, 2, 3] => [0, 1, 0, 1] and [2, 3, 2, 3] => v[max(0, 2), max(1, 3), max(0, 2), max(1, 3)] */
v = _mm_max_ps(_mm_movehl_ps(v, v), _mm_movelh_ps(v, v));
/* v[max(0, 2), max(1, 3), max(0, 2), max(1, 3)] => [4 times max(1, 3)] and [4 times max(0, 2)] => v[4 times max(0, 1, 2, 3)] */
v = _mm_max_ps(_mm_movehdup_ps(v), _mm_moveldup_ps(v));
return v;
}
/* Return the sum of the four elements of x. */
ccl_device_inline float _mm_hsum_ss(__m128 x)
{
__m128 a = _mm_movehdup_ps(x);
__m128 b = _mm_add_ps(x, a);
return _mm_cvtss_f32(_mm_add_ss(_mm_movehl_ps(a, b), b));
}
/* Return a __m128 with every element set to the sum of the four elements of x. */
ccl_device_inline __m128 _mm_hsum_ps(__m128 x)
{
x = _mm_hadd_ps(x, x);
x = _mm_hadd_ps(x, x);
return x;
}
/* Replace elements of x with zero where mask isn't set. */
#undef _mm_mask_ps
#define _mm_mask_ps(x, mask) _mm_blendv_ps(_mm_setzero_ps(), x, mask)
#endif
#endif /* !(defined(__SSE4_1__) || defined(__SSE4_2__)) */
#else /* __KERNEL_SSE2__ */
@@ -496,13 +436,19 @@ ccl_device_inline int bitscan(int value)
#endif /* __KERNEL_SSE2__ */
/* quiet unused define warnings */
#if defined(__KERNEL_SSE2__) || \
defined(__KERNEL_SSE3__) || \
defined(__KERNEL_SSSE3__) || \
defined(__KERNEL_SSE41__) || \
defined(__KERNEL_AVX__) || \
defined(__KERNEL_AVX2__)
/* do nothing */
#endif
CCL_NAMESPACE_END
#include "util/util_math.h"
#include "util/util_sseb.h"
#include "util/util_ssei.h"
#include "util/util_ssef.h"
#include "util/util_avxf.h"
#endif /* __KERNEL_GPU__ */
#endif /* __UTIL_SIMD_TYPES_H__ */

View File

@@ -22,6 +22,9 @@ CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_SSE2__
struct ssei;
struct ssef;
/*! 4-wide SSE bool type. */
struct sseb
{

View File

@@ -22,6 +22,9 @@ CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_SSE2__
struct sseb;
struct ssef;
/*! 4-wide SSE float type. */
struct ssef
{

View File

@@ -22,6 +22,9 @@ CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_SSE2__
struct sseb;
struct ssef;
/*! 4-wide SSE integer type. */
struct ssei
{
@@ -234,8 +237,10 @@ __forceinline size_t select_max(const sseb& valid, const ssei& v) { const ssei a
#else
__forceinline int reduce_min(const ssei& v) { return min(min(v[0],v[1]),min(v[2],v[3])); }
__forceinline int reduce_max(const ssei& v) { return max(max(v[0],v[1]),max(v[2],v[3])); }
__forceinline int ssei_min(int a, int b) { return (a < b)? a: b; }
__forceinline int ssei_max(int a, int b) { return (a > b)? a: b; }
__forceinline int reduce_min(const ssei& v) { return ssei_min(ssei_min(v[0],v[1]),ssei_min(v[2],v[3])); }
__forceinline int reduce_max(const ssei& v) { return ssei_max(ssei_max(v[0],v[1]),ssei_max(v[2],v[3])); }
__forceinline int reduce_add(const ssei& v) { return v[0]+v[1]+v[2]+v[3]; }
#endif

View File

@@ -21,72 +21,17 @@
# include <stdlib.h>
#endif
/* Bitness */
#if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64)
# define __KERNEL_64_BIT__
#endif
/* Qualifiers for kernel code shared by CPU and GPU */
#ifndef __KERNEL_GPU__
# define ccl_device static inline
# define ccl_device_noinline static
# define ccl_global
# define ccl_constant
# define ccl_local
# define ccl_local_param
# define ccl_private
# define ccl_restrict __restrict
# define __KERNEL_WITH_SSE_ALIGN__
# if defined(_WIN32) && !defined(FREE_WINDOWS)
# define ccl_device_inline static __forceinline
# define ccl_device_forceinline static __forceinline
# define ccl_align(...) __declspec(align(__VA_ARGS__))
# ifdef __KERNEL_64_BIT__
# define ccl_try_align(...) __declspec(align(__VA_ARGS__))
# else /* __KERNEL_64_BIT__ */
# undef __KERNEL_WITH_SSE_ALIGN__
/* No support for function arguments (error C2719). */
# define ccl_try_align(...)
# endif /* __KERNEL_64_BIT__ */
# define ccl_may_alias
# define ccl_always_inline __forceinline
# define ccl_never_inline __declspec(noinline)
# define ccl_maybe_unused
# else /* _WIN32 && !FREE_WINDOWS */
# define ccl_device_inline static inline __attribute__((always_inline))
# define ccl_device_forceinline static inline __attribute__((always_inline))
# define ccl_align(...) __attribute__((aligned(__VA_ARGS__)))
# ifndef FREE_WINDOWS64
# define __forceinline inline __attribute__((always_inline))
# endif
# define ccl_try_align(...) __attribute__((aligned(__VA_ARGS__)))
# define ccl_may_alias __attribute__((__may_alias__))
# define ccl_always_inline __attribute__((always_inline))
# define ccl_never_inline __attribute__((noinline))
# define ccl_maybe_unused __attribute__((used))
# endif /* _WIN32 && !FREE_WINDOWS */
/* Use to suppress '-Wimplicit-fallthrough' (in place of 'break'). */
# if defined(__GNUC__) && (__GNUC__ >= 7) /* gcc7.0+ only */
# define ATTR_FALLTHROUGH __attribute__((fallthrough))
# else
# define ATTR_FALLTHROUGH ((void)0)
# endif
#endif /* __KERNEL_GPU__ */
/* Standard Integer Types */
#if !defined(__KERNEL_GPU__) && !defined(_WIN32)
# include <stdint.h>
#endif
#include "util/util_defines.h"
#ifndef __KERNEL_GPU__
/* int8_t, uint16_t, and friends */
# ifndef _WIN32
# include <stdint.h>
# endif
/* SIMD Types */
# include "util/util_optimization.h"
#endif /* __KERNEL_GPU__ */
# include "util/util_simd.h"
#endif
CCL_NAMESPACE_BEGIN
@@ -201,65 +146,8 @@ enum ExtensionType {
EXTENSION_NUM_TYPES,
};
/* macros */
/* hints for branch prediction, only use in code that runs a _lot_ */
#if defined(__GNUC__) && defined(__KERNEL_CPU__)
# define LIKELY(x) __builtin_expect(!!(x), 1)
# define UNLIKELY(x) __builtin_expect(!!(x), 0)
#else
# define LIKELY(x) (x)
# define UNLIKELY(x) (x)
#endif
#if defined(__cplusplus) && ((__cplusplus >= 201103L) || (defined(_MSC_VER) && _MSC_VER >= 1800))
# define HAS_CPP11_FEATURES
#endif
#if defined(__GNUC__) || defined(__clang__)
# if defined(HAS_CPP11_FEATURES)
/* Some magic to be sure we don't have reference in the type. */
template<typename T> static inline T decltype_helper(T x) { return x; }
# define TYPEOF(x) decltype(decltype_helper(x))
# else
# define TYPEOF(x) typeof(x)
# endif
#endif
/* Causes warning:
* incompatible types when assigning to type 'Foo' from type 'Bar'
* ... the compiler optimizes away the temp var */
#ifdef __GNUC__
#define CHECK_TYPE(var, type) { \
TYPEOF(var) *__tmp; \
__tmp = (type *)NULL; \
(void)__tmp; \
} (void)0
#define CHECK_TYPE_PAIR(var_a, var_b) { \
TYPEOF(var_a) *__tmp; \
__tmp = (typeof(var_b) *)NULL; \
(void)__tmp; \
} (void)0
#else
# define CHECK_TYPE(var, type)
# define CHECK_TYPE_PAIR(var_a, var_b)
#endif
/* can be used in simple macros */
#define CHECK_TYPE_INLINE(val, type) \
((void)(((type)0) != (val)))
CCL_NAMESPACE_END
#ifndef __KERNEL_GPU__
# include <cassert>
# define util_assert(statement) assert(statement)
#else
# define util_assert(statement)
#endif
/* Vectorized types declaration. */
#include "util/util_types_uchar2.h"
#include "util/util_types_uchar3.h"
@@ -298,5 +186,13 @@ CCL_NAMESPACE_END
#include "util/util_types_vector3_impl.h"
/* SSE types. */
#ifndef __KERNEL_GPU__
# include "util/util_sseb.h"
# include "util/util_ssei.h"
# include "util/util_ssef.h"
# include "util/util_avxf.h"
#endif
#endif /* __UTIL_TYPES_H__ */

View File

@@ -821,7 +821,8 @@ static void clip_keymap(struct wmKeyConfig *keyconf)
#endif
}
static const char *clip_context_dir[] = {"edit_movieclip", "edit_mask", NULL};
/* DO NOT make this static, this hides the symbol and breaks API generation script. */
const char *clip_context_dir[] = {"edit_movieclip", "edit_mask", NULL};
static int clip_context(const bContext *C, const char *member, bContextDataResult *result)
{

View File

@@ -436,7 +436,8 @@ static void sequencer_dropboxes(void)
/* ************* end drop *********** */
static const char *sequencer_context_dir[] = {"edit_mask", NULL};
/* DO NOT make this static, this hides the symbol and breaks API generation script. */
const char *sequencer_context_dir[] = {"edit_mask", NULL};
static int sequencer_context(const bContext *C, const char *member, bContextDataResult *result)
{

View File

@@ -729,20 +729,20 @@ int wm_homefile_read(
if (filepath_startup_override != NULL) {
/* pass */
}
else if (app_template_override && app_template_override[0]) {
else if (app_template_override) {
/* This may be clearing the current template by setting to an empty string. */
app_template = app_template_override;
}
else if (!use_factory_settings && U.app_template[0]) {
app_template = U.app_template;
}
if (app_template != NULL) {
if ((app_template != NULL) && (app_template[0] != '\0')) {
BKE_appdir_app_template_id_search(app_template, app_template_system, sizeof(app_template_system));
BLI_path_join(app_template_config, sizeof(app_template_config), cfgdir, app_template, NULL);
}
/* insert template name into startup file */
if (app_template != NULL) {
/* Insert template name into startup file. */
/* note that the path is being set even when 'use_factory_settings == true'
* this is done so we can load a templates factory-settings */
if (!use_factory_settings) {

View File

@@ -518,6 +518,7 @@ if(WITH_CYCLES)
-blender "$<TARGET_FILE:blender>"
-testdir "${TEST_SRC_DIR}/cycles/ctests/${subject}"
-idiff "${OPENIMAGEIO_IDIFF}"
-outdir "${TEST_OUT_DIR}/cycles"
)
else()
add_test(
@@ -526,17 +527,23 @@ if(WITH_CYCLES)
-blender "$<TARGET_FILE:blender>"
-testdir "${TEST_SRC_DIR}/cycles/ctests/${subject}"
-idiff "${OPENIMAGEIO_IDIFF}"
-outdir "${TEST_OUT_DIR}/cycles"
)
endif()
endmacro()
if(WITH_OPENGL_TESTS)
add_cycles_render_test(opengl)
endif()
add_cycles_render_test(image)
add_cycles_render_test(displacement)
add_cycles_render_test(image_data_types)
add_cycles_render_test(image_mapping)
add_cycles_render_test(image_texture_limit)
add_cycles_render_test(mblur)
add_cycles_render_test(reports)
add_cycles_render_test(render)
add_cycles_render_test(shader)
add_cycles_render_test(shadow_catcher)
add_cycles_render_test(volume)
else()
MESSAGE(STATUS "Disabling Cycles tests because tests folder does not exist")
endif()

View File

@@ -2,7 +2,9 @@
# Apache License, Version 2.0
import argparse
import glob
import os
import pathlib
import shutil
import subprocess
import sys
@@ -24,7 +26,7 @@ class COLORS_DUMMY:
COLORS = COLORS_DUMMY
def printMessage(type, status, message):
def print_message(message, type=None, status=''):
if type == 'SUCCESS':
print(COLORS.GREEN, end="")
elif type == 'FAILURE':
@@ -109,20 +111,126 @@ def test_get_name(filepath):
filename = os.path.basename(filepath)
return os.path.splitext(filename)[0]
def verify_output(filepath):
def test_get_images(filepath):
testname = test_get_name(filepath)
dirpath = os.path.dirname(filepath)
reference_dirpath = os.path.join(dirpath, "reference_renders")
reference_image = os.path.join(reference_dirpath, testname + ".png")
failed_image = os.path.join(reference_dirpath, testname + ".fail.png")
if not os.path.exists(reference_image):
ref_dirpath = os.path.join(dirpath, "reference_renders")
ref_img = os.path.join(ref_dirpath, testname + ".png")
new_dirpath = os.path.join(OUTDIR, os.path.basename(dirpath))
if not os.path.exists(new_dirpath):
os.makedirs(new_dirpath)
new_img = os.path.join(new_dirpath, testname + ".png")
diff_dirpath = os.path.join(OUTDIR, os.path.basename(dirpath), "diff")
if not os.path.exists(diff_dirpath):
os.makedirs(diff_dirpath)
diff_img = os.path.join(diff_dirpath, testname + ".diff.png")
return ref_img, new_img, diff_img
class Report:
def __init__(self, testname):
self.failed_tests = ""
self.passed_tests = ""
self.testname = testname
def output(self):
# write intermediate data for single test
outdir = os.path.join(OUTDIR, self.testname)
f = open(os.path.join(outdir, "failed.data"), "w")
f.write(self.failed_tests)
f.close()
f = open(os.path.join(outdir, "passed.data"), "w")
f.write(self.passed_tests)
f.close()
# gather intermediate data for all tests
failed_data = sorted(glob.glob(os.path.join(OUTDIR, "*/failed.data")))
passed_data = sorted(glob.glob(os.path.join(OUTDIR, "*/passed.data")))
failed_tests = ""
passed_tests = ""
for filename in failed_data:
failed_tests += open(os.path.join(OUTDIR, filename), "r").read()
for filename in passed_data:
passed_tests += open(os.path.join(OUTDIR, filename), "r").read()
# write html for all tests
self.html = """
<html>
<head>
<title>Cycles Test Report</title>
<style>
img {{ image-rendering: pixelated; width: 256; background-color: #000; }}
table td:first-child {{ width: 100%; }}
</style>
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0-alpha.6/css/bootstrap.min.css">
</head>
<body>
<div class="container">
<br/>
<h1>Cycles Test Report</h1>
<br/>
<table class="table table-striped">
<thead class="thead-default">
<tr><th>Name</th><th>New</th><th>Reference</th><th>Diff</th>
</thead>
{}{}
</table>
<br/>
</div>
</body>
</html>
""" . format(failed_tests, passed_tests)
filepath = os.path.join(OUTDIR, "report.html")
f = open(filepath, "w")
f.write(self.html)
f.close()
print_message("Report saved to: " + pathlib.Path(filepath).as_uri())
def add_test(self, filepath, error):
name = test_get_name(filepath)
ref_img, new_img, diff_img = test_get_images(filepath)
status = error if error else ""
style = """ style="background-color: #f99;" """ if error else ""
new_url = pathlib.Path(new_img).as_uri()
ref_url = pathlib.Path(ref_img).as_uri()
diff_url = pathlib.Path(diff_img).as_uri()
test_html = """
<tr{}>
<td><b>{}</b><br/>{}<br/>{}</td>
<td><img src="{}" onmouseover="this.src='{}';" onmouseout="this.src='{}';"></td>
<td><img src="{}" onmouseover="this.src='{}';" onmouseout="this.src='{}';"></td>
<td><img src="{}"></td>
</tr>""" . format(style, name, self.testname, status,
new_url, ref_url, new_url,
ref_url, new_url, ref_url,
diff_url)
if error:
self.failed_tests += test_html
else:
self.passed_tests += test_html
def verify_output(report, filepath):
ref_img, new_img, diff_img = test_get_images(filepath)
if not os.path.exists(ref_img):
return False
# diff test with threshold
command = (
IDIFF,
"-fail", "0.015",
"-fail", "0.016",
"-failpercent", "1",
reference_image,
ref_img,
TEMP_FILE,
)
try:
@@ -130,47 +238,66 @@ def verify_output(filepath):
failed = False
except subprocess.CalledProcessError as e:
if VERBOSE:
print(e.output.decode("utf-8"))
print_message(e.output.decode("utf-8"))
failed = e.returncode != 1
if failed:
shutil.copy(TEMP_FILE, failed_image)
elif os.path.exists(failed_image):
os.remove(failed_image)
# generate diff image
command = (
IDIFF,
"-o", diff_img,
"-abs", "-scale", "16",
ref_img,
TEMP_FILE
)
try:
subprocess.check_output(command)
except subprocess.CalledProcessError as e:
if VERBOSE:
print_message(e.output.decode("utf-8"))
# copy new image
if os.path.exists(new_img):
os.remove(new_img)
if os.path.exists(TEMP_FILE):
shutil.copy(TEMP_FILE, new_img)
return not failed
def run_test(filepath):
def run_test(report, filepath):
testname = test_get_name(filepath)
spacer = "." * (32 - len(testname))
printMessage('SUCCESS', 'RUN', testname)
print_message(testname, 'SUCCESS', 'RUN')
time_start = time.time()
error = render_file(filepath)
status = "FAIL"
if not error:
if not verify_output(filepath):
if not verify_output(report, filepath):
error = "VERIFY"
time_end = time.time()
elapsed_ms = int((time_end - time_start) * 1000)
if not error:
printMessage('SUCCESS', 'OK', "{} ({} ms)" .
format(testname, elapsed_ms))
print_message("{} ({} ms)" . format(testname, elapsed_ms),
'SUCCESS', 'OK')
else:
if error == "NO_CYCLES":
print("Can't perform tests because Cycles failed to load!")
return False
print_message("Can't perform tests because Cycles failed to load!")
return error
elif error == "NO_START":
print('Can not perform tests because blender fails to start.',
print_message('Can not perform tests because blender fails to start.',
'Make sure INSTALL target was run.')
return False
return error
elif error == 'VERIFY':
print("Rendered result is different from reference image")
print_message("Rendered result is different from reference image")
else:
print("Unknown error %r" % error)
printMessage('FAILURE', 'FAILED', "{} ({} ms)" .
format(testname, elapsed_ms))
print_message("Unknown error %r" % error)
print_message("{} ({} ms)" . format(testname, elapsed_ms),
'FAILURE', 'FAILED')
return error
def blend_list(path):
for dirpath, dirnames, filenames in os.walk(path):
for filename in filenames:
@@ -178,17 +305,18 @@ def blend_list(path):
filepath = os.path.join(dirpath, filename)
yield filepath
def run_all_tests(dirpath):
passed_tests = []
failed_tests = []
all_files = list(blend_list(dirpath))
all_files.sort()
printMessage('SUCCESS', "==========",
"Running {} tests from 1 test case." . format(len(all_files)))
report = Report(os.path.basename(dirpath))
print_message("Running {} tests from 1 test case." .
format(len(all_files)),
'SUCCESS', "==========")
time_start = time.time()
for filepath in all_files:
error = run_test(filepath)
error = run_test(report, filepath)
testname = test_get_name(filepath)
if error:
if error == "NO_CYCLES":
@@ -198,28 +326,33 @@ def run_all_tests(dirpath):
failed_tests.append(testname)
else:
passed_tests.append(testname)
report.add_test(filepath, error)
time_end = time.time()
elapsed_ms = int((time_end - time_start) * 1000)
print("")
printMessage('SUCCESS', "==========",
"{} tests from 1 test case ran. ({} ms total)" .
format(len(all_files), elapsed_ms))
printMessage('SUCCESS', 'PASSED', "{} tests." .
format(len(passed_tests)))
print_message("")
print_message("{} tests from 1 test case ran. ({} ms total)" .
format(len(all_files), elapsed_ms),
'SUCCESS', "==========")
print_message("{} tests." .
format(len(passed_tests)),
'SUCCESS', 'PASSED')
if failed_tests:
printMessage('FAILURE', 'FAILED', "{} tests, listed below:" .
format(len(failed_tests)))
print_message("{} tests, listed below:" .
format(len(failed_tests)),
'FAILURE', 'FAILED')
failed_tests.sort()
for test in failed_tests:
printMessage('FAILURE', "FAILED", "{}" . format(test))
return False
return True
print_message("{}" . format(test), 'FAILURE', "FAILED")
report.output()
return not bool(failed_tests)
def create_argparse():
parser = argparse.ArgumentParser()
parser.add_argument("-blender", nargs="+")
parser.add_argument("-testdir", nargs=1)
parser.add_argument("-outdir", nargs=1)
parser.add_argument("-idiff", nargs=1)
return parser
@@ -229,7 +362,7 @@ def main():
args = parser.parse_args()
global COLORS
global BLENDER, ROOT, IDIFF
global BLENDER, TESTDIR, IDIFF, OUTDIR
global TEMP_FILE, TEMP_FILE_MASK, TEST_SCRIPT
global VERBOSE
@@ -237,8 +370,12 @@ def main():
COLORS = COLORS_ANSI
BLENDER = args.blender[0]
ROOT = args.testdir[0]
TESTDIR = args.testdir[0]
IDIFF = args.idiff[0]
OUTDIR = args.outdir[0]
if not os.path.exists(OUTDIR):
os.makedirs(OUTDIR)
TEMP = tempfile.mkdtemp()
TEMP_FILE_MASK = os.path.join(TEMP, "test")
@@ -248,7 +385,7 @@ def main():
VERBOSE = os.environ.get("BLENDER_VERBOSE") is not None
ok = run_all_tests(ROOT)
ok = run_all_tests(TESTDIR)
# Cleanup temp files and folders
if os.path.exists(TEMP_FILE):