Merge branch 'master' into blender2.8

This commit is contained in:
Campbell Barton
2017-03-12 03:00:06 +11:00
60 changed files with 873 additions and 336 deletions

View File

@@ -34,7 +34,7 @@ add_subdirectory(mikktspace)
add_subdirectory(glew-mx)
add_subdirectory(eigen)
if (WITH_GAMEENGINE_DECKLINK)
if(WITH_GAMEENGINE_DECKLINK)
add_subdirectory(decklink)
endif()

View File

@@ -588,8 +588,8 @@ static void attr_create_pointiness(Scene *scene,
sorted_vert_indeices[other_sorted_vert_index];
const float3 &other_vert_co = mesh->verts[other_vert_index];
/* We are too far away now, we wouldn't have duplicate. */
if ((other_vert_co.x + other_vert_co.y + other_vert_co.z) -
(vert_co.x + vert_co.y + vert_co.z) > 3 * FLT_EPSILON)
if((other_vert_co.x + other_vert_co.y + other_vert_co.z) -
(vert_co.x + vert_co.y + vert_co.z) > 3 * FLT_EPSILON)
{
break;
}

View File

@@ -72,7 +72,7 @@ public:
virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&);
virtual int2 split_kernel_local_size();
virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task);
virtual size_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads);
virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads);
};
class CPUDevice : public Device
@@ -860,7 +860,7 @@ int2 CPUSplitKernel::split_kernel_global_size(device_memory& /*kg*/, device_memo
return task->requested_tile_size;
}
size_t CPUSplitKernel::state_buffer_size(device_memory& kernel_globals, device_memory& /*data*/, size_t num_threads) {
uint64_t CPUSplitKernel::state_buffer_size(device_memory& kernel_globals, device_memory& /*data*/, size_t num_threads) {
KernelGlobals *kg = (KernelGlobals*)kernel_globals.device_pointer;
return split_data_buffer_size(kg, num_threads);

View File

@@ -89,7 +89,7 @@ class CUDASplitKernel : public DeviceSplitKernel {
public:
explicit CUDASplitKernel(CUDADevice *device);
virtual size_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads);
virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads);
virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
RenderTile& rtile,
@@ -1473,9 +1473,9 @@ CUDASplitKernel::CUDASplitKernel(CUDADevice *device) : DeviceSplitKernel(device)
{
}
size_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory& /*data*/, size_t num_threads)
uint64_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory& /*data*/, size_t num_threads)
{
device_vector<uint> size_buffer;
device_vector<uint64_t> size_buffer;
size_buffer.resize(1);
device->mem_alloc(NULL, size_buffer, MEM_READ_WRITE);
@@ -1504,7 +1504,7 @@ size_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory&
device->cuda_pop_context();
device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint));
device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint64_t));
device->mem_free(size_buffer);
return *size_buffer.get_data();

View File

@@ -48,7 +48,8 @@ enum DataType {
TYPE_UINT,
TYPE_INT,
TYPE_FLOAT,
TYPE_HALF
TYPE_HALF,
TYPE_UINT64,
};
static inline size_t datatype_size(DataType datatype)
@@ -59,6 +60,7 @@ static inline size_t datatype_size(DataType datatype)
case TYPE_UINT: return sizeof(uint);
case TYPE_INT: return sizeof(int);
case TYPE_HALF: return sizeof(half);
case TYPE_UINT64: return sizeof(uint64_t);
default: return 0;
}
}
@@ -160,6 +162,11 @@ template<> struct device_type_traits<half4> {
static const int num_elements = 4;
};
template<> struct device_type_traits<uint64_t> {
static const DataType data_type = TYPE_UINT64;
static const int num_elements = 1;
};
/* Device Memory */
class device_memory

View File

@@ -105,9 +105,9 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_fe
return true;
}
size_t DeviceSplitKernel::max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, size_t max_buffer_size)
size_t DeviceSplitKernel::max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, uint64_t max_buffer_size)
{
size_t size_per_element = state_buffer_size(kg, data, 1024) / 1024;
uint64_t size_per_element = state_buffer_size(kg, data, 1024) / 1024;
return max_buffer_size / size_per_element;
}

View File

@@ -105,8 +105,8 @@ public:
device_memory& kgbuffer,
device_memory& kernel_data);
virtual size_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) = 0;
size_t max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, size_t max_buffer_size);
virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) = 0;
size_t max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, uint64_t max_buffer_size);
virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
RenderTile& rtile,

View File

@@ -334,11 +334,11 @@ void OpenCLDeviceBase::mem_zero(device_memory& mem)
size_t num_threads = global_size[0] * global_size[1];
cl_mem d_buffer = CL_MEM_PTR(mem.device_pointer);
unsigned long long d_offset = 0;
unsigned long long d_size = 0;
cl_ulong d_offset = 0;
cl_ulong d_size = 0;
while(d_offset < mem.memory_size()) {
d_size = std::min<unsigned long long>(num_threads*sizeof(float4), mem.memory_size() - d_offset);
d_size = std::min<cl_ulong>(num_threads*sizeof(float4), mem.memory_size() - d_offset);
kernel_set_args(ckZeroBuffer, 0, d_buffer, d_size, d_offset);

View File

@@ -227,9 +227,9 @@ public:
return kernel;
}
virtual size_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads)
virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads)
{
device_vector<uint> size_buffer;
device_vector<uint64_t> size_buffer;
size_buffer.resize(1);
device->mem_alloc(NULL, size_buffer, MEM_READ_WRITE);
@@ -249,7 +249,7 @@ public:
device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel");
device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint));
device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint64_t));
device->mem_free(size_buffer);
if(device->ciErr != CL_SUCCESS) {
@@ -346,8 +346,8 @@ public:
virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask */*task*/)
{
size_t max_buffer_size;
clGetDeviceInfo(device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &max_buffer_size, NULL);
cl_ulong max_buffer_size;
clGetDeviceInfo(device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_buffer_size, NULL);
VLOG(1) << "Maximum device allocation side: "
<< string_human_readable_number(max_buffer_size) << " bytes. ("
<< string_human_readable_size(max_buffer_size) << ").";

View File

@@ -454,7 +454,7 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
float3 aligned_dir0 = transform_direction(&space0, dir),
aligned_dir1 = transform_direction(&space1, dir);;
aligned_dir1 = transform_direction(&space1, dir);
float3 aligned_P0 = transform_point(&space0, P),
aligned_P1 = transform_point(&space1, P);
float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
@@ -516,7 +516,7 @@ ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg
Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
float3 aligned_dir0 = transform_direction(&space0, dir),
aligned_dir1 = transform_direction(&space1, dir);;
aligned_dir1 = transform_direction(&space1, dir);
float3 aligned_P0 = transform_point(&space0, P),
aligned_P1 = transform_point(&space1, P);
float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),

View File

@@ -90,13 +90,13 @@ CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_CUDA__
# define __KERNEL_SHADING__
# define __KERNEL_ADV_SHADING__
# define __VOLUME__
# define __VOLUME_SCATTER__
# define __SUBSURFACE__
# define __SHADOW_RECORD_ALL__
# ifndef __SPLIT_KERNEL__
# define __BRANCHED_PATH__
# define __VOLUME__
# define __VOLUME_SCATTER__
# define __SUBSURFACE__
# define __CMJ__
# define __SHADOW_RECORD_ALL__
# endif
#endif /* __KERNEL_CUDA__ */
@@ -107,6 +107,10 @@ CCL_NAMESPACE_BEGIN
# ifdef __KERNEL_OPENCL_NVIDIA__
# define __KERNEL_SHADING__
# define __KERNEL_ADV_SHADING__
# define __SUBSURFACE__
# define __VOLUME__
# define __VOLUME_SCATTER__
# define __SHADOW_RECORD_ALL__
# ifdef __KERNEL_EXPERIMENTAL__
# define __CMJ__
# endif

View File

@@ -28,20 +28,25 @@
#include "../../split/kernel_path_init.h"
#include "../../split/kernel_scene_intersect.h"
#include "../../split/kernel_lamp_emission.h"
#include "../../split/kernel_do_volume.h"
#include "../../split/kernel_queue_enqueue.h"
#include "../../split/kernel_background_buffer_update.h"
#include "../../split/kernel_indirect_background.h"
#include "../../split/kernel_shader_eval.h"
#include "../../split/kernel_holdout_emission_blurring_pathtermination_ao.h"
#include "../../split/kernel_subsurface_scatter.h"
#include "../../split/kernel_direct_lighting.h"
#include "../../split/kernel_shadow_blocked.h"
#include "../../split/kernel_shadow_blocked_ao.h"
#include "../../split/kernel_shadow_blocked_dl.h"
#include "../../split/kernel_next_iteration_setup.h"
#include "../../split/kernel_indirect_subsurface.h"
#include "../../split/kernel_buffer_update.h"
#include "../../kernel_film.h"
/* kernels */
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
kernel_cuda_state_buffer_size(uint num_threads, uint *size)
kernel_cuda_state_buffer_size(uint num_threads, uint64_t *size)
{
*size = split_data_buffer_size(NULL, num_threads);
}
@@ -91,13 +96,18 @@ kernel_cuda_path_trace_data_init(
DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
DEFINE_SPLIT_KERNEL_FUNCTION(do_volume)
DEFINE_SPLIT_KERNEL_FUNCTION(queue_enqueue)
DEFINE_SPLIT_KERNEL_FUNCTION(background_buffer_update)
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background)
DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval)
DEFINE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao)
DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
DEFINE_SPLIT_KERNEL_FUNCTION(direct_lighting)
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked)
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
DEFINE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
DEFINE_SPLIT_KERNEL_FUNCTION(buffer_update)
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)

View File

@@ -21,7 +21,7 @@ __kernel void kernel_ocl_path_trace_state_buffer_size(
KernelGlobals *kg,
ccl_constant KernelData *data,
uint num_threads,
ccl_global uint *size)
ccl_global uint64_t *size)
{
kg->data = data;
*size = split_data_buffer_size(kg, num_threads);

View File

@@ -22,11 +22,11 @@
CCL_NAMESPACE_BEGIN
ccl_device_inline size_t split_data_buffer_size(KernelGlobals *kg, size_t num_elements)
ccl_device_inline uint64_t split_data_buffer_size(KernelGlobals *kg, size_t num_elements)
{
(void)kg; /* Unused on CPU. */
size_t size = 0;
uint64_t size = 0;
#define SPLIT_DATA_ENTRY(type, name, num) + align_up(num_elements * num * sizeof(type), 16)
size = size SPLIT_DATA_ENTRIES;
#undef SPLIT_DATA_ENTRY

View File

@@ -43,41 +43,41 @@ CCL_NAMESPACE_BEGIN
/* Division */
#ifndef M_PI_F
#define M_PI_F ((float)3.14159265358979323846264338327950288) /* pi */
#define M_PI_F (3.1415926535897932f) /* pi */
#endif
#ifndef M_PI_2_F
#define M_PI_2_F ((float)1.57079632679489661923132169163975144) /* pi/2 */
#define M_PI_2_F (1.5707963267948966f) /* pi/2 */
#endif
#ifndef M_PI_4_F
#define M_PI_4_F ((float)0.785398163397448309615660845819875721) /* pi/4 */
#define M_PI_4_F (0.7853981633974830f) /* pi/4 */
#endif
#ifndef M_1_PI_F
#define M_1_PI_F ((float)0.318309886183790671537767526745028724) /* 1/pi */
#define M_1_PI_F (0.3183098861837067f) /* 1/pi */
#endif
#ifndef M_2_PI_F
#define M_2_PI_F ((float)0.636619772367581343075535053490057448) /* 2/pi */
#define M_2_PI_F (0.6366197723675813f) /* 2/pi */
#endif
/* Multiplication */
#ifndef M_2PI_F
#define M_2PI_F ((float)6.283185307179586476925286766559005768) /* 2*pi */
#define M_2PI_F (6.2831853071795864f) /* 2*pi */
#endif
#ifndef M_4PI_F
#define M_4PI_F ((float)12.56637061435917295385057353311801153) /* 4*pi */
#define M_4PI_F (12.566370614359172f) /* 4*pi */
#endif
/* Float sqrt variations */
#ifndef M_SQRT2_F
#define M_SQRT2_F ((float)1.41421356237309504880) /* sqrt(2) */
#define M_SQRT2_F (1.4142135623730950f) /* sqrt(2) */
#endif
#ifndef M_LN2_F
#define M_LN2_F ((float)0.6931471805599453) /* ln(2) */
#define M_LN2_F (0.6931471805599453f) /* ln(2) */
#endif
#ifndef M_LN10_F
#define M_LN10_F ((float)2.3025850929940457) /* ln(10) */
#define M_LN10_F (2.3025850929940457f) /* ln(10) */
#endif
/* Scalar */

View File

@@ -106,10 +106,16 @@ typedef unsigned int uint;
#endif
#ifndef __KERNEL_GPU__
/* Fixed Bits Types */
#ifdef __KERNEL_OPENCL__
typedef ulong uint64_t;
#endif
#ifndef __KERNEL_GPU__
#ifdef _WIN32
typedef signed char int8_t;
@@ -474,17 +480,17 @@ ccl_device_inline int4 make_int4(const float3& f)
#endif
ccl_device_inline int align_up(int offset, int alignment)
ccl_device_inline size_t align_up(size_t offset, size_t alignment)
{
return (offset + alignment - 1) & ~(alignment - 1);
}
ccl_device_inline int round_up(int x, int multiple)
ccl_device_inline size_t round_up(size_t x, size_t multiple)
{
return ((x + multiple - 1) / multiple) * multiple;
}
ccl_device_inline int round_down(int x, int multiple)
ccl_device_inline size_t round_down(size_t x, size_t multiple)
{
return (x / multiple) * multiple;
}

View File

@@ -34,14 +34,14 @@ set(SRC
DeckLinkAPI.h
)
if (WIN32)
if(WIN32)
list(APPEND SRC
win/DeckLinkAPI_h.h
win/DeckLinkAPI_i.c
)
endif()
if (UNIX AND NOT APPLE)
if(UNIX AND NOT APPLE)
list(APPEND SRC
linux/DeckLinkAPI.h
linux/DeckLinkAPIConfiguration.h