Merge branch 'master' into blender2.8
This commit is contained in:
@@ -34,7 +34,7 @@ add_subdirectory(mikktspace)
|
||||
add_subdirectory(glew-mx)
|
||||
add_subdirectory(eigen)
|
||||
|
||||
if (WITH_GAMEENGINE_DECKLINK)
|
||||
if(WITH_GAMEENGINE_DECKLINK)
|
||||
add_subdirectory(decklink)
|
||||
endif()
|
||||
|
||||
|
@@ -588,8 +588,8 @@ static void attr_create_pointiness(Scene *scene,
|
||||
sorted_vert_indeices[other_sorted_vert_index];
|
||||
const float3 &other_vert_co = mesh->verts[other_vert_index];
|
||||
/* We are too far away now, we wouldn't have duplicate. */
|
||||
if ((other_vert_co.x + other_vert_co.y + other_vert_co.z) -
|
||||
(vert_co.x + vert_co.y + vert_co.z) > 3 * FLT_EPSILON)
|
||||
if((other_vert_co.x + other_vert_co.y + other_vert_co.z) -
|
||||
(vert_co.x + vert_co.y + vert_co.z) > 3 * FLT_EPSILON)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
@@ -72,7 +72,7 @@ public:
|
||||
virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&);
|
||||
virtual int2 split_kernel_local_size();
|
||||
virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task);
|
||||
virtual size_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads);
|
||||
virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads);
|
||||
};
|
||||
|
||||
class CPUDevice : public Device
|
||||
@@ -860,7 +860,7 @@ int2 CPUSplitKernel::split_kernel_global_size(device_memory& /*kg*/, device_memo
|
||||
return task->requested_tile_size;
|
||||
}
|
||||
|
||||
size_t CPUSplitKernel::state_buffer_size(device_memory& kernel_globals, device_memory& /*data*/, size_t num_threads) {
|
||||
uint64_t CPUSplitKernel::state_buffer_size(device_memory& kernel_globals, device_memory& /*data*/, size_t num_threads) {
|
||||
KernelGlobals *kg = (KernelGlobals*)kernel_globals.device_pointer;
|
||||
|
||||
return split_data_buffer_size(kg, num_threads);
|
||||
|
@@ -89,7 +89,7 @@ class CUDASplitKernel : public DeviceSplitKernel {
|
||||
public:
|
||||
explicit CUDASplitKernel(CUDADevice *device);
|
||||
|
||||
virtual size_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads);
|
||||
virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads);
|
||||
|
||||
virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
|
||||
RenderTile& rtile,
|
||||
@@ -1473,9 +1473,9 @@ CUDASplitKernel::CUDASplitKernel(CUDADevice *device) : DeviceSplitKernel(device)
|
||||
{
|
||||
}
|
||||
|
||||
size_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory& /*data*/, size_t num_threads)
|
||||
uint64_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory& /*data*/, size_t num_threads)
|
||||
{
|
||||
device_vector<uint> size_buffer;
|
||||
device_vector<uint64_t> size_buffer;
|
||||
size_buffer.resize(1);
|
||||
device->mem_alloc(NULL, size_buffer, MEM_READ_WRITE);
|
||||
|
||||
@@ -1504,7 +1504,7 @@ size_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory&
|
||||
|
||||
device->cuda_pop_context();
|
||||
|
||||
device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint));
|
||||
device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint64_t));
|
||||
device->mem_free(size_buffer);
|
||||
|
||||
return *size_buffer.get_data();
|
||||
|
@@ -48,7 +48,8 @@ enum DataType {
|
||||
TYPE_UINT,
|
||||
TYPE_INT,
|
||||
TYPE_FLOAT,
|
||||
TYPE_HALF
|
||||
TYPE_HALF,
|
||||
TYPE_UINT64,
|
||||
};
|
||||
|
||||
static inline size_t datatype_size(DataType datatype)
|
||||
@@ -59,6 +60,7 @@ static inline size_t datatype_size(DataType datatype)
|
||||
case TYPE_UINT: return sizeof(uint);
|
||||
case TYPE_INT: return sizeof(int);
|
||||
case TYPE_HALF: return sizeof(half);
|
||||
case TYPE_UINT64: return sizeof(uint64_t);
|
||||
default: return 0;
|
||||
}
|
||||
}
|
||||
@@ -160,6 +162,11 @@ template<> struct device_type_traits<half4> {
|
||||
static const int num_elements = 4;
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uint64_t> {
|
||||
static const DataType data_type = TYPE_UINT64;
|
||||
static const int num_elements = 1;
|
||||
};
|
||||
|
||||
/* Device Memory */
|
||||
|
||||
class device_memory
|
||||
|
@@ -105,9 +105,9 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_fe
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t DeviceSplitKernel::max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, size_t max_buffer_size)
|
||||
size_t DeviceSplitKernel::max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, uint64_t max_buffer_size)
|
||||
{
|
||||
size_t size_per_element = state_buffer_size(kg, data, 1024) / 1024;
|
||||
uint64_t size_per_element = state_buffer_size(kg, data, 1024) / 1024;
|
||||
return max_buffer_size / size_per_element;
|
||||
}
|
||||
|
||||
|
@@ -105,8 +105,8 @@ public:
|
||||
device_memory& kgbuffer,
|
||||
device_memory& kernel_data);
|
||||
|
||||
virtual size_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) = 0;
|
||||
size_t max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, size_t max_buffer_size);
|
||||
virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) = 0;
|
||||
size_t max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, uint64_t max_buffer_size);
|
||||
|
||||
virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
|
||||
RenderTile& rtile,
|
||||
|
@@ -334,11 +334,11 @@ void OpenCLDeviceBase::mem_zero(device_memory& mem)
|
||||
size_t num_threads = global_size[0] * global_size[1];
|
||||
|
||||
cl_mem d_buffer = CL_MEM_PTR(mem.device_pointer);
|
||||
unsigned long long d_offset = 0;
|
||||
unsigned long long d_size = 0;
|
||||
cl_ulong d_offset = 0;
|
||||
cl_ulong d_size = 0;
|
||||
|
||||
while(d_offset < mem.memory_size()) {
|
||||
d_size = std::min<unsigned long long>(num_threads*sizeof(float4), mem.memory_size() - d_offset);
|
||||
d_size = std::min<cl_ulong>(num_threads*sizeof(float4), mem.memory_size() - d_offset);
|
||||
|
||||
kernel_set_args(ckZeroBuffer, 0, d_buffer, d_size, d_offset);
|
||||
|
||||
|
@@ -227,9 +227,9 @@ public:
|
||||
return kernel;
|
||||
}
|
||||
|
||||
virtual size_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads)
|
||||
virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads)
|
||||
{
|
||||
device_vector<uint> size_buffer;
|
||||
device_vector<uint64_t> size_buffer;
|
||||
size_buffer.resize(1);
|
||||
device->mem_alloc(NULL, size_buffer, MEM_READ_WRITE);
|
||||
|
||||
@@ -249,7 +249,7 @@ public:
|
||||
|
||||
device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel");
|
||||
|
||||
device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint));
|
||||
device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint64_t));
|
||||
device->mem_free(size_buffer);
|
||||
|
||||
if(device->ciErr != CL_SUCCESS) {
|
||||
@@ -346,8 +346,8 @@ public:
|
||||
|
||||
virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask */*task*/)
|
||||
{
|
||||
size_t max_buffer_size;
|
||||
clGetDeviceInfo(device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &max_buffer_size, NULL);
|
||||
cl_ulong max_buffer_size;
|
||||
clGetDeviceInfo(device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_buffer_size, NULL);
|
||||
VLOG(1) << "Maximum device allocation side: "
|
||||
<< string_human_readable_number(max_buffer_size) << " bytes. ("
|
||||
<< string_human_readable_size(max_buffer_size) << ").";
|
||||
|
@@ -454,7 +454,7 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
|
||||
Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
|
||||
|
||||
float3 aligned_dir0 = transform_direction(&space0, dir),
|
||||
aligned_dir1 = transform_direction(&space1, dir);;
|
||||
aligned_dir1 = transform_direction(&space1, dir);
|
||||
float3 aligned_P0 = transform_point(&space0, P),
|
||||
aligned_P1 = transform_point(&space1, P);
|
||||
float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
|
||||
@@ -516,7 +516,7 @@ ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg
|
||||
Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
|
||||
|
||||
float3 aligned_dir0 = transform_direction(&space0, dir),
|
||||
aligned_dir1 = transform_direction(&space1, dir);;
|
||||
aligned_dir1 = transform_direction(&space1, dir);
|
||||
float3 aligned_P0 = transform_point(&space0, P),
|
||||
aligned_P1 = transform_point(&space1, P);
|
||||
float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
|
||||
|
@@ -90,13 +90,13 @@ CCL_NAMESPACE_BEGIN
|
||||
#ifdef __KERNEL_CUDA__
|
||||
# define __KERNEL_SHADING__
|
||||
# define __KERNEL_ADV_SHADING__
|
||||
# define __VOLUME__
|
||||
# define __VOLUME_SCATTER__
|
||||
# define __SUBSURFACE__
|
||||
# define __SHADOW_RECORD_ALL__
|
||||
# ifndef __SPLIT_KERNEL__
|
||||
# define __BRANCHED_PATH__
|
||||
# define __VOLUME__
|
||||
# define __VOLUME_SCATTER__
|
||||
# define __SUBSURFACE__
|
||||
# define __CMJ__
|
||||
# define __SHADOW_RECORD_ALL__
|
||||
# endif
|
||||
#endif /* __KERNEL_CUDA__ */
|
||||
|
||||
@@ -107,6 +107,10 @@ CCL_NAMESPACE_BEGIN
|
||||
# ifdef __KERNEL_OPENCL_NVIDIA__
|
||||
# define __KERNEL_SHADING__
|
||||
# define __KERNEL_ADV_SHADING__
|
||||
# define __SUBSURFACE__
|
||||
# define __VOLUME__
|
||||
# define __VOLUME_SCATTER__
|
||||
# define __SHADOW_RECORD_ALL__
|
||||
# ifdef __KERNEL_EXPERIMENTAL__
|
||||
# define __CMJ__
|
||||
# endif
|
||||
|
@@ -28,20 +28,25 @@
|
||||
#include "../../split/kernel_path_init.h"
|
||||
#include "../../split/kernel_scene_intersect.h"
|
||||
#include "../../split/kernel_lamp_emission.h"
|
||||
#include "../../split/kernel_do_volume.h"
|
||||
#include "../../split/kernel_queue_enqueue.h"
|
||||
#include "../../split/kernel_background_buffer_update.h"
|
||||
#include "../../split/kernel_indirect_background.h"
|
||||
#include "../../split/kernel_shader_eval.h"
|
||||
#include "../../split/kernel_holdout_emission_blurring_pathtermination_ao.h"
|
||||
#include "../../split/kernel_subsurface_scatter.h"
|
||||
#include "../../split/kernel_direct_lighting.h"
|
||||
#include "../../split/kernel_shadow_blocked.h"
|
||||
#include "../../split/kernel_shadow_blocked_ao.h"
|
||||
#include "../../split/kernel_shadow_blocked_dl.h"
|
||||
#include "../../split/kernel_next_iteration_setup.h"
|
||||
#include "../../split/kernel_indirect_subsurface.h"
|
||||
#include "../../split/kernel_buffer_update.h"
|
||||
|
||||
#include "../../kernel_film.h"
|
||||
|
||||
/* kernels */
|
||||
extern "C" __global__ void
|
||||
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
|
||||
kernel_cuda_state_buffer_size(uint num_threads, uint *size)
|
||||
kernel_cuda_state_buffer_size(uint num_threads, uint64_t *size)
|
||||
{
|
||||
*size = split_data_buffer_size(NULL, num_threads);
|
||||
}
|
||||
@@ -91,13 +96,18 @@ kernel_cuda_path_trace_data_init(
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(do_volume)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(queue_enqueue)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(background_buffer_update)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(direct_lighting)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(buffer_update)
|
||||
|
||||
extern "C" __global__ void
|
||||
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
|
||||
|
@@ -21,7 +21,7 @@ __kernel void kernel_ocl_path_trace_state_buffer_size(
|
||||
KernelGlobals *kg,
|
||||
ccl_constant KernelData *data,
|
||||
uint num_threads,
|
||||
ccl_global uint *size)
|
||||
ccl_global uint64_t *size)
|
||||
{
|
||||
kg->data = data;
|
||||
*size = split_data_buffer_size(kg, num_threads);
|
||||
|
@@ -22,11 +22,11 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
ccl_device_inline size_t split_data_buffer_size(KernelGlobals *kg, size_t num_elements)
|
||||
ccl_device_inline uint64_t split_data_buffer_size(KernelGlobals *kg, size_t num_elements)
|
||||
{
|
||||
(void)kg; /* Unused on CPU. */
|
||||
|
||||
size_t size = 0;
|
||||
uint64_t size = 0;
|
||||
#define SPLIT_DATA_ENTRY(type, name, num) + align_up(num_elements * num * sizeof(type), 16)
|
||||
size = size SPLIT_DATA_ENTRIES;
|
||||
#undef SPLIT_DATA_ENTRY
|
||||
|
@@ -43,41 +43,41 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Division */
|
||||
#ifndef M_PI_F
|
||||
#define M_PI_F ((float)3.14159265358979323846264338327950288) /* pi */
|
||||
#define M_PI_F (3.1415926535897932f) /* pi */
|
||||
#endif
|
||||
#ifndef M_PI_2_F
|
||||
#define M_PI_2_F ((float)1.57079632679489661923132169163975144) /* pi/2 */
|
||||
#define M_PI_2_F (1.5707963267948966f) /* pi/2 */
|
||||
#endif
|
||||
#ifndef M_PI_4_F
|
||||
#define M_PI_4_F ((float)0.785398163397448309615660845819875721) /* pi/4 */
|
||||
#define M_PI_4_F (0.7853981633974830f) /* pi/4 */
|
||||
#endif
|
||||
#ifndef M_1_PI_F
|
||||
#define M_1_PI_F ((float)0.318309886183790671537767526745028724) /* 1/pi */
|
||||
#define M_1_PI_F (0.3183098861837067f) /* 1/pi */
|
||||
#endif
|
||||
#ifndef M_2_PI_F
|
||||
#define M_2_PI_F ((float)0.636619772367581343075535053490057448) /* 2/pi */
|
||||
#define M_2_PI_F (0.6366197723675813f) /* 2/pi */
|
||||
#endif
|
||||
|
||||
/* Multiplication */
|
||||
#ifndef M_2PI_F
|
||||
#define M_2PI_F ((float)6.283185307179586476925286766559005768) /* 2*pi */
|
||||
#define M_2PI_F (6.2831853071795864f) /* 2*pi */
|
||||
#endif
|
||||
#ifndef M_4PI_F
|
||||
#define M_4PI_F ((float)12.56637061435917295385057353311801153) /* 4*pi */
|
||||
#define M_4PI_F (12.566370614359172f) /* 4*pi */
|
||||
#endif
|
||||
|
||||
/* Float sqrt variations */
|
||||
|
||||
#ifndef M_SQRT2_F
|
||||
#define M_SQRT2_F ((float)1.41421356237309504880) /* sqrt(2) */
|
||||
#define M_SQRT2_F (1.4142135623730950f) /* sqrt(2) */
|
||||
#endif
|
||||
|
||||
#ifndef M_LN2_F
|
||||
#define M_LN2_F ((float)0.6931471805599453) /* ln(2) */
|
||||
#define M_LN2_F (0.6931471805599453f) /* ln(2) */
|
||||
#endif
|
||||
|
||||
#ifndef M_LN10_F
|
||||
#define M_LN10_F ((float)2.3025850929940457) /* ln(10) */
|
||||
#define M_LN10_F (2.3025850929940457f) /* ln(10) */
|
||||
#endif
|
||||
|
||||
/* Scalar */
|
||||
|
@@ -106,10 +106,16 @@ typedef unsigned int uint;
|
||||
|
||||
#endif
|
||||
|
||||
#ifndef __KERNEL_GPU__
|
||||
|
||||
/* Fixed Bits Types */
|
||||
|
||||
#ifdef __KERNEL_OPENCL__
|
||||
|
||||
typedef ulong uint64_t;
|
||||
|
||||
#endif
|
||||
|
||||
#ifndef __KERNEL_GPU__
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
typedef signed char int8_t;
|
||||
@@ -474,17 +480,17 @@ ccl_device_inline int4 make_int4(const float3& f)
|
||||
|
||||
#endif
|
||||
|
||||
ccl_device_inline int align_up(int offset, int alignment)
|
||||
ccl_device_inline size_t align_up(size_t offset, size_t alignment)
|
||||
{
|
||||
return (offset + alignment - 1) & ~(alignment - 1);
|
||||
}
|
||||
|
||||
ccl_device_inline int round_up(int x, int multiple)
|
||||
ccl_device_inline size_t round_up(size_t x, size_t multiple)
|
||||
{
|
||||
return ((x + multiple - 1) / multiple) * multiple;
|
||||
}
|
||||
|
||||
ccl_device_inline int round_down(int x, int multiple)
|
||||
ccl_device_inline size_t round_down(size_t x, size_t multiple)
|
||||
{
|
||||
return (x / multiple) * multiple;
|
||||
}
|
||||
|
@@ -34,14 +34,14 @@ set(SRC
|
||||
DeckLinkAPI.h
|
||||
)
|
||||
|
||||
if (WIN32)
|
||||
if(WIN32)
|
||||
list(APPEND SRC
|
||||
win/DeckLinkAPI_h.h
|
||||
win/DeckLinkAPI_i.c
|
||||
)
|
||||
endif()
|
||||
|
||||
if (UNIX AND NOT APPLE)
|
||||
if(UNIX AND NOT APPLE)
|
||||
list(APPEND SRC
|
||||
linux/DeckLinkAPI.h
|
||||
linux/DeckLinkAPIConfiguration.h
|
||||
|
Reference in New Issue
Block a user