Cycles:
* Fix crash in light path node * Fix struct alignment issue for cuda * Fix issue with instances taking up too much memory * Fix issue with ray visibility working incorrect on some objects * Enable OpenCL always and remove option, it has no dependencies so may as well * Refuse to load kernel if OpenCL version < 1.1, recent drivers are needed * Better error handling for OpenCL device * 3D views with rendered draw mode will now revert to wireframe on file load
This commit is contained in:
@@ -208,7 +208,6 @@ option(WITH_API_INSTALL "Copy API header files into the blender install fold
|
||||
option(WITH_CYCLES "Enable Cycles Render Engine" ON)
|
||||
OPTION(WITH_CYCLES_OSL "Build with Open Shading Language support" OFF)
|
||||
OPTION(WITH_CYCLES_CUDA "Build with CUDA support" OFF)
|
||||
OPTION(WITH_CYCLES_OPENCL "Build with OpenCL support" OFF)
|
||||
OPTION(WITH_CYCLES_BLENDER "Build Blender Python extension" ON)
|
||||
OPTION(WITH_CYCLES_PARTIO "Build with Partio point cloud support (unfinished)" OFF)
|
||||
OPTION(WITH_CYCLES_NETWORK "Build with network rendering support (unfinished)" OFF)
|
||||
|
@@ -52,9 +52,7 @@ if(WITH_CYCLES_PARTIO)
|
||||
add_definitions(-DWITH_PARTIO)
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_OPENCL)
|
||||
add_definitions(-DWITH_OPENCL)
|
||||
endif()
|
||||
add_definitions(-DWITH_OPENCL)
|
||||
|
||||
include_directories(
|
||||
${BOOST_INCLUDE_DIR}
|
||||
|
@@ -27,6 +27,7 @@
|
||||
#include "util_cache.h"
|
||||
#include "util_debug.h"
|
||||
#include "util_foreach.h"
|
||||
#include "util_map.h"
|
||||
#include "util_progress.h"
|
||||
#include "util_types.h"
|
||||
|
||||
@@ -287,17 +288,25 @@ void BVH::pack_instances(size_t nodes_size)
|
||||
size_t pack_nodes_offset = nodes_size;
|
||||
size_t object_offset = 0;
|
||||
|
||||
map<Mesh*, int> mesh_map;
|
||||
|
||||
foreach(Object *ob, objects) {
|
||||
Mesh *mesh = ob->mesh;
|
||||
BVH *bvh = mesh->bvh;
|
||||
|
||||
if(!mesh->transform_applied) {
|
||||
prim_index_size += bvh->pack.prim_index.size();
|
||||
tri_woop_size += bvh->pack.tri_woop.size();
|
||||
nodes_size += bvh->pack.nodes.size()*nsize;
|
||||
if(mesh_map.find(mesh) == mesh_map.end()) {
|
||||
prim_index_size += bvh->pack.prim_index.size();
|
||||
tri_woop_size += bvh->pack.tri_woop.size();
|
||||
nodes_size += bvh->pack.nodes.size()*nsize;
|
||||
|
||||
mesh_map[mesh] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mesh_map.clear();
|
||||
|
||||
pack.prim_index.resize(prim_index_size);
|
||||
pack.prim_object.resize(prim_index_size);
|
||||
pack.prim_visibility.resize(prim_index_size);
|
||||
@@ -322,6 +331,16 @@ void BVH::pack_instances(size_t nodes_size)
|
||||
continue;
|
||||
}
|
||||
|
||||
/* if mesh already added once, don't add it again, but used set
|
||||
node offset for this object */
|
||||
map<Mesh*, int>::iterator it = mesh_map.find(mesh);
|
||||
|
||||
if(mesh_map.find(mesh) != mesh_map.end()) {
|
||||
int noffset = it->second;
|
||||
pack.object_node[object_offset++] = noffset;
|
||||
continue;
|
||||
}
|
||||
|
||||
BVH *bvh = mesh->bvh;
|
||||
|
||||
int noffset = nodes_offset/nsize;
|
||||
@@ -333,6 +352,8 @@ void BVH::pack_instances(size_t nodes_size)
|
||||
else
|
||||
pack.object_node[object_offset++] = noffset;
|
||||
|
||||
mesh_map[mesh] = pack.object_node[object_offset-1];
|
||||
|
||||
/* merge primitive and object indexes */
|
||||
{
|
||||
size_t bvh_prim_index_size = bvh->pack.prim_index.size();
|
||||
@@ -341,7 +362,7 @@ void BVH::pack_instances(size_t nodes_size)
|
||||
|
||||
for(size_t i = 0; i < bvh_prim_index_size; i++) {
|
||||
pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_tri_offset;
|
||||
pack_prim_visibility[pack_prim_index_offset] = bvh_prim_visibility[i] + mesh_tri_offset;
|
||||
pack_prim_visibility[pack_prim_index_offset] = bvh_prim_visibility[i];
|
||||
pack_prim_object[pack_prim_index_offset] = 0; // unused for instances
|
||||
pack_prim_index_offset++;
|
||||
}
|
||||
|
@@ -49,6 +49,7 @@ public:
|
||||
map<string, device_vector<uchar>*> const_mem_map;
|
||||
map<string, device_memory*> mem_map;
|
||||
device_ptr null_mem;
|
||||
bool device_initialized;
|
||||
|
||||
const char *opencl_error_string(cl_int err)
|
||||
{
|
||||
@@ -103,50 +104,120 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
bool opencl_error(cl_int err)
|
||||
{
|
||||
if(err != CL_SUCCESS) {
|
||||
fprintf(stderr, "OpenCL error (%d): %s\n", err, opencl_error_string(err));
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void opencl_assert(cl_int err)
|
||||
{
|
||||
if(err != CL_SUCCESS) {
|
||||
printf("error (%d): %s\n", err, opencl_error_string(err));
|
||||
#ifndef NDEBUG
|
||||
abort();
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
OpenCLDevice(bool background_)
|
||||
{
|
||||
background = background_;
|
||||
cpPlatform = NULL;
|
||||
cxContext = NULL;
|
||||
cqCommandQueue = NULL;
|
||||
cpProgram = NULL;
|
||||
ckPathTraceKernel = NULL;
|
||||
ckFilmConvertKernel = NULL;
|
||||
null_mem = 0;
|
||||
device_initialized = false;
|
||||
|
||||
vector<cl_platform_id> platform_ids;
|
||||
cl_uint num_platforms;
|
||||
|
||||
/* setup device */
|
||||
ciErr = clGetPlatformIDs(0, NULL, &num_platforms);
|
||||
opencl_assert(ciErr);
|
||||
assert(num_platforms != 0);
|
||||
if(opencl_error(ciErr))
|
||||
return;
|
||||
|
||||
if(num_platforms == 0) {
|
||||
fprintf(stderr, "OpenCL: no platforms found.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
platform_ids.resize(num_platforms);
|
||||
ciErr = clGetPlatformIDs(num_platforms, &platform_ids[0], NULL);
|
||||
opencl_assert(ciErr);
|
||||
if(opencl_error(ciErr))
|
||||
return;
|
||||
|
||||
cpPlatform = platform_ids[0]; /* todo: pick specified platform && device */
|
||||
|
||||
ciErr = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_ALL, 1, &cdDevice, NULL);
|
||||
opencl_assert(ciErr);
|
||||
if(opencl_error(ciErr))
|
||||
return;
|
||||
|
||||
cxContext = clCreateContext(0, 1, &cdDevice, NULL, NULL, &ciErr);
|
||||
opencl_assert(ciErr);
|
||||
if(opencl_error(ciErr))
|
||||
return;
|
||||
|
||||
cqCommandQueue = clCreateCommandQueue(cxContext, cdDevice, 0, &ciErr);
|
||||
opencl_assert(ciErr);
|
||||
if(opencl_error(ciErr))
|
||||
return;
|
||||
|
||||
null_mem = (device_ptr)clCreateBuffer(cxContext, CL_MEM_READ_ONLY, 1, NULL, &ciErr);
|
||||
device_initialized = true;
|
||||
}
|
||||
|
||||
cpProgram = NULL;
|
||||
ckPathTraceKernel = NULL;
|
||||
ckFilmConvertKernel = NULL;
|
||||
bool opencl_version_check()
|
||||
{
|
||||
char version[256];
|
||||
int major, minor, req_major = 1, req_minor = 1;
|
||||
|
||||
clGetPlatformInfo(cpPlatform, CL_PLATFORM_VERSION, sizeof(version), &version, NULL);
|
||||
|
||||
if(sscanf(version, "OpenCL %d.%d", &major, &minor) < 2) {
|
||||
fprintf(stderr, "OpenCL: failed to parse platform version string (%s).", version);
|
||||
return false;
|
||||
}
|
||||
|
||||
if(!((major == req_major && minor >= req_minor) || (major > req_major))) {
|
||||
fprintf(stderr, "OpenCL: platform version 1.1 or later required, found %d.%d\n", major, minor);
|
||||
return false;
|
||||
}
|
||||
|
||||
clGetDeviceInfo(cdDevice, CL_DEVICE_OPENCL_C_VERSION, sizeof(version), &version, NULL);
|
||||
|
||||
if(sscanf(version, "OpenCL C %d.%d", &major, &minor) < 2) {
|
||||
fprintf(stderr, "OpenCL: failed to parse OpenCL C version string (%s).", version);
|
||||
return false;
|
||||
}
|
||||
|
||||
if(!((major == req_major && minor >= req_minor) || (major > req_major))) {
|
||||
fprintf(stderr, "OpenCL: C version 1.1 or later required, found %d.%d\n", major, minor);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* we don't check CL_DEVICE_VERSION since for e.g. nvidia sm 1.3 cards this is
|
||||
1.0 even if the language features are there, just limited shared memory */
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool load_kernels()
|
||||
{
|
||||
/* compile kernel */
|
||||
/* verify if device was initialized */
|
||||
if(!device_initialized)
|
||||
return false;
|
||||
|
||||
/* verify we have right opencl version */
|
||||
if(!opencl_version_check())
|
||||
return false;
|
||||
|
||||
/* compile source */
|
||||
string source = string_printf("#include \"kernel.cl\" // %lf\n", time_dt());
|
||||
size_t source_len = source.size();
|
||||
const char *source_str = source.c_str();
|
||||
@@ -156,9 +227,9 @@ public:
|
||||
build_options += "-I " + path_get("kernel") + " -I " + path_get("util"); /* todo: escape path */
|
||||
build_options += " -Werror -cl-fast-relaxed-math -cl-strict-aliasing";
|
||||
|
||||
cpProgram = clCreateProgramWithSource(cxContext, 1, (const char **)&source_str, &source_len, &ciErr);
|
||||
|
||||
opencl_assert(ciErr);
|
||||
cpProgram = clCreateProgramWithSource(cxContext, 1, &source_str, &source_len, &ciErr);
|
||||
if(opencl_error(ciErr))
|
||||
return false;
|
||||
|
||||
ciErr = clBuildProgram(cpProgram, 0, NULL, build_options.c_str(), NULL, NULL);
|
||||
|
||||
@@ -179,10 +250,14 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
/* find kernels */
|
||||
ckPathTraceKernel = clCreateKernel(cpProgram, "kernel_ocl_path_trace", &ciErr);
|
||||
opencl_assert(ciErr);
|
||||
if(opencl_error(ciErr))
|
||||
return false;
|
||||
|
||||
ckFilmConvertKernel = clCreateKernel(cpProgram, "kernel_ocl_tonemap", &ciErr);
|
||||
opencl_assert(ciErr);
|
||||
if(opencl_error(ciErr))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@@ -123,18 +123,16 @@ endif()
|
||||
|
||||
# OPENCL kernel
|
||||
|
||||
if(WITH_CYCLES_OPENCL)
|
||||
#set(kernel_preprocessed ${CMAKE_CURRENT_BINARY_DIR}/kernel_preprocessed.cl)
|
||||
#add_custom_command(
|
||||
# OUTPUT ${kernel_preprocessed}
|
||||
# COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DWITH_OPENCL -o ${kernel_preprocessed}
|
||||
# DEPENDS ${kernel_sources} ${util_headers})
|
||||
#add_custom_target(cycles_kernel_preprocess ALL DEPENDS ${kernel_preprocessed})
|
||||
#delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${kernel_preprocessed}" ${CYCLES_INSTALL_PATH}/kernel)
|
||||
#set(kernel_preprocessed ${CMAKE_CURRENT_BINARY_DIR}/kernel_preprocessed.cl)
|
||||
#add_custom_command(
|
||||
# OUTPUT ${kernel_preprocessed}
|
||||
# COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DWITH_OPENCL -o ${kernel_preprocessed}
|
||||
# DEPENDS ${kernel_sources} ${util_headers})
|
||||
#add_custom_target(cycles_kernel_preprocess ALL DEPENDS ${kernel_preprocessed})
|
||||
#delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${kernel_preprocessed}" ${CYCLES_INSTALL_PATH}/kernel)
|
||||
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernel.cl" ${CYCLES_INSTALL_PATH}/kernel)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${headers}" ${CYCLES_INSTALL_PATH}/kernel)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${svm_headers}" ${CYCLES_INSTALL_PATH}/kernel/svm)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${util_headers}" ${CYCLES_INSTALL_PATH}/kernel)
|
||||
endif()
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernel.cl" ${CYCLES_INSTALL_PATH}/kernel)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${headers}" ${CYCLES_INSTALL_PATH}/kernel)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${svm_headers}" ${CYCLES_INSTALL_PATH}/kernel/svm)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${util_headers}" ${CYCLES_INSTALL_PATH}/kernel)
|
||||
|
||||
|
@@ -371,9 +371,6 @@ typedef struct KernelIntegrator {
|
||||
/* caustics */
|
||||
int no_caustics;
|
||||
float blur_caustics;
|
||||
|
||||
/* padding */
|
||||
int pad[2];
|
||||
} KernelIntegrator;
|
||||
|
||||
typedef struct KernelBVH {
|
||||
|
@@ -1490,8 +1490,9 @@ LightPathNode::LightPathNode()
|
||||
add_output("Is Shadow Ray", SHADER_SOCKET_FLOAT);
|
||||
add_output("Is Diffuse Ray", SHADER_SOCKET_FLOAT);
|
||||
add_output("Is Glossy Ray", SHADER_SOCKET_FLOAT);
|
||||
add_output("Is Transmission Ray", SHADER_SOCKET_FLOAT);
|
||||
add_output("Is Singular Ray", SHADER_SOCKET_FLOAT);
|
||||
add_output("Is Reflection Ray", SHADER_SOCKET_FLOAT);
|
||||
add_output("Is Transmission Ray", SHADER_SOCKET_FLOAT);
|
||||
}
|
||||
|
||||
void LightPathNode::compile(SVMCompiler& compiler)
|
||||
|
@@ -482,6 +482,18 @@ typedef struct _cl_image_format {
|
||||
#define CL_DEVICE_VERSION 0x102F
|
||||
#define CL_DEVICE_EXTENSIONS 0x1030
|
||||
#define CL_DEVICE_PLATFORM 0x1031
|
||||
/* 0x1032 reserved for CL_DEVICE_DOUBLE_FP_CONFIG */
|
||||
/* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG */
|
||||
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034
|
||||
#define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035
|
||||
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036
|
||||
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037
|
||||
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038
|
||||
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039
|
||||
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A
|
||||
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B
|
||||
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C
|
||||
#define CL_DEVICE_OPENCL_C_VERSION 0x103D
|
||||
|
||||
// cl_device_fp_config - bitfield
|
||||
#define CL_FP_DENORM (1 << 0)
|
||||
|
@@ -5414,6 +5414,10 @@ static void direct_link_screen(FileData *fd, bScreen *sc)
|
||||
v3d->afterdraw_xray.first= v3d->afterdraw_xray.last= NULL;
|
||||
v3d->afterdraw_xraytransp.first= v3d->afterdraw_xraytransp.last= NULL;
|
||||
v3d->properties_storage= NULL;
|
||||
|
||||
/* render can be quite heavy, set to wire on load */
|
||||
if(v3d->drawtype == OB_RENDER)
|
||||
v3d->drawtype = OB_WIRE;
|
||||
|
||||
view3d_split_250(v3d, &sl->regionbase);
|
||||
}
|
||||
|
Reference in New Issue
Block a user