2016-09-14 23:47:54 +02:00
|
|
|
/*
|
|
|
|
* Copyright 2011-2013 Blender Foundation
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifdef WITH_OPENCL
|
|
|
|
|
Cycles: Make all #include statements relative to cycles source directory
The idea is to make include statements more explicit and obvious where the
file is coming from, additionally reducing chance of wrong header being
picked up.
For example, it was not obvious whether bvh.h was refferring to builder
or traversal, whenter node.h is a generic graph node or a shader node
and cases like that.
Surely this might look obvious for the active developers, but after some
time of not touching the code it becomes less obvious where file is coming
from.
This was briefly mentioned in T50824 and seems @brecht is fine with such
explicitness, but need to agree with all active developers before committing
this.
Please note that this patch is lacking changes related on GPU/OpenCL
support. This will be solved if/when we all agree this is a good idea to move
forward.
Reviewers: brecht, lukasstockner97, maiself, nirved, dingto, juicyfruit, swerner
Reviewed By: lukasstockner97, maiself, nirved, dingto
Subscribers: brecht
Differential Revision: https://developer.blender.org/D2586
2017-03-28 20:39:14 +02:00
|
|
|
#include "device/opencl/opencl.h"
|
Cycles: Support multithreaded compilation of kernels
This patch implements a workaround to get the multithreaded compilation from D2231 working.
So far, it only works for Blender, not for Cycles Standalone. Also, I have only tested the Linux codepath in the helper function.
Depends on D2231.
Patch by lukasstockner97, jbakker, brecht
job | scene_name | compilation_time
----------+-----------------+------------------
Baseline | empty | 22.73
D2264 | empty | 13.94
Baseline | bmw | 56.44
D2264 | bmw | 41.32
Baseline | fishycat | 59.50
D2264 | fishycat | 45.19
Baseline | barbershop | 212.28
D2264 | barbershop | 169.81
Baseline | victor | 67.51
D2264 | victor | 53.60
Baseline | classroom | 51.46
D2264 | classroom | 39.02
Baseline | koro | 62.48
D2264 | koro | 49.03
Baseline | pavillion | 54.37
D2264 | pavillion | 38.82
Baseline | splash279 | 47.43
D2264 | splash279 | 37.94
Baseline | volume_emission | 145.22
D2264 | volume_emission | 121.10
This patch reduced compilation time as the split kernels and base
kernels are compiled in parallel. In cycles debug mode (256) you can set
unmark the opencl single program file, what reduces the compilation time
even further (bmw 17 seconds, barbershop 53 seconds).
Reviewers: brecht, dingto, sergey, juicyfruit, lukasstockner97
Reviewed By: brecht
Subscribers: Loner, jbakker, candreacchio, 3dLuver, LazyDodo, bliblubli
Differential Revision: https://developer.blender.org/D2264
2019-02-15 08:18:38 +01:00
|
|
|
#include "device/device_intern.h"
|
2016-09-14 23:47:54 +02:00
|
|
|
|
2018-01-19 14:55:53 +01:00
|
|
|
#include "util/util_debug.h"
|
Cycles: Make all #include statements relative to cycles source directory
The idea is to make include statements more explicit and obvious where the
file is coming from, additionally reducing chance of wrong header being
picked up.
For example, it was not obvious whether bvh.h was refferring to builder
or traversal, whenter node.h is a generic graph node or a shader node
and cases like that.
Surely this might look obvious for the active developers, but after some
time of not touching the code it becomes less obvious where file is coming
from.
This was briefly mentioned in T50824 and seems @brecht is fine with such
explicitness, but need to agree with all active developers before committing
this.
Please note that this patch is lacking changes related on GPU/OpenCL
support. This will be solved if/when we all agree this is a good idea to move
forward.
Reviewers: brecht, lukasstockner97, maiself, nirved, dingto, juicyfruit, swerner
Reviewed By: lukasstockner97, maiself, nirved, dingto
Subscribers: brecht
Differential Revision: https://developer.blender.org/D2586
2017-03-28 20:39:14 +02:00
|
|
|
#include "util/util_logging.h"
|
|
|
|
#include "util/util_md5.h"
|
|
|
|
#include "util/util_path.h"
|
|
|
|
#include "util/util_time.h"
|
Cycles: Support multithreaded compilation of kernels
This patch implements a workaround to get the multithreaded compilation from D2231 working.
So far, it only works for Blender, not for Cycles Standalone. Also, I have only tested the Linux codepath in the helper function.
Depends on D2231.
Patch by lukasstockner97, jbakker, brecht
job | scene_name | compilation_time
----------+-----------------+------------------
Baseline | empty | 22.73
D2264 | empty | 13.94
Baseline | bmw | 56.44
D2264 | bmw | 41.32
Baseline | fishycat | 59.50
D2264 | fishycat | 45.19
Baseline | barbershop | 212.28
D2264 | barbershop | 169.81
Baseline | victor | 67.51
D2264 | victor | 53.60
Baseline | classroom | 51.46
D2264 | classroom | 39.02
Baseline | koro | 62.48
D2264 | koro | 49.03
Baseline | pavillion | 54.37
D2264 | pavillion | 38.82
Baseline | splash279 | 47.43
D2264 | splash279 | 37.94
Baseline | volume_emission | 145.22
D2264 | volume_emission | 121.10
This patch reduced compilation time as the split kernels and base
kernels are compiled in parallel. In cycles debug mode (256) you can set
unmark the opencl single program file, what reduces the compilation time
even further (bmw 17 seconds, barbershop 53 seconds).
Reviewers: brecht, dingto, sergey, juicyfruit, lukasstockner97
Reviewed By: brecht
Subscribers: Loner, jbakker, candreacchio, 3dLuver, LazyDodo, bliblubli
Differential Revision: https://developer.blender.org/D2264
2019-02-15 08:18:38 +01:00
|
|
|
#include "util/util_system.h"
|
2016-09-14 23:47:54 +02:00
|
|
|
|
|
|
|
using std::cerr;
|
|
|
|
using std::endl;
|
|
|
|
|
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
|
|
|
|
OpenCLCache::Slot::ProgramEntry::ProgramEntry()
|
|
|
|
: program(NULL),
|
|
|
|
mutex(NULL)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
OpenCLCache::Slot::ProgramEntry::ProgramEntry(const ProgramEntry& rhs)
|
|
|
|
: program(rhs.program),
|
|
|
|
mutex(NULL)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
OpenCLCache::Slot::ProgramEntry::~ProgramEntry()
|
|
|
|
{
|
|
|
|
delete mutex;
|
|
|
|
}
|
|
|
|
|
|
|
|
OpenCLCache::Slot::Slot()
|
|
|
|
: context_mutex(NULL),
|
|
|
|
context(NULL)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
OpenCLCache::Slot::Slot(const Slot& rhs)
|
|
|
|
: context_mutex(NULL),
|
|
|
|
context(NULL),
|
|
|
|
programs(rhs.programs)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
OpenCLCache::Slot::~Slot()
|
|
|
|
{
|
|
|
|
delete context_mutex;
|
|
|
|
}
|
|
|
|
|
|
|
|
OpenCLCache& OpenCLCache::global_instance()
|
|
|
|
{
|
|
|
|
static OpenCLCache instance;
|
|
|
|
return instance;
|
|
|
|
}
|
|
|
|
|
|
|
|
cl_context OpenCLCache::get_context(cl_platform_id platform,
|
|
|
|
cl_device_id device,
|
|
|
|
thread_scoped_lock& slot_locker)
|
|
|
|
{
|
|
|
|
assert(platform != NULL);
|
|
|
|
|
|
|
|
OpenCLCache& self = global_instance();
|
|
|
|
|
|
|
|
thread_scoped_lock cache_lock(self.cache_lock);
|
|
|
|
|
|
|
|
pair<CacheMap::iterator,bool> ins = self.cache.insert(
|
|
|
|
CacheMap::value_type(PlatformDevicePair(platform, device), Slot()));
|
|
|
|
|
|
|
|
Slot &slot = ins.first->second;
|
|
|
|
|
|
|
|
/* create slot lock only while holding cache lock */
|
|
|
|
if(!slot.context_mutex)
|
|
|
|
slot.context_mutex = new thread_mutex;
|
|
|
|
|
|
|
|
/* need to unlock cache before locking slot, to allow store to complete */
|
|
|
|
cache_lock.unlock();
|
|
|
|
|
|
|
|
/* lock the slot */
|
|
|
|
slot_locker = thread_scoped_lock(*slot.context_mutex);
|
|
|
|
|
|
|
|
/* If the thing isn't cached */
|
|
|
|
if(slot.context == NULL) {
|
|
|
|
/* return with the caller's lock holder holding the slot lock */
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* the item was already cached, release the slot lock */
|
|
|
|
slot_locker.unlock();
|
|
|
|
|
|
|
|
cl_int ciErr = clRetainContext(slot.context);
|
|
|
|
assert(ciErr == CL_SUCCESS);
|
2018-11-09 12:08:51 +01:00
|
|
|
(void) ciErr;
|
2016-09-14 23:47:54 +02:00
|
|
|
|
|
|
|
return slot.context;
|
|
|
|
}
|
|
|
|
|
|
|
|
cl_program OpenCLCache::get_program(cl_platform_id platform,
|
|
|
|
cl_device_id device,
|
|
|
|
ustring key,
|
|
|
|
thread_scoped_lock& slot_locker)
|
|
|
|
{
|
|
|
|
assert(platform != NULL);
|
|
|
|
|
|
|
|
OpenCLCache& self = global_instance();
|
|
|
|
|
|
|
|
thread_scoped_lock cache_lock(self.cache_lock);
|
|
|
|
|
|
|
|
pair<CacheMap::iterator,bool> ins = self.cache.insert(
|
|
|
|
CacheMap::value_type(PlatformDevicePair(platform, device), Slot()));
|
|
|
|
|
|
|
|
Slot &slot = ins.first->second;
|
|
|
|
|
|
|
|
pair<Slot::EntryMap::iterator,bool> ins2 = slot.programs.insert(
|
|
|
|
Slot::EntryMap::value_type(key, Slot::ProgramEntry()));
|
|
|
|
|
|
|
|
Slot::ProgramEntry &entry = ins2.first->second;
|
|
|
|
|
|
|
|
/* create slot lock only while holding cache lock */
|
|
|
|
if(!entry.mutex)
|
|
|
|
entry.mutex = new thread_mutex;
|
|
|
|
|
|
|
|
/* need to unlock cache before locking slot, to allow store to complete */
|
|
|
|
cache_lock.unlock();
|
|
|
|
|
|
|
|
/* lock the slot */
|
|
|
|
slot_locker = thread_scoped_lock(*entry.mutex);
|
|
|
|
|
|
|
|
/* If the thing isn't cached */
|
|
|
|
if(entry.program == NULL) {
|
|
|
|
/* return with the caller's lock holder holding the slot lock */
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* the item was already cached, release the slot lock */
|
|
|
|
slot_locker.unlock();
|
|
|
|
|
|
|
|
cl_int ciErr = clRetainProgram(entry.program);
|
|
|
|
assert(ciErr == CL_SUCCESS);
|
2018-11-09 12:08:51 +01:00
|
|
|
(void) ciErr;
|
2016-09-14 23:47:54 +02:00
|
|
|
|
|
|
|
return entry.program;
|
|
|
|
}
|
|
|
|
|
|
|
|
void OpenCLCache::store_context(cl_platform_id platform,
|
|
|
|
cl_device_id device,
|
|
|
|
cl_context context,
|
|
|
|
thread_scoped_lock& slot_locker)
|
|
|
|
{
|
|
|
|
assert(platform != NULL);
|
|
|
|
assert(device != NULL);
|
|
|
|
assert(context != NULL);
|
|
|
|
|
|
|
|
OpenCLCache &self = global_instance();
|
|
|
|
|
|
|
|
thread_scoped_lock cache_lock(self.cache_lock);
|
|
|
|
CacheMap::iterator i = self.cache.find(PlatformDevicePair(platform, device));
|
|
|
|
cache_lock.unlock();
|
|
|
|
|
|
|
|
Slot &slot = i->second;
|
|
|
|
|
|
|
|
/* sanity check */
|
|
|
|
assert(i != self.cache.end());
|
|
|
|
assert(slot.context == NULL);
|
|
|
|
|
|
|
|
slot.context = context;
|
|
|
|
|
|
|
|
/* unlock the slot */
|
|
|
|
slot_locker.unlock();
|
|
|
|
|
|
|
|
/* increment reference count in OpenCL.
|
|
|
|
* The caller is going to release the object when done with it. */
|
|
|
|
cl_int ciErr = clRetainContext(context);
|
|
|
|
assert(ciErr == CL_SUCCESS);
|
2018-11-09 12:08:51 +01:00
|
|
|
(void) ciErr;
|
2016-09-14 23:47:54 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void OpenCLCache::store_program(cl_platform_id platform,
|
|
|
|
cl_device_id device,
|
|
|
|
cl_program program,
|
|
|
|
ustring key,
|
|
|
|
thread_scoped_lock& slot_locker)
|
|
|
|
{
|
|
|
|
assert(platform != NULL);
|
|
|
|
assert(device != NULL);
|
|
|
|
assert(program != NULL);
|
|
|
|
|
|
|
|
OpenCLCache &self = global_instance();
|
|
|
|
|
|
|
|
thread_scoped_lock cache_lock(self.cache_lock);
|
|
|
|
|
|
|
|
CacheMap::iterator i = self.cache.find(PlatformDevicePair(platform, device));
|
|
|
|
assert(i != self.cache.end());
|
|
|
|
Slot &slot = i->second;
|
|
|
|
|
|
|
|
Slot::EntryMap::iterator i2 = slot.programs.find(key);
|
|
|
|
assert(i2 != slot.programs.end());
|
|
|
|
Slot::ProgramEntry &entry = i2->second;
|
|
|
|
|
|
|
|
assert(entry.program == NULL);
|
|
|
|
|
|
|
|
cache_lock.unlock();
|
|
|
|
|
|
|
|
entry.program = program;
|
|
|
|
|
|
|
|
/* unlock the slot */
|
|
|
|
slot_locker.unlock();
|
|
|
|
|
|
|
|
/* Increment reference count in OpenCL.
|
|
|
|
* The caller is going to release the object when done with it.
|
|
|
|
*/
|
|
|
|
cl_int ciErr = clRetainProgram(program);
|
|
|
|
assert(ciErr == CL_SUCCESS);
|
2018-11-09 12:08:51 +01:00
|
|
|
(void) ciErr;
|
2016-09-14 23:47:54 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
string OpenCLCache::get_kernel_md5()
|
|
|
|
{
|
|
|
|
OpenCLCache &self = global_instance();
|
|
|
|
thread_scoped_lock lock(self.kernel_md5_lock);
|
|
|
|
|
|
|
|
if(self.kernel_md5.empty()) {
|
2017-03-31 10:12:13 +02:00
|
|
|
self.kernel_md5 = path_files_md5_hash(path_get("source"));
|
2016-09-14 23:47:54 +02:00
|
|
|
}
|
|
|
|
return self.kernel_md5;
|
|
|
|
}
|
|
|
|
|
2019-02-20 14:41:56 +01:00
|
|
|
OpenCLDevice::OpenCLProgram::OpenCLProgram(OpenCLDevice *device,
|
2017-07-05 12:27:41 +02:00
|
|
|
const string& program_name,
|
|
|
|
const string& kernel_file,
|
|
|
|
const string& kernel_build_options,
|
2016-10-17 11:48:24 +02:00
|
|
|
bool use_stdout)
|
2016-09-14 23:47:54 +02:00
|
|
|
: device(device),
|
|
|
|
program_name(program_name),
|
|
|
|
kernel_file(kernel_file),
|
2016-10-17 11:48:24 +02:00
|
|
|
kernel_build_options(kernel_build_options),
|
|
|
|
use_stdout(use_stdout)
|
2016-09-14 23:47:54 +02:00
|
|
|
{
|
|
|
|
loaded = false;
|
|
|
|
program = NULL;
|
|
|
|
}
|
|
|
|
|
2019-02-20 14:41:56 +01:00
|
|
|
OpenCLDevice::OpenCLProgram::~OpenCLProgram()
|
2016-09-14 23:47:54 +02:00
|
|
|
{
|
|
|
|
release();
|
|
|
|
}
|
|
|
|
|
2019-02-20 14:41:56 +01:00
|
|
|
void OpenCLDevice::OpenCLProgram::release()
|
2016-09-14 23:47:54 +02:00
|
|
|
{
|
|
|
|
for(map<ustring, cl_kernel>::iterator kernel = kernels.begin(); kernel != kernels.end(); ++kernel) {
|
|
|
|
if(kernel->second) {
|
|
|
|
clReleaseKernel(kernel->second);
|
|
|
|
kernel->second = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if(program) {
|
|
|
|
clReleaseProgram(program);
|
|
|
|
program = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-20 14:41:56 +01:00
|
|
|
void OpenCLDevice::OpenCLProgram::add_log(const string& msg, bool debug)
|
2016-10-17 11:48:24 +02:00
|
|
|
{
|
|
|
|
if(!use_stdout) {
|
|
|
|
log += msg + "\n";
|
|
|
|
}
|
|
|
|
else if(!debug) {
|
|
|
|
printf("%s\n", msg.c_str());
|
2017-03-31 02:00:27 -04:00
|
|
|
fflush(stdout);
|
2016-10-17 11:48:24 +02:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
VLOG(2) << msg;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-20 14:41:56 +01:00
|
|
|
void OpenCLDevice::OpenCLProgram::add_error(const string& msg)
|
2016-10-17 11:48:24 +02:00
|
|
|
{
|
|
|
|
if(use_stdout) {
|
|
|
|
fprintf(stderr, "%s\n", msg.c_str());
|
|
|
|
}
|
|
|
|
if(error_msg == "") {
|
|
|
|
error_msg += "\n";
|
|
|
|
}
|
|
|
|
error_msg += msg;
|
|
|
|
}
|
|
|
|
|
2019-02-20 14:41:56 +01:00
|
|
|
void OpenCLDevice::OpenCLProgram::add_kernel(ustring name)
|
2016-09-14 23:47:54 +02:00
|
|
|
{
|
|
|
|
if(!kernels.count(name)) {
|
|
|
|
kernels[name] = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-20 14:41:56 +01:00
|
|
|
bool OpenCLDevice::OpenCLProgram::build_kernel(const string *debug_src)
|
2016-09-14 23:47:54 +02:00
|
|
|
{
|
|
|
|
string build_options;
|
|
|
|
build_options = device->kernel_build_options(debug_src) + kernel_build_options;
|
|
|
|
|
2017-02-22 10:04:13 +01:00
|
|
|
VLOG(1) << "Build options passed to clBuildProgram: '"
|
|
|
|
<< build_options << "'.";
|
2016-09-14 23:47:54 +02:00
|
|
|
cl_int ciErr = clBuildProgram(program, 0, NULL, build_options.c_str(), NULL, NULL);
|
|
|
|
|
|
|
|
/* show warnings even if build is successful */
|
|
|
|
size_t ret_val_size = 0;
|
|
|
|
|
|
|
|
clGetProgramBuildInfo(program, device->cdDevice, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
|
|
|
|
|
2016-10-21 02:49:00 +02:00
|
|
|
if(ciErr != CL_SUCCESS) {
|
|
|
|
add_error(string("OpenCL build failed with error ") + clewErrorString(ciErr) + ", errors in console.");
|
|
|
|
}
|
|
|
|
|
2016-09-14 23:47:54 +02:00
|
|
|
if(ret_val_size > 1) {
|
|
|
|
vector<char> build_log(ret_val_size + 1);
|
|
|
|
clGetProgramBuildInfo(program, device->cdDevice, CL_PROGRAM_BUILD_LOG, ret_val_size, &build_log[0], NULL);
|
|
|
|
|
|
|
|
build_log[ret_val_size] = '\0';
|
|
|
|
/* Skip meaningless empty output from the NVidia compiler. */
|
|
|
|
if(!(ret_val_size == 2 && build_log[0] == '\n')) {
|
2016-10-21 02:49:00 +02:00
|
|
|
add_log(string("OpenCL program ") + program_name + " build output: " + string(&build_log[0]), ciErr == CL_SUCCESS);
|
2016-09-14 23:47:54 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-10-21 02:49:00 +02:00
|
|
|
return (ciErr == CL_SUCCESS);
|
2016-09-14 23:47:54 +02:00
|
|
|
}
|
|
|
|
|
2019-02-20 14:41:56 +01:00
|
|
|
bool OpenCLDevice::OpenCLProgram::compile_kernel(const string *debug_src)
|
2016-09-14 23:47:54 +02:00
|
|
|
{
|
Cycles: Make all #include statements relative to cycles source directory
The idea is to make include statements more explicit and obvious where the
file is coming from, additionally reducing chance of wrong header being
picked up.
For example, it was not obvious whether bvh.h was refferring to builder
or traversal, whenter node.h is a generic graph node or a shader node
and cases like that.
Surely this might look obvious for the active developers, but after some
time of not touching the code it becomes less obvious where file is coming
from.
This was briefly mentioned in T50824 and seems @brecht is fine with such
explicitness, but need to agree with all active developers before committing
this.
Please note that this patch is lacking changes related on GPU/OpenCL
support. This will be solved if/when we all agree this is a good idea to move
forward.
Reviewers: brecht, lukasstockner97, maiself, nirved, dingto, juicyfruit, swerner
Reviewed By: lukasstockner97, maiself, nirved, dingto
Subscribers: brecht
Differential Revision: https://developer.blender.org/D2586
2017-03-28 20:39:14 +02:00
|
|
|
string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\"\n";
|
2016-09-14 23:47:54 +02:00
|
|
|
/* We compile kernels consisting of many files. unfortunately OpenCL
|
|
|
|
* kernel caches do not seem to recognize changes in included files.
|
|
|
|
* so we force recompile on changes by adding the md5 hash of all files.
|
|
|
|
*/
|
Cycles: Make all #include statements relative to cycles source directory
The idea is to make include statements more explicit and obvious where the
file is coming from, additionally reducing chance of wrong header being
picked up.
For example, it was not obvious whether bvh.h was refferring to builder
or traversal, whenter node.h is a generic graph node or a shader node
and cases like that.
Surely this might look obvious for the active developers, but after some
time of not touching the code it becomes less obvious where file is coming
from.
This was briefly mentioned in T50824 and seems @brecht is fine with such
explicitness, but need to agree with all active developers before committing
this.
Please note that this patch is lacking changes related on GPU/OpenCL
support. This will be solved if/when we all agree this is a good idea to move
forward.
Reviewers: brecht, lukasstockner97, maiself, nirved, dingto, juicyfruit, swerner
Reviewed By: lukasstockner97, maiself, nirved, dingto
Subscribers: brecht
Differential Revision: https://developer.blender.org/D2586
2017-03-28 20:39:14 +02:00
|
|
|
source = path_source_replace_includes(source, path_get("source"));
|
2017-03-03 04:10:17 -05:00
|
|
|
source += "\n// " + util_md5_string(source) + "\n";
|
2016-09-14 23:47:54 +02:00
|
|
|
|
|
|
|
if(debug_src) {
|
|
|
|
path_write_text(*debug_src, source);
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t source_len = source.size();
|
|
|
|
const char *source_str = source.c_str();
|
|
|
|
cl_int ciErr;
|
|
|
|
|
|
|
|
program = clCreateProgramWithSource(device->cxContext,
|
2017-03-14 16:54:16 +01:00
|
|
|
1,
|
|
|
|
&source_str,
|
|
|
|
&source_len,
|
|
|
|
&ciErr);
|
2016-09-14 23:47:54 +02:00
|
|
|
|
|
|
|
if(ciErr != CL_SUCCESS) {
|
2016-10-17 11:48:24 +02:00
|
|
|
add_error(string("OpenCL program creation failed: ") + clewErrorString(ciErr));
|
2016-09-14 23:47:54 +02:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
double starttime = time_dt();
|
Cycles: Support multithreaded compilation of kernels
This patch implements a workaround to get the multithreaded compilation from D2231 working.
So far, it only works for Blender, not for Cycles Standalone. Also, I have only tested the Linux codepath in the helper function.
Depends on D2231.
Patch by lukasstockner97, jbakker, brecht
job | scene_name | compilation_time
----------+-----------------+------------------
Baseline | empty | 22.73
D2264 | empty | 13.94
Baseline | bmw | 56.44
D2264 | bmw | 41.32
Baseline | fishycat | 59.50
D2264 | fishycat | 45.19
Baseline | barbershop | 212.28
D2264 | barbershop | 169.81
Baseline | victor | 67.51
D2264 | victor | 53.60
Baseline | classroom | 51.46
D2264 | classroom | 39.02
Baseline | koro | 62.48
D2264 | koro | 49.03
Baseline | pavillion | 54.37
D2264 | pavillion | 38.82
Baseline | splash279 | 47.43
D2264 | splash279 | 37.94
Baseline | volume_emission | 145.22
D2264 | volume_emission | 121.10
This patch reduced compilation time as the split kernels and base
kernels are compiled in parallel. In cycles debug mode (256) you can set
unmark the opencl single program file, what reduces the compilation time
even further (bmw 17 seconds, barbershop 53 seconds).
Reviewers: brecht, dingto, sergey, juicyfruit, lukasstockner97
Reviewed By: brecht
Subscribers: Loner, jbakker, candreacchio, 3dLuver, LazyDodo, bliblubli
Differential Revision: https://developer.blender.org/D2264
2019-02-15 08:18:38 +01:00
|
|
|
add_log(string("Cycles: compiling OpenCL program ") + program_name + "...", false);
|
2016-10-17 11:48:24 +02:00
|
|
|
add_log(string("Build flags: ") + kernel_build_options, true);
|
2016-09-14 23:47:54 +02:00
|
|
|
|
|
|
|
if(!build_kernel(debug_src))
|
|
|
|
return false;
|
|
|
|
|
Cycles: Support multithreaded compilation of kernels
This patch implements a workaround to get the multithreaded compilation from D2231 working.
So far, it only works for Blender, not for Cycles Standalone. Also, I have only tested the Linux codepath in the helper function.
Depends on D2231.
Patch by lukasstockner97, jbakker, brecht
job | scene_name | compilation_time
----------+-----------------+------------------
Baseline | empty | 22.73
D2264 | empty | 13.94
Baseline | bmw | 56.44
D2264 | bmw | 41.32
Baseline | fishycat | 59.50
D2264 | fishycat | 45.19
Baseline | barbershop | 212.28
D2264 | barbershop | 169.81
Baseline | victor | 67.51
D2264 | victor | 53.60
Baseline | classroom | 51.46
D2264 | classroom | 39.02
Baseline | koro | 62.48
D2264 | koro | 49.03
Baseline | pavillion | 54.37
D2264 | pavillion | 38.82
Baseline | splash279 | 47.43
D2264 | splash279 | 37.94
Baseline | volume_emission | 145.22
D2264 | volume_emission | 121.10
This patch reduced compilation time as the split kernels and base
kernels are compiled in parallel. In cycles debug mode (256) you can set
unmark the opencl single program file, what reduces the compilation time
even further (bmw 17 seconds, barbershop 53 seconds).
Reviewers: brecht, dingto, sergey, juicyfruit, lukasstockner97
Reviewed By: brecht
Subscribers: Loner, jbakker, candreacchio, 3dLuver, LazyDodo, bliblubli
Differential Revision: https://developer.blender.org/D2264
2019-02-15 08:18:38 +01:00
|
|
|
double elapsed = time_dt() - starttime;
|
|
|
|
add_log(string_printf("Kernel compilation of %s finished in %.2lfs.", program_name.c_str(), elapsed), false);
|
2016-09-14 23:47:54 +02:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-02-19 13:48:29 +01:00
|
|
|
static void escape_python_string(string& str)
|
|
|
|
{
|
|
|
|
/* Escape string to be passed as a Python raw string with '' quotes'. */
|
|
|
|
string_replace(str, "'", "\'");
|
|
|
|
}
|
|
|
|
|
2019-02-20 14:41:56 +01:00
|
|
|
bool OpenCLDevice::OpenCLProgram::compile_separate(const string& clbin)
|
Cycles: Support multithreaded compilation of kernels
This patch implements a workaround to get the multithreaded compilation from D2231 working.
So far, it only works for Blender, not for Cycles Standalone. Also, I have only tested the Linux codepath in the helper function.
Depends on D2231.
Patch by lukasstockner97, jbakker, brecht
job | scene_name | compilation_time
----------+-----------------+------------------
Baseline | empty | 22.73
D2264 | empty | 13.94
Baseline | bmw | 56.44
D2264 | bmw | 41.32
Baseline | fishycat | 59.50
D2264 | fishycat | 45.19
Baseline | barbershop | 212.28
D2264 | barbershop | 169.81
Baseline | victor | 67.51
D2264 | victor | 53.60
Baseline | classroom | 51.46
D2264 | classroom | 39.02
Baseline | koro | 62.48
D2264 | koro | 49.03
Baseline | pavillion | 54.37
D2264 | pavillion | 38.82
Baseline | splash279 | 47.43
D2264 | splash279 | 37.94
Baseline | volume_emission | 145.22
D2264 | volume_emission | 121.10
This patch reduced compilation time as the split kernels and base
kernels are compiled in parallel. In cycles debug mode (256) you can set
unmark the opencl single program file, what reduces the compilation time
even further (bmw 17 seconds, barbershop 53 seconds).
Reviewers: brecht, dingto, sergey, juicyfruit, lukasstockner97
Reviewed By: brecht
Subscribers: Loner, jbakker, candreacchio, 3dLuver, LazyDodo, bliblubli
Differential Revision: https://developer.blender.org/D2264
2019-02-15 08:18:38 +01:00
|
|
|
{
|
|
|
|
vector<string> args;
|
|
|
|
args.push_back("--background");
|
|
|
|
args.push_back("--factory-startup");
|
|
|
|
args.push_back("--python-expr");
|
|
|
|
|
2019-02-19 13:48:29 +01:00
|
|
|
int device_platform_id = device->device_num;
|
|
|
|
string device_name = device->device_name;
|
|
|
|
string platform_name = device->platform_name;
|
|
|
|
string build_options = device->kernel_build_options(NULL) + kernel_build_options;
|
|
|
|
string kernel_file_escaped = kernel_file;
|
|
|
|
string clbin_escaped = clbin;
|
|
|
|
|
|
|
|
escape_python_string(device_name);
|
|
|
|
escape_python_string(platform_name);
|
|
|
|
escape_python_string(build_options);
|
|
|
|
escape_python_string(kernel_file_escaped);
|
|
|
|
escape_python_string(clbin_escaped);
|
|
|
|
|
Cycles: Support multithreaded compilation of kernels
This patch implements a workaround to get the multithreaded compilation from D2231 working.
So far, it only works for Blender, not for Cycles Standalone. Also, I have only tested the Linux codepath in the helper function.
Depends on D2231.
Patch by lukasstockner97, jbakker, brecht
job | scene_name | compilation_time
----------+-----------------+------------------
Baseline | empty | 22.73
D2264 | empty | 13.94
Baseline | bmw | 56.44
D2264 | bmw | 41.32
Baseline | fishycat | 59.50
D2264 | fishycat | 45.19
Baseline | barbershop | 212.28
D2264 | barbershop | 169.81
Baseline | victor | 67.51
D2264 | victor | 53.60
Baseline | classroom | 51.46
D2264 | classroom | 39.02
Baseline | koro | 62.48
D2264 | koro | 49.03
Baseline | pavillion | 54.37
D2264 | pavillion | 38.82
Baseline | splash279 | 47.43
D2264 | splash279 | 37.94
Baseline | volume_emission | 145.22
D2264 | volume_emission | 121.10
This patch reduced compilation time as the split kernels and base
kernels are compiled in parallel. In cycles debug mode (256) you can set
unmark the opencl single program file, what reduces the compilation time
even further (bmw 17 seconds, barbershop 53 seconds).
Reviewers: brecht, dingto, sergey, juicyfruit, lukasstockner97
Reviewed By: brecht
Subscribers: Loner, jbakker, candreacchio, 3dLuver, LazyDodo, bliblubli
Differential Revision: https://developer.blender.org/D2264
2019-02-15 08:18:38 +01:00
|
|
|
args.push_back(
|
|
|
|
string_printf(
|
2019-02-20 14:41:56 +01:00
|
|
|
"import _cycles; _cycles.opencl_compile(r'%d', r'%s', r'%s', r'%s', r'%s', r'%s')",
|
2019-02-19 13:48:29 +01:00
|
|
|
device_platform_id,
|
|
|
|
device_name.c_str(),
|
|
|
|
platform_name.c_str(),
|
|
|
|
build_options.c_str(),
|
|
|
|
kernel_file_escaped.c_str(),
|
|
|
|
clbin_escaped.c_str()));
|
Cycles: Support multithreaded compilation of kernels
This patch implements a workaround to get the multithreaded compilation from D2231 working.
So far, it only works for Blender, not for Cycles Standalone. Also, I have only tested the Linux codepath in the helper function.
Depends on D2231.
Patch by lukasstockner97, jbakker, brecht
job | scene_name | compilation_time
----------+-----------------+------------------
Baseline | empty | 22.73
D2264 | empty | 13.94
Baseline | bmw | 56.44
D2264 | bmw | 41.32
Baseline | fishycat | 59.50
D2264 | fishycat | 45.19
Baseline | barbershop | 212.28
D2264 | barbershop | 169.81
Baseline | victor | 67.51
D2264 | victor | 53.60
Baseline | classroom | 51.46
D2264 | classroom | 39.02
Baseline | koro | 62.48
D2264 | koro | 49.03
Baseline | pavillion | 54.37
D2264 | pavillion | 38.82
Baseline | splash279 | 47.43
D2264 | splash279 | 37.94
Baseline | volume_emission | 145.22
D2264 | volume_emission | 121.10
This patch reduced compilation time as the split kernels and base
kernels are compiled in parallel. In cycles debug mode (256) you can set
unmark the opencl single program file, what reduces the compilation time
even further (bmw 17 seconds, barbershop 53 seconds).
Reviewers: brecht, dingto, sergey, juicyfruit, lukasstockner97
Reviewed By: brecht
Subscribers: Loner, jbakker, candreacchio, 3dLuver, LazyDodo, bliblubli
Differential Revision: https://developer.blender.org/D2264
2019-02-15 08:18:38 +01:00
|
|
|
|
|
|
|
double starttime = time_dt();
|
|
|
|
add_log(string("Cycles: compiling OpenCL program ") + program_name + "...", false);
|
|
|
|
add_log(string("Build flags: ") + kernel_build_options, true);
|
|
|
|
if(!system_call_self(args) || !path_exists(clbin)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
double elapsed = time_dt() - starttime;
|
|
|
|
add_log(string_printf("Kernel compilation of %s finished in %.2lfs.", program_name.c_str(), elapsed), false);
|
|
|
|
|
|
|
|
return load_binary(clbin);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Compile opencl kernel. This method is called from the _cycles Python
|
|
|
|
* module compile kernels. Parameters must match function above. */
|
|
|
|
bool device_opencl_compile_kernel(const vector<string>& parameters)
|
|
|
|
{
|
2019-02-20 14:41:56 +01:00
|
|
|
int device_platform_id = std::stoi(parameters[0]);
|
|
|
|
const string& device_name = parameters[1];
|
|
|
|
const string& platform_name = parameters[2];
|
|
|
|
const string& build_options = parameters[3];
|
|
|
|
const string& kernel_file = parameters[4];
|
|
|
|
const string& binary_path = parameters[5];
|
Cycles: Support multithreaded compilation of kernels
This patch implements a workaround to get the multithreaded compilation from D2231 working.
So far, it only works for Blender, not for Cycles Standalone. Also, I have only tested the Linux codepath in the helper function.
Depends on D2231.
Patch by lukasstockner97, jbakker, brecht
job | scene_name | compilation_time
----------+-----------------+------------------
Baseline | empty | 22.73
D2264 | empty | 13.94
Baseline | bmw | 56.44
D2264 | bmw | 41.32
Baseline | fishycat | 59.50
D2264 | fishycat | 45.19
Baseline | barbershop | 212.28
D2264 | barbershop | 169.81
Baseline | victor | 67.51
D2264 | victor | 53.60
Baseline | classroom | 51.46
D2264 | classroom | 39.02
Baseline | koro | 62.48
D2264 | koro | 49.03
Baseline | pavillion | 54.37
D2264 | pavillion | 38.82
Baseline | splash279 | 47.43
D2264 | splash279 | 37.94
Baseline | volume_emission | 145.22
D2264 | volume_emission | 121.10
This patch reduced compilation time as the split kernels and base
kernels are compiled in parallel. In cycles debug mode (256) you can set
unmark the opencl single program file, what reduces the compilation time
even further (bmw 17 seconds, barbershop 53 seconds).
Reviewers: brecht, dingto, sergey, juicyfruit, lukasstockner97
Reviewed By: brecht
Subscribers: Loner, jbakker, candreacchio, 3dLuver, LazyDodo, bliblubli
Differential Revision: https://developer.blender.org/D2264
2019-02-15 08:18:38 +01:00
|
|
|
|
|
|
|
if(clewInit() != CLEW_SUCCESS) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
vector<OpenCLPlatformDevice> usable_devices;
|
2019-02-20 14:41:56 +01:00
|
|
|
OpenCLInfo::get_usable_devices(&usable_devices);
|
Cycles: Support multithreaded compilation of kernels
This patch implements a workaround to get the multithreaded compilation from D2231 working.
So far, it only works for Blender, not for Cycles Standalone. Also, I have only tested the Linux codepath in the helper function.
Depends on D2231.
Patch by lukasstockner97, jbakker, brecht
job | scene_name | compilation_time
----------+-----------------+------------------
Baseline | empty | 22.73
D2264 | empty | 13.94
Baseline | bmw | 56.44
D2264 | bmw | 41.32
Baseline | fishycat | 59.50
D2264 | fishycat | 45.19
Baseline | barbershop | 212.28
D2264 | barbershop | 169.81
Baseline | victor | 67.51
D2264 | victor | 53.60
Baseline | classroom | 51.46
D2264 | classroom | 39.02
Baseline | koro | 62.48
D2264 | koro | 49.03
Baseline | pavillion | 54.37
D2264 | pavillion | 38.82
Baseline | splash279 | 47.43
D2264 | splash279 | 37.94
Baseline | volume_emission | 145.22
D2264 | volume_emission | 121.10
This patch reduced compilation time as the split kernels and base
kernels are compiled in parallel. In cycles debug mode (256) you can set
unmark the opencl single program file, what reduces the compilation time
even further (bmw 17 seconds, barbershop 53 seconds).
Reviewers: brecht, dingto, sergey, juicyfruit, lukasstockner97
Reviewed By: brecht
Subscribers: Loner, jbakker, candreacchio, 3dLuver, LazyDodo, bliblubli
Differential Revision: https://developer.blender.org/D2264
2019-02-15 08:18:38 +01:00
|
|
|
if(device_platform_id >= usable_devices.size()) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
OpenCLPlatformDevice& platform_device = usable_devices[device_platform_id];
|
|
|
|
if(platform_device.platform_name != platform_name ||
|
|
|
|
platform_device.device_name != device_name)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
cl_platform_id platform = platform_device.platform_id;
|
|
|
|
cl_device_id device = platform_device.device_id;
|
|
|
|
const cl_context_properties context_props[] = {
|
|
|
|
CL_CONTEXT_PLATFORM, (cl_context_properties) platform,
|
|
|
|
0, 0
|
|
|
|
};
|
|
|
|
|
|
|
|
cl_int err;
|
|
|
|
cl_context context = clCreateContext(context_props, 1, &device, NULL, NULL, &err);
|
|
|
|
if(err != CL_SUCCESS) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\" // " + path_files_md5_hash(path_get("kernel")) + "\n";
|
|
|
|
source = path_source_replace_includes(source, path_get("source"));
|
|
|
|
size_t source_len = source.size();
|
|
|
|
const char *source_str = source.c_str();
|
|
|
|
cl_program program = clCreateProgramWithSource(context, 1, &source_str, &source_len, &err);
|
|
|
|
bool result = false;
|
|
|
|
|
|
|
|
if(err == CL_SUCCESS) {
|
|
|
|
err = clBuildProgram(program, 0, NULL, build_options.c_str(), NULL, NULL);
|
|
|
|
|
|
|
|
if(err == CL_SUCCESS) {
|
|
|
|
size_t size = 0;
|
|
|
|
clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL);
|
|
|
|
if(size > 0) {
|
|
|
|
vector<uint8_t> binary(size);
|
|
|
|
uint8_t *bytes = &binary[0];
|
|
|
|
clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(uint8_t*), &bytes, NULL);
|
|
|
|
result = path_write_binary(binary_path, binary);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
clReleaseProgram(program);
|
|
|
|
}
|
|
|
|
|
|
|
|
clReleaseContext(context);
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2019-02-20 14:41:56 +01:00
|
|
|
bool OpenCLDevice::OpenCLProgram::load_binary(const string& clbin,
|
2016-09-14 23:47:54 +02:00
|
|
|
const string *debug_src)
|
|
|
|
{
|
|
|
|
/* read binary into memory */
|
|
|
|
vector<uint8_t> binary;
|
|
|
|
|
|
|
|
if(!path_read_binary(clbin, binary)) {
|
2016-10-17 11:48:24 +02:00
|
|
|
add_error(string_printf("OpenCL failed to read cached binary %s.", clbin.c_str()));
|
2016-09-14 23:47:54 +02:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* create program */
|
|
|
|
cl_int status, ciErr;
|
|
|
|
size_t size = binary.size();
|
|
|
|
const uint8_t *bytes = &binary[0];
|
|
|
|
|
|
|
|
program = clCreateProgramWithBinary(device->cxContext, 1, &device->cdDevice,
|
|
|
|
&size, &bytes, &status, &ciErr);
|
|
|
|
|
|
|
|
if(status != CL_SUCCESS || ciErr != CL_SUCCESS) {
|
2016-10-17 11:48:24 +02:00
|
|
|
add_error(string("OpenCL failed create program from cached binary ") + clbin + ": "
|
|
|
|
+ clewErrorString(status) + " " + clewErrorString(ciErr));
|
2016-09-14 23:47:54 +02:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(!build_kernel(debug_src))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-02-20 14:41:56 +01:00
|
|
|
bool OpenCLDevice::OpenCLProgram::save_binary(const string& clbin)
|
2016-09-14 23:47:54 +02:00
|
|
|
{
|
|
|
|
size_t size = 0;
|
|
|
|
clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL);
|
|
|
|
|
|
|
|
if(!size)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
vector<uint8_t> binary(size);
|
|
|
|
uint8_t *bytes = &binary[0];
|
|
|
|
|
|
|
|
clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(uint8_t*), &bytes, NULL);
|
|
|
|
|
|
|
|
return path_write_binary(clbin, binary);
|
|
|
|
}
|
|
|
|
|
2019-02-20 14:41:56 +01:00
|
|
|
void OpenCLDevice::OpenCLProgram::load()
|
2016-09-14 23:47:54 +02:00
|
|
|
{
|
|
|
|
assert(device);
|
|
|
|
|
|
|
|
loaded = false;
|
|
|
|
|
|
|
|
string device_md5 = device->device_md5_hash(kernel_build_options);
|
|
|
|
|
|
|
|
/* Try to use cached kernel. */
|
|
|
|
thread_scoped_lock cache_locker;
|
|
|
|
ustring cache_key(program_name + device_md5);
|
|
|
|
program = device->load_cached_kernel(cache_key,
|
|
|
|
cache_locker);
|
|
|
|
|
|
|
|
if(!program) {
|
2016-10-17 11:48:24 +02:00
|
|
|
add_log(string("OpenCL program ") + program_name + " not found in cache.", true);
|
2016-09-14 23:47:54 +02:00
|
|
|
|
2017-03-03 04:10:17 -05:00
|
|
|
/* need to create source to get md5 */
|
2017-03-30 11:42:23 +02:00
|
|
|
string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\"\n";
|
|
|
|
source = path_source_replace_includes(source, path_get("source"));
|
2017-03-03 04:10:17 -05:00
|
|
|
|
|
|
|
string basename = "cycles_kernel_" + program_name + "_" + device_md5 + "_" + util_md5_string(source);
|
2016-09-14 23:47:54 +02:00
|
|
|
basename = path_cache_get(path_join("kernels", basename));
|
|
|
|
string clbin = basename + ".clbin";
|
|
|
|
|
|
|
|
/* path to preprocessed source for debugging */
|
|
|
|
string clsrc, *debug_src = NULL;
|
|
|
|
|
|
|
|
if(OpenCLInfo::use_debug()) {
|
|
|
|
clsrc = basename + ".cl";
|
|
|
|
debug_src = &clsrc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If binary kernel exists already, try use it. */
|
|
|
|
if(path_exists(clbin) && load_binary(clbin)) {
|
|
|
|
/* Kernel loaded from binary, nothing to do. */
|
2016-10-17 11:48:24 +02:00
|
|
|
add_log(string("Loaded program from ") + clbin + ".", true);
|
2016-09-14 23:47:54 +02:00
|
|
|
}
|
|
|
|
else {
|
2016-10-17 11:48:24 +02:00
|
|
|
add_log(string("Kernel file ") + clbin + " either doesn't exist or failed to be loaded by driver.", true);
|
Cycles: Support multithreaded compilation of kernels
This patch implements a workaround to get the multithreaded compilation from D2231 working.
So far, it only works for Blender, not for Cycles Standalone. Also, I have only tested the Linux codepath in the helper function.
Depends on D2231.
Patch by lukasstockner97, jbakker, brecht
job | scene_name | compilation_time
----------+-----------------+------------------
Baseline | empty | 22.73
D2264 | empty | 13.94
Baseline | bmw | 56.44
D2264 | bmw | 41.32
Baseline | fishycat | 59.50
D2264 | fishycat | 45.19
Baseline | barbershop | 212.28
D2264 | barbershop | 169.81
Baseline | victor | 67.51
D2264 | victor | 53.60
Baseline | classroom | 51.46
D2264 | classroom | 39.02
Baseline | koro | 62.48
D2264 | koro | 49.03
Baseline | pavillion | 54.37
D2264 | pavillion | 38.82
Baseline | splash279 | 47.43
D2264 | splash279 | 37.94
Baseline | volume_emission | 145.22
D2264 | volume_emission | 121.10
This patch reduced compilation time as the split kernels and base
kernels are compiled in parallel. In cycles debug mode (256) you can set
unmark the opencl single program file, what reduces the compilation time
even further (bmw 17 seconds, barbershop 53 seconds).
Reviewers: brecht, dingto, sergey, juicyfruit, lukasstockner97
Reviewed By: brecht
Subscribers: Loner, jbakker, candreacchio, 3dLuver, LazyDodo, bliblubli
Differential Revision: https://developer.blender.org/D2264
2019-02-15 08:18:38 +01:00
|
|
|
if(!path_exists(clbin)) {
|
|
|
|
if(compile_separate(clbin)) {
|
|
|
|
add_log(string("Built and loaded program from ") + clbin + ".", true);
|
|
|
|
loaded = true;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
add_log(string("Separate-process building of ") + clbin + " failed, will fall back to regular building.", true);
|
|
|
|
|
|
|
|
/* If does not exist or loading binary failed, compile kernel. */
|
|
|
|
if(!compile_kernel(debug_src)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Save binary for reuse. */
|
|
|
|
if(!save_binary(clbin)) {
|
|
|
|
add_log(string("Saving compiled OpenCL kernel to ") + clbin + " failed!", true);
|
|
|
|
}
|
|
|
|
}
|
2016-09-14 23:47:54 +02:00
|
|
|
}
|
Cycles: Support multithreaded compilation of kernels
This patch implements a workaround to get the multithreaded compilation from D2231 working.
So far, it only works for Blender, not for Cycles Standalone. Also, I have only tested the Linux codepath in the helper function.
Depends on D2231.
Patch by lukasstockner97, jbakker, brecht
job | scene_name | compilation_time
----------+-----------------+------------------
Baseline | empty | 22.73
D2264 | empty | 13.94
Baseline | bmw | 56.44
D2264 | bmw | 41.32
Baseline | fishycat | 59.50
D2264 | fishycat | 45.19
Baseline | barbershop | 212.28
D2264 | barbershop | 169.81
Baseline | victor | 67.51
D2264 | victor | 53.60
Baseline | classroom | 51.46
D2264 | classroom | 39.02
Baseline | koro | 62.48
D2264 | koro | 49.03
Baseline | pavillion | 54.37
D2264 | pavillion | 38.82
Baseline | splash279 | 47.43
D2264 | splash279 | 37.94
Baseline | volume_emission | 145.22
D2264 | volume_emission | 121.10
This patch reduced compilation time as the split kernels and base
kernels are compiled in parallel. In cycles debug mode (256) you can set
unmark the opencl single program file, what reduces the compilation time
even further (bmw 17 seconds, barbershop 53 seconds).
Reviewers: brecht, dingto, sergey, juicyfruit, lukasstockner97
Reviewed By: brecht
Subscribers: Loner, jbakker, candreacchio, 3dLuver, LazyDodo, bliblubli
Differential Revision: https://developer.blender.org/D2264
2019-02-15 08:18:38 +01:00
|
|
|
else {
|
|
|
|
add_log(string("Kernel file ") + clbin + "exists, but failed to be loaded by driver.", true);
|
|
|
|
/* Fall back to compiling. */
|
|
|
|
if(!compile_kernel(debug_src)) {
|
|
|
|
return;
|
|
|
|
}
|
2016-09-14 23:47:54 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Cache the program. */
|
|
|
|
device->store_cached_kernel(program,
|
|
|
|
cache_key,
|
|
|
|
cache_locker);
|
|
|
|
}
|
|
|
|
else {
|
2016-10-17 11:48:24 +02:00
|
|
|
add_log(string("Found cached OpenCL program ") + program_name + ".", true);
|
2016-09-14 23:47:54 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
for(map<ustring, cl_kernel>::iterator kernel = kernels.begin(); kernel != kernels.end(); ++kernel) {
|
|
|
|
assert(kernel->second == NULL);
|
|
|
|
cl_int ciErr;
|
|
|
|
string name = "kernel_ocl_" + kernel->first.string();
|
|
|
|
kernel->second = clCreateKernel(program, name.c_str(), &ciErr);
|
|
|
|
if(device->opencl_error(ciErr)) {
|
2016-10-17 11:48:24 +02:00
|
|
|
add_error(string("Error getting kernel ") + name + " from program " + program_name + ": " + clewErrorString(ciErr));
|
2016-09-14 23:47:54 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
loaded = true;
|
|
|
|
}
|
|
|
|
|
2019-02-20 14:41:56 +01:00
|
|
|
void OpenCLDevice::OpenCLProgram::report_error()
|
2016-09-14 23:47:54 +02:00
|
|
|
{
|
2016-10-17 11:48:24 +02:00
|
|
|
/* If loaded is true, there was no error. */
|
2016-09-14 23:47:54 +02:00
|
|
|
if(loaded) return;
|
2016-10-17 11:48:24 +02:00
|
|
|
/* if use_stdout is true, the error was already reported. */
|
|
|
|
if(use_stdout) return;
|
2016-09-14 23:47:54 +02:00
|
|
|
|
|
|
|
cerr << error_msg << endl;
|
2016-10-17 11:48:24 +02:00
|
|
|
if(!compile_output.empty()) {
|
2016-09-14 23:47:54 +02:00
|
|
|
cerr << "OpenCL kernel build output for " << program_name << ":" << endl;
|
2016-10-17 11:48:24 +02:00
|
|
|
cerr << compile_output << endl;
|
2016-09-14 23:47:54 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-20 14:41:56 +01:00
|
|
|
cl_kernel OpenCLDevice::OpenCLProgram::operator()()
|
2016-09-14 23:47:54 +02:00
|
|
|
{
|
|
|
|
assert(kernels.size() == 1);
|
|
|
|
return kernels.begin()->second;
|
|
|
|
}
|
|
|
|
|
2019-02-20 14:41:56 +01:00
|
|
|
cl_kernel OpenCLDevice::OpenCLProgram::operator()(ustring name)
|
2016-09-14 23:47:54 +02:00
|
|
|
{
|
|
|
|
assert(kernels.count(name));
|
|
|
|
return kernels[name];
|
|
|
|
}
|
|
|
|
|
|
|
|
cl_device_type OpenCLInfo::device_type()
|
|
|
|
{
|
|
|
|
switch(DebugFlags().opencl.device_type)
|
|
|
|
{
|
|
|
|
case DebugFlags::OpenCL::DEVICE_NONE:
|
|
|
|
return 0;
|
|
|
|
case DebugFlags::OpenCL::DEVICE_ALL:
|
|
|
|
return CL_DEVICE_TYPE_ALL;
|
|
|
|
case DebugFlags::OpenCL::DEVICE_DEFAULT:
|
|
|
|
return CL_DEVICE_TYPE_DEFAULT;
|
|
|
|
case DebugFlags::OpenCL::DEVICE_CPU:
|
|
|
|
return CL_DEVICE_TYPE_CPU;
|
|
|
|
case DebugFlags::OpenCL::DEVICE_GPU:
|
|
|
|
return CL_DEVICE_TYPE_GPU;
|
|
|
|
case DebugFlags::OpenCL::DEVICE_ACCELERATOR:
|
|
|
|
return CL_DEVICE_TYPE_ACCELERATOR;
|
|
|
|
default:
|
|
|
|
return CL_DEVICE_TYPE_ALL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool OpenCLInfo::use_debug()
|
|
|
|
{
|
|
|
|
return DebugFlags().opencl.debug;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool OpenCLInfo::kernel_use_advanced_shading(const string& platform)
|
|
|
|
{
|
|
|
|
/* keep this in sync with kernel_types.h! */
|
|
|
|
if(platform == "NVIDIA CUDA")
|
|
|
|
return true;
|
|
|
|
else if(platform == "Apple")
|
|
|
|
return true;
|
|
|
|
else if(platform == "AMD Accelerated Parallel Processing")
|
|
|
|
return true;
|
|
|
|
else if(platform == "Intel(R) OpenCL")
|
|
|
|
return true;
|
|
|
|
/* Make sure officially unsupported OpenCL platforms
|
|
|
|
* does not set up to use advanced shading.
|
|
|
|
*/
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool OpenCLInfo::device_supported(const string& platform_name,
|
|
|
|
const cl_device_id device_id)
|
|
|
|
{
|
|
|
|
cl_device_type device_type;
|
2017-03-20 17:57:37 +01:00
|
|
|
if(!get_device_type(device_id, &device_type)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
string device_name;
|
|
|
|
if(!get_device_name(device_id, &device_name)) {
|
|
|
|
return false;
|
|
|
|
}
|
2017-06-08 05:08:52 -04:00
|
|
|
|
|
|
|
int driver_major = 0;
|
|
|
|
int driver_minor = 0;
|
|
|
|
if(!get_driver_version(device_id, &driver_major, &driver_minor)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
VLOG(3) << "OpenCL driver version " << driver_major << "." << driver_minor;
|
|
|
|
|
2017-03-20 15:37:05 +01:00
|
|
|
/* It is possible tyo have Iris GPU on AMD/Apple OpenCL framework
|
|
|
|
* (aka, it will not be on Intel framework). This isn't supported
|
|
|
|
* and needs an explicit blacklist.
|
|
|
|
*/
|
2017-03-20 17:57:37 +01:00
|
|
|
if(strstr(device_name.c_str(), "Iris")) {
|
2017-03-20 15:37:05 +01:00
|
|
|
return false;
|
|
|
|
}
|
2016-09-14 23:47:54 +02:00
|
|
|
if(platform_name == "AMD Accelerated Parallel Processing" &&
|
|
|
|
device_type == CL_DEVICE_TYPE_GPU)
|
|
|
|
{
|
2017-06-11 23:04:27 +02:00
|
|
|
if(driver_major < 2236) {
|
|
|
|
VLOG(1) << "AMD driver version " << driver_major << "." << driver_minor << " not supported.";
|
2017-06-08 05:08:52 -04:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
const char *blacklist[] = {
|
|
|
|
/* GCN 1 */
|
2018-04-03 23:09:38 -04:00
|
|
|
"Tahiti", "Pitcairn", "Capeverde", "Oland", "Hainan",
|
2017-06-08 05:08:52 -04:00
|
|
|
NULL
|
|
|
|
};
|
2017-08-07 14:47:51 +02:00
|
|
|
for(int i = 0; blacklist[i] != NULL; i++) {
|
2017-06-08 05:08:52 -04:00
|
|
|
if(device_name == blacklist[i]) {
|
|
|
|
VLOG(1) << "AMD device " << device_name << " not supported";
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
2016-09-14 23:47:54 +02:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if(platform_name == "Apple" && device_type == CL_DEVICE_TYPE_GPU) {
|
2018-12-07 14:37:47 +01:00
|
|
|
return false;
|
2016-09-14 23:47:54 +02:00
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool OpenCLInfo::platform_version_check(cl_platform_id platform,
|
|
|
|
string *error)
|
|
|
|
{
|
|
|
|
const int req_major = 1, req_minor = 1;
|
|
|
|
int major, minor;
|
|
|
|
char version[256];
|
|
|
|
clGetPlatformInfo(platform,
|
|
|
|
CL_PLATFORM_VERSION,
|
|
|
|
sizeof(version),
|
|
|
|
&version,
|
|
|
|
NULL);
|
|
|
|
if(sscanf(version, "OpenCL %d.%d", &major, &minor) < 2) {
|
|
|
|
if(error != NULL) {
|
|
|
|
*error = string_printf("OpenCL: failed to parse platform version string (%s).", version);
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if(!((major == req_major && minor >= req_minor) || (major > req_major))) {
|
|
|
|
if(error != NULL) {
|
|
|
|
*error = string_printf("OpenCL: platform version 1.1 or later required, found %d.%d", major, minor);
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if(error != NULL) {
|
|
|
|
*error = "";
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool OpenCLInfo::device_version_check(cl_device_id device,
|
|
|
|
string *error)
|
|
|
|
{
|
|
|
|
const int req_major = 1, req_minor = 1;
|
|
|
|
int major, minor;
|
|
|
|
char version[256];
|
|
|
|
clGetDeviceInfo(device,
|
|
|
|
CL_DEVICE_OPENCL_C_VERSION,
|
|
|
|
sizeof(version),
|
|
|
|
&version,
|
|
|
|
NULL);
|
|
|
|
if(sscanf(version, "OpenCL C %d.%d", &major, &minor) < 2) {
|
|
|
|
if(error != NULL) {
|
|
|
|
*error = string_printf("OpenCL: failed to parse OpenCL C version string (%s).", version);
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if(!((major == req_major && minor >= req_minor) || (major > req_major))) {
|
|
|
|
if(error != NULL) {
|
|
|
|
*error = string_printf("OpenCL: C version 1.1 or later required, found %d.%d", major, minor);
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if(error != NULL) {
|
|
|
|
*error = "";
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-07-05 12:27:41 +02:00
|
|
|
string OpenCLInfo::get_hardware_id(const string& platform_name, cl_device_id device_id)
|
Cycles: Refactor Device selection to allow individual GPU compute device selection
Previously, it was only possible to choose a single GPU or all of that type (CUDA or OpenCL).
Now, a toggle button is displayed for every device.
These settings are tied to the PCI Bus ID of the devices, so they're consistent across hardware addition and removal (but not when swapping/moving cards).
From the code perspective, the more important change is that now, the compute device properties are stored in the Addon preferences of the Cycles addon, instead of directly in the User Preferences.
This allows for a cleaner implementation, removing the Cycles C API functions that were called by the RNA code to specify the enum items.
Note that this change is neither backwards- nor forwards-compatible, but since it's only a User Preference no existing files are broken.
Reviewers: #cycles, brecht
Reviewed By: #cycles, brecht
Subscribers: brecht, juicyfruit, mib2berlin, Blendify
Differential Revision: https://developer.blender.org/D2338
2016-11-07 02:33:53 +01:00
|
|
|
{
|
|
|
|
if(platform_name == "AMD Accelerated Parallel Processing" || platform_name == "Apple") {
|
|
|
|
/* Use cl_amd_device_topology extension. */
|
|
|
|
cl_char topology[24];
|
|
|
|
if(clGetDeviceInfo(device_id, 0x4037, sizeof(topology), topology, NULL) == CL_SUCCESS && topology[0] == 1) {
|
2016-11-22 16:38:37 +01:00
|
|
|
return string_printf("%02x:%02x.%01x",
|
|
|
|
(unsigned int)topology[21],
|
|
|
|
(unsigned int)topology[22],
|
|
|
|
(unsigned int)topology[23]);
|
Cycles: Refactor Device selection to allow individual GPU compute device selection
Previously, it was only possible to choose a single GPU or all of that type (CUDA or OpenCL).
Now, a toggle button is displayed for every device.
These settings are tied to the PCI Bus ID of the devices, so they're consistent across hardware addition and removal (but not when swapping/moving cards).
From the code perspective, the more important change is that now, the compute device properties are stored in the Addon preferences of the Cycles addon, instead of directly in the User Preferences.
This allows for a cleaner implementation, removing the Cycles C API functions that were called by the RNA code to specify the enum items.
Note that this change is neither backwards- nor forwards-compatible, but since it's only a User Preference no existing files are broken.
Reviewers: #cycles, brecht
Reviewed By: #cycles, brecht
Subscribers: brecht, juicyfruit, mib2berlin, Blendify
Differential Revision: https://developer.blender.org/D2338
2016-11-07 02:33:53 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else if(platform_name == "NVIDIA CUDA") {
|
|
|
|
/* Use two undocumented options of the cl_nv_device_attribute_query extension. */
|
|
|
|
cl_int bus_id, slot_id;
|
|
|
|
if(clGetDeviceInfo(device_id, 0x4008, sizeof(cl_int), &bus_id, NULL) == CL_SUCCESS &&
|
|
|
|
clGetDeviceInfo(device_id, 0x4009, sizeof(cl_int), &slot_id, NULL) == CL_SUCCESS) {
|
2016-11-22 16:38:37 +01:00
|
|
|
return string_printf("%02x:%02x.%01x",
|
|
|
|
(unsigned int)(bus_id),
|
|
|
|
(unsigned int)(slot_id >> 3),
|
|
|
|
(unsigned int)(slot_id & 0x7));
|
Cycles: Refactor Device selection to allow individual GPU compute device selection
Previously, it was only possible to choose a single GPU or all of that type (CUDA or OpenCL).
Now, a toggle button is displayed for every device.
These settings are tied to the PCI Bus ID of the devices, so they're consistent across hardware addition and removal (but not when swapping/moving cards).
From the code perspective, the more important change is that now, the compute device properties are stored in the Addon preferences of the Cycles addon, instead of directly in the User Preferences.
This allows for a cleaner implementation, removing the Cycles C API functions that were called by the RNA code to specify the enum items.
Note that this change is neither backwards- nor forwards-compatible, but since it's only a User Preference no existing files are broken.
Reviewers: #cycles, brecht
Reviewed By: #cycles, brecht
Subscribers: brecht, juicyfruit, mib2berlin, Blendify
Differential Revision: https://developer.blender.org/D2338
2016-11-07 02:33:53 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
/* No general way to get a hardware ID from OpenCL => give up. */
|
|
|
|
return "";
|
|
|
|
}
|
|
|
|
|
2016-09-14 23:47:54 +02:00
|
|
|
void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices,
|
|
|
|
bool force_all)
|
|
|
|
{
|
|
|
|
const cl_device_type device_type = OpenCLInfo::device_type();
|
|
|
|
static bool first_time = true;
|
|
|
|
#define FIRST_VLOG(severity) if(first_time) VLOG(severity)
|
|
|
|
|
|
|
|
usable_devices->clear();
|
|
|
|
|
|
|
|
if(device_type == 0) {
|
|
|
|
FIRST_VLOG(2) << "OpenCL devices are forced to be disabled.";
|
|
|
|
first_time = false;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-03-20 17:57:37 +01:00
|
|
|
cl_int error;
|
2016-09-14 23:47:54 +02:00
|
|
|
vector<cl_device_id> device_ids;
|
|
|
|
vector<cl_platform_id> platform_ids;
|
|
|
|
|
2017-03-20 17:57:37 +01:00
|
|
|
/* Get platforms. */
|
|
|
|
if(!get_platforms(&platform_ids, &error)) {
|
|
|
|
FIRST_VLOG(2) << "Error fetching platforms:"
|
|
|
|
<< string(clewErrorString(error));
|
2016-09-14 23:47:54 +02:00
|
|
|
first_time = false;
|
|
|
|
return;
|
|
|
|
}
|
2017-03-20 17:57:37 +01:00
|
|
|
if(platform_ids.size() == 0) {
|
|
|
|
FIRST_VLOG(2) << "No OpenCL platforms were found.";
|
2016-09-14 23:47:54 +02:00
|
|
|
first_time = false;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
/* Devices are numbered consecutively across platforms. */
|
2017-03-20 17:57:37 +01:00
|
|
|
for(int platform = 0; platform < platform_ids.size(); platform++) {
|
2016-09-14 23:47:54 +02:00
|
|
|
cl_platform_id platform_id = platform_ids[platform];
|
2017-03-20 17:57:37 +01:00
|
|
|
string platform_name;
|
|
|
|
if(!get_platform_name(platform_id, &platform_name)) {
|
2016-09-14 23:47:54 +02:00
|
|
|
FIRST_VLOG(2) << "Failed to get platform name, ignoring.";
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
FIRST_VLOG(2) << "Enumerating devices for platform "
|
|
|
|
<< platform_name << ".";
|
|
|
|
if(!platform_version_check(platform_id)) {
|
|
|
|
FIRST_VLOG(2) << "Ignoring platform " << platform_name
|
|
|
|
<< " due to too old compiler version.";
|
|
|
|
continue;
|
|
|
|
}
|
2017-03-20 17:57:37 +01:00
|
|
|
if(!get_platform_devices(platform_id,
|
|
|
|
device_type,
|
|
|
|
&device_ids,
|
|
|
|
&error))
|
2016-09-14 23:47:54 +02:00
|
|
|
{
|
|
|
|
FIRST_VLOG(2) << "Ignoring platform " << platform_name
|
2017-03-20 17:57:37 +01:00
|
|
|
<< ", failed to fetch of devices: "
|
|
|
|
<< string(clewErrorString(error));
|
2016-09-14 23:47:54 +02:00
|
|
|
continue;
|
|
|
|
}
|
2017-03-20 17:57:37 +01:00
|
|
|
if(device_ids.size() == 0) {
|
2016-09-14 23:47:54 +02:00
|
|
|
FIRST_VLOG(2) << "Ignoring platform " << platform_name
|
2017-03-20 17:57:37 +01:00
|
|
|
<< ", it has no devices.";
|
2016-09-14 23:47:54 +02:00
|
|
|
continue;
|
|
|
|
}
|
2017-03-20 17:57:37 +01:00
|
|
|
for(int num = 0; num < device_ids.size(); num++) {
|
|
|
|
const cl_device_id device_id = device_ids[num];
|
|
|
|
string device_name;
|
|
|
|
if(!get_device_name(device_id, &device_name, &error)) {
|
|
|
|
FIRST_VLOG(2) << "Failed to fetch device name: "
|
|
|
|
<< string(clewErrorString(error))
|
|
|
|
<< ", ignoring.";
|
2016-09-14 23:47:54 +02:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if(!device_version_check(device_id)) {
|
|
|
|
FIRST_VLOG(2) << "Ignoring device " << device_name
|
|
|
|
<< " due to old compiler version.";
|
|
|
|
continue;
|
|
|
|
}
|
2019-02-20 14:41:56 +01:00
|
|
|
if(force_all ||
|
2016-09-14 23:47:54 +02:00
|
|
|
device_supported(platform_name, device_id))
|
|
|
|
{
|
|
|
|
cl_device_type device_type;
|
2017-03-20 17:57:37 +01:00
|
|
|
if(!get_device_type(device_id, &device_type, &error)) {
|
2016-09-14 23:47:54 +02:00
|
|
|
FIRST_VLOG(2) << "Ignoring device " << device_name
|
2017-03-20 17:57:37 +01:00
|
|
|
<< ", failed to fetch device type:"
|
|
|
|
<< string(clewErrorString(error));
|
2016-09-14 23:47:54 +02:00
|
|
|
continue;
|
|
|
|
}
|
2017-03-20 19:05:04 +01:00
|
|
|
string readable_device_name =
|
|
|
|
get_readable_device_name(device_id);
|
|
|
|
if(readable_device_name != device_name) {
|
|
|
|
FIRST_VLOG(2) << "Using more readable device name: "
|
|
|
|
<< readable_device_name;
|
|
|
|
}
|
|
|
|
FIRST_VLOG(2) << "Adding new device "
|
|
|
|
<< readable_device_name << ".";
|
Cycles: Refactor Device selection to allow individual GPU compute device selection
Previously, it was only possible to choose a single GPU or all of that type (CUDA or OpenCL).
Now, a toggle button is displayed for every device.
These settings are tied to the PCI Bus ID of the devices, so they're consistent across hardware addition and removal (but not when swapping/moving cards).
From the code perspective, the more important change is that now, the compute device properties are stored in the Addon preferences of the Cycles addon, instead of directly in the User Preferences.
This allows for a cleaner implementation, removing the Cycles C API functions that were called by the RNA code to specify the enum items.
Note that this change is neither backwards- nor forwards-compatible, but since it's only a User Preference no existing files are broken.
Reviewers: #cycles, brecht
Reviewed By: #cycles, brecht
Subscribers: brecht, juicyfruit, mib2berlin, Blendify
Differential Revision: https://developer.blender.org/D2338
2016-11-07 02:33:53 +01:00
|
|
|
string hardware_id = get_hardware_id(platform_name, device_id);
|
2018-07-06 11:42:34 +02:00
|
|
|
string device_extensions = get_device_extensions(device_id);
|
2017-03-20 19:05:04 +01:00
|
|
|
usable_devices->push_back(OpenCLPlatformDevice(
|
|
|
|
platform_id,
|
|
|
|
platform_name,
|
|
|
|
device_id,
|
|
|
|
device_type,
|
|
|
|
readable_device_name,
|
2018-07-06 11:42:34 +02:00
|
|
|
hardware_id,
|
|
|
|
device_extensions));
|
2016-09-14 23:47:54 +02:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
FIRST_VLOG(2) << "Ignoring device " << device_name
|
|
|
|
<< ", not officially supported yet.";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
first_time = false;
|
|
|
|
}
|
|
|
|
|
2017-03-20 17:57:37 +01:00
|
|
|
bool OpenCLInfo::get_platforms(vector<cl_platform_id> *platform_ids,
|
|
|
|
cl_int *error)
|
|
|
|
{
|
|
|
|
/* Reset from possible previous state. */
|
|
|
|
platform_ids->resize(0);
|
|
|
|
cl_uint num_platforms;
|
|
|
|
if(!get_num_platforms(&num_platforms, error)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
/* Get actual platforms. */
|
|
|
|
cl_int err;
|
|
|
|
platform_ids->resize(num_platforms);
|
|
|
|
if((err = clGetPlatformIDs(num_platforms,
|
|
|
|
&platform_ids->at(0),
|
|
|
|
NULL)) != CL_SUCCESS) {
|
|
|
|
if(error != NULL) {
|
|
|
|
*error = err;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if(error != NULL) {
|
|
|
|
*error = CL_SUCCESS;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
vector<cl_platform_id> OpenCLInfo::get_platforms()
|
|
|
|
{
|
|
|
|
vector<cl_platform_id> platform_ids;
|
|
|
|
get_platforms(&platform_ids);
|
|
|
|
return platform_ids;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool OpenCLInfo::get_num_platforms(cl_uint *num_platforms, cl_int *error)
|
|
|
|
{
|
|
|
|
cl_int err;
|
|
|
|
if((err = clGetPlatformIDs(0, NULL, num_platforms)) != CL_SUCCESS) {
|
|
|
|
if(error != NULL) {
|
|
|
|
*error = err;
|
|
|
|
}
|
|
|
|
*num_platforms = 0;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if(error != NULL) {
|
|
|
|
*error = CL_SUCCESS;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
cl_uint OpenCLInfo::get_num_platforms()
|
|
|
|
{
|
|
|
|
cl_uint num_platforms;
|
|
|
|
if(!get_num_platforms(&num_platforms)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return num_platforms;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool OpenCLInfo::get_platform_name(cl_platform_id platform_id,
|
|
|
|
string *platform_name)
|
|
|
|
{
|
|
|
|
char buffer[256];
|
|
|
|
if(clGetPlatformInfo(platform_id,
|
|
|
|
CL_PLATFORM_NAME,
|
|
|
|
sizeof(buffer),
|
|
|
|
&buffer,
|
|
|
|
NULL) != CL_SUCCESS)
|
|
|
|
{
|
|
|
|
*platform_name = "";
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
*platform_name = buffer;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
string OpenCLInfo::get_platform_name(cl_platform_id platform_id)
|
|
|
|
{
|
|
|
|
string platform_name;
|
2017-04-18 11:39:21 +02:00
|
|
|
if(!get_platform_name(platform_id, &platform_name)) {
|
2017-03-20 17:57:37 +01:00
|
|
|
return "";
|
|
|
|
}
|
|
|
|
return platform_name;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool OpenCLInfo::get_num_platform_devices(cl_platform_id platform_id,
|
|
|
|
cl_device_type device_type,
|
|
|
|
cl_uint *num_devices,
|
|
|
|
cl_int *error)
|
|
|
|
{
|
|
|
|
cl_int err;
|
|
|
|
if((err = clGetDeviceIDs(platform_id,
|
|
|
|
device_type,
|
|
|
|
0,
|
|
|
|
NULL,
|
|
|
|
num_devices)) != CL_SUCCESS)
|
|
|
|
{
|
|
|
|
if(error != NULL) {
|
|
|
|
*error = err;
|
|
|
|
}
|
|
|
|
*num_devices = 0;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if(error != NULL) {
|
|
|
|
*error = CL_SUCCESS;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
cl_uint OpenCLInfo::get_num_platform_devices(cl_platform_id platform_id,
|
|
|
|
cl_device_type device_type)
|
|
|
|
{
|
|
|
|
cl_uint num_devices;
|
|
|
|
if(!get_num_platform_devices(platform_id,
|
|
|
|
device_type,
|
|
|
|
&num_devices))
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return num_devices;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool OpenCLInfo::get_platform_devices(cl_platform_id platform_id,
|
|
|
|
cl_device_type device_type,
|
|
|
|
vector<cl_device_id> *device_ids,
|
|
|
|
cl_int* error)
|
|
|
|
{
|
|
|
|
/* Reset from possible previous state. */
|
|
|
|
device_ids->resize(0);
|
|
|
|
/* Get number of devices to pre-allocate memory. */
|
|
|
|
cl_uint num_devices;
|
|
|
|
if(!get_num_platform_devices(platform_id,
|
|
|
|
device_type,
|
|
|
|
&num_devices,
|
|
|
|
error))
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
/* Get actual device list. */
|
|
|
|
device_ids->resize(num_devices);
|
|
|
|
cl_int err;
|
|
|
|
if((err = clGetDeviceIDs(platform_id,
|
|
|
|
device_type,
|
|
|
|
num_devices,
|
|
|
|
&device_ids->at(0),
|
|
|
|
NULL)) != CL_SUCCESS)
|
|
|
|
{
|
|
|
|
if(error != NULL) {
|
|
|
|
*error = err;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if(error != NULL) {
|
|
|
|
*error = CL_SUCCESS;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
vector<cl_device_id> OpenCLInfo::get_platform_devices(cl_platform_id platform_id,
|
|
|
|
cl_device_type device_type)
|
|
|
|
{
|
|
|
|
vector<cl_device_id> devices;
|
|
|
|
get_platform_devices(platform_id, device_type, &devices);
|
|
|
|
return devices;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool OpenCLInfo::get_device_name(cl_device_id device_id,
|
|
|
|
string *device_name,
|
|
|
|
cl_int* error)
|
|
|
|
{
|
|
|
|
char buffer[1024];
|
|
|
|
cl_int err;
|
|
|
|
if((err = clGetDeviceInfo(device_id,
|
|
|
|
CL_DEVICE_NAME,
|
|
|
|
sizeof(buffer),
|
|
|
|
&buffer,
|
|
|
|
NULL)) != CL_SUCCESS)
|
|
|
|
{
|
|
|
|
if(error != NULL) {
|
|
|
|
*error = err;
|
|
|
|
}
|
|
|
|
*device_name = "";
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if(error != NULL) {
|
|
|
|
*error = CL_SUCCESS;
|
|
|
|
}
|
|
|
|
*device_name = buffer;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
string OpenCLInfo::get_device_name(cl_device_id device_id)
|
|
|
|
{
|
|
|
|
string device_name;
|
|
|
|
if(!get_device_name(device_id, &device_name)) {
|
|
|
|
return "";
|
|
|
|
}
|
|
|
|
return device_name;
|
|
|
|
}
|
|
|
|
|
2018-07-06 11:42:34 +02:00
|
|
|
bool OpenCLInfo::get_device_extensions(cl_device_id device_id,
|
|
|
|
string *device_extensions,
|
|
|
|
cl_int* error)
|
|
|
|
{
|
|
|
|
char buffer[1024];
|
|
|
|
cl_int err;
|
|
|
|
if((err = clGetDeviceInfo(device_id,
|
|
|
|
CL_DEVICE_EXTENSIONS,
|
|
|
|
sizeof(buffer),
|
|
|
|
&buffer,
|
|
|
|
NULL)) != CL_SUCCESS)
|
|
|
|
{
|
|
|
|
if(error != NULL) {
|
|
|
|
*error = err;
|
|
|
|
}
|
|
|
|
*device_extensions = "";
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if(error != NULL) {
|
|
|
|
*error = CL_SUCCESS;
|
|
|
|
}
|
|
|
|
*device_extensions = buffer;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
string OpenCLInfo::get_device_extensions(cl_device_id device_id)
|
|
|
|
{
|
|
|
|
string device_extensions;
|
|
|
|
if(!get_device_extensions(device_id, &device_extensions)) {
|
|
|
|
return "";
|
|
|
|
}
|
|
|
|
return device_extensions;
|
|
|
|
}
|
|
|
|
|
2017-03-20 17:57:37 +01:00
|
|
|
bool OpenCLInfo::get_device_type(cl_device_id device_id,
|
|
|
|
cl_device_type *device_type,
|
|
|
|
cl_int* error)
|
|
|
|
{
|
|
|
|
cl_int err;
|
|
|
|
if((err = clGetDeviceInfo(device_id,
|
|
|
|
CL_DEVICE_TYPE,
|
|
|
|
sizeof(cl_device_type),
|
|
|
|
device_type,
|
|
|
|
NULL)) != CL_SUCCESS)
|
|
|
|
{
|
|
|
|
if(error != NULL) {
|
|
|
|
*error = err;
|
|
|
|
}
|
|
|
|
*device_type = 0;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if(error != NULL) {
|
|
|
|
*error = CL_SUCCESS;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
cl_device_type OpenCLInfo::get_device_type(cl_device_id device_id)
|
|
|
|
{
|
|
|
|
cl_device_type device_type;
|
|
|
|
if(!get_device_type(device_id, &device_type)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return device_type;
|
|
|
|
}
|
|
|
|
|
2017-03-20 19:05:04 +01:00
|
|
|
string OpenCLInfo::get_readable_device_name(cl_device_id device_id)
|
|
|
|
{
|
2017-10-21 18:58:59 +02:00
|
|
|
string name = "";
|
2017-03-20 19:05:04 +01:00
|
|
|
char board_name[1024];
|
2017-04-10 15:31:21 +02:00
|
|
|
size_t length = 0;
|
2017-03-20 19:05:04 +01:00
|
|
|
if(clGetDeviceInfo(device_id,
|
|
|
|
CL_DEVICE_BOARD_NAME_AMD,
|
|
|
|
sizeof(board_name),
|
|
|
|
&board_name,
|
2017-06-08 12:03:08 +02:00
|
|
|
&length) == CL_SUCCESS)
|
2017-03-20 19:05:04 +01:00
|
|
|
{
|
2017-04-10 15:31:21 +02:00
|
|
|
if(length != 0 && board_name[0] != '\0') {
|
2017-10-21 18:58:59 +02:00
|
|
|
name = board_name;
|
2017-04-10 15:31:21 +02:00
|
|
|
}
|
2017-03-20 19:05:04 +01:00
|
|
|
}
|
2017-10-21 18:58:59 +02:00
|
|
|
|
2017-03-20 19:05:04 +01:00
|
|
|
/* Fallback to standard device name API. */
|
2017-10-21 18:58:59 +02:00
|
|
|
if(name.empty()) {
|
|
|
|
name = get_device_name(device_id);
|
|
|
|
}
|
|
|
|
|
2018-08-09 15:41:24 +02:00
|
|
|
/* Special exception for AMD Vega, need to be able to tell
|
|
|
|
* Vega 56 from 64 apart.
|
|
|
|
*/
|
2018-08-24 14:36:18 +02:00
|
|
|
if(name == "Radeon RX Vega") {
|
2018-08-09 15:41:24 +02:00
|
|
|
cl_int max_compute_units = 0;
|
2018-08-24 14:36:18 +02:00
|
|
|
if(clGetDeviceInfo(device_id,
|
|
|
|
CL_DEVICE_MAX_COMPUTE_UNITS,
|
|
|
|
sizeof(max_compute_units),
|
|
|
|
&max_compute_units,
|
|
|
|
NULL) == CL_SUCCESS)
|
2018-08-09 15:41:24 +02:00
|
|
|
{
|
|
|
|
name += " " + to_string(max_compute_units);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-10-21 18:58:59 +02:00
|
|
|
/* Distinguish from our native CPU device. */
|
|
|
|
if(get_device_type(device_id) & CL_DEVICE_TYPE_CPU) {
|
|
|
|
name += " (OpenCL)";
|
|
|
|
}
|
|
|
|
|
|
|
|
return name;
|
2017-03-20 19:05:04 +01:00
|
|
|
}
|
|
|
|
|
2017-06-08 05:08:52 -04:00
|
|
|
bool OpenCLInfo::get_driver_version(cl_device_id device_id,
|
|
|
|
int *major,
|
|
|
|
int *minor,
|
|
|
|
cl_int* error)
|
|
|
|
{
|
|
|
|
char buffer[1024];
|
|
|
|
cl_int err;
|
|
|
|
if((err = clGetDeviceInfo(device_id,
|
|
|
|
CL_DRIVER_VERSION,
|
|
|
|
sizeof(buffer),
|
|
|
|
&buffer,
|
|
|
|
NULL)) != CL_SUCCESS)
|
|
|
|
{
|
|
|
|
if(error != NULL) {
|
|
|
|
*error = err;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if(error != NULL) {
|
|
|
|
*error = CL_SUCCESS;
|
|
|
|
}
|
|
|
|
if(sscanf(buffer, "%d.%d", major, minor) < 2) {
|
|
|
|
VLOG(1) << string_printf("OpenCL: failed to parse driver version string (%s).", buffer);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-02-05 22:13:08 +01:00
|
|
|
int OpenCLInfo::mem_sub_ptr_alignment(cl_device_id device_id)
|
2017-05-07 14:40:58 +02:00
|
|
|
{
|
2018-02-05 22:13:08 +01:00
|
|
|
int base_align_bits;
|
2017-05-07 14:40:58 +02:00
|
|
|
if(clGetDeviceInfo(device_id,
|
2018-02-05 22:13:08 +01:00
|
|
|
CL_DEVICE_MEM_BASE_ADDR_ALIGN,
|
2017-05-07 14:40:58 +02:00
|
|
|
sizeof(int),
|
2018-02-05 22:13:08 +01:00
|
|
|
&base_align_bits,
|
2017-05-07 14:40:58 +02:00
|
|
|
NULL) == CL_SUCCESS)
|
|
|
|
{
|
2018-02-05 22:13:08 +01:00
|
|
|
return base_align_bits/8;
|
2017-05-07 14:40:58 +02:00
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2016-09-14 23:47:54 +02:00
|
|
|
CCL_NAMESPACE_END
|
|
|
|
|
|
|
|
#endif
|