Cycles: threading optimizations

* Multithreaded image loading, each thread can load a separate image.
* Better multithreading for multiple instanced meshes, different threads can now
  build BVH's for different meshes, rather than all cooperating on the same mesh.
  Especially noticeable for dynamic BVH building for the viewport, gave about
  2x faster build on 8 core in fairly complex scene with many objects.
* The main thread waiting for worker threads can now also work itself, so
  (num_cores + 1) threads will be working, this supposedly gives better
  performance on some operating systems, but did not measure performance for
  this very detailed yet.
This commit is contained in:
Brecht Van Lommel
2012-05-05 19:44:33 +00:00
parent c53fe94bb4
commit 8103381ded
9 changed files with 182 additions and 109 deletions

View File

@@ -36,12 +36,12 @@ CCL_NAMESPACE_BEGIN
class BVHBuildTask : public Task { class BVHBuildTask : public Task {
public: public:
BVHBuildTask(InnerNode *node_, int child_, BVHObjectBinning& range_, int level_) BVHBuildTask(BVHBuild *build, InnerNode *node, int child, BVHObjectBinning& range_, int level)
: node(node_), child(child_), level(level_), range(range_) {} : range(range_)
{
run = function_bind(&BVHBuild::thread_build_node, build, node, child, &range, level);
}
InnerNode *node;
int child;
int level;
BVHObjectBinning range; BVHObjectBinning range;
}; };
@@ -55,8 +55,7 @@ BVHBuild::BVHBuild(const vector<Object*>& objects_,
prim_object(prim_object_), prim_object(prim_object_),
params(params_), params(params_),
progress(progress_), progress(progress_),
progress_start_time(0.0), progress_start_time(0.0)
task_pool(function_bind(&BVHBuild::thread_build_node, this, _1, _2))
{ {
spatial_min_overlap = 0.0f; spatial_min_overlap = 0.0f;
} }
@@ -177,7 +176,7 @@ BVHNode* BVHBuild::run()
/* multithreaded binning build */ /* multithreaded binning build */
BVHObjectBinning rootbin(root, (references.size())? &references[0]: NULL); BVHObjectBinning rootbin(root, (references.size())? &references[0]: NULL);
rootnode = build_node(rootbin, 0); rootnode = build_node(rootbin, 0);
task_pool.wait(); task_pool.wait_work();
} }
/* delete if we cancelled */ /* delete if we cancelled */
@@ -210,25 +209,24 @@ void BVHBuild::progress_update()
progress_start_time = time_dt(); progress_start_time = time_dt();
} }
void BVHBuild::thread_build_node(Task *task_, int thread_id) void BVHBuild::thread_build_node(InnerNode *inner, int child, BVHObjectBinning *range, int level)
{ {
if(progress.get_cancel()) if(progress.get_cancel())
return; return;
/* build nodes */ /* build nodes */
BVHBuildTask *task = (BVHBuildTask*)task_; BVHNode *node = build_node(*range, level);
BVHNode *node = build_node(task->range, task->level);
/* set child in inner node */ /* set child in inner node */
task->node->children[task->child] = node; inner->children[child] = node;
/* update progress */ /* update progress */
if(task->range.size() < THREAD_TASK_SIZE) { if(range->size() < THREAD_TASK_SIZE) {
/*rotate(node, INT_MAX, 5);*/ /*rotate(node, INT_MAX, 5);*/
thread_scoped_lock lock(build_mutex); thread_scoped_lock lock(build_mutex);
progress_count += task->range.size(); progress_count += range->size();
progress_update(); progress_update();
} }
} }
@@ -262,8 +260,8 @@ BVHNode* BVHBuild::build_node(const BVHObjectBinning& range, int level)
/* threaded build */ /* threaded build */
inner = new InnerNode(range.bounds()); inner = new InnerNode(range.bounds());
task_pool.push(new BVHBuildTask(inner, 0, left, level + 1), true); task_pool.push(new BVHBuildTask(this, inner, 0, left, level + 1), true);
task_pool.push(new BVHBuildTask(inner, 1, right, level + 1), true); task_pool.push(new BVHBuildTask(this, inner, 1, right, level + 1), true);
} }
return inner; return inner;

View File

@@ -29,7 +29,9 @@
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
class BVHBuildTask;
class BVHParams; class BVHParams;
class InnerNode;
class Mesh; class Mesh;
class Object; class Object;
class Progress; class Progress;
@@ -54,6 +56,7 @@ protected:
friend class BVHMixedSplit; friend class BVHMixedSplit;
friend class BVHObjectSplit; friend class BVHObjectSplit;
friend class BVHSpatialSplit; friend class BVHSpatialSplit;
friend class BVHBuildTask;
/* adding references */ /* adding references */
void add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i); void add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i);
@@ -68,7 +71,7 @@ protected:
/* threads */ /* threads */
enum { THREAD_TASK_SIZE = 4096 }; enum { THREAD_TASK_SIZE = 4096 };
void thread_build_node(Task *task_, int thread_id); void thread_build_node(InnerNode *node, int child, BVHObjectBinning *range, int level);
thread_mutex build_mutex; thread_mutex build_mutex;
/* progress */ /* progress */

View File

@@ -44,7 +44,6 @@ public:
KernelGlobals *kg; KernelGlobals *kg;
CPUDevice(int threads_num) CPUDevice(int threads_num)
: task_pool(function_bind(&CPUDevice::thread_run, this, _1, _2))
{ {
kg = kernel_globals_create(); kg = kernel_globals_create();
@@ -113,10 +112,8 @@ public:
#endif #endif
} }
void thread_run(Task *task_, int thread_id) void thread_run(DeviceTask *task)
{ {
DeviceTask *task = (DeviceTask*)task_;
if(task->type == DeviceTask::PATH_TRACE) if(task->type == DeviceTask::PATH_TRACE)
thread_path_trace(*task); thread_path_trace(*task);
else if(task->type == DeviceTask::TONEMAP) else if(task->type == DeviceTask::TONEMAP)
@@ -125,6 +122,15 @@ public:
thread_shader(*task); thread_shader(*task);
} }
class CPUDeviceTask : public DeviceTask {
public:
CPUDeviceTask(CPUDevice *device, DeviceTask& task)
: DeviceTask(task)
{
run = function_bind(&CPUDevice::thread_run, device, this);
}
};
void thread_path_trace(DeviceTask& task) void thread_path_trace(DeviceTask& task)
{ {
if(task_pool.cancelled()) if(task_pool.cancelled())
@@ -226,12 +232,12 @@ public:
task.split(tasks, TaskScheduler::num_threads()*10); task.split(tasks, TaskScheduler::num_threads()*10);
foreach(DeviceTask& task, tasks) foreach(DeviceTask& task, tasks)
task_pool.push(new DeviceTask(task)); task_pool.push(new CPUDeviceTask(this, task));
} }
void task_wait() void task_wait()
{ {
task_pool.wait(); task_pool.wait_work();
} }
void task_cancel() void task_cancel()

View File

@@ -324,8 +324,10 @@ bool ImageManager::file_load_float_image(Image *img, device_vector<float4>& tex_
return true; return true;
} }
void ImageManager::device_load_image(Device *device, DeviceScene *dscene, int slot) void ImageManager::device_load_image(Device *device, DeviceScene *dscene, int slot, Progress *progress)
{ {
if(progress->get_cancel())
return;
if(osl_texture_system) if(osl_texture_system)
return; return;
@@ -342,6 +344,9 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, int sl
} }
if(is_float) { if(is_float) {
string filename = path_filename(float_images[slot]->filename);
progress->set_status("Updating Images", "Loading " + filename);
device_vector<float4>& tex_img = dscene->tex_float_image[slot - TEX_IMAGE_FLOAT_START]; device_vector<float4>& tex_img = dscene->tex_float_image[slot - TEX_IMAGE_FLOAT_START];
if(tex_img.device_pointer) if(tex_img.device_pointer)
@@ -365,6 +370,9 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, int sl
device->tex_alloc(name.c_str(), tex_img, true, true); device->tex_alloc(name.c_str(), tex_img, true, true);
} }
else { else {
string filename = path_filename(images[slot]->filename);
progress->set_status("Updating Images", "Loading " + filename);
device_vector<uchar4>& tex_img = dscene->tex_image[slot]; device_vector<uchar4>& tex_img = dscene->tex_image[slot];
if(tex_img.device_pointer) if(tex_img.device_pointer)
@@ -387,6 +395,8 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, int sl
device->tex_alloc(name.c_str(), tex_img, true, true); device->tex_alloc(name.c_str(), tex_img, true, true);
} }
img->need_load = false;
} }
void ImageManager::device_free_image(Device *device, DeviceScene *dscene, int slot) void ImageManager::device_free_image(Device *device, DeviceScene *dscene, int slot)
@@ -431,39 +441,37 @@ void ImageManager::device_update(Device *device, DeviceScene *dscene, Progress&
{ {
if(!need_update) if(!need_update)
return; return;
TaskPool pool;
for(size_t slot = 0; slot < images.size(); slot++) { for(size_t slot = 0; slot < images.size(); slot++) {
if(images[slot]) { if(!images[slot])
if(images[slot]->users == 0) { continue;
device_free_image(device, dscene, slot);
}
else if(images[slot]->need_load) {
string name = path_filename(images[slot]->filename);
progress.set_status("Updating Images", "Loading " + name);
device_load_image(device, dscene, slot);
images[slot]->need_load = false;
}
if(progress.get_cancel()) return; if(images[slot]->users == 0) {
device_free_image(device, dscene, slot);
}
else if(images[slot]->need_load) {
if(!osl_texture_system)
pool.push(function_bind(&ImageManager::device_load_image, this, device, dscene, slot, &progress));
} }
} }
for(size_t slot = 0; slot < float_images.size(); slot++) { for(size_t slot = 0; slot < float_images.size(); slot++) {
if(float_images[slot]) { if(!float_images[slot])
if(float_images[slot]->users == 0) { continue;
device_free_image(device, dscene, slot + TEX_IMAGE_FLOAT_START);
}
else if(float_images[slot]->need_load) {
string name = path_filename(float_images[slot]->filename);
progress.set_status("Updating Images", "Loading " + name);
device_load_image(device, dscene, slot + TEX_IMAGE_FLOAT_START);
float_images[slot]->need_load = false;
}
if(progress.get_cancel()) return; if(float_images[slot]->users == 0) {
device_free_image(device, dscene, slot + TEX_IMAGE_FLOAT_START);
}
else if(float_images[slot]->need_load) {
if(!osl_texture_system)
pool.push(function_bind(&ImageManager::device_load_image, this, device, dscene, slot + TEX_IMAGE_FLOAT_START, &progress));
} }
} }
pool.wait_work();
need_update = false; need_update = false;
} }

View File

@@ -65,7 +65,7 @@ private:
bool file_load_image(Image *img, device_vector<uchar4>& tex_img); bool file_load_image(Image *img, device_vector<uchar4>& tex_img);
bool file_load_float_image(Image *img, device_vector<float4>& tex_img); bool file_load_float_image(Image *img, device_vector<float4>& tex_img);
void device_load_image(Device *device, DeviceScene *dscene, int slot); void device_load_image(Device *device, DeviceScene *dscene, int slot, Progress *progess);
void device_free_image(Device *device, DeviceScene *dscene, int slot); void device_free_image(Device *device, DeviceScene *dscene, int slot);
}; };

View File

@@ -242,31 +242,47 @@ void Mesh::pack_verts(float4 *tri_verts, float4 *tri_vindex, size_t vert_offset)
} }
} }
void Mesh::compute_bvh(SceneParams *params, Progress& progress) void Mesh::compute_bvh(SceneParams *params, Progress *progress, int n, int total)
{ {
Object object; if(progress->get_cancel())
object.mesh = this; return;
vector<Object*> objects; compute_bounds();
objects.push_back(&object);
if(bvh && !need_update_rebuild) { if(!transform_applied) {
progress.set_substatus("Refitting BVH"); string msg = "Updating Mesh BVH ";
bvh->objects = objects; if(name == "")
bvh->refit(progress); msg += string_printf("%u/%u", (uint)(n+1), (uint)total);
else
msg += string_printf("%s %u/%u", name.c_str(), (uint)(n+1), (uint)total);
Object object;
object.mesh = this;
vector<Object*> objects;
objects.push_back(&object);
if(bvh && !need_update_rebuild) {
progress->set_status(msg, "Refitting BVH");
bvh->objects = objects;
bvh->refit(*progress);
}
else {
progress->set_status(msg, "Building BVH");
BVHParams bparams;
bparams.use_cache = params->use_bvh_cache;
bparams.use_spatial_split = params->use_bvh_spatial_split;
bparams.use_qbvh = params->use_qbvh;
delete bvh;
bvh = BVH::create(bparams, objects);
bvh->build(*progress);
}
} }
else {
progress.set_substatus("Building BVH");
BVHParams bparams; need_update = false;
bparams.use_cache = params->use_bvh_cache; need_update_rebuild = false;
bparams.use_spatial_split = params->use_bvh_spatial_split;
bparams.use_qbvh = params->use_qbvh;
delete bvh;
bvh = BVH::create(bparams, objects);
bvh->build(progress);
}
} }
void Mesh::tag_update(Scene *scene, bool rebuild) void Mesh::tag_update(Scene *scene, bool rebuild)
@@ -686,35 +702,22 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen
} }
/* update bvh */ /* update bvh */
size_t i = 0, num_instance_bvh = 0; size_t i = 0, num_bvh = 0;
foreach(Mesh *mesh, scene->meshes) foreach(Mesh *mesh, scene->meshes)
if(mesh->need_update && !mesh->transform_applied) if(mesh->need_update && !mesh->transform_applied)
num_instance_bvh++; num_bvh++;
TaskPool pool;
foreach(Mesh *mesh, scene->meshes) { foreach(Mesh *mesh, scene->meshes) {
if(mesh->need_update) { if(mesh->need_update) {
mesh->compute_bounds(); pool.push(function_bind(&Mesh::compute_bvh, mesh, &scene->params, &progress, i, num_bvh));
i++;
if(!mesh->transform_applied) {
string msg = "Updating Mesh BVH ";
if(mesh->name == "")
msg += string_printf("%u/%u", (uint)(i+1), (uint)num_instance_bvh);
else
msg += string_printf("%s %u/%u", mesh->name.c_str(), (uint)(i+1), (uint)num_instance_bvh);
progress.set_status(msg, "Building BVH");
mesh->compute_bvh(&scene->params, progress);
i++;
}
if(progress.get_cancel()) return;
mesh->need_update = false;
mesh->need_update_rebuild = false;
} }
} }
pool.wait_work();
foreach(Shader *shader, scene->shaders) foreach(Shader *shader, scene->shaders)
shader->need_update_attributes = false; shader->need_update_attributes = false;

View File

@@ -96,7 +96,7 @@ public:
void pack_normals(Scene *scene, float4 *normal, float4 *vnormal); void pack_normals(Scene *scene, float4 *normal, float4 *vnormal);
void pack_verts(float4 *tri_verts, float4 *tri_vindex, size_t vert_offset); void pack_verts(float4 *tri_verts, float4 *tri_vindex, size_t vert_offset);
void compute_bvh(SceneParams *params, Progress& progress); void compute_bvh(SceneParams *params, Progress *progress, int n, int total);
bool need_attribute(Scene *scene, AttributeStandard std); bool need_attribute(Scene *scene, AttributeStandard std);
bool need_attribute(Scene *scene, ustring name); bool need_attribute(Scene *scene, ustring name);

View File

@@ -25,14 +25,12 @@ CCL_NAMESPACE_BEGIN
/* Task Pool */ /* Task Pool */
TaskPool::TaskPool(const TaskRunFunction& run_) TaskPool::TaskPool()
{ {
num = 0; num = 0;
num_done = 0; num_done = 0;
do_cancel = false; do_cancel = false;
run = run_;
} }
TaskPool::~TaskPool() TaskPool::~TaskPool()
@@ -50,12 +48,55 @@ void TaskPool::push(Task *task, bool front)
TaskScheduler::push(entry, front); TaskScheduler::push(entry, front);
} }
void TaskPool::wait() void TaskPool::push(const TaskRunFunction& run, bool front)
{ {
thread_scoped_lock lock(done_mutex); push(new Task(run), front);
}
while(num_done != num) void TaskPool::wait_work()
done_cond.wait(lock); {
thread_scoped_lock done_lock(done_mutex);
while(num_done != num) {
thread_scoped_lock queue_lock(TaskScheduler::queue_mutex);
/* find task from this pool. if we get a task from another pool,
* we can get into deadlock */
TaskScheduler::Entry work_entry;
bool found_entry = false;
list<TaskScheduler::Entry>::iterator it;
for(it = TaskScheduler::queue.begin(); it != TaskScheduler::queue.end(); it++) {
TaskScheduler::Entry& entry = *it;
if(entry.pool == this) {
work_entry = entry;
found_entry = true;
TaskScheduler::queue.erase(it);
break;
}
}
queue_lock.unlock();
/* if found task, do it, otherwise wait until other tasks are done */
if(found_entry) {
done_lock.unlock();
/* run task */
work_entry.task->run();
/* delete task */
delete work_entry.task;
/* notify pool task was done */
done_increase(1);
done_lock.lock();
}
else
done_cond.wait(done_lock);
}
} }
void TaskPool::cancel() void TaskPool::cancel()
@@ -63,7 +104,12 @@ void TaskPool::cancel()
TaskScheduler::clear(this); TaskScheduler::clear(this);
do_cancel = true; do_cancel = true;
wait(); {
thread_scoped_lock lock(done_mutex);
while(num_done != num)
done_cond.wait(lock);
}
do_cancel = false; do_cancel = false;
} }
@@ -94,6 +140,7 @@ void TaskPool::done_increase(int done)
thread_mutex TaskScheduler::mutex; thread_mutex TaskScheduler::mutex;
int TaskScheduler::users = 0; int TaskScheduler::users = 0;
vector<thread*> TaskScheduler::threads; vector<thread*> TaskScheduler::threads;
vector<int> TaskScheduler::thread_level;
volatile bool TaskScheduler::do_exit = false; volatile bool TaskScheduler::do_exit = false;
list<TaskScheduler::Entry> TaskScheduler::queue; list<TaskScheduler::Entry> TaskScheduler::queue;
@@ -114,9 +161,12 @@ void TaskScheduler::init(int num_threads)
num_threads = system_cpu_thread_count(); num_threads = system_cpu_thread_count();
threads.resize(num_threads); threads.resize(num_threads);
thread_level.resize(num_threads);
for(size_t i = 0; i < threads.size(); i++) for(size_t i = 0; i < threads.size(); i++) {
threads[i] = new thread(function_bind(&TaskScheduler::thread_run, i)); threads[i] = new thread(function_bind(&TaskScheduler::thread_run, i));
thread_level[i] = 0;
}
} }
users++; users++;
@@ -140,6 +190,7 @@ void TaskScheduler::exit()
} }
threads.clear(); threads.clear();
thread_level.clear();
} }
} }
@@ -170,7 +221,7 @@ void TaskScheduler::thread_run(int thread_id)
/* keep popping off tasks */ /* keep popping off tasks */
while(thread_wait_pop(entry)) { while(thread_wait_pop(entry)) {
/* run task */ /* run task */
entry.pool->run(entry.task, thread_id); entry.task->run();
/* delete task */ /* delete task */
delete entry.task; delete entry.task;
@@ -196,20 +247,20 @@ void TaskScheduler::push(Entry& entry, bool front)
void TaskScheduler::clear(TaskPool *pool) void TaskScheduler::clear(TaskPool *pool)
{ {
thread_scoped_lock lock(TaskScheduler::queue_mutex); thread_scoped_lock lock(queue_mutex);
/* erase all tasks from this pool from the queue */ /* erase all tasks from this pool from the queue */
list<TaskScheduler::Entry>::iterator it = TaskScheduler::queue.begin(); list<Entry>::iterator it = queue.begin();
int done = 0; int done = 0;
while(it != TaskScheduler::queue.end()) { while(it != queue.end()) {
TaskScheduler::Entry& entry = *it; Entry& entry = *it;
if(entry.pool == pool) { if(entry.pool == pool) {
done++; done++;
delete entry.task; delete entry.task;
it = TaskScheduler::queue.erase(it); it = queue.erase(it);
} }
else else
it++; it++;

View File

@@ -29,7 +29,7 @@ class Task;
class TaskPool; class TaskPool;
class TaskScheduler; class TaskScheduler;
typedef boost::function<void(Task*,int)> TaskRunFunction; typedef boost::function<void(void)> TaskRunFunction;
/* Task /* Task
* *
@@ -39,7 +39,11 @@ class Task
{ {
public: public:
Task() {}; Task() {};
Task(const TaskRunFunction& run_) : run(run_) {}
virtual ~Task() {} virtual ~Task() {}
TaskRunFunction run;
}; };
/* Task Pool /* Task Pool
@@ -54,12 +58,13 @@ public:
class TaskPool class TaskPool
{ {
public: public:
TaskPool(const TaskRunFunction& run); TaskPool();
~TaskPool(); ~TaskPool();
void push(Task *task, bool front = false); void push(Task *task, bool front = false);
void push(const TaskRunFunction& run, bool front = false);
void wait(); /* wait until all tasks are done */ void wait_work(); /* work and wait until all tasks are done */
void cancel(); /* cancel all tasks, keep worker threads running */ void cancel(); /* cancel all tasks, keep worker threads running */
void stop(); /* stop all worker threads */ void stop(); /* stop all worker threads */
@@ -70,8 +75,6 @@ protected:
void done_increase(int done); void done_increase(int done);
TaskRunFunction run;
thread_mutex done_mutex; thread_mutex done_mutex;
thread_condition_variable done_cond; thread_condition_variable done_cond;
@@ -103,6 +106,7 @@ protected:
static thread_mutex mutex; static thread_mutex mutex;
static int users; static int users;
static vector<thread*> threads; static vector<thread*> threads;
static vector<int> thread_level;
static volatile bool do_exit; static volatile bool do_exit;
static list<Entry> queue; static list<Entry> queue;