Merge per-shader SVM nodes in the main update thread to avoid locking and resizing overhead
In a test file containing 1300 copies of the same shader, this reduces shader update time from 3.1 sec to 0.05 sec. Thanks to @swerner for noticing this issue. Reviewers: brecht, sergey, swerner Subscribers: swerner Differential Revision: https://developer.blender.org/D5376
This commit is contained in:
@@ -47,46 +47,23 @@ void SVMShaderManager::reset(Scene * /*scene*/)
|
|||||||
void SVMShaderManager::device_update_shader(Scene *scene,
|
void SVMShaderManager::device_update_shader(Scene *scene,
|
||||||
Shader *shader,
|
Shader *shader,
|
||||||
Progress *progress,
|
Progress *progress,
|
||||||
array<int4> *global_svm_nodes)
|
array<int4> *svm_nodes)
|
||||||
{
|
{
|
||||||
if (progress->get_cancel()) {
|
if (progress->get_cancel()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
assert(shader->graph);
|
assert(shader->graph);
|
||||||
|
|
||||||
array<int4> svm_nodes;
|
svm_nodes->push_back_slow(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
|
||||||
svm_nodes.push_back_slow(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
|
|
||||||
|
|
||||||
SVMCompiler::Summary summary;
|
SVMCompiler::Summary summary;
|
||||||
SVMCompiler compiler(scene->shader_manager, scene->image_manager, scene->light_manager);
|
SVMCompiler compiler(scene->shader_manager, scene->image_manager, scene->light_manager);
|
||||||
compiler.background = (shader == scene->default_background);
|
compiler.background = (shader == scene->default_background);
|
||||||
compiler.compile(scene, shader, svm_nodes, 0, &summary);
|
compiler.compile(scene, shader, *svm_nodes, 0, &summary);
|
||||||
|
|
||||||
VLOG(2) << "Compilation summary:\n"
|
VLOG(2) << "Compilation summary:\n"
|
||||||
<< "Shader name: " << shader->name << "\n"
|
<< "Shader name: " << shader->name << "\n"
|
||||||
<< summary.full_report();
|
<< summary.full_report();
|
||||||
|
|
||||||
nodes_lock_.lock();
|
|
||||||
if (shader->use_mis && shader->has_surface_emission) {
|
|
||||||
scene->light_manager->need_update = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* The copy needs to be done inside the lock, if another thread resizes the array
|
|
||||||
* while memcpy is running, it'll be copying into possibly invalid/freed ram.
|
|
||||||
*/
|
|
||||||
size_t global_nodes_size = global_svm_nodes->size();
|
|
||||||
global_svm_nodes->resize(global_nodes_size + svm_nodes.size());
|
|
||||||
|
|
||||||
/* Offset local SVM nodes to a global address space. */
|
|
||||||
int4 &jump_node = (*global_svm_nodes)[shader->id];
|
|
||||||
jump_node.y = svm_nodes[0].y + global_nodes_size - 1;
|
|
||||||
jump_node.z = svm_nodes[0].z + global_nodes_size - 1;
|
|
||||||
jump_node.w = svm_nodes[0].w + global_nodes_size - 1;
|
|
||||||
/* Copy new nodes to global storage. */
|
|
||||||
memcpy(&(*global_svm_nodes)[global_nodes_size],
|
|
||||||
&svm_nodes[1],
|
|
||||||
sizeof(int4) * (svm_nodes.size() - 1));
|
|
||||||
nodes_lock_.unlock();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void SVMShaderManager::device_update(Device *device,
|
void SVMShaderManager::device_update(Device *device,
|
||||||
@@ -97,7 +74,9 @@ void SVMShaderManager::device_update(Device *device,
|
|||||||
if (!need_update)
|
if (!need_update)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
VLOG(1) << "Total " << scene->shaders.size() << " shaders.";
|
const int num_shaders = scene->shaders.size();
|
||||||
|
|
||||||
|
VLOG(1) << "Total " << num_shaders << " shaders.";
|
||||||
|
|
||||||
double start_time = time_dt();
|
double start_time = time_dt();
|
||||||
|
|
||||||
@@ -107,20 +86,17 @@ void SVMShaderManager::device_update(Device *device,
|
|||||||
/* determine which shaders are in use */
|
/* determine which shaders are in use */
|
||||||
device_update_shaders_used(scene);
|
device_update_shaders_used(scene);
|
||||||
|
|
||||||
/* svm_nodes */
|
/* Build all shaders. */
|
||||||
array<int4> svm_nodes;
|
|
||||||
size_t i;
|
|
||||||
|
|
||||||
for (i = 0; i < scene->shaders.size(); i++) {
|
|
||||||
svm_nodes.push_back_slow(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
|
|
||||||
}
|
|
||||||
|
|
||||||
TaskPool task_pool;
|
TaskPool task_pool;
|
||||||
foreach (Shader *shader, scene->shaders) {
|
vector<array<int4>> shader_svm_nodes(num_shaders);
|
||||||
task_pool.push(
|
for (int i = 0; i < num_shaders; i++) {
|
||||||
function_bind(
|
task_pool.push(function_bind(&SVMShaderManager::device_update_shader,
|
||||||
&SVMShaderManager::device_update_shader, this, scene, shader, &progress, &svm_nodes),
|
this,
|
||||||
false);
|
scene,
|
||||||
|
scene->shaders[i],
|
||||||
|
&progress,
|
||||||
|
&shader_svm_nodes[i]),
|
||||||
|
false);
|
||||||
}
|
}
|
||||||
task_pool.wait_work();
|
task_pool.wait_work();
|
||||||
|
|
||||||
@@ -128,20 +104,60 @@ void SVMShaderManager::device_update(Device *device,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
dscene->svm_nodes.steal_data(svm_nodes);
|
/* The global node list contains a jump table (one node per shader)
|
||||||
dscene->svm_nodes.copy_to_device();
|
* followed by the nodes of all shaders. */
|
||||||
|
int svm_nodes_size = num_shaders;
|
||||||
for (i = 0; i < scene->shaders.size(); i++) {
|
for (int i = 0; i < num_shaders; i++) {
|
||||||
Shader *shader = scene->shaders[i];
|
/* Since we're not copying the local jump node, the size ends up being one node lower. */
|
||||||
shader->need_update = false;
|
svm_nodes_size += shader_svm_nodes[i].size() - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int4 *svm_nodes = dscene->svm_nodes.alloc(svm_nodes_size);
|
||||||
|
|
||||||
|
int node_offset = num_shaders;
|
||||||
|
for (int i = 0; i < num_shaders; i++) {
|
||||||
|
Shader *shader = scene->shaders[i];
|
||||||
|
|
||||||
|
shader->need_update = false;
|
||||||
|
if (shader->use_mis && shader->has_surface_emission) {
|
||||||
|
scene->light_manager->need_update = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Update the global jump table.
|
||||||
|
* Each compiled shader starts with a jump node that has offsets local
|
||||||
|
* to the shader, so copy those and add the offset into the global node list. */
|
||||||
|
int4 &global_jump_node = svm_nodes[shader->id];
|
||||||
|
int4 &local_jump_node = shader_svm_nodes[i][0];
|
||||||
|
|
||||||
|
global_jump_node.x = NODE_SHADER_JUMP;
|
||||||
|
global_jump_node.y = local_jump_node.y - 1 + node_offset;
|
||||||
|
global_jump_node.z = local_jump_node.z - 1 + node_offset;
|
||||||
|
global_jump_node.w = local_jump_node.w - 1 + node_offset;
|
||||||
|
|
||||||
|
node_offset += shader_svm_nodes[i].size() - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Copy the nodes of each shader into the correct location. */
|
||||||
|
svm_nodes += num_shaders;
|
||||||
|
for (int i = 0; i < num_shaders; i++) {
|
||||||
|
int shader_size = shader_svm_nodes[i].size() - 1;
|
||||||
|
|
||||||
|
memcpy(svm_nodes, &shader_svm_nodes[i][1], sizeof(int4) * shader_size);
|
||||||
|
svm_nodes += shader_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (progress.get_cancel()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
dscene->svm_nodes.copy_to_device();
|
||||||
|
|
||||||
device_update_common(device, dscene, scene, progress);
|
device_update_common(device, dscene, scene, progress);
|
||||||
|
|
||||||
need_update = false;
|
need_update = false;
|
||||||
|
|
||||||
VLOG(1) << "Shader manager updated " << scene->shaders.size() << " shaders in "
|
VLOG(1) << "Shader manager updated " << num_shaders << " shaders in " << time_dt() - start_time
|
||||||
<< time_dt() - start_time << " seconds.";
|
<< " seconds.";
|
||||||
}
|
}
|
||||||
|
|
||||||
void SVMShaderManager::device_free(Device *device, DeviceScene *dscene, Scene *scene)
|
void SVMShaderManager::device_free(Device *device, DeviceScene *dscene, Scene *scene)
|
||||||
|
@@ -50,13 +50,10 @@ class SVMShaderManager : public ShaderManager {
|
|||||||
void device_free(Device *device, DeviceScene *dscene, Scene *scene);
|
void device_free(Device *device, DeviceScene *dscene, Scene *scene);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
/* Lock used to synchronize threaded nodes compilation. */
|
|
||||||
thread_spin_lock nodes_lock_;
|
|
||||||
|
|
||||||
void device_update_shader(Scene *scene,
|
void device_update_shader(Scene *scene,
|
||||||
Shader *shader,
|
Shader *shader,
|
||||||
Progress *progress,
|
Progress *progress,
|
||||||
array<int4> *global_svm_nodes);
|
array<int4> *svm_nodes);
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Graph Compiler */
|
/* Graph Compiler */
|
||||||
|
Reference in New Issue
Block a user