Cycles: merge of changes from tomato branch.

Regular rendering now works tiled, and supports save buffers to save memory
during render and cache render results.

Brick texture node by Thomas.
http://wiki.blender.org/index.php/Doc:2.6/Manual/Render/Cycles/Nodes/Textures#Brick_Texture

Image texture Blended Box Mapping.
http://wiki.blender.org/index.php/Doc:2.6/Manual/Render/Cycles/Nodes/Textures#Image_Texture
http://mango.blender.org/production/blended_box/

Various bug fixes by Sergey and Campbell.
* Fix for reading freed memory in some node setups.
* Fix incorrect memory read when synchronizing mesh motion.
* Fix crash appearing when direct light usage is different on different layers.
* Fix for vector pass gives wrong result in some circumstances.
* Fix for wrong resolution used for rendering Render Layer node.
* Option to cancel rendering when doing initial synchronization.
* No more texture limit when using CPU render.
* Many fixes for new tiled rendering.
This commit is contained in:
Brecht Van Lommel
2012-09-04 13:29:07 +00:00
parent 68563134d4
commit adea12cb01
69 changed files with 1983 additions and 708 deletions

View File

@@ -17,6 +17,7 @@ set(SRC
device_multi.cpp
device_network.cpp
device_opencl.cpp
device_task.cpp
)
set(SRC_HEADERS
@@ -24,6 +25,7 @@ set(SRC_HEADERS
device_memory.h
device_intern.h
device_network.h
device_task.h
)
add_definitions(-DGLEW_STATIC)

View File

@@ -33,65 +33,6 @@
CCL_NAMESPACE_BEGIN
/* Device Task */
DeviceTask::DeviceTask(Type type_)
: type(type_), x(0), y(0), w(0), h(0), rng_state(0), rgba(0), buffer(0),
sample(0), resolution(0),
shader_input(0), shader_output(0),
shader_eval_type(0), shader_x(0), shader_w(0)
{
}
void DeviceTask::split_max_size(list<DeviceTask>& tasks, int max_size)
{
int num;
if(type == SHADER) {
num = (shader_w + max_size - 1)/max_size;
}
else {
max_size = max(1, max_size/w);
num = (h + max_size - 1)/max_size;
}
split(tasks, num);
}
void DeviceTask::split(list<DeviceTask>& tasks, int num)
{
if(type == SHADER) {
num = min(shader_w, num);
for(int i = 0; i < num; i++) {
int tx = shader_x + (shader_w/num)*i;
int tw = (i == num-1)? shader_w - i*(shader_w/num): shader_w/num;
DeviceTask task = *this;
task.shader_x = tx;
task.shader_w = tw;
tasks.push_back(task);
}
}
else {
num = min(h, num);
for(int i = 0; i < num; i++) {
int ty = y + (h/num)*i;
int th = (i == num-1)? h - i*(h/num): h/num;
DeviceTask task = *this;
task.y = ty;
task.h = th;
tasks.push_back(task);
}
}
}
/* Device */
void Device::pixels_alloc(device_memory& mem)

View File

@@ -22,10 +22,10 @@
#include <stdlib.h>
#include "device_memory.h"
#include "device_task.h"
#include "util_list.h"
#include "util_string.h"
#include "util_task.h"
#include "util_thread.h"
#include "util_types.h"
#include "util_vector.h"
@@ -33,6 +33,7 @@
CCL_NAMESPACE_BEGIN
class Progress;
class RenderTile;
/* Device Types */
@@ -67,32 +68,6 @@ public:
}
};
/* Device Task */
class DeviceTask : public Task {
public:
typedef enum { PATH_TRACE, TONEMAP, SHADER } Type;
Type type;
int x, y, w, h;
device_ptr rng_state;
device_ptr rgba;
device_ptr buffer;
int sample;
int resolution;
int offset, stride;
device_ptr shader_input;
device_ptr shader_output;
int shader_eval_type;
int shader_x, shader_w;
DeviceTask(Type type = PATH_TRACE);
void split(list<DeviceTask>& tasks, int num);
void split_max_size(list<DeviceTask>& tasks, int max_size);
};
/* Device */
class Device {
@@ -150,6 +125,10 @@ public:
void server_run();
#endif
/* multi device */
virtual void map_tile(Device *sub_device, RenderTile& tile) {}
virtual int device_number(Device *sub_device) { return 0; }
/* static */
static Device *create(DeviceInfo& info, bool background = true, int threads = 0);

View File

@@ -27,6 +27,8 @@
#include "osl_shader.h"
#include "buffers.h"
#include "util_debug.h"
#include "util_foreach.h"
#include "util_function.h"
@@ -141,28 +143,56 @@ public:
OSLShader::thread_init(kg);
#endif
RenderTile tile;
while(task.acquire_tile(this, tile)) {
float *render_buffer = (float*)tile.buffer;
uint *rng_state = (uint*)tile.rng_state;
int start_sample = tile.start_sample;
int end_sample = tile.start_sample + tile.num_samples;
#ifdef WITH_OPTIMIZED_KERNEL
if(system_cpu_support_optimized()) {
for(int y = task.y; y < task.y + task.h; y++) {
for(int x = task.x; x < task.x + task.w; x++)
kernel_cpu_optimized_path_trace(kg, (float*)task.buffer, (unsigned int*)task.rng_state,
task.sample, x, y, task.offset, task.stride);
if(system_cpu_support_optimized()) {
for(int sample = start_sample; sample < end_sample; sample++) {
if (task.get_cancel() || task_pool.cancelled())
break;
if(task_pool.cancelled())
break;
for(int y = tile.y; y < tile.y + tile.h; y++) {
for(int x = tile.x; x < tile.x + tile.w; x++) {
kernel_cpu_optimized_path_trace(kg, render_buffer, rng_state,
sample, x, y, tile.offset, tile.stride);
}
}
tile.sample = sample + 1;
task.update_progress(tile);
}
}
}
else
else
#endif
{
for(int y = task.y; y < task.y + task.h; y++) {
for(int x = task.x; x < task.x + task.w; x++)
kernel_cpu_path_trace(kg, (float*)task.buffer, (unsigned int*)task.rng_state,
task.sample, x, y, task.offset, task.stride);
{
for(int sample = start_sample; sample < end_sample; sample++) {
if (task.get_cancel() || task_pool.cancelled())
break;
if(task_pool.cancelled())
break;
for(int y = tile.y; y < tile.y + tile.h; y++) {
for(int x = tile.x; x < tile.x + tile.w; x++) {
kernel_cpu_path_trace(kg, render_buffer, rng_state,
sample, x, y, tile.offset, tile.stride);
}
}
tile.sample = sample + 1;
task.update_progress(tile);
}
}
task.release_tile(tile);
if(task_pool.cancelled())
break;
}
#ifdef WITH_OSL
@@ -228,8 +258,7 @@ public:
/* split task into smaller ones, more than number of threads for uneven
* workloads where some parts of the image render slower than others */
list<DeviceTask> tasks;
task.split(tasks, TaskScheduler::num_threads()*10);
task.split(tasks, TaskScheduler::num_threads()+1);
foreach(DeviceTask& task, tasks)
task_pool.push(new CPUDeviceTask(this, task));

View File

@@ -23,6 +23,8 @@
#include "device.h"
#include "device_intern.h"
#include "buffers.h"
#include "util_cuda.h"
#include "util_debug.h"
#include "util_map.h"
@@ -37,6 +39,7 @@ CCL_NAMESPACE_BEGIN
class CUDADevice : public Device
{
public:
TaskPool task_pool;
CUdevice cuDevice;
CUcontext cuContext;
CUmodule cuModule;
@@ -192,6 +195,8 @@ public:
~CUDADevice()
{
task_pool.stop();
cuda_push_context();
cuda_assert(cuCtxDetach(cuContext))
}
@@ -466,13 +471,13 @@ public:
}
}
void path_trace(DeviceTask& task)
void path_trace(RenderTile& rtile, int sample)
{
cuda_push_context();
CUfunction cuPathTrace;
CUdeviceptr d_buffer = cuda_device_ptr(task.buffer);
CUdeviceptr d_rng_state = cuda_device_ptr(task.rng_state);
CUdeviceptr d_buffer = cuda_device_ptr(rtile.buffer);
CUdeviceptr d_rng_state = cuda_device_ptr(rtile.rng_state);
/* get kernel function */
cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace"))
@@ -486,29 +491,28 @@ public:
cuda_assert(cuParamSetv(cuPathTrace, offset, &d_rng_state, sizeof(d_rng_state)))
offset += sizeof(d_rng_state);
int sample = task.sample;
offset = align_up(offset, __alignof(sample));
cuda_assert(cuParamSeti(cuPathTrace, offset, task.sample))
offset += sizeof(task.sample);
cuda_assert(cuParamSeti(cuPathTrace, offset, sample))
offset += sizeof(sample);
cuda_assert(cuParamSeti(cuPathTrace, offset, task.x))
offset += sizeof(task.x);
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.x))
offset += sizeof(rtile.x);
cuda_assert(cuParamSeti(cuPathTrace, offset, task.y))
offset += sizeof(task.y);
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.y))
offset += sizeof(rtile.y);
cuda_assert(cuParamSeti(cuPathTrace, offset, task.w))
offset += sizeof(task.w);
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.w))
offset += sizeof(rtile.w);
cuda_assert(cuParamSeti(cuPathTrace, offset, task.h))
offset += sizeof(task.h);
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.h))
offset += sizeof(rtile.h);
cuda_assert(cuParamSeti(cuPathTrace, offset, task.offset))
offset += sizeof(task.offset);
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.offset))
offset += sizeof(rtile.offset);
cuda_assert(cuParamSeti(cuPathTrace, offset, task.stride))
offset += sizeof(task.stride);
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.stride))
offset += sizeof(rtile.stride);
cuda_assert(cuParamSetSize(cuPathTrace, offset))
@@ -520,23 +524,25 @@ public:
int xthreads = 8;
int ythreads = 8;
#endif
int xblocks = (task.w + xthreads - 1)/xthreads;
int yblocks = (task.h + ythreads - 1)/ythreads;
int xblocks = (rtile.w + xthreads - 1)/xthreads;
int yblocks = (rtile.h + ythreads - 1)/ythreads;
cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1))
cuda_assert(cuFuncSetBlockShape(cuPathTrace, xthreads, ythreads, 1))
cuda_assert(cuLaunchGrid(cuPathTrace, xblocks, yblocks))
cuda_assert(cuCtxSynchronize())
cuda_pop_context();
}
void tonemap(DeviceTask& task)
void tonemap(DeviceTask& task, device_ptr buffer, device_ptr rgba)
{
cuda_push_context();
CUfunction cuFilmConvert;
CUdeviceptr d_rgba = map_pixels(task.rgba);
CUdeviceptr d_buffer = cuda_device_ptr(task.buffer);
CUdeviceptr d_rgba = map_pixels(rgba);
CUdeviceptr d_buffer = cuda_device_ptr(buffer);
/* get kernel function */
cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_tonemap"))
@@ -820,27 +826,71 @@ public:
Device::draw_pixels(mem, y, w, h, dy, width, height, transparent);
}
void thread_run(DeviceTask *task)
{
if(task->type == DeviceTask::PATH_TRACE) {
RenderTile tile;
/* keep rendering tiles until done */
while(task->acquire_tile(this, tile)) {
int start_sample = tile.start_sample;
int end_sample = tile.start_sample + tile.num_samples;
for(int sample = start_sample; sample < end_sample; sample++) {
if (task->get_cancel())
break;
path_trace(tile, sample);
tile.sample = sample + 1;
task->update_progress(tile);
}
task->release_tile(tile);
}
}
else if(task->type == DeviceTask::SHADER) {
shader(*task);
cuda_push_context();
cuda_assert(cuCtxSynchronize())
cuda_pop_context();
}
}
class CUDADeviceTask : public DeviceTask {
public:
CUDADeviceTask(CUDADevice *device, DeviceTask& task)
: DeviceTask(task)
{
run = function_bind(&CUDADevice::thread_run, device, this);
}
};
void task_add(DeviceTask& task)
{
if(task.type == DeviceTask::TONEMAP)
tonemap(task);
else if(task.type == DeviceTask::PATH_TRACE)
path_trace(task);
else if(task.type == DeviceTask::SHADER)
shader(task);
if(task.type == DeviceTask::TONEMAP) {
/* must be done in main thread due to opengl access */
tonemap(task, task.buffer, task.rgba);
cuda_push_context();
cuda_assert(cuCtxSynchronize())
cuda_pop_context();
}
else {
task_pool.push(new CUDADeviceTask(this, task));
}
}
void task_wait()
{
cuda_push_context();
cuda_assert(cuCtxSynchronize())
cuda_pop_context();
task_pool.wait_work();
}
void task_cancel()
{
task_pool.cancel();
}
};

View File

@@ -23,6 +23,8 @@
#include "device_intern.h"
#include "device_network.h"
#include "buffers.h"
#include "util_foreach.h"
#include "util_list.h"
#include "util_map.h"
@@ -255,6 +257,30 @@ public:
rgba.device_pointer = tmp;
}
void map_tile(Device *sub_device, RenderTile& tile)
{
foreach(SubDevice& sub, devices) {
if(sub.device == sub_device) {
if(tile.buffer) tile.buffer = sub.ptr_map[tile.buffer];
if(tile.rng_state) tile.rng_state = sub.ptr_map[tile.rng_state];
if(tile.rgba) tile.rgba = sub.ptr_map[tile.rgba];
}
}
}
int device_number(Device *sub_device)
{
int i = 0;
foreach(SubDevice& sub, devices) {
if(sub.device == sub_device)
return i;
i++;
}
return -1;
}
void task_add(DeviceTask& task)
{
list<DeviceTask> tasks;
@@ -266,7 +292,6 @@ public:
tasks.pop_front();
if(task.buffer) subtask.buffer = sub.ptr_map[task.buffer];
if(task.rng_state) subtask.rng_state = sub.ptr_map[task.rng_state];
if(task.rgba) subtask.rgba = sub.ptr_map[task.rgba];
if(task.shader_input) subtask.shader_input = sub.ptr_map[task.shader_input];
if(task.shader_output) subtask.shader_output = sub.ptr_map[task.shader_output];

View File

@@ -25,6 +25,8 @@
#include "device.h"
#include "device_intern.h"
#include "buffers.h"
#include "util_foreach.h"
#include "util_map.h"
#include "util_math.h"
@@ -41,6 +43,7 @@ CCL_NAMESPACE_BEGIN
class OpenCLDevice : public Device
{
public:
TaskPool task_pool;
cl_context cxContext;
cl_command_queue cqCommandQueue;
cl_platform_id cpPlatform;
@@ -435,6 +438,8 @@ public:
~OpenCLDevice()
{
task_pool.stop();
if(null_mem)
clReleaseMemObject(CL_MEM_PTR(null_mem));
@@ -540,19 +545,19 @@ public:
return global_size + ((r == 0)? 0: group_size - r);
}
void path_trace(DeviceTask& task)
void path_trace(RenderTile& rtile, int sample)
{
/* cast arguments to cl types */
cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
cl_mem d_buffer = CL_MEM_PTR(task.buffer);
cl_mem d_rng_state = CL_MEM_PTR(task.rng_state);
cl_int d_x = task.x;
cl_int d_y = task.y;
cl_int d_w = task.w;
cl_int d_h = task.h;
cl_int d_sample = task.sample;
cl_int d_offset = task.offset;
cl_int d_stride = task.stride;
cl_mem d_buffer = CL_MEM_PTR(rtile.buffer);
cl_mem d_rng_state = CL_MEM_PTR(rtile.rng_state);
cl_int d_x = rtile.x;
cl_int d_y = rtile.y;
cl_int d_w = rtile.w;
cl_int d_h = rtile.h;
cl_int d_sample = sample;
cl_int d_offset = rtile.offset;
cl_int d_stride = rtile.stride;
/* sample arguments */
int narg = 0;
@@ -613,12 +618,12 @@ public:
return err;
}
void tonemap(DeviceTask& task)
void tonemap(DeviceTask& task, device_ptr buffer, device_ptr rgba)
{
/* cast arguments to cl types */
cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
cl_mem d_rgba = CL_MEM_PTR(task.rgba);
cl_mem d_buffer = CL_MEM_PTR(task.buffer);
cl_mem d_rgba = CL_MEM_PTR(rgba);
cl_mem d_buffer = CL_MEM_PTR(buffer);
cl_int d_x = task.x;
cl_int d_y = task.y;
cl_int d_w = task.w;
@@ -667,30 +672,57 @@ public:
opencl_assert(clFinish(cqCommandQueue));
}
void task_add(DeviceTask& maintask)
void thread_run(DeviceTask *task)
{
list<DeviceTask> tasks;
/* arbitrary limit to work around apple ATI opencl issue */
if(platform_name == "Apple")
maintask.split_max_size(tasks, 76800);
else
tasks.push_back(maintask);
foreach(DeviceTask& task, tasks) {
if(task.type == DeviceTask::TONEMAP)
tonemap(task);
else if(task.type == DeviceTask::PATH_TRACE)
path_trace(task);
if(task->type == DeviceTask::TONEMAP) {
tonemap(*task, task->buffer, task->rgba);
}
else if(task->type == DeviceTask::PATH_TRACE) {
RenderTile tile;
/* keep rendering tiles until done */
while(task->acquire_tile(this, tile)) {
int start_sample = tile.start_sample;
int end_sample = tile.start_sample + tile.num_samples;
for(int sample = start_sample; sample < end_sample; sample++) {
if (task->get_cancel())
break;
path_trace(tile, sample);
tile.sample = sample + 1;
task->update_progress(tile);
}
task->release_tile(tile);
}
}
}
class OpenCLDeviceTask : public DeviceTask {
public:
OpenCLDeviceTask(OpenCLDevice *device, DeviceTask& task)
: DeviceTask(task)
{
run = function_bind(&OpenCLDevice::thread_run, device, this);
}
};
void task_add(DeviceTask& task)
{
task_pool.push(new OpenCLDeviceTask(this, task));
}
void task_wait()
{
task_pool.wait_work();
}
void task_cancel()
{
task_pool.cancel();
}
};

View File

@@ -0,0 +1,113 @@
/*
* Copyright 2011, Blender Foundation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <stdlib.h>
#include <string.h>
#include "device_task.h"
#include "util_algorithm.h"
#include "util_time.h"
CCL_NAMESPACE_BEGIN
/* Device Task */
DeviceTask::DeviceTask(Type type_)
: type(type_), x(0), y(0), w(0), h(0), rgba(0), buffer(0),
sample(0), num_samples(1), resolution(0),
shader_input(0), shader_output(0),
shader_eval_type(0), shader_x(0), shader_w(0)
{
last_update_time = time_dt();
}
void DeviceTask::split_max_size(list<DeviceTask>& tasks, int max_size)
{
int num;
if(type == SHADER) {
num = (shader_w + max_size - 1)/max_size;
}
else {
max_size = max(1, max_size/w);
num = (h + max_size - 1)/max_size;
}
split(tasks, num);
}
void DeviceTask::split(list<DeviceTask>& tasks, int num)
{
if(type == SHADER) {
num = min(shader_w, num);
for(int i = 0; i < num; i++) {
int tx = shader_x + (shader_w/num)*i;
int tw = (i == num-1)? shader_w - i*(shader_w/num): shader_w/num;
DeviceTask task = *this;
task.shader_x = tx;
task.shader_w = tw;
tasks.push_back(task);
}
}
else if(type == PATH_TRACE) {
for(int i = 0; i < num; i++)
tasks.push_back(*this);
}
else {
num = min(h, num);
for(int i = 0; i < num; i++) {
int ty = y + (h/num)*i;
int th = (i == num-1)? h - i*(h/num): h/num;
DeviceTask task = *this;
task.y = ty;
task.h = th;
tasks.push_back(task);
}
}
}
void DeviceTask::update_progress(RenderTile &rtile)
{
if (type != PATH_TRACE)
return;
if(update_progress_sample)
update_progress_sample();
if(update_tile_sample) {
double current_time = time_dt();
if (current_time - last_update_time >= 1.0f) {
update_tile_sample(rtile);
last_update_time = current_time;
}
}
}
CCL_NAMESPACE_END

View File

@@ -0,0 +1,75 @@
/*
* Copyright 2011, Blender Foundation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifndef __DEVICE_TASK_H__
#define __DEVICE_TASK_H__
#include "device_memory.h"
#include "util_function.h"
#include "util_list.h"
#include "util_task.h"
CCL_NAMESPACE_BEGIN
/* Device Task */
class Device;
class RenderBuffers;
class RenderTile;
class Tile;
class DeviceTask : public Task {
public:
typedef enum { PATH_TRACE, TONEMAP, SHADER } Type;
Type type;
int x, y, w, h;
device_ptr rgba;
device_ptr buffer;
int sample;
int num_samples;
int resolution;
int offset, stride;
device_ptr shader_input;
device_ptr shader_output;
int shader_eval_type;
int shader_x, shader_w;
DeviceTask(Type type = PATH_TRACE);
void split(list<DeviceTask>& tasks, int num);
void split_max_size(list<DeviceTask>& tasks, int max_size);
void update_progress(RenderTile &rtile);
boost::function<bool(Device *device, RenderTile&)> acquire_tile;
boost::function<void(void)> update_progress_sample;
boost::function<void(RenderTile&)> update_tile_sample;
boost::function<void(RenderTile&)> release_tile;
boost::function<bool(void)> get_cancel;
protected:
double last_update_time;
};
CCL_NAMESPACE_END
#endif /* __DEVICE_TASK_H__ */