Cycles: merge of changes from tomato branch.

Regular rendering now works tiled, and supports save buffers to save memory
during render and cache render results.

Brick texture node by Thomas.
http://wiki.blender.org/index.php/Doc:2.6/Manual/Render/Cycles/Nodes/Textures#Brick_Texture

Image texture Blended Box Mapping.
http://wiki.blender.org/index.php/Doc:2.6/Manual/Render/Cycles/Nodes/Textures#Image_Texture
http://mango.blender.org/production/blended_box/

Various bug fixes by Sergey and Campbell.
* Fix for reading freed memory in some node setups.
* Fix incorrect memory read when synchronizing mesh motion.
* Fix crash appearing when direct light usage is different on different layers.
* Fix for vector pass gives wrong result in some circumstances.
* Fix for wrong resolution used for rendering Render Layer node.
* Option to cancel rendering when doing initial synchronization.
* No more texture limit when using CPU render.
* Many fixes for new tiled rendering.
This commit is contained in:
Brecht Van Lommel
2012-09-04 13:29:07 +00:00
parent 68563134d4
commit adea12cb01
69 changed files with 1983 additions and 708 deletions

View File

@@ -23,6 +23,8 @@
#include "device.h"
#include "device_intern.h"
#include "buffers.h"
#include "util_cuda.h"
#include "util_debug.h"
#include "util_map.h"
@@ -37,6 +39,7 @@ CCL_NAMESPACE_BEGIN
class CUDADevice : public Device
{
public:
TaskPool task_pool;
CUdevice cuDevice;
CUcontext cuContext;
CUmodule cuModule;
@@ -192,6 +195,8 @@ public:
~CUDADevice()
{
task_pool.stop();
cuda_push_context();
cuda_assert(cuCtxDetach(cuContext))
}
@@ -466,13 +471,13 @@ public:
}
}
void path_trace(DeviceTask& task)
void path_trace(RenderTile& rtile, int sample)
{
cuda_push_context();
CUfunction cuPathTrace;
CUdeviceptr d_buffer = cuda_device_ptr(task.buffer);
CUdeviceptr d_rng_state = cuda_device_ptr(task.rng_state);
CUdeviceptr d_buffer = cuda_device_ptr(rtile.buffer);
CUdeviceptr d_rng_state = cuda_device_ptr(rtile.rng_state);
/* get kernel function */
cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace"))
@@ -486,29 +491,28 @@ public:
cuda_assert(cuParamSetv(cuPathTrace, offset, &d_rng_state, sizeof(d_rng_state)))
offset += sizeof(d_rng_state);
int sample = task.sample;
offset = align_up(offset, __alignof(sample));
cuda_assert(cuParamSeti(cuPathTrace, offset, task.sample))
offset += sizeof(task.sample);
cuda_assert(cuParamSeti(cuPathTrace, offset, sample))
offset += sizeof(sample);
cuda_assert(cuParamSeti(cuPathTrace, offset, task.x))
offset += sizeof(task.x);
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.x))
offset += sizeof(rtile.x);
cuda_assert(cuParamSeti(cuPathTrace, offset, task.y))
offset += sizeof(task.y);
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.y))
offset += sizeof(rtile.y);
cuda_assert(cuParamSeti(cuPathTrace, offset, task.w))
offset += sizeof(task.w);
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.w))
offset += sizeof(rtile.w);
cuda_assert(cuParamSeti(cuPathTrace, offset, task.h))
offset += sizeof(task.h);
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.h))
offset += sizeof(rtile.h);
cuda_assert(cuParamSeti(cuPathTrace, offset, task.offset))
offset += sizeof(task.offset);
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.offset))
offset += sizeof(rtile.offset);
cuda_assert(cuParamSeti(cuPathTrace, offset, task.stride))
offset += sizeof(task.stride);
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.stride))
offset += sizeof(rtile.stride);
cuda_assert(cuParamSetSize(cuPathTrace, offset))
@@ -520,23 +524,25 @@ public:
int xthreads = 8;
int ythreads = 8;
#endif
int xblocks = (task.w + xthreads - 1)/xthreads;
int yblocks = (task.h + ythreads - 1)/ythreads;
int xblocks = (rtile.w + xthreads - 1)/xthreads;
int yblocks = (rtile.h + ythreads - 1)/ythreads;
cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1))
cuda_assert(cuFuncSetBlockShape(cuPathTrace, xthreads, ythreads, 1))
cuda_assert(cuLaunchGrid(cuPathTrace, xblocks, yblocks))
cuda_assert(cuCtxSynchronize())
cuda_pop_context();
}
void tonemap(DeviceTask& task)
void tonemap(DeviceTask& task, device_ptr buffer, device_ptr rgba)
{
cuda_push_context();
CUfunction cuFilmConvert;
CUdeviceptr d_rgba = map_pixels(task.rgba);
CUdeviceptr d_buffer = cuda_device_ptr(task.buffer);
CUdeviceptr d_rgba = map_pixels(rgba);
CUdeviceptr d_buffer = cuda_device_ptr(buffer);
/* get kernel function */
cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_tonemap"))
@@ -820,27 +826,71 @@ public:
Device::draw_pixels(mem, y, w, h, dy, width, height, transparent);
}
void thread_run(DeviceTask *task)
{
if(task->type == DeviceTask::PATH_TRACE) {
RenderTile tile;
/* keep rendering tiles until done */
while(task->acquire_tile(this, tile)) {
int start_sample = tile.start_sample;
int end_sample = tile.start_sample + tile.num_samples;
for(int sample = start_sample; sample < end_sample; sample++) {
if (task->get_cancel())
break;
path_trace(tile, sample);
tile.sample = sample + 1;
task->update_progress(tile);
}
task->release_tile(tile);
}
}
else if(task->type == DeviceTask::SHADER) {
shader(*task);
cuda_push_context();
cuda_assert(cuCtxSynchronize())
cuda_pop_context();
}
}
class CUDADeviceTask : public DeviceTask {
public:
CUDADeviceTask(CUDADevice *device, DeviceTask& task)
: DeviceTask(task)
{
run = function_bind(&CUDADevice::thread_run, device, this);
}
};
void task_add(DeviceTask& task)
{
if(task.type == DeviceTask::TONEMAP)
tonemap(task);
else if(task.type == DeviceTask::PATH_TRACE)
path_trace(task);
else if(task.type == DeviceTask::SHADER)
shader(task);
if(task.type == DeviceTask::TONEMAP) {
/* must be done in main thread due to opengl access */
tonemap(task, task.buffer, task.rgba);
cuda_push_context();
cuda_assert(cuCtxSynchronize())
cuda_pop_context();
}
else {
task_pool.push(new CUDADeviceTask(this, task));
}
}
void task_wait()
{
cuda_push_context();
cuda_assert(cuCtxSynchronize())
cuda_pop_context();
task_pool.wait_work();
}
void task_cancel()
{
task_pool.cancel();
}
};