Cycles / Non-Progressive integrator:
* Non-Progressive integrator is now available on the GPU (CUDA, sm_20 and above). Implementation details: * kernel_path_trace() has been split up into two functions: kernel_path_trace_non_progressive() and kernel_path_trace_progressive(). * We compile two CUDA kernel entry functions (in kernel.cu) for the two integrators, they are still inside one .cubin file but due to the kernel separation there should be no performance problem. I tested with the BMW file on my Geforce 540M and the render times were the same for 100 samples (1.57 min in my case). This is part of my GSoC project, SVN merge of r59032 + manual merge of UI changes for this from my branch.
This commit is contained in:
@@ -67,9 +67,7 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel):
|
|||||||
row.operator("render.cycles_sampling_preset_add", text="", icon="ZOOMOUT").remove_active = True
|
row.operator("render.cycles_sampling_preset_add", text="", icon="ZOOMOUT").remove_active = True
|
||||||
|
|
||||||
row = layout.row()
|
row = layout.row()
|
||||||
sub = row.row()
|
row.prop(cscene, "progressive")
|
||||||
sub.active = (device_type == 'NONE' or cscene.device == 'CPU')
|
|
||||||
sub.prop(cscene, "progressive")
|
|
||||||
|
|
||||||
if not cscene.progressive:
|
if not cscene.progressive:
|
||||||
row.prop(cscene, "squared_samples")
|
row.prop(cscene, "squared_samples")
|
||||||
@@ -82,7 +80,7 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel):
|
|||||||
sub.prop(cscene, "seed")
|
sub.prop(cscene, "seed")
|
||||||
sub.prop(cscene, "sample_clamp")
|
sub.prop(cscene, "sample_clamp")
|
||||||
|
|
||||||
if cscene.progressive or (device_type != 'NONE' and cscene.device == 'GPU'):
|
if cscene.progressive:
|
||||||
col = split.column()
|
col = split.column()
|
||||||
sub = col.column(align=True)
|
sub = col.column(align=True)
|
||||||
sub.label(text="Samples:")
|
sub.label(text="Samples:")
|
||||||
@@ -656,7 +654,6 @@ class CyclesLamp_PT_lamp(CyclesButtonsPanel, Panel):
|
|||||||
lamp = context.lamp
|
lamp = context.lamp
|
||||||
clamp = lamp.cycles
|
clamp = lamp.cycles
|
||||||
cscene = context.scene.cycles
|
cscene = context.scene.cycles
|
||||||
device_type = context.user_preferences.system.compute_device_type
|
|
||||||
|
|
||||||
layout.prop(lamp, "type", expand=True)
|
layout.prop(lamp, "type", expand=True)
|
||||||
|
|
||||||
@@ -675,7 +672,7 @@ class CyclesLamp_PT_lamp(CyclesButtonsPanel, Panel):
|
|||||||
sub.prop(lamp, "size", text="Size X")
|
sub.prop(lamp, "size", text="Size X")
|
||||||
sub.prop(lamp, "size_y", text="Size Y")
|
sub.prop(lamp, "size_y", text="Size Y")
|
||||||
|
|
||||||
if not cscene.progressive and (device_type == 'NONE' or cscene.device == 'CPU'):
|
if not cscene.progressive:
|
||||||
col.prop(clamp, "samples")
|
col.prop(clamp, "samples")
|
||||||
|
|
||||||
col = split.column()
|
col = split.column()
|
||||||
@@ -864,7 +861,6 @@ class CyclesWorld_PT_settings(CyclesButtonsPanel, Panel):
|
|||||||
world = context.world
|
world = context.world
|
||||||
cworld = world.cycles
|
cworld = world.cycles
|
||||||
cscene = context.scene.cycles
|
cscene = context.scene.cycles
|
||||||
device_type = context.user_preferences.system.compute_device_type
|
|
||||||
|
|
||||||
col = layout.column()
|
col = layout.column()
|
||||||
|
|
||||||
@@ -872,7 +868,7 @@ class CyclesWorld_PT_settings(CyclesButtonsPanel, Panel):
|
|||||||
sub = col.row(align=True)
|
sub = col.row(align=True)
|
||||||
sub.active = cworld.sample_as_light
|
sub.active = cworld.sample_as_light
|
||||||
sub.prop(cworld, "sample_map_resolution")
|
sub.prop(cworld, "sample_map_resolution")
|
||||||
if not cscene.progressive and (device_type == 'NONE' or cscene.device == 'CPU'):
|
if not cscene.progressive:
|
||||||
sub.prop(cworld, "samples")
|
sub.prop(cworld, "samples")
|
||||||
|
|
||||||
|
|
||||||
|
@@ -420,7 +420,7 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine b_engine, BL::Use
|
|||||||
preview_aa_samples = preview_aa_samples * preview_aa_samples;
|
preview_aa_samples = preview_aa_samples * preview_aa_samples;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(get_boolean(cscene, "progressive") == 0 && params.device.type == DEVICE_CPU) {
|
if(get_boolean(cscene, "progressive") == 0) {
|
||||||
if(background) {
|
if(background) {
|
||||||
params.samples = aa_samples;
|
params.samples = aa_samples;
|
||||||
}
|
}
|
||||||
|
@@ -558,7 +558,7 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void path_trace(RenderTile& rtile, int sample)
|
void path_trace(RenderTile& rtile, int sample, bool progressive)
|
||||||
{
|
{
|
||||||
if(have_error())
|
if(have_error())
|
||||||
return;
|
return;
|
||||||
@@ -570,7 +570,10 @@ public:
|
|||||||
CUdeviceptr d_rng_state = cuda_device_ptr(rtile.rng_state);
|
CUdeviceptr d_rng_state = cuda_device_ptr(rtile.rng_state);
|
||||||
|
|
||||||
/* get kernel function */
|
/* get kernel function */
|
||||||
cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace"))
|
if(progressive)
|
||||||
|
cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace_progressive"))
|
||||||
|
else
|
||||||
|
cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace_non_progressive"))
|
||||||
|
|
||||||
/* pass in parameters */
|
/* pass in parameters */
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
@@ -914,6 +917,8 @@ public:
|
|||||||
if(task->type == DeviceTask::PATH_TRACE) {
|
if(task->type == DeviceTask::PATH_TRACE) {
|
||||||
RenderTile tile;
|
RenderTile tile;
|
||||||
|
|
||||||
|
bool progressive = task->integrator_progressive;
|
||||||
|
|
||||||
/* keep rendering tiles until done */
|
/* keep rendering tiles until done */
|
||||||
while(task->acquire_tile(this, tile)) {
|
while(task->acquire_tile(this, tile)) {
|
||||||
int start_sample = tile.start_sample;
|
int start_sample = tile.start_sample;
|
||||||
@@ -925,7 +930,7 @@ public:
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
path_trace(tile, sample);
|
path_trace(tile, sample, progressive);
|
||||||
|
|
||||||
tile.sample = sample + 1;
|
tile.sample = sample + 1;
|
||||||
|
|
||||||
|
@@ -65,6 +65,7 @@ public:
|
|||||||
boost::function<bool(void)> get_cancel;
|
boost::function<bool(void)> get_cancel;
|
||||||
|
|
||||||
bool need_finish_queue;
|
bool need_finish_queue;
|
||||||
|
bool integrator_progressive;
|
||||||
protected:
|
protected:
|
||||||
double last_update_time;
|
double last_update_time;
|
||||||
};
|
};
|
||||||
|
@@ -51,7 +51,7 @@ __kernel void kernel_ocl_path_trace(
|
|||||||
int y = sy + get_global_id(1);
|
int y = sy + get_global_id(1);
|
||||||
|
|
||||||
if(x < sx + sw && y < sy + sh)
|
if(x < sx + sw && y < sy + sh)
|
||||||
kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
|
kernel_path_trace_progressive(kg, buffer, rng_state, sample, x, y, offset, stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
__kernel void kernel_ocl_tonemap(
|
__kernel void kernel_ocl_tonemap(
|
||||||
|
@@ -90,7 +90,10 @@ void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t
|
|||||||
|
|
||||||
void kernel_cpu_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride)
|
void kernel_cpu_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride)
|
||||||
{
|
{
|
||||||
kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
|
if(kernel_data.integrator.progressive)
|
||||||
|
kernel_path_trace_progressive(kg, buffer, rng_state, sample, x, y, offset, stride);
|
||||||
|
else
|
||||||
|
kernel_path_trace_non_progressive(kg, buffer, rng_state, sample, x, y, offset, stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Tonemapping */
|
/* Tonemapping */
|
||||||
|
@@ -26,13 +26,22 @@
|
|||||||
#include "kernel_path.h"
|
#include "kernel_path.h"
|
||||||
#include "kernel_displace.h"
|
#include "kernel_displace.h"
|
||||||
|
|
||||||
extern "C" __global__ void kernel_cuda_path_trace(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride)
|
extern "C" __global__ void kernel_cuda_path_trace_progressive(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride)
|
||||||
{
|
{
|
||||||
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
|
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
|
||||||
int y = sy + blockDim.y*blockIdx.y + threadIdx.y;
|
int y = sy + blockDim.y*blockIdx.y + threadIdx.y;
|
||||||
|
|
||||||
if(x < sx + sw && y < sy + sh)
|
if(x < sx + sw && y < sy + sh)
|
||||||
kernel_path_trace(NULL, buffer, rng_state, sample, x, y, offset, stride);
|
kernel_path_trace_progressive(NULL, buffer, rng_state, sample, x, y, offset, stride);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" __global__ void kernel_cuda_path_trace_non_progressive(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride)
|
||||||
|
{
|
||||||
|
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
|
||||||
|
int y = sy + blockDim.y*blockIdx.y + threadIdx.y;
|
||||||
|
|
||||||
|
if(x < sx + sw && y < sy + sh)
|
||||||
|
kernel_path_trace_non_progressive(NULL, buffer, rng_state, sample, x, y, offset, stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" __global__ void kernel_cuda_tonemap(uchar4 *rgba, float *buffer, int sample, int sx, int sy, int sw, int sh, int offset, int stride)
|
extern "C" __global__ void kernel_cuda_tonemap(uchar4 *rgba, float *buffer, int sample, int sx, int sy, int sw, int sh, int offset, int stride)
|
||||||
|
@@ -1134,7 +1134,36 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
__device void kernel_path_trace(KernelGlobals *kg,
|
__device_inline void kernel_path_trace_setup(KernelGlobals *kg, __global uint *rng_state, int sample, int x, int y, RNG *rng, Ray *ray)
|
||||||
|
{
|
||||||
|
float filter_u;
|
||||||
|
float filter_v;
|
||||||
|
#ifdef __CMJ__
|
||||||
|
int num_samples = kernel_data.integrator.aa_samples;
|
||||||
|
#else
|
||||||
|
int num_samples = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
path_rng_init(kg, rng_state, sample, num_samples, rng, x, y, &filter_u, &filter_v);
|
||||||
|
|
||||||
|
/* sample camera ray */
|
||||||
|
|
||||||
|
float lens_u = 0.0f, lens_v = 0.0f;
|
||||||
|
|
||||||
|
if(kernel_data.cam.aperturesize > 0.0f)
|
||||||
|
path_rng_2D(kg, rng, sample, num_samples, PRNG_LENS_U, &lens_u, &lens_v);
|
||||||
|
|
||||||
|
float time = 0.0f;
|
||||||
|
|
||||||
|
#ifdef __CAMERA_MOTION__
|
||||||
|
if(kernel_data.cam.shuttertime != -1.0f)
|
||||||
|
time = path_rng_1D(kg, rng, sample, num_samples, PRNG_TIME);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, ray);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device void kernel_path_trace_progressive(KernelGlobals *kg,
|
||||||
__global float *buffer, __global uint *rng_state,
|
__global float *buffer, __global uint *rng_state,
|
||||||
int sample, int x, int y, int offset, int stride)
|
int sample, int x, int y, int offset, int stride)
|
||||||
{
|
{
|
||||||
@@ -1145,49 +1174,52 @@ __device void kernel_path_trace(KernelGlobals *kg,
|
|||||||
rng_state += index;
|
rng_state += index;
|
||||||
buffer += index*pass_stride;
|
buffer += index*pass_stride;
|
||||||
|
|
||||||
/* initialize random numbers */
|
/* initialize random numbers and ray */
|
||||||
RNG rng;
|
RNG rng;
|
||||||
|
|
||||||
float filter_u;
|
|
||||||
float filter_v;
|
|
||||||
#ifdef __CMJ__
|
|
||||||
int num_samples = kernel_data.integrator.aa_samples;
|
|
||||||
#else
|
|
||||||
int num_samples = 0;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
path_rng_init(kg, rng_state, sample, num_samples, &rng, x, y, &filter_u, &filter_v);
|
|
||||||
|
|
||||||
/* sample camera ray */
|
|
||||||
Ray ray;
|
Ray ray;
|
||||||
|
|
||||||
float lens_u = 0.0f, lens_v = 0.0f;
|
kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng, &ray);
|
||||||
|
|
||||||
if(kernel_data.cam.aperturesize > 0.0f)
|
|
||||||
path_rng_2D(kg, &rng, sample, num_samples, PRNG_LENS_U, &lens_u, &lens_v);
|
|
||||||
|
|
||||||
float time = 0.0f;
|
|
||||||
|
|
||||||
#ifdef __CAMERA_MOTION__
|
|
||||||
if(kernel_data.cam.shuttertime != -1.0f)
|
|
||||||
time = path_rng_1D(kg, &rng, sample, num_samples, PRNG_TIME);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, &ray);
|
|
||||||
|
|
||||||
/* integrate */
|
/* integrate */
|
||||||
float4 L;
|
float4 L;
|
||||||
|
|
||||||
if (ray.t != 0.0f) {
|
if (ray.t != 0.0f)
|
||||||
|
L = kernel_path_progressive(kg, &rng, sample, ray, buffer);
|
||||||
|
else
|
||||||
|
L = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
||||||
|
|
||||||
|
/* accumulate result in output buffer */
|
||||||
|
kernel_write_pass_float4(buffer, sample, L);
|
||||||
|
|
||||||
|
path_rng_end(kg, rng_state, rng);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device void kernel_path_trace_non_progressive(KernelGlobals *kg,
|
||||||
|
__global float *buffer, __global uint *rng_state,
|
||||||
|
int sample, int x, int y, int offset, int stride)
|
||||||
|
{
|
||||||
|
/* buffer offset */
|
||||||
|
int index = offset + x + y*stride;
|
||||||
|
int pass_stride = kernel_data.film.pass_stride;
|
||||||
|
|
||||||
|
rng_state += index;
|
||||||
|
buffer += index*pass_stride;
|
||||||
|
|
||||||
|
/* initialize random numbers and ray */
|
||||||
|
RNG rng;
|
||||||
|
Ray ray;
|
||||||
|
|
||||||
|
kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng, &ray);
|
||||||
|
|
||||||
|
/* integrate */
|
||||||
|
float4 L;
|
||||||
|
|
||||||
|
if (ray.t != 0.0f)
|
||||||
#ifdef __NON_PROGRESSIVE__
|
#ifdef __NON_PROGRESSIVE__
|
||||||
if(kernel_data.integrator.progressive)
|
L = kernel_path_non_progressive(kg, &rng, sample, ray, buffer);
|
||||||
|
#else
|
||||||
|
L = kernel_path_progressive(kg, &rng, sample, ray, buffer);
|
||||||
#endif
|
#endif
|
||||||
L = kernel_path_progressive(kg, &rng, sample, ray, buffer);
|
|
||||||
#ifdef __NON_PROGRESSIVE__
|
|
||||||
else
|
|
||||||
L = kernel_path_non_progressive(kg, &rng, sample, ray, buffer);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
L = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
L = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
||||||
|
|
||||||
|
@@ -955,8 +955,11 @@ __device void shader_merge_closures(KernelGlobals *kg, ShaderData *sd)
|
|||||||
sci->sample_weight += scj->sample_weight;
|
sci->sample_weight += scj->sample_weight;
|
||||||
|
|
||||||
int size = sd->num_closure - (j+1);
|
int size = sd->num_closure - (j+1);
|
||||||
if(size > 0)
|
if(size > 0) {
|
||||||
memmove(scj, scj+1, size*sizeof(ShaderClosure));
|
for(int k = 0; k < size; k++) {
|
||||||
|
scj[k] = scj[k+1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
sd->num_closure--;
|
sd->num_closure--;
|
||||||
j--;
|
j--;
|
||||||
|
@@ -39,7 +39,10 @@ CCL_NAMESPACE_BEGIN
|
|||||||
|
|
||||||
void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride)
|
void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride)
|
||||||
{
|
{
|
||||||
kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
|
if(kernel_data.integrator.progressive)
|
||||||
|
kernel_path_trace_progressive(kg, buffer, rng_state, sample, x, y, offset, stride);
|
||||||
|
else
|
||||||
|
kernel_path_trace_non_progressive(kg, buffer, rng_state, sample, x, y, offset, stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Tonemapping */
|
/* Tonemapping */
|
||||||
|
@@ -41,7 +41,10 @@ CCL_NAMESPACE_BEGIN
|
|||||||
|
|
||||||
void kernel_cpu_sse3_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride)
|
void kernel_cpu_sse3_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride)
|
||||||
{
|
{
|
||||||
kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
|
if(kernel_data.integrator.progressive)
|
||||||
|
kernel_path_trace_progressive(kg, buffer, rng_state, sample, x, y, offset, stride);
|
||||||
|
else
|
||||||
|
kernel_path_trace_non_progressive(kg, buffer, rng_state, sample, x, y, offset, stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Tonemapping */
|
/* Tonemapping */
|
||||||
|
@@ -68,6 +68,7 @@ CCL_NAMESPACE_BEGIN
|
|||||||
#define __KERNEL_SHADING__
|
#define __KERNEL_SHADING__
|
||||||
#if __CUDA_ARCH__ >= 200
|
#if __CUDA_ARCH__ >= 200
|
||||||
#define __KERNEL_ADV_SHADING__
|
#define __KERNEL_ADV_SHADING__
|
||||||
|
#define __NON_PROGRESSIVE__
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@@ -832,6 +832,7 @@ void Session::path_trace()
|
|||||||
task.update_tile_sample = function_bind(&Session::update_tile_sample, this, _1);
|
task.update_tile_sample = function_bind(&Session::update_tile_sample, this, _1);
|
||||||
task.update_progress_sample = function_bind(&Session::update_progress_sample, this);
|
task.update_progress_sample = function_bind(&Session::update_progress_sample, this);
|
||||||
task.need_finish_queue = params.progressive_refine;
|
task.need_finish_queue = params.progressive_refine;
|
||||||
|
task.integrator_progressive = scene->integrator->progressive;
|
||||||
|
|
||||||
device->task_add(task);
|
device->task_add(task);
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user