Files
blender/intern/cycles/kernel/split/kernel_do_volume.h
Brecht Van Lommel cfa8b762e2 Code cleanup: move rng into path state.
Also pass by value and don't write back now that it is just a hash for seeding
and no longer an LCG state. Together this makes CUDA a tiny bit faster in my
tests, but mainly simplifies code.
2017-08-19 18:14:16 +02:00

222 lines
7.7 KiB
C

/*
* Copyright 2011-2017 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
CCL_NAMESPACE_BEGIN
#if defined(__BRANCHED_PATH__) && defined(__VOLUME__)
ccl_device_inline void kernel_split_branched_path_volume_indirect_light_init(KernelGlobals *kg, int ray_index)
{
kernel_split_branched_path_indirect_loop_init(kg, ray_index);
ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT);
}
ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(KernelGlobals *kg, int ray_index)
{
SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
ShaderData *sd = &kernel_split_state.sd[ray_index];
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
/* GPU: no decoupled ray marching, scatter probalistically */
int num_samples = kernel_data.integrator.volume_samples;
float num_samples_inv = 1.0f/num_samples;
Ray volume_ray = branched_state->ray;
volume_ray.t = (!IS_STATE(&branched_state->ray_state, 0, RAY_HIT_BACKGROUND)) ? branched_state->isect.t : FLT_MAX;
bool heterogeneous = volume_stack_is_heterogeneous(kg, branched_state->path_state.volume_stack);
for(int j = branched_state->next_sample; j < num_samples; j++) {
ccl_global PathState *ps = &kernel_split_state.path_state[ray_index];
*ps = branched_state->path_state;
ccl_global Ray *pray = &kernel_split_state.ray[ray_index];
*pray = branched_state->ray;
ccl_global float3 *tp = &kernel_split_state.throughput[ray_index];
*tp = branched_state->throughput * num_samples_inv;
/* branch RNG state */
path_state_branch(ps, j, num_samples);
/* integrate along volume segment with distance sampling */
VolumeIntegrateResult result = kernel_volume_integrate(
kg, ps, sd, &volume_ray, L, tp, heterogeneous);
# ifdef __VOLUME_SCATTER__
if(result == VOLUME_PATH_SCATTERED) {
/* direct lighting */
kernel_path_volume_connect_light(kg, sd, emission_sd, *tp, &branched_state->path_state, L);
/* indirect light bounce */
if(!kernel_path_volume_bounce(kg, sd, tp, ps, L, pray)) {
continue;
}
/* start the indirect path */
branched_state->next_closure = 0;
branched_state->next_sample = j+1;
branched_state->num_samples = num_samples;
/* Attempting to share too many samples is slow for volumes as it causes us to
* loop here more and have many calls to kernel_volume_integrate which evaluates
* shaders. The many expensive shader evaluations cause the work load to become
* unbalanced and many threads to become idle in this kernel. Limiting the
* number of shared samples here helps quite a lot.
*/
if(branched_state->shared_sample_count < 2) {
if(kernel_split_branched_indirect_start_shared(kg, ray_index)) {
continue;
}
}
return true;
}
# endif
}
branched_state->next_sample = num_samples;
branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
if(branched_state->waiting_on_shared_samples) {
return true;
}
kernel_split_branched_path_indirect_loop_end(kg, ray_index);
/* todo: avoid this calculation using decoupled ray marching */
float3 throughput = kernel_split_state.throughput[ray_index];
kernel_volume_shadow(kg, emission_sd, &kernel_split_state.path_state[ray_index], &volume_ray, &throughput);
kernel_split_state.throughput[ray_index] = throughput;
return false;
}
#endif /* __BRANCHED_PATH__ && __VOLUME__ */
ccl_device void kernel_do_volume(KernelGlobals *kg)
{
#ifdef __VOLUME__
/* We will empty this queue in this kernel. */
if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
# ifdef __BRANCHED_PATH__
kernel_split_params.queue_index[QUEUE_VOLUME_INDIRECT_ITER] = 0;
# endif /* __BRANCHED_PATH__ */
}
int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
if(*kernel_split_params.use_queues_flag) {
ray_index = get_ray_index(kg, ray_index,
QUEUE_ACTIVE_AND_REGENERATED_RAYS,
kernel_split_state.queue_data,
kernel_split_params.queue_size,
1);
}
ccl_global char *ray_state = kernel_split_state.ray_state;
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE) ||
IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
ShaderData *sd = &kernel_split_state.sd[ray_index];
ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
bool hit = ! IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND);
/* Sanitize volume stack. */
if(!hit) {
kernel_volume_clean_stack(kg, state->volume_stack);
}
/* volume attenuation, emission, scatter */
if(state->volume_stack[0].shader != SHADER_NONE) {
Ray volume_ray = *ray;
volume_ray.t = (hit)? isect->t: FLT_MAX;
# ifdef __BRANCHED_PATH__
if(!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
# endif /* __BRANCHED_PATH__ */
bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
{
/* integrate along volume segment with distance sampling */
VolumeIntegrateResult result = kernel_volume_integrate(
kg, state, sd, &volume_ray, L, throughput, heterogeneous);
# ifdef __VOLUME_SCATTER__
if(result == VOLUME_PATH_SCATTERED) {
/* direct lighting */
kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L);
/* indirect light bounce */
if(kernel_path_volume_bounce(kg, sd, throughput, state, L, ray)) {
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
}
else {
kernel_split_path_end(kg, ray_index);
}
}
# endif /* __VOLUME_SCATTER__ */
}
# ifdef __BRANCHED_PATH__
}
else {
kernel_split_branched_path_volume_indirect_light_init(kg, ray_index);
if(kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) {
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
}
}
# endif /* __BRANCHED_PATH__ */
}
}
# ifdef __BRANCHED_PATH__
/* iter loop */
ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
QUEUE_VOLUME_INDIRECT_ITER,
kernel_split_state.queue_data,
kernel_split_params.queue_size,
1);
if(IS_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER)) {
/* for render passes, sum and reset indirect light pass variables
* for the next samples */
path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]);
path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]);
if(kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) {
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
}
}
# endif /* __BRANCHED_PATH__ */
#endif /* __VOLUME__ */
}
CCL_NAMESPACE_END