Cleanup: comments (long lines) in cycles

This commit is contained in:
Campbell Barton
2019-05-01 21:14:11 +10:00
parent 177a0ca131
commit c47d669f24
72 changed files with 504 additions and 402 deletions

View File

@@ -155,8 +155,8 @@ void BlenderSession::create_session()
/* There is no single depsgraph to use for the entire render. /* There is no single depsgraph to use for the entire render.
* So we need to handle this differently. * So we need to handle this differently.
* *
* We could loop over the final render result render layers in pipeline and keep Cycles unaware of multiple layers, * We could loop over the final render result render layers in pipeline and keep Cycles unaware
* or perhaps move syncing further down in the pipeline. * of multiple layers, or perhaps move syncing further down in the pipeline.
*/ */
/* create sync */ /* create sync */
sync = new BlenderSync(b_engine, b_data, b_scene, scene, !background, session->progress); sync = new BlenderSync(b_engine, b_data, b_scene, scene, !background, session->progress);
@@ -528,14 +528,15 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
/* Attempt to free all data which is held by Blender side, since at this /* Attempt to free all data which is held by Blender side, since at this
* point we knwo that we've got everything to render current view layer. * point we knwo that we've got everything to render current view layer.
*/ */
/* At the moment we only free if we are not doing multi-view (or if we are rendering the last view). /* At the moment we only free if we are not doing multi-view
* See T58142/D4239 for discussion. * (or if we are rendering the last view). See T58142/D4239 for discussion.
*/ */
if (view_index == num_views - 1) { if (view_index == num_views - 1) {
free_blender_memory_if_possible(); free_blender_memory_if_possible();
} }
/* Make sure all views have different noise patterns. - hardcoded value just to make it random */ /* Make sure all views have different noise patterns. - hardcoded value just to make it random
*/
if (view_index != 0) { if (view_index != 0) {
scene->integrator->seed += hash_int_2d(scene->integrator->seed, scene->integrator->seed += hash_int_2d(scene->integrator->seed,
hash_int(view_index * 0xdeadbeef)); hash_int(view_index * 0xdeadbeef));
@@ -1057,8 +1058,9 @@ void BlenderSession::update_status_progress()
} }
double current_time = time_dt(); double current_time = time_dt();
/* When rendering in a window, redraw the status at least once per second to keep the elapsed and remaining time up-to-date. /* When rendering in a window, redraw the status at least once per second to keep the elapsed and
* For headless rendering, only report when something significant changes to keep the console output readable. */ * remaining time up-to-date. For headless rendering, only report when something significant
* changes to keep the console output readable. */
if (status != last_status || (!headless && (current_time - last_status_time) > 1.0)) { if (status != last_status || (!headless && (current_time - last_status_time) > 1.0)) {
b_engine.update_stats("", (timestatus + scene_status + status).c_str()); b_engine.update_stats("", (timestatus + scene_status + status).c_str());
b_engine.update_memory_stats(mem_used, mem_peak); b_engine.update_memory_stats(mem_used, mem_peak);

View File

@@ -1,30 +1,30 @@
/* /*
* Original code Copyright 2017, Intel Corporation * Original code Copyright 2017, Intel Corporation
* Modifications Copyright 2018, Blender Foundation. * Modifications Copyright 2018, Blender Foundation.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
* *
* * Redistributions of source code must retain the above copyright notice, * * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer. * this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright * * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the * notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution. * documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors * * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software * may be used to endorse or promote products derived from this software
* without specific prior written permission. * without specific prior written permission.
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include "bvh/bvh8.h" #include "bvh/bvh8.h"

View File

@@ -1,30 +1,30 @@
/* /*
* Original code Copyright 2017, Intel Corporation * Original code Copyright 2017, Intel Corporation
* Modifications Copyright 2018, Blender Foundation. * Modifications Copyright 2018, Blender Foundation.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
* *
* * Redistributions of source code must retain the above copyright notice, * * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer. * this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright * * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the * notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution. * documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors * * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software * may be used to endorse or promote products derived from this software
* without specific prior written permission. * without specific prior written permission.
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#ifndef __BVH8_H__ #ifndef __BVH8_H__
#define __BVH8_H__ #define __BVH8_H__
@@ -50,9 +50,9 @@ class Progress;
#define BVH_UNALIGNED_ONODE_SIZE 28 #define BVH_UNALIGNED_ONODE_SIZE 28
/* BVH8 /* BVH8
* *
* Octo BVH, with each node having eight children, to use with SIMD instructions. * Octo BVH, with each node having eight children, to use with SIMD instructions.
*/ */
class BVH8 : public BVH { class BVH8 : public BVH {
protected: protected:
/* constructor */ /* constructor */

View File

@@ -18,18 +18,19 @@
* It supports triangles, curves, object and deformation blur and instancing. * It supports triangles, curves, object and deformation blur and instancing.
* Not supported are thick line segments, those have no native equivalent in Embree. * Not supported are thick line segments, those have no native equivalent in Embree.
* They could be implemented using Embree's thick curves, at the expense of wasted memory. * They could be implemented using Embree's thick curves, at the expense of wasted memory.
* User defined intersections for Embree could also be an option, but since Embree only uses aligned BVHs * User defined intersections for Embree could also be an option, but since Embree only uses
* for user geometry, this would come with reduced performance and/or higher memory usage. * aligned BVHs for user geometry, this would come with reduced performance and/or higher memory
* usage.
* *
* Since Embree allows object to be either curves or triangles but not both, Cycles object IDs are maapped * Since Embree allows object to be either curves or triangles but not both, Cycles object IDs are
* to Embree IDs by multiplying by two and adding one for curves. * maapped to Embree IDs by multiplying by two and adding one for curves.
* *
* This implementation shares RTCDevices between Cycles instances. Eventually each instance should get * This implementation shares RTCDevices between Cycles instances. Eventually each instance should
* a separate RTCDevice to correctly keep track of memory usage. * get a separate RTCDevice to correctly keep track of memory usage.
* *
* Vertex and index buffers are duplicated between Cycles device arrays and Embree. These could be merged, * Vertex and index buffers are duplicated between Cycles device arrays and Embree. These could be
* which would requrie changes to intersection refinement, shader setup, mesh light sampling and a few * merged, which would requrie changes to intersection refinement, shader setup, mesh light
* other places in Cycles where direct access to vertex data is required. * sampling and a few other places in Cycles where direct access to vertex data is required.
*/ */
#ifdef WITH_EMBREE #ifdef WITH_EMBREE
@@ -40,7 +41,8 @@
# include "bvh/bvh_embree.h" # include "bvh/bvh_embree.h"
/* Kernel includes are necessary so that the filter function for Embree can access the packed BVH. */ /* Kernel includes are necessary so that the filter function for Embree can access the packed BVH.
*/
# include "kernel/bvh/bvh_embree.h" # include "kernel/bvh/bvh_embree.h"
# include "kernel/kernel_compat_cpu.h" # include "kernel/kernel_compat_cpu.h"
# include "kernel/split/kernel_split_data_types.h" # include "kernel/split/kernel_split_data_types.h"

View File

@@ -287,7 +287,8 @@ void Device::draw_pixels(device_memory &rgba,
} }
glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer); glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer);
/* invalidate old contents - avoids stalling if buffer is still waiting in queue to be rendered */ /* invalidate old contents - avoids stalling if buffer is still waiting in queue to be rendered
*/
glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW); glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW);
float *vpointer = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY); float *vpointer = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);

View File

@@ -1019,7 +1019,7 @@ class CUDADevice : public Device {
size_t bytes; size_t bytes;
cuda_assert(cuModuleGetGlobal(&mem, &bytes, cuModule, name)); cuda_assert(cuModuleGetGlobal(&mem, &bytes, cuModule, name));
//assert(bytes == size); // assert(bytes == size);
cuda_assert(cuMemcpyHtoD(mem, host, size)); cuda_assert(cuMemcpyHtoD(mem, host, size));
} }
@@ -2127,7 +2127,8 @@ class CUDADevice : public Device {
} }
glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer); glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer);
/* invalidate old contents - avoids stalling if buffer is still waiting in queue to be rendered */ /* invalidate old contents -
* avoids stalling if buffer is still waiting in queue to be rendered */
glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW); glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW);
vpointer = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY); vpointer = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);

View File

@@ -104,7 +104,8 @@ void DenoisingTask::set_render_buffer(RenderTile *rtiles)
void DenoisingTask::setup_denoising_buffer() void DenoisingTask::setup_denoising_buffer()
{ {
/* Expand filter_area by radius pixels and clamp the result to the extent of the neighboring tiles */ /* Expand filter_area by radius pixels and clamp the result to the extent of the neighboring
* tiles */
rect = rect_from_shape(filter_area.x, filter_area.y, filter_area.z, filter_area.w); rect = rect_from_shape(filter_area.x, filter_area.y, filter_area.z, filter_area.w);
rect = rect_expand(rect, radius); rect = rect_expand(rect, radius);
rect = rect_clip(rect, rect = rect_clip(rect,
@@ -149,16 +150,19 @@ void DenoisingTask::prefilter_shadowing()
device_sub_ptr buffer_var(buffer.mem, 5 * buffer.pass_stride, buffer.pass_stride); device_sub_ptr buffer_var(buffer.mem, 5 * buffer.pass_stride, buffer.pass_stride);
device_sub_ptr filtered_var(buffer.mem, 6 * buffer.pass_stride, buffer.pass_stride); device_sub_ptr filtered_var(buffer.mem, 6 * buffer.pass_stride, buffer.pass_stride);
/* Get the A/B unfiltered passes, the combined sample variance, the estimated variance of the sample variance and the buffer variance. */ /* Get the A/B unfiltered passes, the combined sample variance, the estimated variance of the
* sample variance and the buffer variance. */
functions.divide_shadow(*unfiltered_a, *unfiltered_b, *sample_var, *sample_var_var, *buffer_var); functions.divide_shadow(*unfiltered_a, *unfiltered_b, *sample_var, *sample_var_var, *buffer_var);
/* Smooth the (generally pretty noisy) buffer variance using the spatial information from the sample variance. */ /* Smooth the (generally pretty noisy) buffer variance using the spatial information from the
* sample variance. */
nlm_state.set_parameters(6, 3, 4.0f, 1.0f, false); nlm_state.set_parameters(6, 3, 4.0f, 1.0f, false);
functions.non_local_means(*buffer_var, *sample_var, *sample_var_var, *filtered_var); functions.non_local_means(*buffer_var, *sample_var, *sample_var_var, *filtered_var);
/* Reuse memory, the previous data isn't needed anymore. */ /* Reuse memory, the previous data isn't needed anymore. */
device_ptr filtered_a = *buffer_var, filtered_b = *sample_var; device_ptr filtered_a = *buffer_var, filtered_b = *sample_var;
/* Use the smoothed variance to filter the two shadow half images using each other for weight calculation. */ /* Use the smoothed variance to filter the two shadow half images using each other for weight
* calculation. */
nlm_state.set_parameters(5, 3, 1.0f, 0.25f, false); nlm_state.set_parameters(5, 3, 1.0f, 0.25f, false);
functions.non_local_means(*unfiltered_a, *unfiltered_b, *filtered_var, filtered_a); functions.non_local_means(*unfiltered_a, *unfiltered_b, *filtered_var, filtered_a);
functions.non_local_means(*unfiltered_b, *unfiltered_a, *filtered_var, filtered_b); functions.non_local_means(*unfiltered_b, *unfiltered_a, *filtered_var, filtered_b);

View File

@@ -136,8 +136,8 @@ string device_opencl_capabilities()
} }
string result = ""; string result = "";
string error_msg = ""; /* Only used by opencl_assert(), but in the future string error_msg = ""; /* Only used by opencl_assert(), but in the future
* it could also be nicely reported to the console. * it could also be nicely reported to the console.
*/ */
cl_uint num_platforms = 0; cl_uint num_platforms = 0;
opencl_assert(device_opencl_get_num_platforms_safe(&num_platforms)); opencl_assert(device_opencl_get_num_platforms_safe(&num_platforms));
if (num_platforms == 0) { if (num_platforms == 0) {

View File

@@ -27,7 +27,7 @@ CCL_NAMESPACE_BEGIN
* Since some bytes may be needed for aligning chunks of memory; * Since some bytes may be needed for aligning chunks of memory;
* This is the amount of memory that we dedicate for that purpose. * This is the amount of memory that we dedicate for that purpose.
*/ */
#define DATA_ALLOCATION_MEM_FACTOR 5000000 //5MB #define DATA_ALLOCATION_MEM_FACTOR 5000000 // 5MB
/* Types used for split kernel */ /* Types used for split kernel */

View File

@@ -40,7 +40,8 @@ class DenoiseParams {
float strength; float strength;
/* Preserve more or less detail based on feature passes. */ /* Preserve more or less detail based on feature passes. */
float feature_strength; float feature_strength;
/* When removing pixels that don't carry information, use a relative threshold instead of an absolute one. */ /* When removing pixels that don't carry information,
* use a relative threshold instead of an absolute one. */
bool relative_pca; bool relative_pca;
/* How many frames before and after the current center frame are included. */ /* How many frames before and after the current center frame are included. */
int neighbor_frames; int neighbor_frames;

View File

@@ -358,8 +358,8 @@ class OpenCLDevice : public Device {
OpenCLSplitPrograms(OpenCLDevice *device); OpenCLSplitPrograms(OpenCLDevice *device);
~OpenCLSplitPrograms(); ~OpenCLSplitPrograms();
/* Load the kernels and put the created kernels in the given `programs` /* Load the kernels and put the created kernels in the given
* paramter. */ * `programs` paramter. */
void load_kernels(vector<OpenCLProgram *> &programs, void load_kernels(vector<OpenCLProgram *> &programs,
const DeviceRequestedFeatures &requested_features, const DeviceRequestedFeatures &requested_features,
bool is_preview = false); bool is_preview = false);

View File

@@ -265,7 +265,7 @@ void OpenCLDevice::OpenCLSplitPrograms::load_kernels(
ADD_SPLIT_KERNEL_PROGRAM(shader_eval); ADD_SPLIT_KERNEL_PROGRAM(shader_eval);
/* Quick kernels bundled in a single program to reduce overhead of starting /* Quick kernels bundled in a single program to reduce overhead of starting
* Blender processes. */ * Blender processes. */
program_split = OpenCLDevice::OpenCLProgram( program_split = OpenCLDevice::OpenCLProgram(
device, device,
"split_bundle", "split_bundle",
@@ -668,7 +668,8 @@ OpenCLDevice::OpenCLDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, b
return; return;
} }
/* Allocate this right away so that texture_info is placed at offset 0 in the device memory buffers */ /* Allocate this right away so that texture_info
* is placed at offset 0 in the device memory buffers. */
texture_info.resize(1); texture_info.resize(1);
memory_manager.alloc("texture_info", texture_info); memory_manager.alloc("texture_info", texture_info);
@@ -1149,7 +1150,8 @@ void OpenCLDevice::tex_alloc(device_memory &mem)
<< string_human_readable_size(mem.memory_size()) << ")"; << string_human_readable_size(mem.memory_size()) << ")";
memory_manager.alloc(mem.name, mem); memory_manager.alloc(mem.name, mem);
/* Set the pointer to non-null to keep code that inspects its value from thinking its unallocated. */ /* Set the pointer to non-null to keep code that inspects its value from thinking its
* unallocated. */
mem.device_pointer = 1; mem.device_pointer = 1;
textures[mem.name] = &mem; textures[mem.name] = &mem;
textures_need_update = true; textures_need_update = true;

View File

@@ -431,7 +431,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
} }
prim_addr++; prim_addr++;
} //while } // while
} }
else { else {
kernel_assert((kernel_tex_fetch(__prim_type, (prim_addr)) & PRIMITIVE_ALL) == kernel_assert((kernel_tex_fetch(__prim_type, (prim_addr)) & PRIMITIVE_ALL) ==
@@ -568,7 +568,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
} }
prim_addr++; prim_addr++;
} //while prim } // while prim
} }
} }
#if BVH_FEATURE(BVH_INSTANCING) #if BVH_FEATURE(BVH_INSTANCING)

View File

@@ -333,8 +333,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
} }
/* Eight children are hit, push all onto stack and sort 8 /* Eight children are hit, push all onto stack and sort 8
* stack items, continue with closest child. * stack items, continue with closest child.
*/ */
r = __bscf(child_mask); r = __bscf(child_mask);
int c7 = __float_as_int(cnodes[r]); int c7 = __float_as_int(cnodes[r]);
float d7 = ((float *)&dist)[r]; float d7 = ((float *)&dist)[r];
@@ -409,7 +409,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
return true; return true;
} }
} }
} //for } // for
} }
else { else {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
@@ -430,7 +430,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
return true; return true;
} }
} }
} //prim count } // prim count
break; break;
} }
#if BVH_FEATURE(BVH_MOTION) #if BVH_FEATURE(BVH_MOTION)

View File

@@ -127,7 +127,7 @@ ccl_device_inline void qbvh_stack_sort(QBVHStackItem *ccl_restrict s1,
/* Axis-aligned nodes intersection */ /* Axis-aligned nodes intersection */
//ccl_device_inline int qbvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg, // ccl_device_inline int qbvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
static int qbvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg, static int qbvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
const ssef &isect_near, const ssef &isect_near,
const ssef &isect_far, const ssef &isect_far,

View File

@@ -37,7 +37,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
uint *num_hits) uint *num_hits)
{ {
/* TODO(sergey): /* TODO(sergey):
* - Test if pushing distance on the stack helps. * - Test if pushing distance on the stack helps.
* - Likely and unlikely for if() statements. * - Likely and unlikely for if() statements.
* - Test restrict attribute for pointers. * - Test restrict attribute for pointers.
*/ */

View File

@@ -85,15 +85,11 @@ ccl_device_forceinline float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderCl
float HdotI = fmaxf(fabsf(dot(H, I)), 1e-6f); float HdotI = fmaxf(fabsf(dot(H, I)), 1e-6f);
float HdotN = fmaxf(dot(H, N), 1e-6f); float HdotN = fmaxf(dot(H, N), 1e-6f);
float pump = /* pump from original paper
1.0f / * (first derivative disc., but cancels the HdotI in the pdf nicely) */
fmaxf( float pump = 1.0f / fmaxf(1e-6f, (HdotI * fmaxf(NdotO, NdotI)));
1e-6f, /* pump from d-brdf paper */
(HdotI * /*float pump = 1.0f / fmaxf(1e-4f, ((NdotO + NdotI) * (NdotO*NdotI))); */
fmaxf(
NdotO,
NdotI))); /* pump from original paper (first derivative disc., but cancels the HdotI in the pdf nicely) */
/*float pump = 1.0f / fmaxf(1e-4f, ((NdotO + NdotI) * (NdotO*NdotI))); */ /* pump from d-brdf paper */
float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_x); float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_x);
float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_y); float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_y);
@@ -105,9 +101,8 @@ ccl_device_forceinline float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderCl
float norm = (n_x + 1.0f) / (8.0f * M_PI_F); float norm = (n_x + 1.0f) / (8.0f * M_PI_F);
out = NdotO * norm * lobe * pump; out = NdotO * norm * lobe * pump;
*pdf = /* this is p_h / 4(H.I) (conversion from 'wh measure' to 'wi measure', eq. 8 in paper). */
norm * lobe / *pdf = norm * lobe / HdotI;
HdotI; /* this is p_h / 4(H.I) (conversion from 'wh measure' to 'wi measure', eq. 8 in paper) */
} }
else { else {
/* anisotropic */ /* anisotropic */

View File

@@ -224,7 +224,7 @@ ccl_device int bsdf_hair_reflection_sample(const ShaderClosure *sc,
fast_sincosf(phi, &sinphi, &cosphi); fast_sincosf(phi, &sinphi, &cosphi);
*omega_in = (cosphi * costheta_i) * locy - (sinphi * costheta_i) * locx + (sintheta_i)*Tg; *omega_in = (cosphi * costheta_i) * locy - (sinphi * costheta_i) * locx + (sintheta_i)*Tg;
//differentials - TODO: find a better approximation for the reflective bounce // differentials - TODO: find a better approximation for the reflective bounce
#ifdef __RAY_DIFFERENTIALS__ #ifdef __RAY_DIFFERENTIALS__
*domega_in_dx = 2 * dot(locy, dIdx) * locy - dIdx; *domega_in_dx = 2 * dot(locy, dIdx) * locy - dIdx;
*domega_in_dy = 2 * dot(locy, dIdy) * locy - dIdy; *domega_in_dy = 2 * dot(locy, dIdy) * locy - dIdy;
@@ -285,7 +285,7 @@ ccl_device int bsdf_hair_transmission_sample(const ShaderClosure *sc,
fast_sincosf(phi, &sinphi, &cosphi); fast_sincosf(phi, &sinphi, &cosphi);
*omega_in = (cosphi * costheta_i) * locy - (sinphi * costheta_i) * locx + (sintheta_i)*Tg; *omega_in = (cosphi * costheta_i) * locy - (sinphi * costheta_i) * locx + (sintheta_i)*Tg;
//differentials - TODO: find a better approximation for the transmission bounce // differentials - TODO: find a better approximation for the transmission bounce
#ifdef __RAY_DIFFERENTIALS__ #ifdef __RAY_DIFFERENTIALS__
*domega_in_dx = 2 * dot(locy, dIdx) * locy - dIdx; *domega_in_dx = 2 * dot(locy, dIdx) * locy - dIdx;
*domega_in_dy = 2 * dot(locy, dIdy) * locy - dIdy; *domega_in_dy = 2 * dot(locy, dIdy) * locy - dIdy;

View File

@@ -60,7 +60,8 @@ ccl_device_inline float cos_from_sin(const float s)
return safe_sqrtf(1.0f - s * s); return safe_sqrtf(1.0f - s * s);
} }
/* Gives the change in direction in the normal plane for the given angles and p-th-order scattering. */ /* Gives the change in direction in the normal plane for the given angles and p-th-order
* scattering. */
ccl_device_inline float delta_phi(int p, float gamma_o, float gamma_t) ccl_device_inline float delta_phi(int p, float gamma_o, float gamma_t)
{ {
return 2.0f * p * gamma_t - 2.0f * gamma_o + p * M_PI_F; return 2.0f * p * gamma_t - 2.0f * gamma_o + p * M_PI_F;

View File

@@ -16,7 +16,8 @@
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
/* Most of the code is based on the supplemental implementations from https://eheitzresearch.wordpress.com/240-2/. */ /* Most of the code is based on the supplemental implementations from
* https://eheitzresearch.wordpress.com/240-2/. */
/* === GGX Microfacet distribution functions === */ /* === GGX Microfacet distribution functions === */
@@ -80,7 +81,8 @@ ccl_device_forceinline float2 mf_sampleP22_11(const float cosI,
return make_float2(slopeX, -slopeY); return make_float2(slopeX, -slopeY);
} }
/* Visible normal sampling for the GGX distribution (based on page 7 of the supplemental implementation). */ /* Visible normal sampling for the GGX distribution
* (based on page 7 of the supplemental implementation). */
ccl_device_forceinline float3 mf_sample_vndf(const float3 wi, ccl_device_forceinline float3 mf_sample_vndf(const float3 wi,
const float2 alpha, const float2 alpha,
const float randx, const float randx,
@@ -134,7 +136,8 @@ ccl_device_forceinline float3 mf_eval_phase_glossy(const float3 w,
return make_float3(phase, phase, phase); return make_float3(phase, phase, phase);
} }
/* Phase function for dielectric transmissive materials, including both reflection and refraction according to the dielectric fresnel term. */ /* Phase function for dielectric transmissive materials, including both reflection and refraction
* according to the dielectric fresnel term. */
ccl_device_forceinline float3 mf_sample_phase_glass( ccl_device_forceinline float3 mf_sample_phase_glass(
const float3 wi, const float eta, const float3 wm, const float randV, bool *outside) const float3 wi, const float eta, const float3 wm, const float randV, bool *outside)
{ {
@@ -227,7 +230,8 @@ ccl_device_forceinline float mf_G1(const float3 w, const float C1, const float l
return powf(C1, lambda); return powf(C1, lambda);
} }
/* Sampling from the visible height distribution (based on page 17 of the supplemental implementation). */ /* Sampling from the visible height distribution (based on page 17 of the supplemental
* implementation). */
ccl_device_forceinline bool mf_sample_height( ccl_device_forceinline bool mf_sample_height(
const float3 w, float *h, float *C1, float *G1, float *lambda, const float U) const float3 w, float *h, float *C1, float *G1, float *lambda, const float U)
{ {
@@ -254,7 +258,8 @@ ccl_device_forceinline bool mf_sample_height(
} }
/* === PDF approximations for the different phase functions. === /* === PDF approximations for the different phase functions. ===
* As explained in bsdf_microfacet_multi_impl.h, using approximations with MIS still produces an unbiased result. */ * As explained in bsdf_microfacet_multi_impl.h, using approximations with MIS still produces an
* unbiased result. */
/* Approximation for the albedo of the single-scattering GGX distribution, /* Approximation for the albedo of the single-scattering GGX distribution,
* the missing energy is then approximated as a diffuse reflection for the PDF. */ * the missing energy is then approximated as a diffuse reflection for the PDF. */
@@ -342,7 +347,8 @@ ccl_device_forceinline float mf_glass_pdf(const float3 wi,
} }
} }
/* === Actual random walk implementations, one version of mf_eval and mf_sample per phase function. === */ /* === Actual random walk implementations === */
/* One version of mf_eval and mf_sample per phase function. */
#define MF_NAME_JOIN(x, y) x##_##y #define MF_NAME_JOIN(x, y) x##_##y
#define MF_NAME_EVAL(x, y) MF_NAME_JOIN(x, y) #define MF_NAME_EVAL(x, y) MF_NAME_JOIN(x, y)

View File

@@ -16,14 +16,14 @@
/* Evaluate the BSDF from wi to wo. /* Evaluate the BSDF from wi to wo.
* Evaluation is split into the analytical single-scattering BSDF and the multi-scattering BSDF, * Evaluation is split into the analytical single-scattering BSDF and the multi-scattering BSDF,
* which is evaluated stochastically through a random walk. At each bounce (except for the first one), * which is evaluated stochastically through a random walk. At each bounce (except for the first
* the amount of reflection from here towards wo is evaluated before bouncing again. * one), the amount of reflection from here towards wo is evaluated before bouncing again.
* *
* Because of the random walk, the evaluation is not deterministic, but its expected value is equal to * Because of the random walk, the evaluation is not deterministic, but its expected value is equal
* the correct BSDF, which is enough for Monte-Carlo rendering. The PDF also can't be determined * to the correct BSDF, which is enough for Monte-Carlo rendering. The PDF also can't be determined
* analytically, so the single-scattering PDF plus a diffuse term to account for the multi-scattered * analytically, so the single-scattering PDF plus a diffuse term to account for the
* energy is used. In combination with MIS, that is enough to produce an unbiased result, although * multi-scattered energy is used. In combination with MIS, that is enough to produce an unbiased
* the balance heuristic isn't necessarily optimal anymore. * result, although the balance heuristic isn't necessarily optimal anymore.
*/ */
ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi, ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi,
float3 wo, float3 wo,
@@ -36,7 +36,8 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi,
bool use_fresnel, bool use_fresnel,
const float3 cspec0) const float3 cspec0)
{ {
/* Evaluating for a shallower incoming direction produces less noise, and the properties of the BSDF guarantee reciprocity. */ /* Evaluating for a shallower incoming direction produces less noise, and the properties of the
* BSDF guarantee reciprocity. */
bool swapped = false; bool swapped = false;
#ifdef MF_MULTI_GLASS #ifdef MF_MULTI_GLASS
if (wi.z * wo.z < 0.0f) { if (wi.z * wo.z < 0.0f) {
@@ -180,9 +181,9 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi,
return eval; return eval;
} }
/* Perform a random walk on the microsurface starting from wi, returning the direction in which the walk /* Perform a random walk on the microsurface starting from wi, returning the direction in which the
* escaped the surface in wo. The function returns the throughput between wi and wo. * walk escaped the surface in wo. The function returns the throughput between wi and wo. Without
* Without reflection losses due to coloring or fresnel absorption in conductors, the sampling is optimal. * reflection losses due to coloring or fresnel absorption in conductors, the sampling is optimal.
*/ */
ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi, ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi,
float3 *wo, float3 *wo,

View File

@@ -155,7 +155,7 @@ interpolate_fresnel_color(float3 L, float3 H, float ior, float F0, float3 cspec0
/* Calculate the fresnel interpolation factor /* Calculate the fresnel interpolation factor
* The value from fresnel_dielectric_cos(...) has to be normalized because * The value from fresnel_dielectric_cos(...) has to be normalized because
* the cspec0 keeps the F0 color * the cspec0 keeps the F0 color
*/ */
float F0_norm = 1.0f / (1.0f - F0); float F0_norm = 1.0f / (1.0f - F0);
float FH = (fresnel_dielectric_cos(dot(L, H), ior) - F0) * F0_norm; float FH = (fresnel_dielectric_cos(dot(L, H), ior) - F0) * F0_norm;

View File

@@ -450,7 +450,8 @@ ccl_device void bssrdf_sample(const ShaderClosure *sc, float xi, float *r, float
else if (bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID) { else if (bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID) {
bssrdf_gaussian_sample(radius, xi, r, h); bssrdf_gaussian_sample(radius, xi, r, h);
} }
else { /*if(bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID || bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID)*/ else { /* if (bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID ||
* bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID) */
bssrdf_burley_sample(radius, xi, r, h); bssrdf_burley_sample(radius, xi, r, h);
} }
} }
@@ -466,7 +467,8 @@ ccl_device float bssrdf_channel_pdf(const Bssrdf *bssrdf, float radius, float r)
else if (bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID) { else if (bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID) {
return bssrdf_gaussian_pdf(radius, r); return bssrdf_gaussian_pdf(radius, r);
} }
else { /*if(bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID || bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID)*/ else { /* if (bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID ||
* bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID)*/
return bssrdf_burley_pdf(radius, r); return bssrdf_burley_pdf(radius, r);
} }
} }

View File

@@ -18,8 +18,9 @@ CCL_NAMESPACE_BEGIN
#define ccl_get_feature(buffer, pass) (buffer)[(pass)*pass_stride] #define ccl_get_feature(buffer, pass) (buffer)[(pass)*pass_stride]
/* Loop over the pixels in the range [low.x, high.x) x [low.y, high.y).+ * pixel_buffer always points to the current pixel in the first pass. /* Loop over the pixels in the range [low.x, high.x) x [low.y, high.y).+ * pixel_buffer always
* Repeat the loop for every secondary frame if there are any. */ * points to the current pixel in the first pass. Repeat the loop for every secondary frame if
* there are any. */
#define FOR_PIXEL_WINDOW \ #define FOR_PIXEL_WINDOW \
for (int frame = 0; frame < tile_info->num_frames; frame++) { \ for (int frame = 0; frame < tile_info->num_frames; frame++) { \
pixel.z = tile_info->frames[frame]; \ pixel.z = tile_info->frames[frame]; \

View File

@@ -20,8 +20,8 @@ CCL_NAMESPACE_BEGIN
/* Loop over the pixels in the range [low.x, high.x) x [low.y, high.y), 4 at a time. /* Loop over the pixels in the range [low.x, high.x) x [low.y, high.y), 4 at a time.
* pixel_buffer always points to the first of the 4 current pixel in the first pass. * pixel_buffer always points to the first of the 4 current pixel in the first pass.
* x4 and y4 contain the coordinates of the four pixels, active_pixels contains a mask that's set for all pixels within the window. * x4 and y4 contain the coordinates of the four pixels, active_pixels contains a mask that's set
* Repeat the loop for every secondary frame if there are any. */ * for all pixels within the window. Repeat the loop for every secondary frame if there are any. */
#define FOR_PIXEL_WINDOW_SSE \ #define FOR_PIXEL_WINDOW_SSE \
for (int frame = 0; frame < tile_info->num_frames; frame++) { \ for (int frame = 0; frame < tile_info->num_frames; frame++) { \
pixel.z = tile_info->frames[frame]; \ pixel.z = tile_info->frames[frame]; \

View File

@@ -197,7 +197,8 @@ ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx,
bool use_time) bool use_time)
{ {
int4 clip_area = rect_clip(rect, filter_window); int4 clip_area = rect_clip(rect, filter_window);
/* fy and fy are in filter-window-relative coordinates, while x and y are in feature-window-relative coordinates. */ /* fy and fy are in filter-window-relative coordinates,
* while x and y are in feature-window-relative coordinates. */
for (int y = clip_area.y; y < clip_area.w; y++) { for (int y = clip_area.y; y < clip_area.w; y++) {
for (int x = clip_area.x; x < clip_area.z; x++) { for (int x = clip_area.x; x < clip_area.z; x++) {
const int low = max(rect.x, x - f); const int low = max(rect.x, x - f);

View File

@@ -16,14 +16,19 @@
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
/* First step of the shadow prefiltering, performs the shadow division and stores all data /**
* First step of the shadow prefiltering, performs the shadow division and stores all data
* in a nice and easy rectangular array that can be passed to the NLM filter. * in a nice and easy rectangular array that can be passed to the NLM filter.
* *
* Calculates: * Calculates:
* unfiltered: Contains the two half images of the shadow feature pass * \param unfiltered: Contains the two half images of the shadow feature pass
* sampleVariance: The sample-based variance calculated in the kernel. Note: This calculation is biased in general, and especially here since the variance of the ratio can only be approximated. * \param sampleVariance: The sample-based variance calculated in the kernel.
* sampleVarianceV: Variance of the sample variance estimation, quite noisy (since it's essentially the buffer variance of the two variance halves) * Note: This calculation is biased in general,
* bufferVariance: The buffer-based variance of the shadow feature. Unbiased, but quite noisy. * and especially here since the variance of the ratio can only be approximated.
* \param sampleVarianceV: Variance of the sample variance estimation, quite noisy
* (since it's essentially the buffer variance of the two variance halves)
* \param bufferVariance: The buffer-based variance of the shadow feature.
* Unbiased, but quite noisy.
*/ */
ccl_device void kernel_filter_divide_shadow(int sample, ccl_device void kernel_filter_divide_shadow(int sample,
CCL_FILTER_TILE_INFO, CCL_FILTER_TILE_INFO,
@@ -204,10 +209,10 @@ ccl_device void kernel_filter_detect_outliers(int x,
if (L > ref) { if (L > ref) {
/* The pixel appears to be an outlier. /* The pixel appears to be an outlier.
* However, it may just be a legitimate highlight. Therefore, it is checked how likely it is that the pixel * However, it may just be a legitimate highlight. Therefore, it is checked how likely it is
* should actually be at the reference value: * that the pixel should actually be at the reference value: If the reference is within the
* If the reference is within the 3-sigma interval, the pixel is assumed to be a statistical outlier. * 3-sigma interval, the pixel is assumed to be a statistical outlier. Otherwise, it is very
* Otherwise, it is very unlikely that the pixel should be darker, which indicates a legitimate highlight. * unlikely that the pixel should be darker, which indicates a legitimate highlight.
*/ */
if (pixel_variance < 0.0f || pixel_variance > 9.0f * max_variance) { if (pixel_variance < 0.0f || pixel_variance > 9.0f * max_variance) {
@@ -219,7 +224,8 @@ ccl_device void kernel_filter_detect_outliers(int x,
float stddev = sqrtf(pixel_variance); float stddev = sqrtf(pixel_variance);
if (L - 3 * stddev < ref) { if (L - 3 * stddev < ref) {
/* The pixel is an outlier, so negate the depth value to mark it as one. /* The pixel is an outlier, so negate the depth value to mark it as one.
* Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM weights. */ * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM
* weights. */
depth[idx] = -depth[idx]; depth[idx] = -depth[idx];
float fac = ref / L; float fac = ref / L;
color *= fac; color *= fac;

View File

@@ -55,7 +55,8 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff
math_vector_scale(feature_means, 1.0f / num_pixels, num_features); math_vector_scale(feature_means, 1.0f / num_pixels, num_features);
/* === Scale the shifted feature passes to a range of [-1; 1], will be baked into the transform later. === */ /* === Scale the shifted feature passes to a range of [-1; 1] ===
* Will be baked into the transform later. */
float feature_scale[DENOISE_FEATURES]; float feature_scale[DENOISE_FEATURES];
math_vector_zero(feature_scale, num_features); math_vector_zero(feature_scale, num_features);
@@ -69,8 +70,9 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff
filter_calculate_scale(feature_scale, use_time); filter_calculate_scale(feature_scale, use_time);
/* === Generate the feature transformation. === /* === Generate the feature transformation. ===
* This transformation maps the num_features-dimentional feature space to a reduced feature (r-feature) space * This transformation maps the num_features-dimentional feature space to a reduced feature
* which generally has fewer dimensions. This mainly helps to prevent overfitting. */ * (r-feature) space which generally has fewer dimensions. This mainly helps to prevent
* overfitting. */
float feature_matrix[DENOISE_FEATURES * DENOISE_FEATURES]; float feature_matrix[DENOISE_FEATURES * DENOISE_FEATURES];
math_matrix_zero(feature_matrix, num_features); math_matrix_zero(feature_matrix, num_features);
FOR_PIXEL_WINDOW FOR_PIXEL_WINDOW

View File

@@ -61,7 +61,8 @@ ccl_device void kernel_filter_construct_transform(const ccl_global float *ccl_re
math_vector_scale(feature_means, 1.0f / num_pixels, num_features); math_vector_scale(feature_means, 1.0f / num_pixels, num_features);
/* === Scale the shifted feature passes to a range of [-1; 1], will be baked into the transform later. === */ /* === Scale the shifted feature passes to a range of [-1; 1] ===
* Will be baked into the transform later. */
float feature_scale[DENOISE_FEATURES]; float feature_scale[DENOISE_FEATURES];
math_vector_zero(feature_scale, num_features); math_vector_zero(feature_scale, num_features);
@@ -75,8 +76,9 @@ ccl_device void kernel_filter_construct_transform(const ccl_global float *ccl_re
filter_calculate_scale(feature_scale, use_time); filter_calculate_scale(feature_scale, use_time);
/* === Generate the feature transformation. === /* === Generate the feature transformation. ===
* This transformation maps the num_features-dimentional feature space to a reduced feature (r-feature) space * This transformation maps the num_features-dimentional feature space to a reduced feature
* which generally has fewer dimensions. This mainly helps to prevent overfitting. */ * (r-feature) space which generally has fewer dimensions. This mainly helps to prevent
* overfitting. */
float feature_matrix[DENOISE_FEATURES * DENOISE_FEATURES]; float feature_matrix[DENOISE_FEATURES * DENOISE_FEATURES];
math_matrix_zero(feature_matrix, num_features); math_matrix_zero(feature_matrix, num_features);
FOR_PIXEL_WINDOW FOR_PIXEL_WINDOW

View File

@@ -58,7 +58,8 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff
feature_means[i] = reduce_add(feature_means[i]) * pixel_scale; feature_means[i] = reduce_add(feature_means[i]) * pixel_scale;
} }
/* === Scale the shifted feature passes to a range of [-1; 1], will be baked into the transform later. === */ /* === Scale the shifted feature passes to a range of [-1; 1] ===
* Will be baked into the transform later. */
float4 feature_scale[DENOISE_FEATURES]; float4 feature_scale[DENOISE_FEATURES];
math_vector_zero_sse(feature_scale, num_features); math_vector_zero_sse(feature_scale, num_features);
FOR_PIXEL_WINDOW_SSE FOR_PIXEL_WINDOW_SSE
@@ -72,8 +73,9 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff
filter_calculate_scale_sse(feature_scale, use_time); filter_calculate_scale_sse(feature_scale, use_time);
/* === Generate the feature transformation. === /* === Generate the feature transformation. ===
* This transformation maps the num_features-dimentional feature space to a reduced feature (r-feature) space * This transformation maps the num_features-dimentional feature space to a reduced feature
* which generally has fewer dimensions. This mainly helps to prevent overfitting. */ * (r-feature) space which generally has fewer dimensions. This mainly helps to prevent
* overfitting. */
float4 feature_matrix_sse[DENOISE_FEATURES * DENOISE_FEATURES]; float4 feature_matrix_sse[DENOISE_FEATURES * DENOISE_FEATURES];
math_matrix_zero_sse(feature_matrix_sse, num_features); math_matrix_zero_sse(feature_matrix_sse, num_features);
FOR_PIXEL_WINDOW_SSE FOR_PIXEL_WINDOW_SSE

View File

@@ -386,7 +386,8 @@ ccl_device float3 particle_angular_velocity(KernelGlobals *kg, int particle)
ccl_device_inline float3 bvh_clamp_direction(float3 dir) ccl_device_inline float3 bvh_clamp_direction(float3 dir)
{ {
/* clamp absolute values by exp2f(-80.0f) to avoid division by zero when calculating inverse direction */ /* clamp absolute values by exp2f(-80.0f) to avoid division by zero when calculating inverse
* direction */
#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__) #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
const ssef oopes(8.271806E-25f, 8.271806E-25f, 8.271806E-25f, 0.0f); const ssef oopes(8.271806E-25f, 8.271806E-25f, 8.271806E-25f, 0.0f);
const ssef mask = _mm_cmpgt_ps(fabs(dir), oopes); const ssef mask = _mm_cmpgt_ps(fabs(dir), oopes);

View File

@@ -178,7 +178,7 @@ ccl_device_inline int ray_triangle_intersect8(KernelGlobals *kg,
_mm256_cmpeq_epi32(two256, UVW_256_1)); _mm256_cmpeq_epi32(two256, UVW_256_1));
unsigned char mask_minmaxUVW_pos = _mm256_movemask_ps(_mm256_castsi256_ps(mask_minmaxUVW_256)); unsigned char mask_minmaxUVW_pos = _mm256_movemask_ps(_mm256_castsi256_ps(mask_minmaxUVW_256));
if ((mask_minmaxUVW_pos & prim_num_mask) == prim_num_mask) { //all bits set if ((mask_minmaxUVW_pos & prim_num_mask) == prim_num_mask) { // all bits set
return false; return false;
} }
@@ -375,7 +375,7 @@ ccl_device_inline int triangle_intersect8(KernelGlobals *kg,
tri_b[i] = *(__m128 *)&kg->__prim_tri_verts.data[tri_vindex++]; tri_b[i] = *(__m128 *)&kg->__prim_tri_verts.data[tri_vindex++];
tri_c[i] = *(__m128 *)&kg->__prim_tri_verts.data[tri_vindex++]; tri_c[i] = *(__m128 *)&kg->__prim_tri_verts.data[tri_vindex++];
} }
//create 9 or 12 placeholders // create 9 or 12 placeholders
tri[0] = _mm256_castps128_ps256(tri_a[0]); //_mm256_zextps128_ps256 tri[0] = _mm256_castps128_ps256(tri_a[0]); //_mm256_zextps128_ps256
tri[1] = _mm256_castps128_ps256(tri_b[0]); //_mm256_zextps128_ps256 tri[1] = _mm256_castps128_ps256(tri_b[0]); //_mm256_zextps128_ps256
tri[2] = _mm256_castps128_ps256(tri_c[0]); //_mm256_zextps128_ps256 tri[2] = _mm256_castps128_ps256(tri_c[0]); //_mm256_zextps128_ps256
@@ -401,40 +401,40 @@ ccl_device_inline int triangle_intersect8(KernelGlobals *kg,
} }
//------------------------------------------------ //------------------------------------------------
//0! Xa0 Ya0 Za0 1 Xa4 Ya4 Za4 1 // 0! Xa0 Ya0 Za0 1 Xa4 Ya4 Za4 1
//1! Xb0 Yb0 Zb0 1 Xb4 Yb4 Zb4 1 // 1! Xb0 Yb0 Zb0 1 Xb4 Yb4 Zb4 1
//2! Xc0 Yc0 Zc0 1 Xc4 Yc4 Zc4 1 // 2! Xc0 Yc0 Zc0 1 Xc4 Yc4 Zc4 1
//3! Xa1 Ya1 Za1 1 Xa5 Ya5 Za5 1 // 3! Xa1 Ya1 Za1 1 Xa5 Ya5 Za5 1
//4! Xb1 Yb1 Zb1 1 Xb5 Yb5 Zb5 1 // 4! Xb1 Yb1 Zb1 1 Xb5 Yb5 Zb5 1
//5! Xc1 Yc1 Zc1 1 Xc5 Yc5 Zc5 1 // 5! Xc1 Yc1 Zc1 1 Xc5 Yc5 Zc5 1
//6! Xa2 Ya2 Za2 1 Xa6 Ya6 Za6 1 // 6! Xa2 Ya2 Za2 1 Xa6 Ya6 Za6 1
//7! Xb2 Yb2 Zb2 1 Xb6 Yb6 Zb6 1 // 7! Xb2 Yb2 Zb2 1 Xb6 Yb6 Zb6 1
//8! Xc2 Yc2 Zc2 1 Xc6 Yc6 Zc6 1 // 8! Xc2 Yc2 Zc2 1 Xc6 Yc6 Zc6 1
//9! Xa3 Ya3 Za3 1 Xa7 Ya7 Za7 1 // 9! Xa3 Ya3 Za3 1 Xa7 Ya7 Za7 1
//10! Xb3 Yb3 Zb3 1 Xb7 Yb7 Zb7 1 // 10! Xb3 Yb3 Zb3 1 Xb7 Yb7 Zb7 1
//11! Xc3 Yc3 Zc3 1 Xc7 Yc7 Zc7 1 // 11! Xc3 Yc3 Zc3 1 Xc7 Yc7 Zc7 1
//"transpose" //"transpose"
tritmp[0] = _mm256_unpacklo_ps(tri[0], tri[3]); //0! Xa0 Xa1 Ya0 Ya1 Xa4 Xa5 Ya4 Ya5 tritmp[0] = _mm256_unpacklo_ps(tri[0], tri[3]); // 0! Xa0 Xa1 Ya0 Ya1 Xa4 Xa5 Ya4 Ya5
tritmp[1] = _mm256_unpackhi_ps(tri[0], tri[3]); //1! Za0 Za1 1 1 Za4 Za5 1 1 tritmp[1] = _mm256_unpackhi_ps(tri[0], tri[3]); // 1! Za0 Za1 1 1 Za4 Za5 1 1
tritmp[2] = _mm256_unpacklo_ps(tri[6], tri[9]); //2! Xa2 Xa3 Ya2 Ya3 Xa6 Xa7 Ya6 Ya7 tritmp[2] = _mm256_unpacklo_ps(tri[6], tri[9]); // 2! Xa2 Xa3 Ya2 Ya3 Xa6 Xa7 Ya6 Ya7
tritmp[3] = _mm256_unpackhi_ps(tri[6], tri[9]); //3! Za2 Za3 1 1 Za6 Za7 1 1 tritmp[3] = _mm256_unpackhi_ps(tri[6], tri[9]); // 3! Za2 Za3 1 1 Za6 Za7 1 1
tritmp[4] = _mm256_unpacklo_ps(tri[1], tri[4]); //4! Xb0 Xb1 Yb0 Yb1 Xb4 Xb5 Yb4 Yb5 tritmp[4] = _mm256_unpacklo_ps(tri[1], tri[4]); // 4! Xb0 Xb1 Yb0 Yb1 Xb4 Xb5 Yb4 Yb5
tritmp[5] = _mm256_unpackhi_ps(tri[1], tri[4]); //5! Zb0 Zb1 1 1 Zb4 Zb5 1 1 tritmp[5] = _mm256_unpackhi_ps(tri[1], tri[4]); // 5! Zb0 Zb1 1 1 Zb4 Zb5 1 1
tritmp[6] = _mm256_unpacklo_ps(tri[7], tri[10]); //6! Xb2 Xb3 Yb2 Yb3 Xb6 Xb7 Yb6 Yb7 tritmp[6] = _mm256_unpacklo_ps(tri[7], tri[10]); // 6! Xb2 Xb3 Yb2 Yb3 Xb6 Xb7 Yb6 Yb7
tritmp[7] = _mm256_unpackhi_ps(tri[7], tri[10]); //7! Zb2 Zb3 1 1 Zb6 Zb7 1 1 tritmp[7] = _mm256_unpackhi_ps(tri[7], tri[10]); // 7! Zb2 Zb3 1 1 Zb6 Zb7 1 1
tritmp[8] = _mm256_unpacklo_ps(tri[2], tri[5]); //8! Xc0 Xc1 Yc0 Yc1 Xc4 Xc5 Yc4 Yc5 tritmp[8] = _mm256_unpacklo_ps(tri[2], tri[5]); // 8! Xc0 Xc1 Yc0 Yc1 Xc4 Xc5 Yc4 Yc5
tritmp[9] = _mm256_unpackhi_ps(tri[2], tri[5]); //9! Zc0 Zc1 1 1 Zc4 Zc5 1 1 tritmp[9] = _mm256_unpackhi_ps(tri[2], tri[5]); // 9! Zc0 Zc1 1 1 Zc4 Zc5 1 1
tritmp[10] = _mm256_unpacklo_ps(tri[8], tri[11]); //10! Xc2 Xc3 Yc2 Yc3 Xc6 Xc7 Yc6 Yc7 tritmp[10] = _mm256_unpacklo_ps(tri[8], tri[11]); // 10! Xc2 Xc3 Yc2 Yc3 Xc6 Xc7 Yc6 Yc7
tritmp[11] = _mm256_unpackhi_ps(tri[8], tri[11]); //11! Zc2 Zc3 1 1 Zc6 Zc7 1 1 tritmp[11] = _mm256_unpackhi_ps(tri[8], tri[11]); // 11! Zc2 Zc3 1 1 Zc6 Zc7 1 1
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
triA[0] = _mm256_castpd_ps( triA[0] = _mm256_castpd_ps(
@@ -459,13 +459,13 @@ ccl_device_inline int triangle_intersect8(KernelGlobals *kg,
triC[0] = _mm256_castpd_ps( triC[0] = _mm256_castpd_ps(
_mm256_unpacklo_pd(_mm256_castps_pd(tritmp[8]), _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[8]),
_mm256_castps_pd(tritmp[10]))); //Xc0 Xc1 Xc2 Xc3 Xc4 Xc5 Xc6 Xc7 _mm256_castps_pd(tritmp[10]))); // Xc0 Xc1 Xc2 Xc3 Xc4 Xc5 Xc6 Xc7
triC[1] = _mm256_castpd_ps( triC[1] = _mm256_castpd_ps(
_mm256_unpackhi_pd(_mm256_castps_pd(tritmp[8]), _mm256_unpackhi_pd(_mm256_castps_pd(tritmp[8]),
_mm256_castps_pd(tritmp[10]))); //Yc0 Yc1 Yc2 Yc3 Yc4 Yc5 Yc6 Yc7 _mm256_castps_pd(tritmp[10]))); // Yc0 Yc1 Yc2 Yc3 Yc4 Yc5 Yc6 Yc7
triC[2] = _mm256_castpd_ps( triC[2] = _mm256_castpd_ps(
_mm256_unpacklo_pd(_mm256_castps_pd(tritmp[9]), _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[9]),
_mm256_castps_pd(tritmp[11]))); //Zc0 Zc1 Zc2 Zc3 Zc4 Zc5 Zc6 Zc7 _mm256_castps_pd(tritmp[11]))); // Zc0 Zc1 Zc2 Zc3 Zc4 Zc5 Zc6 Zc7
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/

View File

@@ -72,7 +72,8 @@ ccl_device_inline void compute_light_pass(
# ifdef __SUBSURFACE__ # ifdef __SUBSURFACE__
/* sample subsurface scattering */ /* sample subsurface scattering */
if ((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) { if ((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) {
/* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */ /* When mixing BSSRDF and BSDF closures we should skip BSDF lighting
* if scattering was successful. */
SubsurfaceIndirectRays ss_indirect; SubsurfaceIndirectRays ss_indirect;
kernel_path_subsurface_init_indirect(&ss_indirect); kernel_path_subsurface_init_indirect(&ss_indirect);
if (kernel_path_subsurface_scatter( if (kernel_path_subsurface_scatter(
@@ -123,7 +124,8 @@ ccl_device_inline void compute_light_pass(
# ifdef __SUBSURFACE__ # ifdef __SUBSURFACE__
/* sample subsurface scattering */ /* sample subsurface scattering */
if ((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) { if ((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) {
/* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */ /* When mixing BSSRDF and BSDF closures we should skip BSDF lighting
* if scattering was successful. */
kernel_branched_path_subsurface_scatter( kernel_branched_path_subsurface_scatter(
kg, sd, &indirect_sd, &emission_sd, &L_sample, &state, &ray, throughput); kg, sd, &indirect_sd, &emission_sd, &L_sample, &state, &ray, throughput);
} }

View File

@@ -1,18 +1,18 @@
/* /*
* Copyright 2018 Blender Foundation * Copyright 2018 Blender Foundation
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
@@ -32,7 +32,7 @@ ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer,
/* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */ /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */
if (id_buffer[slot].x == ID_NONE) { if (id_buffer[slot].x == ID_NONE) {
/* Use an atomic to claim this slot. /* Use an atomic to claim this slot.
* If a different thread got here first, try again from this slot on. */ * If a different thread got here first, try again from this slot on. */
float old_id = atomic_compare_and_swap_float(buffer + slot * 2, ID_NONE, id); float old_id = atomic_compare_and_swap_float(buffer + slot * 2, ID_NONE, id);
if (old_id != ID_NONE && old_id != id) { if (old_id != ID_NONE && old_id != id) {
continue; continue;
@@ -54,7 +54,7 @@ ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer,
break; break;
} }
/* If there already is a slot for that ID, add the weight. /* If there already is a slot for that ID, add the weight.
* If no slot was found, add it to the last. */ * If no slot was found, add it to the last. */
else if (id_buffer[slot].x == id || slot == num_slots - 1) { else if (id_buffer[slot].x == id || slot == num_slots - 1) {
id_buffer[slot].y += weight; id_buffer[slot].y += weight;
break; break;

View File

@@ -524,7 +524,8 @@ ccl_device float background_light_pdf(KernelGlobals *kg, float3 P, float3 direct
portal_pdf = background_portal_pdf(kg, P, direction, -1, &is_possible) * portal_sampling_pdf; portal_pdf = background_portal_pdf(kg, P, direction, -1, &is_possible) * portal_sampling_pdf;
if (!is_possible) { if (!is_possible) {
/* Portal sampling is not possible here because all portals point to the wrong side. /* Portal sampling is not possible here because all portals point to the wrong side.
* If map sampling is possible, it would be used instead, otherwise fallback sampling is used. */ * If map sampling is possible, it would be used instead,
* otherwise fallback sampling is used. */
if (portal_sampling_pdf == 1.0f) { if (portal_sampling_pdf == 1.0f) {
return kernel_data.integrator.pdf_lights / M_4PI_F; return kernel_data.integrator.pdf_lights / M_4PI_F;
} }

View File

@@ -199,21 +199,27 @@ ccl_device float3 ensure_valid_reflection(float3 Ng, float3 I, float3 N)
float NdotNg = dot(N, Ng); float NdotNg = dot(N, Ng);
float3 X = normalize(N - NdotNg * Ng); float3 X = normalize(N - NdotNg * Ng);
/* Keep math expressions. */
/* clang-format off */
/* Calculate N.z and N.x in the local coordinate system. /* Calculate N.z and N.x in the local coordinate system.
* *
* The goal of this computation is to find a N' that is rotated towards Ng just enough * The goal of this computation is to find a N' that is rotated towards Ng just enough
* to lift R' above the threshold (here called t), therefore dot(R', Ng) = t. * to lift R' above the threshold (here called t), therefore dot(R', Ng) = t.
* *
* According to the standard reflection equation, this means that we want dot(2*dot(N', I)*N' - I, Ng) = t. * According to the standard reflection equation,
* this means that we want dot(2*dot(N', I)*N' - I, Ng) = t.
* *
* Since the Z axis of our local coordinate system is Ng, dot(x, Ng) is just x.z, so we get 2*dot(N', I)*N'.z - I.z = t. * Since the Z axis of our local coordinate system is Ng, dot(x, Ng) is just x.z, so we get
* 2*dot(N', I)*N'.z - I.z = t.
* *
* The rotation is simple to express in the coordinate system we formed - since N lies in the X-Z-plane, we know that * The rotation is simple to express in the coordinate system we formed -
* N' will also lie in the X-Z-plane, so N'.y = 0 and therefore dot(N', I) = N'.x*I.x + N'.z*I.z . * since N lies in the X-Z-plane, we know that N' will also lie in the X-Z-plane,
* so N'.y = 0 and therefore dot(N', I) = N'.x*I.x + N'.z*I.z .
* *
* Furthermore, we want N' to be normalized, so N'.x = sqrt(1 - N'.z^2). * Furthermore, we want N' to be normalized, so N'.x = sqrt(1 - N'.z^2).
* *
* With these simplifications, we get the final equation 2*(sqrt(1 - N'.z^2)*I.x + N'.z*I.z)*N'.z - I.z = t. * With these simplifications,
* we get the final equation 2*(sqrt(1 - N'.z^2)*I.x + N'.z*I.z)*N'.z - I.z = t.
* *
* The only unknown here is N'.z, so we can solve for that. * The only unknown here is N'.z, so we can solve for that.
* *
@@ -227,8 +233,11 @@ ccl_device float3 ensure_valid_reflection(float3 Ng, float3 I, float3 N)
* c = I.z*t + a * c = I.z*t + a
* N'.z = +-sqrt(0.5*(+-b + c)/a) * N'.z = +-sqrt(0.5*(+-b + c)/a)
* *
* Two solutions can immediately be discarded because they're negative so N' would lie in the lower hemisphere. * Two solutions can immediately be discarded because they're negative so N' would lie in the
* lower hemisphere.
*/ */
/* clang-format on */
float Ix = dot(I, X), Iz = dot(I, Ng); float Ix = dot(I, X), Iz = dot(I, Ng);
float Ix2 = sqr(Ix), Iz2 = sqr(Iz); float Ix2 = sqr(Ix), Iz2 = sqr(Iz);
float a = Ix2 + Iz2; float a = Ix2 + Iz2;
@@ -237,8 +246,9 @@ ccl_device float3 ensure_valid_reflection(float3 Ng, float3 I, float3 N)
float c = Iz * threshold + a; float c = Iz * threshold + a;
/* Evaluate both solutions. /* Evaluate both solutions.
* In many cases one can be immediately discarded (if N'.z would be imaginary or larger than one), so check for that first. * In many cases one can be immediately discarded (if N'.z would be imaginary or larger than
* If no option is viable (might happen in extreme cases like N being in the wrong hemisphere), give up and return Ng. */ * one), so check for that first. If no option is viable (might happen in extreme cases like N
* being in the wrong hemisphere), give up and return Ng. */
float fac = 0.5f / a; float fac = 0.5f / a;
float N1_z2 = fac * (b + c), N2_z2 = fac * (-b + c); float N1_z2 = fac * (b + c), N2_z2 = fac * (-b + c);
bool valid1 = (N1_z2 > 1e-5f) && (N1_z2 <= (1.0f + 1e-5f)); bool valid1 = (N1_z2 > 1e-5f) && (N1_z2 <= (1.0f + 1e-5f));
@@ -256,8 +266,9 @@ ccl_device float3 ensure_valid_reflection(float3 Ng, float3 I, float3 N)
valid1 = (R1 >= 1e-5f); valid1 = (R1 >= 1e-5f);
valid2 = (R2 >= 1e-5f); valid2 = (R2 >= 1e-5f);
if (valid1 && valid2) { if (valid1 && valid2) {
/* If both solutions are valid, return the one with the shallower reflection since it will be closer to the input /* If both solutions are valid, return the one with the shallower reflection since it will be
* (if the original reflection wasn't shallow, we would not be in this part of the function). */ * closer to the input (if the original reflection wasn't shallow, we would not be in this
* part of the function). */
N_new = (R1 < R2) ? N1 : N2; N_new = (R1 < R2) ? N1 : N2;
} }
else { else {

View File

@@ -437,8 +437,8 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
} }
/* path termination. this is a strange place to put the termination, it's /* path termination. this is a strange place to put the termination, it's
* mainly due to the mixed in MIS that we use. gives too many unneeded * mainly due to the mixed in MIS that we use. gives too many unneeded
* shader evaluations, only need emission if we are going to terminate */ * shader evaluations, only need emission if we are going to terminate */
float probability = path_state_continuation_probability(kg, state, throughput); float probability = path_state_continuation_probability(kg, state, throughput);
if (probability == 0.0f) { if (probability == 0.0f) {
@@ -464,7 +464,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
# ifdef __SUBSURFACE__ # ifdef __SUBSURFACE__
/* bssrdf scatter to a different location on the same object, replacing /* bssrdf scatter to a different location on the same object, replacing
* the closures with a diffuse BSDF */ * the closures with a diffuse BSDF */
if (sd->flag & SD_BSSRDF) { if (sd->flag & SD_BSSRDF) {
if (kernel_path_subsurface_scatter( if (kernel_path_subsurface_scatter(
kg, sd, emission_sd, L, state, ray, &throughput, &ss_indirect)) { kg, sd, emission_sd, L, state, ray, &throughput, &ss_indirect)) {
@@ -575,8 +575,8 @@ ccl_device_forceinline void kernel_path_integrate(KernelGlobals *kg,
} }
/* path termination. this is a strange place to put the termination, it's /* path termination. this is a strange place to put the termination, it's
* mainly due to the mixed in MIS that we use. gives too many unneeded * mainly due to the mixed in MIS that we use. gives too many unneeded
* shader evaluations, only need emission if we are going to terminate */ * shader evaluations, only need emission if we are going to terminate */
float probability = path_state_continuation_probability(kg, state, throughput); float probability = path_state_continuation_probability(kg, state, throughput);
if (probability == 0.0f) { if (probability == 0.0f) {
@@ -601,7 +601,7 @@ ccl_device_forceinline void kernel_path_integrate(KernelGlobals *kg,
# ifdef __SUBSURFACE__ # ifdef __SUBSURFACE__
/* bssrdf scatter to a different location on the same object, replacing /* bssrdf scatter to a different location on the same object, replacing
* the closures with a diffuse BSDF */ * the closures with a diffuse BSDF */
if (sd.flag & SD_BSSRDF) { if (sd.flag & SD_BSSRDF) {
if (kernel_path_subsurface_scatter( if (kernel_path_subsurface_scatter(
kg, &sd, emission_sd, L, state, ray, &throughput, &ss_indirect)) { kg, &sd, emission_sd, L, state, ray, &throughput, &ss_indirect)) {

View File

@@ -428,8 +428,8 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
/* transparency termination */ /* transparency termination */
if (state.flag & PATH_RAY_TRANSPARENT) { if (state.flag & PATH_RAY_TRANSPARENT) {
/* path termination. this is a strange place to put the termination, it's /* path termination. this is a strange place to put the termination, it's
* mainly due to the mixed in MIS that we use. gives too many unneeded * mainly due to the mixed in MIS that we use. gives too many unneeded
* shader evaluations, only need emission if we are going to terminate */ * shader evaluations, only need emission if we are going to terminate */
float probability = path_state_continuation_probability(kg, &state, throughput); float probability = path_state_continuation_probability(kg, &state, throughput);
if (probability == 0.0f) { if (probability == 0.0f) {

View File

@@ -18,7 +18,8 @@ CCL_NAMESPACE_BEGIN
#if defined(__BRANCHED_PATH__) || defined(__SUBSURFACE__) || defined(__SHADOW_TRICKS__) || \ #if defined(__BRANCHED_PATH__) || defined(__SUBSURFACE__) || defined(__SHADOW_TRICKS__) || \
defined(__BAKING__) defined(__BAKING__)
/* branched path tracing: connect path directly to position on one or more lights and add it to L */ /* branched path tracing: connect path directly to position on one or more lights and add it to L
*/
ccl_device_noinline void kernel_branched_path_surface_connect_light( ccl_device_noinline void kernel_branched_path_surface_connect_light(
KernelGlobals *kg, KernelGlobals *kg,
ShaderData *sd, ShaderData *sd,
@@ -62,8 +63,10 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light(
LightSample ls; LightSample ls;
if (lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls)) { if (lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls)) {
/* The sampling probability returned by lamp_light_sample assumes that all lights were sampled. /* The sampling probability returned by lamp_light_sample assumes that all lights were
* However, this code only samples lamps, so if the scene also had mesh lights, the real probability is twice as high. */ * sampled.
* However, this code only samples lamps, so if the scene also had mesh lights, the real
* probability is twice as high. */
if (kernel_data.integrator.pdf_triangles != 0.0f) if (kernel_data.integrator.pdf_triangles != 0.0f)
ls.pdf *= 2.0f; ls.pdf *= 2.0f;
@@ -109,7 +112,8 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light(
LightSample ls; LightSample ls;
if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) { if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
/* Same as above, probability needs to be corrected since the sampling was forced to select a mesh light. */ /* Same as above, probability needs to be corrected since the sampling was forced to
* select a mesh light. */
if (kernel_data.integrator.num_all_lights) if (kernel_data.integrator.num_all_lights)
ls.pdf *= 2.0f; ls.pdf *= 2.0f;

View File

@@ -559,7 +559,7 @@ kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg,
float dt = new_t - t; float dt = new_t - t;
/* use random position inside this segment to sample shader, /* use random position inside this segment to sample shader,
* for last shorter step we remap it to fit within the segment. */ * for last shorter step we remap it to fit within the segment. */
if (new_t == ray->t) { if (new_t == ray->t) {
step_offset *= (new_t - t) / step_size; step_offset *= (new_t - t) / step_size;
} }
@@ -794,7 +794,7 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg,
float dt = new_t - t; float dt = new_t - t;
/* use random position inside this segment to sample shader, /* use random position inside this segment to sample shader,
* for last shorter step we remap it to fit within the segment. */ * for last shorter step we remap it to fit within the segment. */
if (new_t == ray->t) { if (new_t == ray->t) {
step_offset *= (new_t - t) / step_size; step_offset *= (new_t - t) / step_size;
} }

View File

@@ -61,7 +61,8 @@
/* tunable parameters */ /* tunable parameters */
# define CUDA_THREADS_BLOCK_WIDTH 16 # define CUDA_THREADS_BLOCK_WIDTH 16
/* CUDA 9.0 seems to cause slowdowns on high-end Pascal cards unless we increase the number of registers */ /* CUDA 9.0 seems to cause slowdowns on high-end Pascal cards unless we increase the number of
* registers */
# if __CUDACC_VER_MAJOR__ >= 9 && __CUDA_ARCH__ >= 600 # if __CUDACC_VER_MAJOR__ >= 9 && __CUDA_ARCH__ >= 600
# define CUDA_KERNEL_MAX_REGISTERS 64 # define CUDA_KERNEL_MAX_REGISTERS 64
# else # else

View File

@@ -497,8 +497,8 @@ class MicrofacetFresnelClosure : public CBSDFClosure {
MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight) MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight)
{ {
/* Technically, the MultiGGX Glass closure may also transmit. However, /* Technically, the MultiGGX Glass closure may also transmit. However,
* since this is set statically and only used for caustic flags, this * since this is set statically and only used for caustic flags, this
* is probably as good as it gets. */ * is probably as good as it gets. */
if (skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) { if (skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
return NULL; return NULL;
} }
@@ -715,8 +715,8 @@ class MicrofacetMultiFresnelClosure : public CBSDFClosure {
MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight) MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight)
{ {
/* Technically, the MultiGGX closure may also transmit. However, /* Technically, the MultiGGX closure may also transmit. However,
* since this is set statically and only used for caustic flags, this * since this is set statically and only used for caustic flags, this
* is probably as good as it gets. */ * is probably as good as it gets. */
if (skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) { if (skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
return NULL; return NULL;
} }

View File

@@ -1017,7 +1017,7 @@ bool OSLRenderServices::texture(ustring filename,
PtexPtr<PtexTexture> r(ptex_cache->get(filename.c_str(), error)); PtexPtr<PtexTexture> r(ptex_cache->get(filename.c_str(), error));
if (!r) { if (!r) {
//std::cerr << error.c_str() << std::endl; // std::cerr << error.c_str() << std::endl;
return false; return false;
} }

View File

@@ -387,14 +387,14 @@ point rotate(point p, float angle, point a, point b)
vector axis = normalize(b - a); vector axis = normalize(b - a);
float cosang, sinang; float cosang, sinang;
/* Older OSX has major issues with sincos() function, /* Older OSX has major issues with sincos() function,
* it's likely a big in OSL or LLVM. For until we've * it's likely a big in OSL or LLVM. For until we've
* updated to new versions of this libraries we'll * updated to new versions of this libraries we'll
* use a workaround to prevent possible crashes on all * use a workaround to prevent possible crashes on all
* the platforms. * the platforms.
* *
* Shouldn't be that bad because it's mainly used for * Shouldn't be that bad because it's mainly used for
* anisotropic shader where angle is usually constant. * anisotropic shader where angle is usually constant.
*/ */
#if 0 #if 0
sincos(angle, sinang, cosang); sincos(angle, sinang, cosang);
#else #else
@@ -425,7 +425,7 @@ point rotate(point p, float angle, point a, point b)
normal ensure_valid_reflection(normal Ng, vector I, normal N) normal ensure_valid_reflection(normal Ng, vector I, normal N)
{ {
/* The implementation here mirrors the one in kernel_montecarlo.h, /* The implementation here mirrors the one in kernel_montecarlo.h,
* check there for an explanation of the algorithm. */ * check there for an explanation of the algorithm. */
float sqr(float x) float sqr(float x)
{ {

View File

@@ -132,8 +132,8 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg,
if (ray->t != 0.0f) { if (ray->t != 0.0f) {
/* Initialize throughput, path radiance, Ray, PathState; /* Initialize throughput, path radiance, Ray, PathState;
* These rays proceed with path-iteration. * These rays proceed with path-iteration.
*/ */
*throughput = make_float3(1.0f, 1.0f, 1.0f); *throughput = make_float3(1.0f, 1.0f, 1.0f);
path_radiance_init(L, kernel_data.film.use_light_pass); path_radiance_init(L, kernel_data.film.use_light_pass);
path_state_init(kg, path_state_init(kg,

View File

@@ -46,10 +46,10 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
int sh, int sh,
int offset, int offset,
int stride, int stride,
ccl_global int *Queue_index, /* Tracks the number of elements in queues */ ccl_global int *Queue_index, /* Tracks the number of elements in queues */
int queuesize, /* size (capacity) of the queue */ int queuesize, /* size (capacity) of the queue */
ccl_global char * ccl_global char *use_queues_flag, /* flag to decide if scene-intersect kernel should use queues
use_queues_flag, /* flag to decide if scene-intersect kernel should use queues to fetch ray index */ to fetch ray index */
ccl_global unsigned int *work_pools, /* Work pool for each work group */ ccl_global unsigned int *work_pools, /* Work pool for each work group */
unsigned int num_samples, unsigned int num_samples,
ccl_global float *buffer) ccl_global float *buffer)

View File

@@ -114,9 +114,9 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
/* Path termination. this is a strange place to put the termination, it's /* Path termination. this is a strange place to put the termination, it's
* mainly due to the mixed in MIS that we use. gives too many unneeded * mainly due to the mixed in MIS that we use. gives too many unneeded
* shader evaluations, only need emission if we are going to terminate. * shader evaluations, only need emission if we are going to terminate.
*/ */
float probability = path_state_continuation_probability(kg, state, throughput); float probability = path_state_continuation_probability(kg, state, throughput);
if (probability == 0.0f) { if (probability == 0.0f) {

View File

@@ -109,9 +109,9 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg,
if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
/* If we are here, then it means that scene-intersect kernel /* If we are here, then it means that scene-intersect kernel
* has already been executed atleast once. From the next time, * has already been executed atleast once. From the next time,
* scene-intersect kernel may operate on queues to fetch ray index * scene-intersect kernel may operate on queues to fetch ray index
*/ */
*kernel_split_params.use_queues_flag = 1; *kernel_split_params.use_queues_flag = 1;
/* Mark queue indices of QUEUE_SHADOW_RAY_CAST_AO_RAYS and /* Mark queue indices of QUEUE_SHADOW_RAY_CAST_AO_RAYS and

View File

@@ -19,7 +19,8 @@
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
/* parameters used by the split kernels, we use a single struct to avoid passing these to each kernel */ /* parameters used by the split kernels, we use a single struct to avoid passing these to each
* kernel */
typedef struct SplitParams { typedef struct SplitParams {
WorkTile tile; WorkTile tile;
@@ -112,7 +113,8 @@ typedef ccl_global struct SplitBranchedState {
SPLIT_DATA_BRANCHED_ENTRIES \ SPLIT_DATA_BRANCHED_ENTRIES \
SPLIT_DATA_ENTRY(ShaderData, _sd, 0) SPLIT_DATA_ENTRY(ShaderData, _sd, 0)
/* entries to be copied to inactive rays when sharing branched samples (TODO: which are actually needed?) */ /* Entries to be copied to inactive rays when sharing branched samples
* (TODO: which are actually needed?) */
#define SPLIT_DATA_ENTRIES_BRANCHED_SHARED \ #define SPLIT_DATA_ENTRIES_BRANCHED_SHARED \
SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \ SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \
SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \ SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \
@@ -134,8 +136,9 @@ typedef struct SplitData {
SPLIT_DATA_ENTRIES SPLIT_DATA_ENTRIES
#undef SPLIT_DATA_ENTRY #undef SPLIT_DATA_ENTRY
/* this is actually in a separate buffer from the rest of the split state data (so it can be read back from /* this is actually in a separate buffer from the rest of the split state data (so it can be read
* the host easily) but is still used the same as the other data so we have it here in this struct as well * back from the host easily) but is still used the same as the other data so we have it here in
* this struct as well
*/ */
ccl_global char *ray_state; ccl_global char *ray_state;
} SplitData; } SplitData;

View File

@@ -1,18 +1,18 @@
/* /*
* Copyright 2011-2018 Blender Foundation * Copyright 2011-2018 Blender Foundation
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN

View File

@@ -21,12 +21,12 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline float interpolate_ies_vertical( ccl_device_inline float interpolate_ies_vertical(
KernelGlobals *kg, int ofs, int v, int v_num, float v_frac, int h) KernelGlobals *kg, int ofs, int v, int v_num, float v_frac, int h)
{ {
/* Since lookups are performed in spherical coordinates, clamping the coordinates at the low end of v /* Since lookups are performed in spherical coordinates, clamping the coordinates at the low end
* (corresponding to the north pole) would result in artifacts. * of v (corresponding to the north pole) would result in artifacts. The proper way of dealing
* The proper way of dealing with this would be to lookup the corresponding value on the other side of the pole, * with this would be to lookup the corresponding value on the other side of the pole, but since
* but since the horizontal coordinates might be nonuniform, this would require yet another interpolation. * the horizontal coordinates might be nonuniform, this would require yet another interpolation.
* Therefore, the assumtion is made that the light is going to be symmetrical, which means that we can just take * Therefore, the assumtion is made that the light is going to be symmetrical, which means that
* the corresponding value at the current horizontal coordinate. */ * we can just take the corresponding value at the current horizontal coordinate. */
#define IES_LOOKUP(v) kernel_tex_fetch(__ies, ofs + h * v_num + (v)) #define IES_LOOKUP(v) kernel_tex_fetch(__ies, ofs + h * v_num + (v))
/* If v is zero, assume symmetry and read at v=1 instead of v=-1. */ /* If v is zero, assume symmetry and read at v=1 instead of v=-1. */
@@ -66,7 +66,8 @@ ccl_device_inline float kernel_ies_interp(KernelGlobals *kg,
/* Lookup the angles to find the table position. */ /* Lookup the angles to find the table position. */
int h_i, v_i; int h_i, v_i;
/* TODO(lukas): Consider using bisection. Probably not worth it for the vast majority of IES files. */ /* TODO(lukas): Consider using bisection.
* Probably not worth it for the vast majority of IES files. */
for (h_i = 0; IES_LOOKUP_ANGLE_H(h_i + 1) < h_angle; h_i++) for (h_i = 0; IES_LOOKUP_ANGLE_H(h_i + 1) < h_angle; h_i++)
; ;
for (v_i = 0; IES_LOOKUP_ANGLE_V(v_i + 1) < v_angle; v_i++) for (v_i = 0; IES_LOOKUP_ANGLE_V(v_i + 1) < v_angle; v_i++)
@@ -83,7 +84,8 @@ ccl_device_inline float kernel_ies_interp(KernelGlobals *kg,
/* Perform cubic interpolation along the horizontal coordinate to get the intensity value. /* Perform cubic interpolation along the horizontal coordinate to get the intensity value.
* If h_i is zero, just wrap around since the horizontal angles always go over the full circle. * If h_i is zero, just wrap around since the horizontal angles always go over the full circle.
* However, the last entry (360°) equals the first one, so we need to wrap around to the one before that. */ * However, the last entry (360°) equals the first one, so we need to wrap around to the one
* before that. */
float a = interpolate_ies_vertical( float a = interpolate_ies_vertical(
kg, ofs, v_i, v_num, v_frac, (h_i == 0) ? h_num - 2 : h_i - 1); kg, ofs, v_i, v_num, v_frac, (h_i == 0) ? h_num - 2 : h_i - 1);
float b = interpolate_ies_vertical(kg, ofs, v_i, v_num, v_frac, h_i); float b = interpolate_ies_vertical(kg, ofs, v_i, v_num, v_frac, h_i);

View File

@@ -70,7 +70,8 @@ ccl_device void voronoi_neighbors(
} }
} }
/* To keep the shortest four distances and associated points we have to keep them in sorted order. */ /* To keep the shortest four distances and associated points we have to keep them in sorted
* order. */
if (d < da[0]) { if (d < da[0]) {
da[3] = da[2]; da[3] = da[2];
da[2] = da[1]; da[2] = da[1];

View File

@@ -642,7 +642,8 @@ float Camera::world_to_raster_size(float3 P)
float3 D = normalize(Ddiff); float3 D = normalize(Ddiff);
res = len(dist * dDdx - dot(dist * dDdx, D) * D); res = len(dist * dDdx - dot(dist * dDdx, D) * D);
/* Decent approx distance to frustum (doesn't handle corners correctly, but not that big of a deal) */ /* Decent approx distance to frustum
* (doesn't handle corners correctly, but not that big of a deal) */
float f_dist = 0.0f; float f_dist = 0.0f;
if (offscreen_dicing_scale > 1.0f) { if (offscreen_dicing_scale > 1.0f) {
@@ -686,7 +687,8 @@ float Camera::world_to_raster_size(float3 P)
f_dist = max(f_dist, *d); f_dist = max(f_dist, *d);
} }
else { else {
/* Possibly far enough behind the frustum to use distance to origin instead of edge */ /* Possibly far enough behind the frustum to use distance to origin instead of edge
*/
test_o = true; test_o = true;
} }
} }

View File

@@ -69,8 +69,8 @@ static void print_progress(int num, int total, int frame, int num_frames)
fflush(stdout); fflush(stdout);
} }
/* Splits in at its last dot, setting suffix to the part after the dot and in to the part before it. /* Splits in at its last dot, setting suffix to the part after the dot and in to the part before
* Returns whether a dot was found. */ * it. Returns whether a dot was found. */
static bool split_last_dot(string &in, string &suffix) static bool split_last_dot(string &in, string &suffix)
{ {
size_t pos = in.rfind("."); size_t pos = in.rfind(".");
@@ -84,9 +84,8 @@ static bool split_last_dot(string &in, string &suffix)
/* Separate channel names as generated by Blender. /* Separate channel names as generated by Blender.
* If views is true: * If views is true:
* Inputs are expected in the form RenderLayer.Pass.View.Channel, sets renderlayer to "RenderLayer.View" * Inputs are expected in the form RenderLayer.Pass.View.Channel, sets renderlayer to
* Otherwise: * "RenderLayer.View" Otherwise: Inputs are expected in the form RenderLayer.Pass.Channel */
* Inputs are expected in the form RenderLayer.Pass.Channel */
static bool parse_channel_name( static bool parse_channel_name(
string name, string &renderlayer, string &pass, string &channel, bool multiview_channels) string name, string &renderlayer, string &pass, string &channel, bool multiview_channels)
{ {
@@ -631,7 +630,8 @@ bool DenoiseImage::parse_channels(const ImageSpec &in_spec, string &error)
layer.name = name; layer.name = name;
layer.samples = samples; layer.samples = samples;
/* If the sample value isn't set yet, check if there is a layer-specific one in the input file. */ /* If the sample value isn't set yet, check if there is a layer-specific one in the input file.
*/
if (layer.samples < 1) { if (layer.samples < 1) {
string sample_string = in_spec.get_string_attribute("cycles." + name + ".samples", ""); string sample_string = in_spec.get_string_attribute("cycles." + name + ".samples", "");
if (sample_string != "") { if (sample_string != "") {

View File

@@ -87,14 +87,17 @@ struct DenoiseImageLayer {
/* input_to_image_channel of the secondary frames, if any are used. */ /* input_to_image_channel of the secondary frames, if any are used. */
vector<vector<int>> neighbor_input_to_image_channel; vector<vector<int>> neighbor_input_to_image_channel;
/* Write i-th channel of the processing output to output_to_image_channel[i]-th channel of the file. */ /* Write i-th channel of the processing output to output_to_image_channel[i]-th channel of the
* file. */
vector<int> output_to_image_channel; vector<int> output_to_image_channel;
/* Detect whether this layer contains a full set of channels and set up the offsets accordingly. */ /* Detect whether this layer contains a full set of channels and set up the offsets accordingly.
*/
bool detect_denoising_channels(); bool detect_denoising_channels();
/* Map the channels of a secondary frame to the channels that are required for processing, /* Map the channels of a secondary frame to the channels that are required for processing,
* fill neighbor_input_to_image_channel if all are present or return false if a channel are missing. */ * fill neighbor_input_to_image_channel if all are present or return false if a channel are
* missing. */
bool match_channels(int neighbor, bool match_channels(int neighbor,
const std::vector<string> &channelnames, const std::vector<string> &channelnames,
const std::vector<string> &neighbor_channelnames); const std::vector<string> &neighbor_channelnames);
@@ -125,7 +128,8 @@ class DenoiseImage {
void free(); void free();
/* Open the input image, parse its channels, open the output image and allocate the output buffer. */ /* Open the input image, parse its channels, open the output image and allocate the output
* buffer. */
bool load(const string &in_filepath, string &error); bool load(const string &in_filepath, string &error);
/* Load neighboring frames. */ /* Load neighboring frames. */
@@ -139,7 +143,8 @@ class DenoiseImage {
bool save_output(const string &out_filepath, string &error); bool save_output(const string &out_filepath, string &error);
protected: protected:
/* Parse input file channels, separate them into DenoiseImageLayers, detect DenoiseImageLayers with full channel sets, /* Parse input file channels, separate them into DenoiseImageLayers,
* detect DenoiseImageLayers with full channel sets,
* fill layers and set up the output channels and passthrough map. */ * fill layers and set up the output channels and passthrough map. */
bool parse_channels(const ImageSpec &in_spec, string &error); bool parse_channels(const ImageSpec &in_spec, string &error);

View File

@@ -839,8 +839,9 @@ void Mesh::add_undisplaced()
size_t size = attr->buffer_size( size_t size = attr->buffer_size(
this, (subdivision_type == SUBDIVISION_NONE) ? ATTR_PRIM_TRIANGLE : ATTR_PRIM_SUBD); this, (subdivision_type == SUBDIVISION_NONE) ? ATTR_PRIM_TRIANGLE : ATTR_PRIM_SUBD);
/* Center points for ngons aren't stored in Mesh::verts but are included in size since they will be /* Center points for ngons aren't stored in Mesh::verts but are included in size since they will
* calculated later, we subtract them from size here so we don't have an overflow while copying. * be calculated later, we subtract them from size here so we don't have an overflow while
* copying.
*/ */
size -= num_ngons * attr->data_sizeof(); size -= num_ngons * attr->data_sizeof();

View File

@@ -143,8 +143,10 @@ class Shader : public Node {
Shader(); Shader();
~Shader(); ~Shader();
/* Checks whether the shader consists of just a emission node with fixed inputs that's connected directly to the output. /* Checks whether the shader consists of just a emission node with fixed inputs that's connected
* If yes, it sets the content of emission to the constant value (color * strength), which is then used for speeding up light evaluation. */ * directly to the output.
* If yes, it sets the content of emission to the constant value (color * strength), which is
* then used for speeding up light evaluation. */
bool is_constant_emission(float3 *emission); bool is_constant_emission(float3 *emission);
void set_graph(ShaderGraph *graph); void set_graph(ShaderGraph *graph);

View File

@@ -773,7 +773,8 @@ void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType ty
compile_failed = false; compile_failed = false;
} }
/* for bump shaders we fall thru to the surface shader, but if this is any other kind of shader it ends here */ /* for bump shaders we fall thru to the surface shader, but if this is any other kind of shader
* it ends here */
if (type != SHADER_TYPE_BUMP) { if (type != SHADER_TYPE_BUMP) {
add_node(NODE_END, 0, 0, 0); add_node(NODE_END, 0, 0, 0);
} }
@@ -828,7 +829,8 @@ void SVMCompiler::compile(
{ {
scoped_timer timer((summary != NULL) ? &summary->time_generate_surface : NULL); scoped_timer timer((summary != NULL) ? &summary->time_generate_surface : NULL);
compile_type(shader, shader->graph, SHADER_TYPE_SURFACE); compile_type(shader, shader->graph, SHADER_TYPE_SURFACE);
/* only set jump offset if there's no bump shader, as the bump shader will fall thru to this one if it exists */ /* only set jump offset if there's no bump shader, as the bump shader will fall thru to this
* one if it exists */
if (!has_bump) { if (!has_bump) {
svm_nodes[index].y = svm_nodes.size(); svm_nodes[index].y = svm_nodes.size();
} }

View File

@@ -170,8 +170,9 @@ void TileManager::set_samples(int num_samples_)
} }
else { else {
uint64_t pixel_samples = 0; uint64_t pixel_samples = 0;
/* While rendering in the viewport, the initial preview resolution is increased to the native resolution /* While rendering in the viewport, the initial preview resolution is increased to the native
* before the actual rendering begins. Therefore, additional pixel samples will be rendered. */ * resolution before the actual rendering begins. Therefore, additional pixel samples will be
* rendered. */
int divider = max(get_divider(params.width, params.height, start_resolution) / 2, pixel_size); int divider = max(get_divider(params.width, params.height, start_resolution) / 2, pixel_size);
while (divider > pixel_size) { while (divider > pixel_size) {
int image_w = max(1, params.width / divider); int image_w = max(1, params.width / divider);
@@ -190,8 +191,9 @@ void TileManager::set_samples(int num_samples_)
} }
} }
/* If sliced is false, splits image into tiles and assigns equal amount of tiles to every render device. /* If sliced is false, splits image into tiles and assigns equal amount of tiles to every render
* If sliced is true, slice image into as much pieces as how many devices are rendering this image. */ * device. If sliced is true, slice image into as much pieces as how many devices are rendering
* this image. */
int TileManager::gen_tiles(bool sliced) int TileManager::gen_tiles(bool sliced)
{ {
int resolution = state.resolution_divider; int resolution = state.resolution_divider;
@@ -255,7 +257,8 @@ int TileManager::gen_tiles(bool sliced)
} }
int2 pos = block * block_size + tile * tile_size + offset; int2 pos = block * block_size + tile * tile_size + offset;
/* Only add tiles which are in the image (tiles outside of the image can be generated since the spiral is always square). */ /* Only add tiles which are in the image (tiles outside of the image can be generated since
* the spiral is always square). */
if (pos.x >= 0 && pos.y >= 0 && pos.x < image_w && pos.y < image_h) { if (pos.x >= 0 && pos.y >= 0 && pos.x < image_w && pos.y < image_h) {
int w = min(tile_size.x, image_w - pos.x); int w = min(tile_size.x, image_w - pos.x);
int h = min(tile_size.y, image_h - pos.y); int h = min(tile_size.y, image_h - pos.y);
@@ -336,7 +339,8 @@ int TileManager::gen_tiles(bool sliced)
cur_tiles++; cur_tiles++;
if (cur_tiles == tiles_per_device) { if (cur_tiles == tiles_per_device) {
/* Tiles are already generated in Bottom-to-Top order, so no sort is necessary in that case. */ /* Tiles are already generated in Bottom-to-Top order, so no sort is necessary in that
* case. */
if (tile_order != TILE_BOTTOM_TO_TOP) { if (tile_order != TILE_BOTTOM_TO_TOP) {
tile_list->sort(TileComparator(tile_order, center, &state.tiles[0])); tile_list->sort(TileComparator(tile_order, center, &state.tiles[0]));
} }
@@ -398,7 +402,8 @@ int TileManager::get_neighbor_index(int index, int neighbor)
return ny * state.tile_stride + nx; return ny * state.tile_stride + nx;
} }
/* Checks whether all neighbors of a tile (as well as the tile itself) are at least at state min_state. */ /* Checks whether all neighbors of a tile (as well as the tile itself) are at least at state
* min_state. */
bool TileManager::check_neighbor_state(int index, Tile::State min_state) bool TileManager::check_neighbor_state(int index, Tile::State min_state)
{ {
if (index < 0 || state.tiles[index].state < min_state) { if (index < 0 || state.tiles[index].state < min_state) {
@@ -415,7 +420,8 @@ bool TileManager::check_neighbor_state(int index, Tile::State min_state)
return true; return true;
} }
/* Returns whether the tile should be written (and freed if no denoising is used) instead of updating. */ /* Returns whether the tile should be written (and freed if no denoising is used) instead of
* updating. */
bool TileManager::finish_tile(int index, bool &delete_tile) bool TileManager::finish_tile(int index, bool &delete_tile)
{ {
delete_tile = false; delete_tile = false;
@@ -432,7 +438,8 @@ bool TileManager::finish_tile(int index, bool &delete_tile)
return true; return true;
} }
state.tiles[index].state = Tile::RENDERED; state.tiles[index].state = Tile::RENDERED;
/* For each neighbor and the tile itself, check whether all of its neighbors have been rendered. If yes, it can be denoised. */ /* For each neighbor and the tile itself, check whether all of its neighbors have been
* rendered. If yes, it can be denoised. */
for (int neighbor = 0; neighbor < 9; neighbor++) { for (int neighbor = 0; neighbor < 9; neighbor++) {
int nindex = get_neighbor_index(index, neighbor); int nindex = get_neighbor_index(index, neighbor);
if (check_neighbor_state(nindex, Tile::RENDERED)) { if (check_neighbor_state(nindex, Tile::RENDERED)) {
@@ -444,13 +451,15 @@ bool TileManager::finish_tile(int index, bool &delete_tile)
} }
case Tile::DENOISE: { case Tile::DENOISE: {
state.tiles[index].state = Tile::DENOISED; state.tiles[index].state = Tile::DENOISED;
/* For each neighbor and the tile itself, check whether all of its neighbors have been denoised. If yes, it can be freed. */ /* For each neighbor and the tile itself, check whether all of its neighbors have been
* denoised. If yes, it can be freed. */
for (int neighbor = 0; neighbor < 9; neighbor++) { for (int neighbor = 0; neighbor < 9; neighbor++) {
int nindex = get_neighbor_index(index, neighbor); int nindex = get_neighbor_index(index, neighbor);
if (check_neighbor_state(nindex, Tile::DENOISED)) { if (check_neighbor_state(nindex, Tile::DENOISED)) {
state.tiles[nindex].state = Tile::DONE; state.tiles[nindex].state = Tile::DONE;
/* It can happen that the tile just finished denoising and already can be freed here. /* It can happen that the tile just finished denoising and already can be freed here.
* However, in that case it still has to be written before deleting, so we can't delete it yet. */ * However, in that case it still has to be written before deleting, so we can't delete
* it yet. */
if (neighbor == 8) { if (neighbor == 8) {
delete_tile = true; delete_tile = true;
} }

View File

@@ -141,7 +141,7 @@ void DiagSplit::split(QuadDice::SubPatch &sub, QuadDice::EdgeFactors &ef, int de
bool split_v = (ef.tv0 == DSPLIT_NON_UNIFORM || ef.tv1 == DSPLIT_NON_UNIFORM); bool split_v = (ef.tv0 == DSPLIT_NON_UNIFORM || ef.tv1 == DSPLIT_NON_UNIFORM);
/* Split subpatches such that the ratio of T for opposite edges doesn't /* Split subpatches such that the ratio of T for opposite edges doesn't
* exceed 1.5, this reduces over tessellation for some patches * exceed 1.5, this reduces over tessellation for some patches
*/ */
bool tmp_split_v = split_v; bool tmp_split_v = split_v;
if (!split_u && min(ef.tu0, ef.tu1) > 8 && min(ef.tu0, ef.tu1) * 1.5f < max(ef.tu0, ef.tu1)) if (!split_u && min(ef.tu0, ef.tu1) > 8 && min(ef.tu0, ef.tu1) * 1.5f < max(ef.tu0, ef.tu1))

View File

@@ -167,7 +167,8 @@ ccl_device float3 xyY_to_xyz(float x, float y, float Y)
#ifdef __KERNEL_SSE2__ #ifdef __KERNEL_SSE2__
/* /*
* Calculate initial guess for arg^exp based on float representation * Calculate initial guess for arg^exp based on float representation
* This method gives a constant bias, which can be easily compensated by multiplication with bias_coeff. * This method gives a constant bias,
* which can be easily compensated by multiplication with bias_coeff.
* Gives better results for exponents near 1 (e. g. 4/5). * Gives better results for exponents near 1 (e. g. 4/5).
* exp = exponent, encoded as uint32_t * exp = exponent, encoded as uint32_t
* e2coeff = 2^(127/exponent - 127) * bias_coeff^(1/exponent), encoded as uint32_t * e2coeff = 2^(127/exponent - 127) * bias_coeff^(1/exponent), encoded as uint32_t

View File

@@ -141,7 +141,8 @@ class DebugFlags {
/* Use debug version of the kernel. */ /* Use debug version of the kernel. */
bool debug; bool debug;
/* TODO(mai): Currently this is only for OpenCL, but we should have it implemented for all devices. */ /* TODO(mai): Currently this is only for OpenCL, but we should have it implemented for all
* devices. */
/* Artificial memory limit in bytes (0 if disabled). */ /* Artificial memory limit in bytes (0 if disabled). */
size_t mem_limit; size_t mem_limit;
}; };

View File

@@ -36,7 +36,8 @@ CCL_NAMESPACE_BEGIN
/* CUDA has its own half data type, no need to define then */ /* CUDA has its own half data type, no need to define then */
# ifndef __KERNEL_CUDA__ # ifndef __KERNEL_CUDA__
/* Implementing this as a class rather than a typedef so that the compiler can tell it apart from unsigned shorts. */ /* Implementing this as a class rather than a typedef so that the compiler can tell it apart from
* unsigned shorts. */
class half { class half {
public: public:
half() : v(0) half() : v(0)

View File

@@ -155,7 +155,8 @@ bool IESFile::parse(ustring ies)
type = (IESType)parser.get_long(); /* Photometric type */ type = (IESType)parser.get_long(); /* Photometric type */
/* TODO(lukas): Test whether the current type B processing can also deal with type A files. /* TODO(lukas): Test whether the current type B processing can also deal with type A files.
* In theory the only difference should be orientation which we ignore anyways, but with IES you never know... * In theory the only difference should be orientation which we ignore anyways, but with IES you
* never know...
*/ */
if (type != TYPE_B && type != TYPE_C) { if (type != TYPE_B && type != TYPE_C) {
return false; return false;
@@ -173,12 +174,13 @@ bool IESFile::parse(ustring ies)
* Cycles expects radiometric quantities, though, which requires a conversion. * Cycles expects radiometric quantities, though, which requires a conversion.
* However, the Luminous efficacy (ratio of lumens per Watt) depends on the spectral distribution * However, the Luminous efficacy (ratio of lumens per Watt) depends on the spectral distribution
* of the light source since lumens take human perception into account. * of the light source since lumens take human perception into account.
* Since this spectral distribution is not known from the IES file, a typical one must be assumed. * Since this spectral distribution is not known from the IES file, a typical one must be
* The D65 standard illuminant has a Luminous efficacy of 177.83, which is used here to convert to Watt/sr. * assumed. The D65 standard illuminant has a Luminous efficacy of 177.83, which is used here to
* A more advanced approach would be to add a Blackbody Temperature input to the node and numerically * convert to Watt/sr. A more advanced approach would be to add a Blackbody Temperature input to
* integrate the Luminous efficacy from the resulting spectral distribution. * the node and numerically integrate the Luminous efficacy from the resulting spectral
* Also, the Watt/sr value must be multiplied by 4*pi to get the Watt value that Cycles expects * distribution. Also, the Watt/sr value must be multiplied by 4*pi to get the Watt value that
* for lamp strength. Therefore, the conversion here uses 4*pi/177.83 as a Candela to Watt factor. * Cycles expects for lamp strength. Therefore, the conversion here uses 4*pi/177.83 as a Candela
* to Watt factor.
*/ */
factor *= 0.0706650768394; factor *= 0.0706650768394;
@@ -294,7 +296,8 @@ bool IESFile::process_type_b()
bool IESFile::process_type_c() bool IESFile::process_type_c()
{ {
if (h_angles[0] == 90.0f) { if (h_angles[0] == 90.0f) {
/* Some files are stored from 90° to 270°, so we just rotate them to the regular 0°-180° range here. */ /* Some files are stored from 90° to 270°, so we just rotate them to the regular 0°-180° range
* here. */
for (int i = 0; i < h_angles.size(); i++) { for (int i = 0; i < h_angles.size(); i++) {
h_angles[i] -= 90.0f; h_angles[i] -= 90.0f;
} }
@@ -311,8 +314,9 @@ bool IESFile::process_type_c()
if (h_angles[h_angles.size() - 1] == 90.0f) { if (h_angles[h_angles.size() - 1] == 90.0f) {
/* Only one quadrant is defined, so we need to mirror twice (from one to two, then to four). /* Only one quadrant is defined, so we need to mirror twice (from one to two, then to four).
* Since the two->four mirroring step might also be required if we get an input of two quadrants, * Since the two->four mirroring step might also be required if we get an input of two
* we only do the first mirror here and later do the second mirror in either case. */ * quadrants, we only do the first mirror here and later do the second mirror in either case.
*/
int hnum = h_angles.size(); int hnum = h_angles.size();
for (int i = hnum - 2; i >= 0; i--) { for (int i = hnum - 2; i >= 0; i--) {
h_angles.push_back(180.0f - h_angles[i]); h_angles.push_back(180.0f - h_angles[i]);
@@ -329,8 +333,8 @@ bool IESFile::process_type_c()
} }
} }
/* Some files skip the 360° entry (contrary to standard) because it's supposed to be identical to the 0° entry. /* Some files skip the 360° entry (contrary to standard) because it's supposed to be identical to
* If the file has a discernible order in its spacing, just fix this. */ * the 0° entry. If the file has a discernible order in its spacing, just fix this. */
if (h_angles[h_angles.size() - 1] != 360.0f) { if (h_angles[h_angles.size() - 1] != 360.0f) {
int hnum = h_angles.size(); int hnum = h_angles.size();
float last_step = h_angles[hnum - 1] - h_angles[hnum - 2]; float last_step = h_angles[hnum - 1] - h_angles[hnum - 2];

View File

@@ -282,8 +282,10 @@ ccl_device float fast_acosf(float x)
const float m = (f < 1.0f) ? 1.0f - (1.0f - f) : 1.0f; const float m = (f < 1.0f) ? 1.0f - (1.0f - f) : 1.0f;
/* Based on http://www.pouet.net/topic.php?which=9132&page=2 /* Based on http://www.pouet.net/topic.php?which=9132&page=2
* 85% accurate (ulp 0) * 85% accurate (ulp 0)
* Examined 2130706434 values of acos: 15.2000597 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // without "denormal crush" * Examined 2130706434 values of acos:
* Examined 2130706434 values of acos: 15.2007108 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // with "denormal crush" * 15.2000597 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // without "denormal crush"
* Examined 2130706434 values of acos:
* 15.2007108 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // with "denormal crush"
*/ */
const float a = sqrtf(1.0f - m) * const float a = sqrtf(1.0f - m) *
(1.5707963267f + m * (-0.213300989f + m * (0.077980478f + m * -0.02164095f))); (1.5707963267f + m * (-0.213300989f + m * (0.077980478f + m * -0.02164095f)));
@@ -312,8 +314,10 @@ ccl_device float fast_atanf(float x)
const float s = 1.0f - (1.0f - k); /* Crush denormals. */ const float s = 1.0f - (1.0f - k); /* Crush denormals. */
const float t = s * s; const float t = s * s;
/* http://mathforum.org/library/drmath/view/62672.html /* http://mathforum.org/library/drmath/view/62672.html
* Examined 4278190080 values of atan: 2.36864877 avg ulp diff, 302 max ulp, 6.55651e-06 max error // (with denormals) * Examined 4278190080 values of atan:
* Examined 4278190080 values of atan: 171160502 avg ulp diff, 855638016 max ulp, 6.55651e-06 max error // (crush denormals) * 2.36864877 avg ulp diff, 302 max ulp, 6.55651e-06 max error // (with denormals)
* Examined 4278190080 values of atan:
* 171160502 avg ulp diff, 855638016 max ulp, 6.55651e-06 max error // (crush denormals)
*/ */
float r = s * madd(0.43157974f, t, 1.0f) / madd(madd(0.05831938f, t, 0.76443945f), t, 1.0f); float r = s * madd(0.43157974f, t, 1.0f) / madd(madd(0.05831938f, t, 0.76443945f), t, 1.0f);
if (a > 1.0f) { if (a > 1.0f) {

View File

@@ -163,7 +163,7 @@ ccl_device_forceinline bool ray_triangle_intersect(float3 ray_P,
/* Calculate geometry normal and denominator. */ /* Calculate geometry normal and denominator. */
const float3 Ng1 = cross(e1, e0); const float3 Ng1 = cross(e1, e0);
//const Vec3vfM Ng1 = stable_triangle_normal(e2,e1,e0); // const Vec3vfM Ng1 = stable_triangle_normal(e2,e1,e0);
const float3 Ng = Ng1 + Ng1; const float3 Ng = Ng1 + Ng1;
const float den = dot3(Ng, dir); const float den = dot3(Ng, dir);
/* Avoid division by 0. */ /* Avoid division by 0. */

View File

@@ -110,7 +110,8 @@ ccl_device_inline void math_vec3_add_strided(
} }
/* Elementary matrix operations. /* Elementary matrix operations.
* Note: TriMatrix refers to a square matrix that is symmetric, and therefore its upper-triangular part isn't stored. */ * Note: TriMatrix refers to a square matrix that is symmetric,
* and therefore its upper-triangular part isn't stored. */
ccl_device_inline void math_trimatrix_add_diagonal(ccl_global float *A, ccl_device_inline void math_trimatrix_add_diagonal(ccl_global float *A,
int n, int n,
@@ -196,7 +197,8 @@ ccl_device void math_trimatrix_cholesky(ccl_global float *A, int n, int stride)
} }
} }
/* Solve A*S=y for S given A and y, where A is symmetrical positive-semidefinite and both inputs are destroyed in the process. /* Solve A*S=y for S given A and y,
* where A is symmetrical positive-semidefinite and both inputs are destroyed in the process.
* *
* We can apply Cholesky decomposition to find a lower triangular L so that L*Lt = A. * We can apply Cholesky decomposition to find a lower triangular L so that L*Lt = A.
* With that we get (L*Lt)*S = L*(Lt*S) = L*b = y, defining b as Lt*S. * With that we get (L*Lt)*S = L*(Lt*S) = L*b = y, defining b as Lt*S.
@@ -204,15 +206,16 @@ ccl_device void math_trimatrix_cholesky(ccl_global float *A, int n, int stride)
* Then, the remaining problem is Lt*S = b, which again can be solved easily. * Then, the remaining problem is Lt*S = b, which again can be solved easily.
* *
* This is useful for solving the normal equation S=inv(Xt*W*X)*Xt*W*y, since Xt*W*X is * This is useful for solving the normal equation S=inv(Xt*W*X)*Xt*W*y, since Xt*W*X is
* symmetrical positive-semidefinite by construction, so we can just use this function with A=Xt*W*X and y=Xt*W*y. */ * symmetrical positive-semidefinite by construction,
* so we can just use this function with A=Xt*W*X and y=Xt*W*y. */
ccl_device_inline void math_trimatrix_vec3_solve(ccl_global float *A, ccl_device_inline void math_trimatrix_vec3_solve(ccl_global float *A,
ccl_global float3 *y, ccl_global float3 *y,
int n, int n,
int stride) int stride)
{ {
/* Since the first entry of the design row is always 1, the upper-left element of XtWX is a good /* Since the first entry of the design row is always 1, the upper-left element of XtWX is a good
* heuristic for the amount of pixels considered (with weighting), therefore the amount of correction * heuristic for the amount of pixels considered (with weighting),
* is scaled based on it. */ * therefore the amount of correction is scaled based on it. */
math_trimatrix_add_diagonal(A, n, 3e-7f * A[0], stride); /* Improve the numerical stability. */ math_trimatrix_add_diagonal(A, n, 3e-7f * A[0], stride); /* Improve the numerical stability. */
math_trimatrix_cholesky(A, n, stride); /* Replace A with L so that L*Lt = A. */ math_trimatrix_cholesky(A, n, stride); /* Replace A with L so that L*Lt = A. */
@@ -234,8 +237,8 @@ ccl_device_inline void math_trimatrix_vec3_solve(ccl_global float *A,
} }
/* Perform the Jacobi Eigenvalue Methon on matrix A. /* Perform the Jacobi Eigenvalue Methon on matrix A.
* A is assumed to be a symmetrical matrix, therefore only the lower-triangular part is ever accessed. * A is assumed to be a symmetrical matrix, therefore only the lower-triangular part is ever
* The algorithm overwrites the contents of A. * accessed. The algorithm overwrites the contents of A.
* *
* After returning, A will be overwritten with D, which is (almost) diagonal, * After returning, A will be overwritten with D, which is (almost) diagonal,
* and V will contain the eigenvectors of the original A in its rows (!), * and V will contain the eigenvectors of the original A in its rows (!),
@@ -263,7 +266,8 @@ ccl_device void math_matrix_jacobi_eigendecomposition(float *A,
} }
if (off_diagonal < 1e-7f) { if (off_diagonal < 1e-7f) {
/* The matrix has nearly reached diagonal form. /* The matrix has nearly reached diagonal form.
* Since the eigenvalues are only used to determine truncation, their exact values aren't required - a relative error of a few ULPs won't matter at all. */ * Since the eigenvalues are only used to determine truncation, their exact values aren't
* required - a relative error of a few ULPs won't matter at all. */
break; break;
} }
@@ -277,7 +281,8 @@ ccl_device void math_matrix_jacobi_eigendecomposition(float *A,
float element = MAT(A, n, row, col); float element = MAT(A, n, row, col);
float abs_element = fabsf(element); float abs_element = fabsf(element);
/* If we're in a later sweep and the element already is very small, just set it to zero and skip the rotation. */ /* If we're in a later sweep and the element already is very small,
* just set it to zero and skip the rotation. */
if (sweep > 3 && abs_element <= singular_epsilon * fabsf(MAT(A, n, row, row)) && if (sweep > 3 && abs_element <= singular_epsilon * fabsf(MAT(A, n, row, row)) &&
abs_element <= singular_epsilon * fabsf(MAT(A, n, col, col))) { abs_element <= singular_epsilon * fabsf(MAT(A, n, col, col))) {
MAT(A, n, row, col) = 0.0f; MAT(A, n, row, col) = 0.0f;
@@ -288,13 +293,16 @@ ccl_device void math_matrix_jacobi_eigendecomposition(float *A,
continue; continue;
} }
/* If we're in one of the first sweeps and the element is smaller than the threshold, skip it. */ /* If we're in one of the first sweeps and the element is smaller than the threshold,
* skip it. */
if (sweep < 3 && (abs_element < threshold)) { if (sweep < 3 && (abs_element < threshold)) {
continue; continue;
} }
/* Determine rotation: The rotation is characterized by its angle phi - or, in the actual implementation, sin(phi) and cos(phi). /* Determine rotation: The rotation is characterized by its angle phi - or,
* To find those, we first compute their ratio - that might be unstable if the angle approaches 90°, so there's a fallback for that case. * in the actual implementation, sin(phi) and cos(phi).
* To find those, we first compute their ratio - that might be unstable if the angle
* approaches 90°, so there's a fallback for that case.
* Then, we compute sin(phi) and cos(phi) themselves. */ * Then, we compute sin(phi) and cos(phi) themselves. */
float singular_diff = MAT(A, n, row, row) - MAT(A, n, col, col); float singular_diff = MAT(A, n, row, row) - MAT(A, n, col, col);
float ratio; float ratio;
@@ -310,7 +318,8 @@ ccl_device void math_matrix_jacobi_eigendecomposition(float *A,
float c = 1.0f / sqrtf(1.0f + ratio * ratio); float c = 1.0f / sqrtf(1.0f + ratio * ratio);
float s = ratio * c; float s = ratio * c;
/* To improve numerical stability by avoiding cancellation, the update equations are reformulized to use sin(phi) and tan(phi/2) instead. */ /* To improve numerical stability by avoiding cancellation, the update equations are
* reformulized to use sin(phi) and tan(phi/2) instead. */
float tan_phi_2 = s / (1.0f + c); float tan_phi_2 = s / (1.0f + c);
/* Update the singular values in the diagonal. */ /* Update the singular values in the diagonal. */
@@ -330,7 +339,8 @@ ccl_device void math_matrix_jacobi_eigendecomposition(float *A,
MATS(M, n, r2, c2, stride) += s * (M1 - tan_phi_2 * M2); \ MATS(M, n, r2, c2, stride) += s * (M1 - tan_phi_2 * M2); \
} }
/* Split into three parts to ensure correct accesses since we only store the lower-triangular part of A. */ /* Split into three parts to ensure correct accesses since we only store the
* lower-triangular part of A. */
for (int i = 0; i < col; i++) for (int i = 0; i < col; i++)
ROT(A, col, i, row, i, 1); ROT(A, col, i, row, i, 1);
for (int i = col + 1; i < row; i++) for (int i = col + 1; i < row; i++)

View File

@@ -47,7 +47,8 @@ void Profiler::run()
} }
if (cur_shader >= 0 && cur_shader < shader_samples.size()) { if (cur_shader >= 0 && cur_shader < shader_samples.size()) {
/* Only consider the active shader during events whose runtime significantly depends on it. */ /* Only consider the active shader during events whose runtime significantly depends on it.
*/
if (((cur_event >= PROFILING_SHADER_EVAL) && (cur_event <= PROFILING_SUBSURFACE)) || if (((cur_event >= PROFILING_SHADER_EVAL) && (cur_event <= PROFILING_SUBSURFACE)) ||
((cur_event >= PROFILING_CLOSURE_EVAL) && ((cur_event >= PROFILING_CLOSURE_EVAL) &&
(cur_event <= PROFILING_CLOSURE_VOLUME_SAMPLE))) { (cur_event <= PROFILING_CLOSURE_VOLUME_SAMPLE))) {

View File

@@ -362,7 +362,8 @@ class Progress {
* It's used to display the sample count if only one tile is active. */ * It's used to display the sample count if only one tile is active. */
int current_tile_sample; int current_tile_sample;
/* Stores the number of tiles that's already finished. /* Stores the number of tiles that's already finished.
* Used to determine whether all but the last tile are finished rendering, in which case the current_tile_sample is displayed. */ * Used to determine whether all but the last tile are finished rendering,
* in which case the current_tile_sample is displayed. */
int rendered_tiles, denoised_tiles; int rendered_tiles, denoised_tiles;
double start_time, render_start_time; double start_time, render_start_time;

View File

@@ -261,7 +261,7 @@ vector<int> distribute_threads_on_nodes(const int num_threads)
const int num_nodes = num_per_node_processors.size(); const int num_nodes = num_per_node_processors.size();
int thread_index = 0; int thread_index = 0;
/* First pass: fill in all the nodes to their maximum. /* First pass: fill in all the nodes to their maximum.
* *
* If there is less threads than the overall nodes capacity, some of the * If there is less threads than the overall nodes capacity, some of the
* nodes or parts of them will idle. * nodes or parts of them will idle.
* *

View File

@@ -1,30 +1,30 @@
/* /*
* Original code Copyright 2017, Intel Corporation * Original code Copyright 2017, Intel Corporation
* Modifications Copyright 2018, Blender Foundation. * Modifications Copyright 2018, Blender Foundation.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
* *
* * Redistributions of source code must retain the above copyright notice, * * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer. * this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright * * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the * notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution. * documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors * * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software * may be used to endorse or promote products derived from this software
* without specific prior written permission. * without specific prior written permission.
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#ifndef __UTIL_TYPES_FLOAT8_H__ #ifndef __UTIL_TYPES_FLOAT8_H__
#define __UTIL_TYPES_FLOAT8_H__ #define __UTIL_TYPES_FLOAT8_H__

View File

@@ -1,30 +1,30 @@
/* /*
* Original code Copyright 2017, Intel Corporation * Original code Copyright 2017, Intel Corporation
* Modifications Copyright 2018, Blender Foundation. * Modifications Copyright 2018, Blender Foundation.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
* *
* * Redistributions of source code must retain the above copyright notice, * * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer. * this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright * * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the * notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution. * documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors * * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software * may be used to endorse or promote products derived from this software
* without specific prior written permission. * without specific prior written permission.
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#ifndef __UTIL_TYPES_FLOAT8_IMPL_H__ #ifndef __UTIL_TYPES_FLOAT8_IMPL_H__
#define __UTIL_TYPES_FLOAT8_IMPL_H__ #define __UTIL_TYPES_FLOAT8_IMPL_H__