Cycles: Tweaks to support CUDA 8 toolkit

All the changes are mainly giving explicit tips on inlining functions,
so they match how inlining worked with previous toolkit.

This make kernel compiled by CUDA 8 render in average with same speed
as previous kernels. Some scenes are somewhat faster, some of them are
somewhat slower. But slowdown is within 1% so far.

On a positive side it allows us to enable newer generation cards on
buildbots (so GTX 10x0 will be officially supported soon).
This commit is contained in:
Sergey Sharybin
2016-08-01 15:40:46 +02:00
parent 7065022f7a
commit 6353ecb996
29 changed files with 250 additions and 126 deletions

View File

@@ -211,7 +211,10 @@ ccl_device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, fl
/* Panorama Camera */
ccl_device void camera_sample_panorama(KernelGlobals *kg, float raster_x, float raster_y, float lens_u, float lens_v, ccl_addr_space Ray *ray)
ccl_device_inline void camera_sample_panorama(KernelGlobals *kg,
float raster_x, float raster_y,
float lens_u, float lens_v,
ccl_addr_space Ray *ray)
{
Transform rastertocamera = kernel_data.cam.rastertocamera;
float3 Pcamera = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y, 0.0f));
@@ -303,8 +306,12 @@ ccl_device void camera_sample_panorama(KernelGlobals *kg, float raster_x, float
/* Common */
ccl_device void camera_sample(KernelGlobals *kg, int x, int y, float filter_u, float filter_v,
float lens_u, float lens_v, float time, ccl_addr_space Ray *ray)
ccl_device_inline void camera_sample(KernelGlobals *kg,
int x, int y,
float filter_u, float filter_v,
float lens_u, float lens_v,
float time,
ccl_addr_space Ray *ray)
{
/* pixel filter */
int filter_table_offset = kernel_data.film.filter_table_offset;