Cycles: avoid using float3 in kernel constant memory, just so we're sure alignment
is working compatible between cpu and gpu.
This commit is contained in:
@@ -74,8 +74,8 @@ __device void camera_sample_perspective(KernelGlobals *kg, float raster_x, float
|
|||||||
ray->dP.dx = make_float3(0.0f, 0.0f, 0.0f);
|
ray->dP.dx = make_float3(0.0f, 0.0f, 0.0f);
|
||||||
ray->dP.dy = make_float3(0.0f, 0.0f, 0.0f);
|
ray->dP.dy = make_float3(0.0f, 0.0f, 0.0f);
|
||||||
|
|
||||||
ray->dD.dx = normalize(Ddiff + kernel_data.cam.dx) - normalize(Ddiff);
|
ray->dD.dx = normalize(Ddiff + float4_to_float3(kernel_data.cam.dx)) - normalize(Ddiff);
|
||||||
ray->dD.dy = normalize(Ddiff + kernel_data.cam.dy) - normalize(Ddiff);
|
ray->dD.dy = normalize(Ddiff + float4_to_float3(kernel_data.cam.dy)) - normalize(Ddiff);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __CAMERA_CLIPPING__
|
#ifdef __CAMERA_CLIPPING__
|
||||||
@@ -107,8 +107,8 @@ __device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, floa
|
|||||||
|
|
||||||
#ifdef __RAY_DIFFERENTIALS__
|
#ifdef __RAY_DIFFERENTIALS__
|
||||||
/* ray differential */
|
/* ray differential */
|
||||||
ray->dP.dx = kernel_data.cam.dx;
|
ray->dP.dx = float4_to_float3(kernel_data.cam.dx);
|
||||||
ray->dP.dy = kernel_data.cam.dy;
|
ray->dP.dy = float4_to_float3(kernel_data.cam.dy);
|
||||||
|
|
||||||
ray->dD.dx = make_float3(0.0f, 0.0f, 0.0f);
|
ray->dD.dx = make_float3(0.0f, 0.0f, 0.0f);
|
||||||
ray->dD.dy = make_float3(0.0f, 0.0f, 0.0f);
|
ray->dD.dy = make_float3(0.0f, 0.0f, 0.0f);
|
||||||
|
@@ -295,7 +295,11 @@ typedef struct ShaderData {
|
|||||||
#endif
|
#endif
|
||||||
} ShaderData;
|
} ShaderData;
|
||||||
|
|
||||||
/* Constrant Kernel Data */
|
/* Constrant Kernel Data
|
||||||
|
*
|
||||||
|
* These structs are passed from CPU to various devices, and the struct layout
|
||||||
|
* must match exactly. Structs are padded to ensure 16 byte alignment, and we
|
||||||
|
* do not use float3 because its size may not be the same on all devices. */
|
||||||
|
|
||||||
typedef struct KernelCamera {
|
typedef struct KernelCamera {
|
||||||
/* type */
|
/* type */
|
||||||
@@ -307,14 +311,8 @@ typedef struct KernelCamera {
|
|||||||
Transform rastertocamera;
|
Transform rastertocamera;
|
||||||
|
|
||||||
/* differentials */
|
/* differentials */
|
||||||
float3 dx;
|
float4 dx;
|
||||||
#ifndef WITH_OPENCL
|
float4 dy;
|
||||||
float pad1;
|
|
||||||
#endif
|
|
||||||
float3 dy;
|
|
||||||
#ifndef WITH_OPENCL
|
|
||||||
float pad2;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* depth of field */
|
/* depth of field */
|
||||||
float aperturesize;
|
float aperturesize;
|
||||||
@@ -355,10 +353,6 @@ typedef struct KernelBackground {
|
|||||||
typedef struct KernelSunSky {
|
typedef struct KernelSunSky {
|
||||||
/* sun direction in spherical and cartesian */
|
/* sun direction in spherical and cartesian */
|
||||||
float theta, phi, pad3, pad4;
|
float theta, phi, pad3, pad4;
|
||||||
float3 dir;
|
|
||||||
#ifndef WITH_OPENCL
|
|
||||||
float pad;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* perez function parameters */
|
/* perez function parameters */
|
||||||
float zenith_Y, zenith_x, zenith_y, pad2;
|
float zenith_Y, zenith_x, zenith_y, pad2;
|
||||||
|
@@ -150,8 +150,8 @@ void Camera::device_update(Device *device, DeviceScene *dscene)
|
|||||||
kcam->ortho = ortho;
|
kcam->ortho = ortho;
|
||||||
|
|
||||||
/* store differentials */
|
/* store differentials */
|
||||||
kcam->dx = dx;
|
kcam->dx = float3_to_float4(dx);
|
||||||
kcam->dy = dy;
|
kcam->dy = float3_to_float4(dy);
|
||||||
|
|
||||||
/* clipping */
|
/* clipping */
|
||||||
kcam->nearclip = nearclip;
|
kcam->nearclip = nearclip;
|
||||||
|
@@ -273,7 +273,6 @@ static void sky_texture_precompute(KernelSunSky *ksunsky, float3 dir, float turb
|
|||||||
|
|
||||||
ksunsky->theta = theta;
|
ksunsky->theta = theta;
|
||||||
ksunsky->phi = phi;
|
ksunsky->phi = phi;
|
||||||
ksunsky->dir = dir;
|
|
||||||
|
|
||||||
float theta2 = theta*theta;
|
float theta2 = theta*theta;
|
||||||
float theta3 = theta*theta*theta;
|
float theta3 = theta*theta*theta;
|
||||||
|
@@ -536,6 +536,11 @@ __device_inline float3 float4_to_float3(const float4 a)
|
|||||||
return make_float3(a.x, a.y, a.z);
|
return make_float3(a.x, a.y, a.z);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__device_inline float4 float3_to_float4(const float3 a)
|
||||||
|
{
|
||||||
|
return make_float4(a.x, a.y, a.z, 1.0f);
|
||||||
|
}
|
||||||
|
|
||||||
#ifndef __KERNEL_GPU__
|
#ifndef __KERNEL_GPU__
|
||||||
|
|
||||||
__device_inline void print_float3(const char *label, const float3& a)
|
__device_inline void print_float3(const char *label, const float3& a)
|
||||||
|
Reference in New Issue
Block a user