Cycles: avoid using float3 in kernel constant memory, just so we're sure alignment

is working compatible between cpu and gpu.
This commit is contained in:
Brecht Van Lommel
2011-12-20 12:25:45 +00:00
parent 72d2d05770
commit 40259cfe7b
5 changed files with 18 additions and 20 deletions

View File

@@ -74,8 +74,8 @@ __device void camera_sample_perspective(KernelGlobals *kg, float raster_x, float
ray->dP.dx = make_float3(0.0f, 0.0f, 0.0f); ray->dP.dx = make_float3(0.0f, 0.0f, 0.0f);
ray->dP.dy = make_float3(0.0f, 0.0f, 0.0f); ray->dP.dy = make_float3(0.0f, 0.0f, 0.0f);
ray->dD.dx = normalize(Ddiff + kernel_data.cam.dx) - normalize(Ddiff); ray->dD.dx = normalize(Ddiff + float4_to_float3(kernel_data.cam.dx)) - normalize(Ddiff);
ray->dD.dy = normalize(Ddiff + kernel_data.cam.dy) - normalize(Ddiff); ray->dD.dy = normalize(Ddiff + float4_to_float3(kernel_data.cam.dy)) - normalize(Ddiff);
#endif #endif
#ifdef __CAMERA_CLIPPING__ #ifdef __CAMERA_CLIPPING__
@@ -107,8 +107,8 @@ __device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, floa
#ifdef __RAY_DIFFERENTIALS__ #ifdef __RAY_DIFFERENTIALS__
/* ray differential */ /* ray differential */
ray->dP.dx = kernel_data.cam.dx; ray->dP.dx = float4_to_float3(kernel_data.cam.dx);
ray->dP.dy = kernel_data.cam.dy; ray->dP.dy = float4_to_float3(kernel_data.cam.dy);
ray->dD.dx = make_float3(0.0f, 0.0f, 0.0f); ray->dD.dx = make_float3(0.0f, 0.0f, 0.0f);
ray->dD.dy = make_float3(0.0f, 0.0f, 0.0f); ray->dD.dy = make_float3(0.0f, 0.0f, 0.0f);

View File

@@ -295,7 +295,11 @@ typedef struct ShaderData {
#endif #endif
} ShaderData; } ShaderData;
/* Constrant Kernel Data */ /* Constrant Kernel Data
*
* These structs are passed from CPU to various devices, and the struct layout
* must match exactly. Structs are padded to ensure 16 byte alignment, and we
* do not use float3 because its size may not be the same on all devices. */
typedef struct KernelCamera { typedef struct KernelCamera {
/* type */ /* type */
@@ -307,14 +311,8 @@ typedef struct KernelCamera {
Transform rastertocamera; Transform rastertocamera;
/* differentials */ /* differentials */
float3 dx; float4 dx;
#ifndef WITH_OPENCL float4 dy;
float pad1;
#endif
float3 dy;
#ifndef WITH_OPENCL
float pad2;
#endif
/* depth of field */ /* depth of field */
float aperturesize; float aperturesize;
@@ -355,10 +353,6 @@ typedef struct KernelBackground {
typedef struct KernelSunSky { typedef struct KernelSunSky {
/* sun direction in spherical and cartesian */ /* sun direction in spherical and cartesian */
float theta, phi, pad3, pad4; float theta, phi, pad3, pad4;
float3 dir;
#ifndef WITH_OPENCL
float pad;
#endif
/* perez function parameters */ /* perez function parameters */
float zenith_Y, zenith_x, zenith_y, pad2; float zenith_Y, zenith_x, zenith_y, pad2;

View File

@@ -150,8 +150,8 @@ void Camera::device_update(Device *device, DeviceScene *dscene)
kcam->ortho = ortho; kcam->ortho = ortho;
/* store differentials */ /* store differentials */
kcam->dx = dx; kcam->dx = float3_to_float4(dx);
kcam->dy = dy; kcam->dy = float3_to_float4(dy);
/* clipping */ /* clipping */
kcam->nearclip = nearclip; kcam->nearclip = nearclip;

View File

@@ -273,7 +273,6 @@ static void sky_texture_precompute(KernelSunSky *ksunsky, float3 dir, float turb
ksunsky->theta = theta; ksunsky->theta = theta;
ksunsky->phi = phi; ksunsky->phi = phi;
ksunsky->dir = dir;
float theta2 = theta*theta; float theta2 = theta*theta;
float theta3 = theta*theta*theta; float theta3 = theta*theta*theta;

View File

@@ -536,6 +536,11 @@ __device_inline float3 float4_to_float3(const float4 a)
return make_float3(a.x, a.y, a.z); return make_float3(a.x, a.y, a.z);
} }
__device_inline float4 float3_to_float4(const float3 a)
{
return make_float4(a.x, a.y, a.z, 1.0f);
}
#ifndef __KERNEL_GPU__ #ifndef __KERNEL_GPU__
__device_inline void print_float3(const char *label, const float3& a) __device_inline void print_float3(const char *label, const float3& a)