Code refactor: make Transform always affine, dropping last row.

This save a little memory and copying in the kernel by storing only a 4x3
matrix instead of a 4x4 matrix. We already did this in a few places, and
those don't need to be special exceptions anymore now.
This commit is contained in:
Brecht Van Lommel
2018-03-08 06:48:14 +01:00
parent 623141f339
commit b66efbecf4
25 changed files with 120 additions and 158 deletions

View File

@@ -40,6 +40,7 @@
#include "util/util_foreach.h"
#include "util/util_path.h"
#include "util/util_projection.h"
#include "util/util_transform.h"
#include "util/util_xml.h"
@@ -546,8 +547,10 @@ static void xml_read_transform(xml_node node, Transform& tfm)
{
if(node.attribute("matrix")) {
vector<float> matrix;
if(xml_read_float_array(matrix, node, "matrix") && matrix.size() == 16)
tfm = tfm * transform_transpose((*(Transform*)&matrix[0]));
if(xml_read_float_array(matrix, node, "matrix") && matrix.size() == 16) {
ProjectionTransform projection = *(ProjectionTransform*)&matrix[0];
tfm = tfm * projection_to_transform(projection_transpose(projection));
}
}
if(node.attribute("translate")) {

View File

@@ -246,8 +246,7 @@ static Transform blender_camera_matrix(const Transform& tfm,
result = tfm *
make_transform(1.0f, 0.0f, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 0.0f,
0.0f, 1.0f, 0.0f, 0.0f,
0.0f, 0.0f, 0.0f, 1.0f);
0.0f, 1.0f, 0.0f, 0.0f);
}
else {
/* Make it so environment camera needs to be pointed in the direction
@@ -257,8 +256,7 @@ static Transform blender_camera_matrix(const Transform& tfm,
result = tfm *
make_transform( 0.0f, -1.0f, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 0.0f,
-1.0f, 0.0f, 0.0f, 0.0f,
0.0f, 0.0f, 0.0f, 1.0f);
-1.0f, 0.0f, 0.0f, 0.0f);
}
}
else {

View File

@@ -247,14 +247,15 @@ static inline float *image_get_float_pixels_for_frame(BL::Image& image,
static inline Transform get_transform(const BL::Array<float, 16>& array)
{
Transform tfm;
ProjectionTransform projection;
/* we assume both types to be just 16 floats, and transpose because blender
* use column major matrix order while we use row major */
memcpy(&tfm, &array, sizeof(float)*16);
tfm = transform_transpose(tfm);
/* We assume both types to be just 16 floats, and transpose because blender
* use column major matrix order while we use row major. */
memcpy(&projection, &array, sizeof(float)*16);
projection = projection_transpose(projection);
return tfm;
/* Drop last row, matrix is assumed to be affine transform. */
return projection_to_transform(projection);
}
static inline float2 get_float2(const BL::Array<float, 2>& array)

View File

@@ -77,7 +77,7 @@ size_t SocketType::max_size()
void *SocketType::zero_default_value()
{
static Transform zero_transform = {{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}};
static Transform zero_transform = {{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}};
return &zero_transform;
}

View File

@@ -196,7 +196,7 @@ void xml_read_node(XMLReader& reader, Node *node, xml_node xml_node)
case SocketType::TRANSFORM:
{
array<Transform> value;
xml_read_float_array<16>(value, attr);
xml_read_float_array<12>(value, attr);
if(value.size() == 1) {
node->set(socket, value[0]);
}
@@ -205,7 +205,7 @@ void xml_read_node(XMLReader& reader, Node *node, xml_node xml_node)
case SocketType::TRANSFORM_ARRAY:
{
array<Transform> value;
xml_read_float_array<16>(value, attr);
xml_read_float_array<12>(value, attr);
node->set(socket, value);
break;
}
@@ -400,12 +400,10 @@ xml_node xml_write_node(Node *node, xml_node xml_root)
{
Transform tfm = node->get_transform(socket);
std::stringstream ss;
for(int i = 0; i < 4; i++) {
ss << string_printf("%g %g %g %g", (double)tfm[i][0], (double)tfm[i][1], (double)tfm[i][2], (double)tfm[i][3]);
if(i != 3) {
ss << " ";
}
for(int i = 0; i < 3; i++) {
ss << string_printf("%g %g %g %g ", (double)tfm[i][0], (double)tfm[i][1], (double)tfm[i][2], (double)tfm[i][3]);
}
ss << string_printf("%g %g %g %g", 0.0, 0.0, 0.0, 1.0);
attr = ss.str().c_str();
break;
}
@@ -416,11 +414,12 @@ xml_node xml_write_node(Node *node, xml_node xml_root)
for(size_t j = 0; j < value.size(); j++) {
const Transform& tfm = value[j];
for(int i = 0; i < 4; i++) {
ss << string_printf("%g %g %g %g", (double)tfm[i][0], (double)tfm[i][1], (double)tfm[i][2], (double)tfm[i][3]);
if(j != value.size() - 1 || i != 3) {
ss << " ";
}
for(int i = 0; i < 3; i++) {
ss << string_printf("%g %g %g %g ", (double)tfm[i][0], (double)tfm[i][1], (double)tfm[i][2], (double)tfm[i][3]);
}
ss << string_printf("%g %g %g %g", 0.0, 0.0, 0.0, 1.0);
if(j != value.size() - 1) {
ss << " ";
}
}
attr = ss.str().c_str();

View File

@@ -25,7 +25,6 @@ ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(KernelGlobals *k
space.x = kernel_tex_fetch(__bvh_nodes, child_addr+1);
space.y = kernel_tex_fetch(__bvh_nodes, child_addr+2);
space.z = kernel_tex_fetch(__bvh_nodes, child_addr+3);
space.w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
return space;
}

View File

@@ -103,7 +103,6 @@ ccl_device Transform primitive_attribute_matrix(KernelGlobals *kg, const ShaderD
tfm.x = kernel_tex_fetch(__attributes_float3, desc.offset + 0);
tfm.y = kernel_tex_fetch(__attributes_float3, desc.offset + 1);
tfm.z = kernel_tex_fetch(__attributes_float3, desc.offset + 2);
tfm.w = kernel_tex_fetch(__attributes_float3, desc.offset + 3);
return tfm;
}

View File

@@ -170,8 +170,7 @@ ccl_device_forceinline bool cardinal_curve_intersect(
htfm = make_transform(
dir.z / d, 0, -dir.x /d, 0,
-dir.x * dir.y /d, d, -dir.y * dir.z /d, 0,
dir.x, dir.y, dir.z, 0,
0, 0, 0, 1);
dir.x, dir.y, dir.z, 0);
float4 v00 = kernel_tex_fetch(__curves, prim);

View File

@@ -28,12 +28,12 @@ CCL_NAMESPACE_BEGIN
enum ObjectTransform {
OBJECT_TRANSFORM = 0,
OBJECT_INVERSE_TRANSFORM = 4,
OBJECT_INVERSE_TRANSFORM = 1,
};
enum ObjectVectorTransform {
OBJECT_VECTOR_MOTION_PRE = 0,
OBJECT_VECTOR_MOTION_POST = 3
OBJECT_VECTOR_MOTION_POST = 1
};
/* Object to world space transformation */
@@ -51,8 +51,6 @@ ccl_device_inline Transform object_fetch_transform(KernelGlobals *kg, int object
tfm.y = kernel_tex_fetch(__objects, object).tfm.pre.y;
tfm.z = kernel_tex_fetch(__objects, object).tfm.pre.z;
}
tfm.w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
return tfm;
}
@@ -60,20 +58,12 @@ ccl_device_inline Transform object_fetch_transform(KernelGlobals *kg, int object
ccl_device_inline Transform lamp_fetch_transform(KernelGlobals *kg, int lamp, bool inverse)
{
Transform tfm;
if(inverse) {
tfm.x = kernel_tex_fetch(__lights, lamp).itfm[0];
tfm.y = kernel_tex_fetch(__lights, lamp).itfm[1];
tfm.z = kernel_tex_fetch(__lights, lamp).itfm[2];
return kernel_tex_fetch(__lights, lamp).itfm;
}
else {
tfm.x = kernel_tex_fetch(__lights, lamp).tfm[0];
tfm.y = kernel_tex_fetch(__lights, lamp).tfm[1];
tfm.z = kernel_tex_fetch(__lights, lamp).tfm[2];
return kernel_tex_fetch(__lights, lamp).tfm;
}
tfm.w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
return tfm;
}
/* Object to world space transformation for motion vectors */
@@ -81,14 +71,7 @@ ccl_device_inline Transform lamp_fetch_transform(KernelGlobals *kg, int lamp, bo
ccl_device_inline Transform object_fetch_vector_transform(KernelGlobals *kg, int object, enum ObjectVectorTransform type)
{
int offset = object*OBJECT_VECTOR_SIZE + (int)type;
Transform tfm;
tfm.x = kernel_tex_fetch(__objects_vector, offset + 0);
tfm.y = kernel_tex_fetch(__objects_vector, offset + 1);
tfm.z = kernel_tex_fetch(__objects_vector, offset + 2);
tfm.w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
return tfm;
return kernel_tex_fetch(__objects_vector, offset);
}
/* Motion blurred object transformations */

View File

@@ -32,7 +32,7 @@ KERNEL_TEX(float2, __prim_time)
/* objects */
KERNEL_TEX(KernelObject, __objects)
KERNEL_TEX(float4, __objects_vector)
KERNEL_TEX(Transform, __objects_vector)
/* triangles */
KERNEL_TEX(uint, __tri_shader)

View File

@@ -35,10 +35,10 @@
CCL_NAMESPACE_BEGIN
/* Constants */
#define OBJECT_VECTOR_SIZE 6
#define OBJECT_VECTOR_SIZE 2
#define FILTER_TABLE_SIZE 1024
#define RAMP_TABLE_SIZE 256
#define SHUTTER_TABLE_SIZE 256
#define SHUTTER_TABLE_SIZE 256
#define BSSRDF_MIN_RADIUS 1e-8f
#define BSSRDF_MAX_HITS 4
@@ -1483,8 +1483,8 @@ typedef struct KernelLight {
int samples;
float max_bounces;
float random;
float4 tfm[3];
float4 itfm[3];
Transform tfm;
Transform itfm;
union {
KernelSpotLight spot;
KernelAreaLight area;

View File

@@ -64,8 +64,7 @@ CCL_NAMESPACE_BEGIN
static void copy_matrix(OSL::Matrix44& m, const Transform& tfm)
{
// TODO: remember when making affine
Transform t = transform_transpose(tfm);
ProjectionTransform t = projection_transpose(ProjectionTransform(tfm));
memcpy(&m, &t, sizeof(m));
}
@@ -553,8 +552,7 @@ static bool set_attribute_float3_3(float3 P[3], TypeDesc type, bool derivatives,
static bool set_attribute_matrix(const Transform& tfm, TypeDesc type, void *val)
{
if(type == TypeDesc::TypeMatrix) {
Transform transpose = transform_transpose(tfm);
memcpy(val, &transpose, sizeof(Transform));
copy_matrix(*(OSL::Matrix44*)val, tfm);
return true;
}

View File

@@ -26,7 +26,6 @@ ccl_device void svm_node_mapping(KernelGlobals *kg, ShaderData *sd, float *stack
tfm.x = read_node_float(kg, offset);
tfm.y = read_node_float(kg, offset);
tfm.z = read_node_float(kg, offset);
tfm.w = read_node_float(kg, offset);
float3 r = transform_point(&tfm, v);
stack_store_float3(stack, out_offset, r);

View File

@@ -42,7 +42,6 @@ ccl_device void svm_node_tex_coord(KernelGlobals *kg,
tfm.x = read_node_float(kg, offset);
tfm.y = read_node_float(kg, offset);
tfm.z = read_node_float(kg, offset);
tfm.w = read_node_float(kg, offset);
data = transform_point(&tfm, data);
}
break;
@@ -123,7 +122,6 @@ ccl_device void svm_node_tex_coord_bump_dx(KernelGlobals *kg,
tfm.x = read_node_float(kg, offset);
tfm.y = read_node_float(kg, offset);
tfm.z = read_node_float(kg, offset);
tfm.w = read_node_float(kg, offset);
data = transform_point(&tfm, data);
}
break;
@@ -207,7 +205,6 @@ ccl_device void svm_node_tex_coord_bump_dy(KernelGlobals *kg,
tfm.x = read_node_float(kg, offset);
tfm.y = read_node_float(kg, offset);
tfm.z = read_node_float(kg, offset);
tfm.w = read_node_float(kg, offset);
data = transform_point(&tfm, data);
}
break;

View File

@@ -39,7 +39,6 @@ ccl_device void svm_node_tex_voxel(KernelGlobals *kg,
tfm.x = read_node_float(kg, offset);
tfm.y = read_node_float(kg, offset);
tfm.z = read_node_float(kg, offset);
tfm.w = read_node_float(kg, offset);
co = transform_point(&tfm, co);
}

View File

@@ -785,11 +785,8 @@ void LightManager::device_update_points(Device *,
klights[light_index].max_bounces = max_bounces;
klights[light_index].random = random;
Transform tfm = light->tfm;
Transform itfm = transform_inverse(tfm);
memcpy(&klights[light_index].tfm, &tfm, sizeof(float4)*3);
memcpy(&klights[light_index].itfm, &itfm, sizeof(float4)*3);
klights[light_index].tfm = light->tfm;
klights[light_index].itfm = transform_inverse(light->tfm);
light_index++;
}
@@ -825,10 +822,8 @@ void LightManager::device_update_points(Device *,
klights[light_index].area.dir[0] = dir.x;
klights[light_index].area.dir[1] = dir.y;
klights[light_index].area.dir[2] = dir.z;
Transform tfm = light->tfm;
Transform itfm = transform_inverse(tfm);
memcpy(&klights[light_index].tfm, &tfm, sizeof(float4)*3);
memcpy(&klights[light_index].itfm, &itfm, sizeof(float4)*3);
klights[light_index].tfm = light->tfm;
klights[light_index].itfm = transform_inverse(light->tfm);
light_index++;
}

View File

@@ -1445,11 +1445,11 @@ static void update_attribute_element_offset(Mesh *mesh,
Transform *tfm = mattr->data_transform();
offset = attr_float3_offset;
assert(attr_float3.size() >= offset + size * 4);
for(size_t k = 0; k < size*4; k++) {
assert(attr_float3.size() >= offset + size * 3);
for(size_t k = 0; k < size*3; k++) {
attr_float3[offset+k] = (&tfm->x)[k];
}
attr_float3_offset += size * 4;
attr_float3_offset += size * 3;
}
else {
float4 *data = mattr->data_float4();

View File

@@ -117,8 +117,7 @@ Transform TextureMapping::compute_transform()
case NORMAL:
/* no translation for normals, and inverse transpose */
mat = rmat*smat;
mat = transform_inverse(mat);
mat = transform_transpose(mat);
mat = transform_transposed_inverse(mat);
break;
}
@@ -153,7 +152,6 @@ void TextureMapping::compile(SVMCompiler& compiler, int offset_in, int offset_ou
compiler.add_node(tfm.x);
compiler.add_node(tfm.y);
compiler.add_node(tfm.z);
compiler.add_node(tfm.w);
if(use_minmax) {
compiler.add_node(NODE_MIN_MAX, offset_out, offset_out);
@@ -193,9 +191,7 @@ void TextureMapping::compile_end(SVMCompiler& compiler, ShaderInput *vector_in,
void TextureMapping::compile(OSLCompiler &compiler)
{
if(!skip()) {
Transform tfm = transform_transpose(compute_transform());
compiler.parameter("mapping", tfm);
compiler.parameter("mapping", compute_transform());
compiler.parameter("use_mapping", 1);
}
}
@@ -1434,7 +1430,6 @@ void PointDensityTextureNode::compile(SVMCompiler& compiler)
compiler.add_node(tfm.x);
compiler.add_node(tfm.y);
compiler.add_node(tfm.z);
compiler.add_node(tfm.w);
}
}
else {
@@ -1478,7 +1473,7 @@ void PointDensityTextureNode::compile(OSLCompiler& compiler)
compiler.parameter("filename", string_printf("@%d", slot).c_str());
}
if(space == NODE_TEX_VOXEL_SPACE_WORLD) {
compiler.parameter("mapping", transform_transpose(tfm));
compiler.parameter("mapping", tfm);
compiler.parameter("use_mapping", 1);
}
compiler.parameter(this, "interpolation");
@@ -1558,8 +1553,7 @@ void MappingNode::compile(SVMCompiler& compiler)
void MappingNode::compile(OSLCompiler& compiler)
{
Transform tfm = transform_transpose(tex_mapping.compute_transform());
compiler.parameter("Matrix", tfm);
compiler.parameter("Matrix", tex_mapping.compute_transform());
compiler.parameter_point("mapping_min", tex_mapping.min);
compiler.parameter_point("mapping_max", tex_mapping.max);
compiler.parameter("use_minmax", tex_mapping.use_minmax);
@@ -3220,7 +3214,6 @@ void TextureCoordinateNode::compile(SVMCompiler& compiler)
compiler.add_node(ob_itfm.x);
compiler.add_node(ob_itfm.y);
compiler.add_node(ob_itfm.z);
compiler.add_node(ob_itfm.w);
}
}
@@ -3259,7 +3252,7 @@ void TextureCoordinateNode::compile(OSLCompiler& compiler)
if(compiler.output_type() == SHADER_TYPE_VOLUME)
compiler.parameter("is_volume", true);
compiler.parameter(this, "use_transform");
Transform ob_itfm = transform_transpose(transform_inverse(ob_tfm));
Transform ob_itfm = transform_transposed_inverse(ob_tfm);
compiler.parameter("object_itfm", ob_itfm);
compiler.parameter(this, "from_dupli");

View File

@@ -132,7 +132,7 @@ void Object::apply_transform(bool apply_to_motion)
/* store matrix to transform later. when accessing these as attributes we
* do not want the transform to be applied for consistency between static
* and dynamic BVH, so we do it on packing. */
mesh->transform_normal = transform_transpose(transform_inverse(tfm));
mesh->transform_normal = transform_transposed_inverse(tfm);
/* apply to mesh vertices */
for(size_t i = 0; i < mesh->verts.size(); i++)
@@ -290,7 +290,7 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
int object_index)
{
KernelObject& kobject = state->objects[object_index];
float4 *objects_vector = state->objects_vector;
Transform *objects_vector = state->objects_vector;
Mesh *mesh = ob->mesh;
uint flag = 0;
@@ -357,11 +357,8 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
}
}
/* OBJECT_TRANSFORM */
memcpy(&kobject.tfm.pre, &tfm, sizeof(float4)*3);
/* OBJECT_INVERSE_TRANSFORM */
memcpy(&kobject.tfm.mid, &itfm, sizeof(float4)*3);
/* OBJECT_PROPERTIES */
memcpy(&kobject.tfm.pre, &tfm, sizeof(tfm));
memcpy(&kobject.tfm.mid, &itfm, sizeof(itfm));
kobject.surface_area = surface_area;
kobject.pass_id = pass_id;
kobject.random_number = random_number;
@@ -395,8 +392,8 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
mtfm.post = mtfm.post * itfm;
}
memcpy(&objects_vector[object_index*OBJECT_VECTOR_SIZE+0], &mtfm.pre, sizeof(float4)*3);
memcpy(&objects_vector[object_index*OBJECT_VECTOR_SIZE+3], &mtfm.post, sizeof(float4)*3);
objects_vector[object_index*OBJECT_VECTOR_SIZE+0] = mtfm.pre;
objects_vector[object_index*OBJECT_VECTOR_SIZE+1] = mtfm.post;
}
else if(state->need_motion == Scene::MOTION_BLUR) {
if(ob->use_motion) {

View File

@@ -135,7 +135,7 @@ protected:
/* Packed object arrays. Those will be filled in. */
uint *object_flag;
KernelObject *objects;
float4 *objects_vector;
Transform *objects_vector;
/* Flags which will be synchronized to Integrator. */
bool have_motion;

View File

@@ -34,6 +34,7 @@
#include "util/util_md5.h"
#include "util/util_path.h"
#include "util/util_progress.h"
#include "util/util_projection.h"
#endif
@@ -832,7 +833,9 @@ void OSLCompiler::parameter(ShaderNode* node, const char *name)
case SocketType::TRANSFORM:
{
Transform value = node->get_transform(socket);
ss->Parameter(uname, TypeDesc::TypeMatrix, &value);
ProjectionTransform projection(value);
projection = projection_transpose(projection);
ss->Parameter(uname, TypeDesc::TypeMatrix, &projection);
break;
}
case SocketType::BOOLEAN_ARRAY:
@@ -900,7 +903,11 @@ void OSLCompiler::parameter(ShaderNode* node, const char *name)
case SocketType::TRANSFORM_ARRAY:
{
const array<Transform>& value = node->get_transform_array(socket);
ss->Parameter(uname, array_typedesc(TypeDesc::TypeMatrix, value.size()), value.data());
array<ProjectionTransform> fvalue(value.size());
for(size_t i = 0; i < value.size(); i++) {
fvalue[i] = projection_transpose(ProjectionTransform(value[i]));
}
ss->Parameter(uname, array_typedesc(TypeDesc::TypeMatrix, fvalue.size()), fvalue.data());
break;
}
case SocketType::CLOSURE:
@@ -967,7 +974,9 @@ void OSLCompiler::parameter(const char *name, ustring s)
void OSLCompiler::parameter(const char *name, const Transform& tfm)
{
OSL::ShadingSystem *ss = (OSL::ShadingSystem*)shadingsys;
ss->Parameter(name, TypeDesc::TypeMatrix, (float*)&tfm);
ProjectionTransform projection(tfm);
projection = projection_transpose(projection);
ss->Parameter(name, TypeDesc::TypeMatrix, (float*)&projection);
}
void OSLCompiler::parameter_array(const char *name, const float f[], int arraylen)

View File

@@ -87,7 +87,7 @@ public:
/* objects */
device_vector<KernelObject> objects;
device_vector<float4> objects_vector;
device_vector<Transform> objects_vector;
/* attributes */
device_vector<uint4> attributes_map;

View File

@@ -35,7 +35,7 @@ typedef struct ProjectionTransform {
: x(tfm.x),
y(tfm.y),
z(tfm.z),
w(tfm.w)
w(make_float4(0.0f, 0.0f, 0.0f, 1.0f))
{
}
#endif
@@ -69,6 +69,12 @@ ccl_device_inline float3 transform_perspective_direction(const ProjectionTransfo
#ifndef __KERNEL_GPU__
ccl_device_inline Transform projection_to_transform(const ProjectionTransform& a)
{
Transform tfm = {a.x, a.y, a.z};
return tfm;
}
ccl_device_inline ProjectionTransform projection_transpose(const ProjectionTransform& a)
{
ProjectionTransform t;
@@ -81,12 +87,7 @@ ccl_device_inline ProjectionTransform projection_transpose(const ProjectionTrans
return t;
}
ccl_device_inline ProjectionTransform projection_inverse(const ProjectionTransform& a)
{
Transform t = {a.x, a.y, a.z, a.w};
t = transform_inverse(t);
return ProjectionTransform(t);
}
ProjectionTransform projection_inverse(const ProjectionTransform& a);
ccl_device_inline ProjectionTransform make_projection(
float a, float b, float c, float d,

View File

@@ -46,6 +46,7 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "util/util_projection.h"
#include "util/util_transform.h"
#include "util/util_boundbox.h"
@@ -129,9 +130,9 @@ static bool transform_matrix4_gj_inverse(float R[][4], float M[][4])
return true;
}
Transform transform_inverse(const Transform& tfm)
ProjectionTransform projection_inverse(const ProjectionTransform& tfm)
{
Transform tfmR = transform_identity();
ProjectionTransform tfmR = projection_identity();
float M[4][4], R[4][4];
memcpy(R, &tfmR, sizeof(R));
@@ -145,7 +146,7 @@ Transform transform_inverse(const Transform& tfm)
M[2][2] += 1e-8f;
if(UNLIKELY(!transform_matrix4_gj_inverse(R, M))) {
return transform_identity();
return projection_identity();
}
}
@@ -154,6 +155,19 @@ Transform transform_inverse(const Transform& tfm)
return tfmR;
}
Transform transform_inverse(const Transform& tfm)
{
ProjectionTransform projection(tfm);
return projection_to_transform(projection_inverse(projection));
}
Transform transform_transposed_inverse(const Transform& tfm)
{
ProjectionTransform projection(tfm);
ProjectionTransform iprojection = projection_inverse(projection);
return projection_to_transform(projection_transpose(iprojection));
}
/* Motion Transform */
float4 transform_to_quat(const Transform& tfm)
@@ -209,7 +223,7 @@ static void transform_decompose(DecomposedTransform *decomp, const Transform *tf
/* extract rotation */
Transform M = *tfm;
M.x.w = 0.0f; M.y.w = 0.0f; M.z.w = 0.0f; M.w.w = 1.0f;
M.x.w = 0.0f; M.y.w = 0.0f; M.z.w = 0.0f;
Transform R = M;
float norm;
@@ -217,9 +231,9 @@ static void transform_decompose(DecomposedTransform *decomp, const Transform *tf
do {
Transform Rnext;
Transform Rit = transform_inverse(transform_transpose(R));
Transform Rit = transform_transposed_inverse(R);
for(int i = 0; i < 4; i++)
for(int i = 0; i < 3; i++)
for(int j = 0; j < 4; j++)
Rnext[i][j] = 0.5f * (R[i][j] + Rit[i][j]);

View File

@@ -26,10 +26,10 @@
CCL_NAMESPACE_BEGIN
/* Data Types */
/* Affine transformation, stored as 4x3 matrix. */
typedef struct Transform {
float4 x, y, z, w; /* rows */
float4 x, y, z;
#ifndef __KERNEL_GPU__
float4 operator[](int i) const { return *(&x + i); }
@@ -69,7 +69,7 @@ ccl_device_inline float3 transform_point(const Transform *t, const float3 a)
x = _mm_loadu_ps(&t->x.x);
y = _mm_loadu_ps(&t->y.x);
z = _mm_loadu_ps(&t->z.x);
w = _mm_loadu_ps(&t->w.x);
w = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f);
_MM_TRANSPOSE4_PS(x, y, z, w);
@@ -125,29 +125,15 @@ ccl_device_inline float3 transform_direction_transposed(const Transform *t, cons
return make_float3(dot(x, a), dot(y, a), dot(z, a));
}
ccl_device_inline Transform transform_transpose(const Transform a)
{
Transform t;
t.x.x = a.x.x; t.x.y = a.y.x; t.x.z = a.z.x; t.x.w = a.w.x;
t.y.x = a.x.y; t.y.y = a.y.y; t.y.z = a.z.y; t.y.w = a.w.y;
t.z.x = a.x.z; t.z.y = a.y.z; t.z.z = a.z.z; t.z.w = a.w.z;
t.w.x = a.x.w; t.w.y = a.y.w; t.w.z = a.z.w; t.w.w = a.w.w;
return t;
}
ccl_device_inline Transform make_transform(float a, float b, float c, float d,
float e, float f, float g, float h,
float i, float j, float k, float l,
float m, float n, float o, float p)
float i, float j, float k, float l)
{
Transform t;
t.x.x = a; t.x.y = b; t.x.z = c; t.x.w = d;
t.y.x = e; t.y.y = f; t.y.z = g; t.y.w = h;
t.z.x = i; t.z.y = j; t.z.z = k; t.z.w = l;
t.w.x = m; t.w.y = n; t.w.z = o; t.w.w = p;
return t;
}
@@ -161,21 +147,22 @@ ccl_device_inline Transform make_transform_frame(float3 N)
const float3 dy = normalize(cross(N, dx));
return make_transform(dx.x, dx.y, dx.z, 0.0f,
dy.x, dy.y, dy.z, 0.0f,
N.x , N.y, N.z, 0.0f,
0.0f, 0.0f, 0.0f, 1.0f);
N.x , N.y, N.z, 0.0f);
}
#ifndef __KERNEL_GPU__
ccl_device_inline Transform operator*(const Transform a, const Transform b)
{
Transform c = transform_transpose(b);
Transform t;
float4 c_x = make_float4(b.x.x, b.y.x, b.z.x, 0.0f);
float4 c_y = make_float4(b.x.y, b.y.y, b.z.y, 0.0f);
float4 c_z = make_float4(b.x.z, b.y.z, b.z.z, 0.0f);
float4 c_w = make_float4(b.x.w, b.y.w, b.z.w, 1.0f);
t.x = make_float4(dot(a.x, c.x), dot(a.x, c.y), dot(a.x, c.z), dot(a.x, c.w));
t.y = make_float4(dot(a.y, c.x), dot(a.y, c.y), dot(a.y, c.z), dot(a.y, c.w));
t.z = make_float4(dot(a.z, c.x), dot(a.z, c.y), dot(a.z, c.z), dot(a.z, c.w));
t.w = make_float4(dot(a.w, c.x), dot(a.w, c.y), dot(a.w, c.z), dot(a.w, c.w));
Transform t;
t.x = make_float4(dot(a.x, c_x), dot(a.x, c_y), dot(a.x, c_z), dot(a.x, c_w));
t.y = make_float4(dot(a.y, c_x), dot(a.y, c_y), dot(a.y, c_z), dot(a.y, c_w));
t.z = make_float4(dot(a.z, c_x), dot(a.z, c_y), dot(a.z, c_z), dot(a.z, c_w));
return t;
}
@@ -185,7 +172,6 @@ ccl_device_inline void print_transform(const char *label, const Transform& t)
print_float4(label, t.x);
print_float4(label, t.y);
print_float4(label, t.z);
print_float4(label, t.w);
printf("\n");
}
@@ -194,8 +180,7 @@ ccl_device_inline Transform transform_translate(float3 t)
return make_transform(
1, 0, 0, t.x,
0, 1, 0, t.y,
0, 0, 1, t.z,
0, 0, 0, 1);
0, 0, 1, t.z);
}
ccl_device_inline Transform transform_translate(float x, float y, float z)
@@ -208,8 +193,7 @@ ccl_device_inline Transform transform_scale(float3 s)
return make_transform(
s.x, 0, 0, 0,
0, s.y, 0, 0,
0, 0, s.z, 0,
0, 0, 0, 1);
0, 0, s.z, 0);
}
ccl_device_inline Transform transform_scale(float x, float y, float z)
@@ -239,9 +223,7 @@ ccl_device_inline Transform transform_rotate(float angle, float3 axis)
axis.z*axis.x*t - s*axis.y,
axis.z*axis.y*t + s*axis.x,
axis.z*axis.z*t + c,
0.0f,
0.0f, 0.0f, 0.0f, 1.0f);
0.0f);
}
/* Euler is assumed to be in XYZ order. */
@@ -281,20 +263,20 @@ ccl_device_inline void transform_set_column(Transform *t, int column, float3 val
}
Transform transform_inverse(const Transform& a);
Transform transform_transposed_inverse(const Transform& a);
ccl_device_inline bool transform_uniform_scale(const Transform& tfm, float& scale)
{
/* the epsilon here is quite arbitrary, but this function is only used for
* surface area and bump, where we except it to not be so sensitive */
Transform ttfm = transform_transpose(tfm);
* surface area and bump, where we expect it to not be so sensitive */
float eps = 1e-6f;
float sx = len_squared(float4_to_float3(tfm.x));
float sy = len_squared(float4_to_float3(tfm.y));
float sz = len_squared(float4_to_float3(tfm.z));
float stx = len_squared(float4_to_float3(ttfm.x));
float sty = len_squared(float4_to_float3(ttfm.y));
float stz = len_squared(float4_to_float3(ttfm.z));
float stx = len_squared(transform_get_column(&tfm, 0));
float sty = len_squared(transform_get_column(&tfm, 1));
float stz = len_squared(transform_get_column(&tfm, 2));
if(fabsf(sx - sy) < eps && fabsf(sx - sz) < eps &&
fabsf(sx - stx) < eps && fabsf(sx - sty) < eps &&
@@ -330,7 +312,6 @@ ccl_device_inline Transform transform_clear_scale(const Transform& tfm)
ccl_device_inline Transform transform_empty()
{
return make_transform(
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0);
@@ -389,7 +370,6 @@ ccl_device_inline Transform transform_quick_inverse(Transform M)
R.x = make_float4(Rx.x, Rx.y, Rx.z, dot(Rx, T));
R.y = make_float4(Ry.x, Ry.y, Ry.z, dot(Ry, T));
R.z = make_float4(Rz.x, Rz.y, Rz.z, dot(Rz, T));
R.w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
return R;
}
@@ -427,7 +407,6 @@ ccl_device_inline void transform_compose(Transform *tfm, const DecomposedTransfo
tfm->x = make_float4(dot(rotation_x, scale_x), dot(rotation_x, scale_y), dot(rotation_x, scale_z), decomp->y.x);
tfm->y = make_float4(dot(rotation_y, scale_x), dot(rotation_y, scale_y), dot(rotation_y, scale_z), decomp->y.y);
tfm->z = make_float4(dot(rotation_z, scale_x), dot(rotation_z, scale_y), dot(rotation_z, scale_z), decomp->y.z);
tfm->w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
}
ccl_device void transform_motion_interpolate(Transform *tfm, const ccl_global DecomposedMotionTransform *motion, float t)