mikktspace: minor optimization

Add a safe version of normalize since all uses of normalize
did zero length checks, move this into a function.

Also avoid unnecessary conversion.

Gives minor speedup here (approx 3-5%).
This commit is contained in:
Campbell Barton
2017-08-03 07:03:59 +10:00
parent f879cac032
commit ba98f06acc

View File

@@ -93,9 +93,23 @@ static float Length( const SVec3 v )
return sqrtf(LengthSquared(v)); return sqrtf(LengthSquared(v));
} }
#if 0 // UNUSED
static SVec3 Normalize( const SVec3 v ) static SVec3 Normalize( const SVec3 v )
{ {
return vscale(1 / Length(v), v); return vscale(1.0f / Length(v), v);
}
#endif
static SVec3 NormalizeSafe( const SVec3 v )
{
const float len = Length(v);
if (len != 0.0f) {
return vscale(1.0f / len, v);
}
else
{
return v;
}
} }
static float vdot( const SVec3 v1, const SVec3 v2) static float vdot( const SVec3 v1, const SVec3 v2)
@@ -110,12 +124,13 @@ static tbool NotZero(const float fX)
return fabsf(fX) > FLT_MIN; return fabsf(fX) > FLT_MIN;
} }
#if 0 // UNUSED
static tbool VNotZero(const SVec3 v) static tbool VNotZero(const SVec3 v)
{ {
// might change this to an epsilon based test // might change this to an epsilon based test
return NotZero(v.x) || NotZero(v.y) || NotZero(v.z); return NotZero(v.x) || NotZero(v.y) || NotZero(v.z);
} }
#endif
typedef struct { typedef struct {
@@ -202,8 +217,8 @@ static STSpace AvgTSpace(const STSpace * pTS0, const STSpace * pTS1)
ts_res.fMagT = 0.5f*(pTS0->fMagT+pTS1->fMagT); ts_res.fMagT = 0.5f*(pTS0->fMagT+pTS1->fMagT);
ts_res.vOs = vadd(pTS0->vOs,pTS1->vOs); ts_res.vOs = vadd(pTS0->vOs,pTS1->vOs);
ts_res.vOt = vadd(pTS0->vOt,pTS1->vOt); ts_res.vOt = vadd(pTS0->vOt,pTS1->vOt);
if ( VNotZero(ts_res.vOs) ) ts_res.vOs = Normalize(ts_res.vOs); ts_res.vOs = NormalizeSafe(ts_res.vOs);
if ( VNotZero(ts_res.vOt) ) ts_res.vOt = Normalize(ts_res.vOt); ts_res.vOt = NormalizeSafe(ts_res.vOt);
} }
return ts_res; return ts_res;
@@ -238,7 +253,7 @@ tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThre
int iNrActiveGroups = 0, index = 0; int iNrActiveGroups = 0, index = 0;
const int iNrFaces = pContext->m_pInterface->m_getNumFaces(pContext); const int iNrFaces = pContext->m_pInterface->m_getNumFaces(pContext);
tbool bRes = TFALSE; tbool bRes = TFALSE;
const float fThresCos = (float) cos((fAngularThreshold*(float)M_PI)/180.0f); const float fThresCos = cosf((fAngularThreshold*(float)M_PI)/180.0f);
// verify all call-backs have been set // verify all call-backs have been set
if ( pContext->m_pInterface->m_getNumFaces==NULL || if ( pContext->m_pInterface->m_getNumFaces==NULL ||
@@ -258,7 +273,7 @@ tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThre
if (iNrTrianglesIn<=0) return TFALSE; if (iNrTrianglesIn<=0) return TFALSE;
// allocate memory for an index list // allocate memory for an index list
piTriListIn = (int *) malloc(sizeof(int)*3*iNrTrianglesIn); piTriListIn = (int *) malloc(sizeof(int[3])*iNrTrianglesIn);
pTriInfos = (STriInfo *) malloc(sizeof(STriInfo)*iNrTrianglesIn); pTriInfos = (STriInfo *) malloc(sizeof(STriInfo)*iNrTrianglesIn);
if (piTriListIn==NULL || pTriInfos==NULL) if (piTriListIn==NULL || pTriInfos==NULL)
{ {
@@ -311,7 +326,7 @@ tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThre
// based on the 4 rules, identify groups based on connectivity // based on the 4 rules, identify groups based on connectivity
iNrMaxGroups = iNrTrianglesIn*3; iNrMaxGroups = iNrTrianglesIn*3;
pGroups = (SGroup *) malloc(sizeof(SGroup)*iNrMaxGroups); pGroups = (SGroup *) malloc(sizeof(SGroup)*iNrMaxGroups);
piGroupTrianglesBuffer = (int *) malloc(sizeof(int)*iNrTrianglesIn*3); piGroupTrianglesBuffer = (int *) malloc(sizeof(int[3])*iNrTrianglesIn);
if (pGroups==NULL || piGroupTrianglesBuffer==NULL) if (pGroups==NULL || piGroupTrianglesBuffer==NULL)
{ {
if (pGroups!=NULL) free(pGroups); if (pGroups!=NULL) free(pGroups);
@@ -427,6 +442,7 @@ typedef struct {
} STmpVert; } STmpVert;
static const int g_iCells = 2048; static const int g_iCells = 2048;
static const float g_iCells_fl = 2048.0f;
#ifdef _MSC_VER #ifdef _MSC_VER
# define NOINLINE __declspec(noinline) # define NOINLINE __declspec(noinline)
@@ -439,7 +455,7 @@ static const int g_iCells = 2048;
// results for the same effective input value fVal. // results for the same effective input value fVal.
static NOINLINE int FindGridCell(const float fMin, const float fMax, const float fVal) static NOINLINE int FindGridCell(const float fMin, const float fMax, const float fVal)
{ {
const float fIndex = g_iCells * ((fVal-fMin)/(fMax-fMin)); const float fIndex = g_iCells_fl * ((fVal-fMin)/(fMax-fMin));
const int iIndex = (int)fIndex; const int iIndex = (int)fIndex;
return iIndex < g_iCells ? (iIndex >= 0 ? iIndex : 0) : (g_iCells - 1); return iIndex < g_iCells ? (iIndex >= 0 ? iIndex : 0) : (g_iCells - 1);
} }
@@ -488,7 +504,7 @@ static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SM
} }
// make allocations // make allocations
piHashTable = (int *) malloc(sizeof(int)*iNrTrianglesIn*3); piHashTable = (int *) malloc(sizeof(int[3])*iNrTrianglesIn);
piHashCount = (int *) malloc(sizeof(int)*g_iCells); piHashCount = (int *) malloc(sizeof(int)*g_iCells);
piHashOffsets = (int *) malloc(sizeof(int)*g_iCells); piHashOffsets = (int *) malloc(sizeof(int)*g_iCells);
piHashCount2 = (int *) malloc(sizeof(int)*g_iCells); piHashCount2 = (int *) malloc(sizeof(int)*g_iCells);
@@ -544,7 +560,7 @@ static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SM
if (iMaxCount<piHashCount[k]) if (iMaxCount<piHashCount[k])
iMaxCount=piHashCount[k]; iMaxCount=piHashCount[k];
pTmpVert = (STmpVert *) malloc(sizeof(STmpVert)*iMaxCount); pTmpVert = (STmpVert *) malloc(sizeof(STmpVert)*iMaxCount);
// complete the merge // complete the merge
for (k=0; k<g_iCells; k++) for (k=0; k<g_iCells; k++)
@@ -1048,7 +1064,7 @@ static void InitTriInfo(STriInfo pTriInfos[], const int piTriListIn[], const SMi
// match up edge pairs // match up edge pairs
{ {
SEdge * pEdges = (SEdge *) malloc(sizeof(SEdge)*iNrTrianglesIn*3); SEdge * pEdges = (SEdge *) malloc(sizeof(SEdge[3])*iNrTrianglesIn);
if (pEdges==NULL) if (pEdges==NULL)
BuildNeighborsSlow(pTriInfos, piTriListIn, iNrTrianglesIn); BuildNeighborsSlow(pTriInfos, piTriListIn, iNrTrianglesIn);
else else
@@ -1247,10 +1263,8 @@ static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], con
n = GetNormal(pContext, iVertIndex); n = GetNormal(pContext, iVertIndex);
// project // project
vOs = vsub(pTriInfos[f].vOs, vscale(vdot(n,pTriInfos[f].vOs), n)); vOs = NormalizeSafe(vsub(pTriInfos[f].vOs, vscale(vdot(n,pTriInfos[f].vOs), n)));
vOt = vsub(pTriInfos[f].vOt, vscale(vdot(n,pTriInfos[f].vOt), n)); vOt = NormalizeSafe(vsub(pTriInfos[f].vOt, vscale(vdot(n,pTriInfos[f].vOt), n)));
if ( VNotZero(vOs) ) vOs = Normalize(vOs);
if ( VNotZero(vOt) ) vOt = Normalize(vOt);
// original face number // original face number
iOF_1 = pTriInfos[f].iOrgFaceNumber; iOF_1 = pTriInfos[f].iOrgFaceNumber;
@@ -1262,10 +1276,8 @@ static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], con
const int iOF_2 = pTriInfos[t].iOrgFaceNumber; const int iOF_2 = pTriInfos[t].iOrgFaceNumber;
// project // project
SVec3 vOs2 = vsub(pTriInfos[t].vOs, vscale(vdot(n,pTriInfos[t].vOs), n)); SVec3 vOs2 = NormalizeSafe(vsub(pTriInfos[t].vOs, vscale(vdot(n,pTriInfos[t].vOs), n)));
SVec3 vOt2 = vsub(pTriInfos[t].vOt, vscale(vdot(n,pTriInfos[t].vOt), n)); SVec3 vOt2 = NormalizeSafe(vsub(pTriInfos[t].vOt, vscale(vdot(n,pTriInfos[t].vOt), n)));
if ( VNotZero(vOs2) ) vOs2 = Normalize(vOs2);
if ( VNotZero(vOt2) ) vOt2 = Normalize(vOt2);
{ {
const tbool bAny = ( (pTriInfos[f].iFlag | pTriInfos[t].iFlag) & GROUP_WITH_ANY )!=0 ? TTRUE : TFALSE; const tbool bAny = ( (pTriInfos[f].iFlag | pTriInfos[t].iFlag) & GROUP_WITH_ANY )!=0 ? TTRUE : TFALSE;
@@ -1321,7 +1333,7 @@ static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], con
} }
pUniSubGroups[iUniqueSubGroups].iNrFaces = iMembers; pUniSubGroups[iUniqueSubGroups].iNrFaces = iMembers;
pUniSubGroups[iUniqueSubGroups].pTriMembers = pIndices; pUniSubGroups[iUniqueSubGroups].pTriMembers = pIndices;
memcpy(pIndices, tmp_group.pTriMembers, iMembers*sizeof(int)); memcpy(pIndices, tmp_group.pTriMembers, sizeof(int)*iMembers);
pSubGroupTspace[iUniqueSubGroups] = pSubGroupTspace[iUniqueSubGroups] =
EvalTspace(tmp_group.pTriMembers, iMembers, piTriListIn, pTriInfos, pContext, pGroup->iVertexRepresentitive); EvalTspace(tmp_group.pTriMembers, iMembers, piTriListIn, pTriInfos, pContext, pGroup->iVertexRepresentitive);
++iUniqueSubGroups; ++iUniqueSubGroups;
@@ -1392,10 +1404,8 @@ static STSpace EvalTspace(int face_indices[], const int iFaces, const int piTriL
// project // project
index = piTriListIn[3*f+i]; index = piTriListIn[3*f+i];
n = GetNormal(pContext, index); n = GetNormal(pContext, index);
vOs = vsub(pTriInfos[f].vOs, vscale(vdot(n,pTriInfos[f].vOs), n)); vOs = NormalizeSafe(vsub(pTriInfos[f].vOs, vscale(vdot(n,pTriInfos[f].vOs), n)));
vOt = vsub(pTriInfos[f].vOt, vscale(vdot(n,pTriInfos[f].vOt), n)); vOt = NormalizeSafe(vsub(pTriInfos[f].vOt, vscale(vdot(n,pTriInfos[f].vOt), n)));
if ( VNotZero(vOs) ) vOs = Normalize(vOs);
if ( VNotZero(vOt) ) vOt = Normalize(vOt);
i2 = piTriListIn[3*f + (i<2?(i+1):0)]; i2 = piTriListIn[3*f + (i<2?(i+1):0)];
i1 = piTriListIn[3*f + i]; i1 = piTriListIn[3*f + i];
@@ -1408,8 +1418,8 @@ static STSpace EvalTspace(int face_indices[], const int iFaces, const int piTriL
v2 = vsub(p2,p1); v2 = vsub(p2,p1);
// project // project
v1 = vsub(v1, vscale(vdot(n,v1),n)); if ( VNotZero(v1) ) v1 = Normalize(v1); v1 = NormalizeSafe(vsub(v1, vscale(vdot(n,v1),n)));
v2 = vsub(v2, vscale(vdot(n,v2),n)); if ( VNotZero(v2) ) v2 = Normalize(v2); v2 = NormalizeSafe(vsub(v2, vscale(vdot(n,v2),n)));
// weight contribution by the angle // weight contribution by the angle
// between the two edge vectors // between the two edge vectors
@@ -1427,8 +1437,8 @@ static STSpace EvalTspace(int face_indices[], const int iFaces, const int piTriL
} }
// normalize // normalize
if ( VNotZero(res.vOs) ) res.vOs = Normalize(res.vOs); res.vOs = NormalizeSafe(res.vOs);
if ( VNotZero(res.vOt) ) res.vOt = Normalize(res.vOt); res.vOt = NormalizeSafe(res.vOt);
if (fAngleSum>0) if (fAngleSum>0)
{ {
res.fMagS /= fAngleSum; res.fMagS /= fAngleSum;
@@ -1464,7 +1474,7 @@ static void QuickSort(int* pSortBuffer, int iLeft, int iRight, unsigned int uSee
iL=iLeft; iR=iRight; iL=iLeft; iR=iRight;
n = (iR-iL)+1; n = (iR-iL)+1;
assert(n>=0); assert(n>=0);
index = (int) (uSeed%n); index = (int) (uSeed%(unsigned int)n);
iMid=pSortBuffer[index + iL]; iMid=pSortBuffer[index + iL];
@@ -1672,7 +1682,7 @@ static void QuickSortEdges(SEdge * pSortBuffer, int iLeft, int iRight, const int
iR = iRight; iR = iRight;
n = (iR-iL)+1; n = (iR-iL)+1;
assert(n>=0); assert(n>=0);
index = (int) (uSeed%n); index = (int) (uSeed%(unsigned int)n);
iMid=pSortBuffer[index + iL].array[channel]; iMid=pSortBuffer[index + iL].array[channel];