OpenNL: modify SuperLU to use doubles rather than floats, for better precision.
This helps to improve the accuracy of UV unwrapping and laplacian deform for high poly meshes, which could get warped quite badly. It's not much slower, doubles are pretty fast on modern CPUs, but it does double memory usage. This seems acceptable as otherwise high poly meshes would not work correctly anyway. Fixes T39004.
This commit is contained in:
18
intern/opennl/extern/ONL_opennl.h
vendored
18
intern/opennl/extern/ONL_opennl.h
vendored
@@ -100,11 +100,11 @@ NLContext nlGetCurrent(void);
|
|||||||
|
|
||||||
/* State get/set */
|
/* State get/set */
|
||||||
|
|
||||||
void nlSolverParameterf(NLenum pname, NLfloat param);
|
void nlSolverParameterf(NLenum pname, NLdouble param);
|
||||||
void nlSolverParameteri(NLenum pname, NLint param);
|
void nlSolverParameteri(NLenum pname, NLint param);
|
||||||
|
|
||||||
void nlGetBooleanv(NLenum pname, NLboolean* params);
|
void nlGetBooleanv(NLenum pname, NLboolean* params);
|
||||||
void nlGetFloatv(NLenum pname, NLfloat* params);
|
void nlGetFloatv(NLenum pname, NLdouble* params);
|
||||||
void nlGetIntergerv(NLenum pname, NLint* params);
|
void nlGetIntergerv(NLenum pname, NLint* params);
|
||||||
|
|
||||||
void nlEnable(NLenum pname);
|
void nlEnable(NLenum pname);
|
||||||
@@ -113,8 +113,8 @@ NLboolean nlIsEnabled(NLenum pname);
|
|||||||
|
|
||||||
/* Variables */
|
/* Variables */
|
||||||
|
|
||||||
void nlSetVariable(NLuint rhsindex, NLuint index, NLfloat value);
|
void nlSetVariable(NLuint rhsindex, NLuint index, NLdouble value);
|
||||||
NLfloat nlGetVariable(NLuint rhsindex, NLuint index);
|
NLdouble nlGetVariable(NLuint rhsindex, NLuint index);
|
||||||
void nlLockVariable(NLuint index);
|
void nlLockVariable(NLuint index);
|
||||||
void nlUnlockVariable(NLuint index);
|
void nlUnlockVariable(NLuint index);
|
||||||
NLboolean nlVariableIsLocked(NLuint index);
|
NLboolean nlVariableIsLocked(NLuint index);
|
||||||
@@ -126,13 +126,9 @@ void nlEnd(NLenum primitive);
|
|||||||
|
|
||||||
/* Setting elements in matrix/vector */
|
/* Setting elements in matrix/vector */
|
||||||
|
|
||||||
void nlMatrixAdd(NLuint row, NLuint col, NLfloat value);
|
void nlMatrixAdd(NLuint row, NLuint col, NLdouble value);
|
||||||
void nlRightHandSideAdd(NLuint rhsindex, NLuint index, NLfloat value);
|
void nlRightHandSideAdd(NLuint rhsindex, NLuint index, NLdouble value);
|
||||||
void nlRightHandSideSet(NLuint rhsindex, NLuint index, NLfloat value);
|
void nlRightHandSideSet(NLuint rhsindex, NLuint index, NLdouble value);
|
||||||
|
|
||||||
/* Multiply */
|
|
||||||
|
|
||||||
void nlMatrixMultiply(NLfloat *x, NLfloat *y);
|
|
||||||
|
|
||||||
/* Solve */
|
/* Solve */
|
||||||
|
|
||||||
|
@@ -69,7 +69,7 @@ static void __nl_assertion_failed(char* cond, char* file, int line) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void __nl_range_assertion_failed(
|
static void __nl_range_assertion_failed(
|
||||||
float x, float min_val, float max_val, char* file, int line
|
double x, double min_val, double max_val, char* file, int line
|
||||||
) {
|
) {
|
||||||
fprintf(
|
fprintf(
|
||||||
stderr,
|
stderr,
|
||||||
@@ -151,7 +151,7 @@ static void __nl_should_not_have_reached(char* file, int line) {
|
|||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
NLuint index;
|
NLuint index;
|
||||||
NLfloat value;
|
NLdouble value;
|
||||||
} __NLCoeff;
|
} __NLCoeff;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@@ -183,7 +183,7 @@ static void __nlRowColumnGrow(__NLRowColumn* c) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __nlRowColumnAdd(__NLRowColumn* c, NLint index, NLfloat value) {
|
static void __nlRowColumnAdd(__NLRowColumn* c, NLint index, NLdouble value) {
|
||||||
NLuint i;
|
NLuint i;
|
||||||
for(i=0; i<c->size; i++) {
|
for(i=0; i<c->size; i++) {
|
||||||
if(c->coeff[i].index == (NLuint)index) {
|
if(c->coeff[i].index == (NLuint)index) {
|
||||||
@@ -200,7 +200,7 @@ static void __nlRowColumnAdd(__NLRowColumn* c, NLint index, NLfloat value) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Does not check whether the index already exists */
|
/* Does not check whether the index already exists */
|
||||||
static void __nlRowColumnAppend(__NLRowColumn* c, NLint index, NLfloat value) {
|
static void __nlRowColumnAppend(__NLRowColumn* c, NLint index, NLdouble value) {
|
||||||
if(c->size == c->capacity) {
|
if(c->size == c->capacity) {
|
||||||
__nlRowColumnGrow(c);
|
__nlRowColumnGrow(c);
|
||||||
}
|
}
|
||||||
@@ -229,7 +229,7 @@ typedef struct {
|
|||||||
NLenum storage;
|
NLenum storage;
|
||||||
__NLRowColumn* row;
|
__NLRowColumn* row;
|
||||||
__NLRowColumn* column;
|
__NLRowColumn* column;
|
||||||
NLfloat* diag;
|
NLdouble* diag;
|
||||||
} __NLSparseMatrix;
|
} __NLSparseMatrix;
|
||||||
|
|
||||||
|
|
||||||
@@ -259,7 +259,7 @@ static void __nlSparseMatrixConstruct(
|
|||||||
}
|
}
|
||||||
|
|
||||||
M->diag_size = MIN(m,n);
|
M->diag_size = MIN(m,n);
|
||||||
M->diag = __NL_NEW_ARRAY(NLfloat, M->diag_size);
|
M->diag = __NL_NEW_ARRAY(NLdouble, M->diag_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __nlSparseMatrixDestroy(__NLSparseMatrix* M) {
|
static void __nlSparseMatrixDestroy(__NLSparseMatrix* M) {
|
||||||
@@ -283,7 +283,7 @@ static void __nlSparseMatrixDestroy(__NLSparseMatrix* M) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void __nlSparseMatrixAdd(
|
static void __nlSparseMatrixAdd(
|
||||||
__NLSparseMatrix* M, NLuint i, NLuint j, NLfloat value
|
__NLSparseMatrix* M, NLuint i, NLuint j, NLdouble value
|
||||||
) {
|
) {
|
||||||
__nl_parano_range_assert(i, 0, M->m - 1);
|
__nl_parano_range_assert(i, 0, M->m - 1);
|
||||||
__nl_parano_range_assert(j, 0, M->n - 1);
|
__nl_parano_range_assert(j, 0, M->n - 1);
|
||||||
@@ -313,7 +313,7 @@ static void __nlSparseMatrixClear( __NLSparseMatrix* M) {
|
|||||||
__nlRowColumnClear(&(M->column[i]));
|
__nlRowColumnClear(&(M->column[i]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
__NL_CLEAR_ARRAY(NLfloat, M->diag, M->diag_size);
|
__NL_CLEAR_ARRAY(NLdouble, M->diag, M->diag_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Returns the number of non-zero coefficients */
|
/* Returns the number of non-zero coefficients */
|
||||||
@@ -338,7 +338,7 @@ static NLuint __nlSparseMatrixNNZ( __NLSparseMatrix* M) {
|
|||||||
/* SparseMatrix x Vector routines, internal helper routines */
|
/* SparseMatrix x Vector routines, internal helper routines */
|
||||||
|
|
||||||
static void __nlSparseMatrix_mult_rows_symmetric(
|
static void __nlSparseMatrix_mult_rows_symmetric(
|
||||||
__NLSparseMatrix* A, NLfloat* x, NLfloat* y
|
__NLSparseMatrix* A, NLdouble* x, NLdouble* y
|
||||||
) {
|
) {
|
||||||
NLuint m = A->m;
|
NLuint m = A->m;
|
||||||
NLuint i,ij;
|
NLuint i,ij;
|
||||||
@@ -358,7 +358,7 @@ static void __nlSparseMatrix_mult_rows_symmetric(
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void __nlSparseMatrix_mult_rows(
|
static void __nlSparseMatrix_mult_rows(
|
||||||
__NLSparseMatrix* A, NLfloat* x, NLfloat* y
|
__NLSparseMatrix* A, NLdouble* x, NLdouble* y
|
||||||
) {
|
) {
|
||||||
NLuint m = A->m;
|
NLuint m = A->m;
|
||||||
NLuint i,ij;
|
NLuint i,ij;
|
||||||
@@ -375,7 +375,7 @@ static void __nlSparseMatrix_mult_rows(
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void __nlSparseMatrix_mult_cols_symmetric(
|
static void __nlSparseMatrix_mult_cols_symmetric(
|
||||||
__NLSparseMatrix* A, NLfloat* x, NLfloat* y
|
__NLSparseMatrix* A, NLdouble* x, NLdouble* y
|
||||||
) {
|
) {
|
||||||
NLuint n = A->n;
|
NLuint n = A->n;
|
||||||
NLuint j,ii;
|
NLuint j,ii;
|
||||||
@@ -395,13 +395,13 @@ static void __nlSparseMatrix_mult_cols_symmetric(
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void __nlSparseMatrix_mult_cols(
|
static void __nlSparseMatrix_mult_cols(
|
||||||
__NLSparseMatrix* A, NLfloat* x, NLfloat* y
|
__NLSparseMatrix* A, NLdouble* x, NLdouble* y
|
||||||
) {
|
) {
|
||||||
NLuint n = A->n;
|
NLuint n = A->n;
|
||||||
NLuint j,ii;
|
NLuint j,ii;
|
||||||
__NLRowColumn* Cj = NULL;
|
__NLRowColumn* Cj = NULL;
|
||||||
__NLCoeff* c = NULL;
|
__NLCoeff* c = NULL;
|
||||||
__NL_CLEAR_ARRAY(NLfloat, y, A->m);
|
__NL_CLEAR_ARRAY(NLdouble, y, A->m);
|
||||||
for(j=0; j<n; j++) {
|
for(j=0; j<n; j++) {
|
||||||
Cj = &(A->column[j]);
|
Cj = &(A->column[j]);
|
||||||
for(ii=0; ii<Cj->size; ii++) {
|
for(ii=0; ii<Cj->size; ii++) {
|
||||||
@@ -414,7 +414,7 @@ static void __nlSparseMatrix_mult_cols(
|
|||||||
/************************************************************************************/
|
/************************************************************************************/
|
||||||
/* SparseMatrix x Vector routines, main driver routine */
|
/* SparseMatrix x Vector routines, main driver routine */
|
||||||
|
|
||||||
static void __nlSparseMatrixMult(__NLSparseMatrix* A, NLfloat* x, NLfloat* y) {
|
static void __nlSparseMatrixMult(__NLSparseMatrix* A, NLdouble* x, NLdouble* y) {
|
||||||
if(A->storage & __NL_ROWS) {
|
if(A->storage & __NL_ROWS) {
|
||||||
if(A->storage & __NL_SYMMETRIC) {
|
if(A->storage & __NL_SYMMETRIC) {
|
||||||
__nlSparseMatrix_mult_rows_symmetric(A, x, y);
|
__nlSparseMatrix_mult_rows_symmetric(A, x, y);
|
||||||
@@ -440,7 +440,7 @@ static void __nlSparseMatrix_square(
|
|||||||
NLuint i, j0, j1;
|
NLuint i, j0, j1;
|
||||||
__NLRowColumn *Ri = NULL;
|
__NLRowColumn *Ri = NULL;
|
||||||
__NLCoeff *c0 = NULL, *c1 = NULL;
|
__NLCoeff *c0 = NULL, *c1 = NULL;
|
||||||
float value;
|
double value;
|
||||||
|
|
||||||
__nlSparseMatrixConstruct(AtA, n, n, A->storage);
|
__nlSparseMatrixConstruct(AtA, n, n, A->storage);
|
||||||
|
|
||||||
@@ -460,7 +460,7 @@ static void __nlSparseMatrix_square(
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void __nlSparseMatrix_transpose_mult_rows(
|
static void __nlSparseMatrix_transpose_mult_rows(
|
||||||
__NLSparseMatrix* A, NLfloat* x, NLfloat* y
|
__NLSparseMatrix* A, NLdouble* x, NLdouble* y
|
||||||
) {
|
) {
|
||||||
NLuint m = A->m;
|
NLuint m = A->m;
|
||||||
NLuint n = A->n;
|
NLuint n = A->n;
|
||||||
@@ -468,7 +468,7 @@ static void __nlSparseMatrix_transpose_mult_rows(
|
|||||||
__NLRowColumn* Ri = NULL;
|
__NLRowColumn* Ri = NULL;
|
||||||
__NLCoeff* c = NULL;
|
__NLCoeff* c = NULL;
|
||||||
|
|
||||||
__NL_CLEAR_ARRAY(NLfloat, y, n);
|
__NL_CLEAR_ARRAY(NLdouble, y, n);
|
||||||
|
|
||||||
for(i=0; i<m; i++) {
|
for(i=0; i<m; i++) {
|
||||||
Ri = &(A->row[i]);
|
Ri = &(A->row[i]);
|
||||||
@@ -482,10 +482,10 @@ static void __nlSparseMatrix_transpose_mult_rows(
|
|||||||
/************************************************************************************/
|
/************************************************************************************/
|
||||||
/* NLContext data structure */
|
/* NLContext data structure */
|
||||||
|
|
||||||
typedef void(*__NLMatrixFunc)(float* x, float* y);
|
typedef void(*__NLMatrixFunc)(double* x, double* y);
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
NLfloat value[4];
|
NLdouble value[4];
|
||||||
NLboolean locked;
|
NLboolean locked;
|
||||||
NLuint index;
|
NLuint index;
|
||||||
__NLRowColumn *a;
|
__NLRowColumn *a;
|
||||||
@@ -503,11 +503,11 @@ typedef struct {
|
|||||||
NLuint n;
|
NLuint n;
|
||||||
NLuint m;
|
NLuint m;
|
||||||
__NLVariable* variable;
|
__NLVariable* variable;
|
||||||
NLfloat* b;
|
NLdouble* b;
|
||||||
NLfloat* Mtb;
|
NLdouble* Mtb;
|
||||||
__NLSparseMatrix M;
|
__NLSparseMatrix M;
|
||||||
__NLSparseMatrix MtM;
|
__NLSparseMatrix MtM;
|
||||||
NLfloat* x;
|
NLdouble* x;
|
||||||
NLuint nb_variables;
|
NLuint nb_variables;
|
||||||
NLuint nb_rows;
|
NLuint nb_rows;
|
||||||
NLboolean least_squares;
|
NLboolean least_squares;
|
||||||
@@ -520,7 +520,7 @@ typedef struct {
|
|||||||
NLboolean alloc_x;
|
NLboolean alloc_x;
|
||||||
NLboolean alloc_b;
|
NLboolean alloc_b;
|
||||||
NLboolean alloc_Mtb;
|
NLboolean alloc_Mtb;
|
||||||
NLfloat error;
|
NLdouble error;
|
||||||
__NLMatrixFunc matrix_vector_prod;
|
__NLMatrixFunc matrix_vector_prod;
|
||||||
|
|
||||||
struct __NLSuperLUContext {
|
struct __NLSuperLUContext {
|
||||||
@@ -533,7 +533,7 @@ typedef struct {
|
|||||||
|
|
||||||
static __NLContext* __nlCurrentContext = NULL;
|
static __NLContext* __nlCurrentContext = NULL;
|
||||||
|
|
||||||
static void __nlMatrixVectorProd_default(NLfloat* x, NLfloat* y) {
|
static void __nlMatrixVectorProd_default(NLdouble* x, NLdouble* y) {
|
||||||
__nlSparseMatrixMult(&(__nlCurrentContext->M), x, y);
|
__nlSparseMatrixMult(&(__nlCurrentContext->M), x, y);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -611,7 +611,7 @@ static void __nlTransition(NLenum from_state, NLenum to_state) {
|
|||||||
/************************************************************************************/
|
/************************************************************************************/
|
||||||
/* Get/Set parameters */
|
/* Get/Set parameters */
|
||||||
|
|
||||||
void nlSolverParameterf(NLenum pname, NLfloat param) {
|
void nlSolverParameterf(NLenum pname, NLdouble param) {
|
||||||
__nlCheckState(__NL_STATE_INITIAL);
|
__nlCheckState(__NL_STATE_INITIAL);
|
||||||
switch(pname) {
|
switch(pname) {
|
||||||
case NL_NB_VARIABLES: {
|
case NL_NB_VARIABLES: {
|
||||||
@@ -677,22 +677,22 @@ void nlGetBooleanv(NLenum pname, NLboolean* params) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void nlGetFloatv(NLenum pname, NLfloat* params) {
|
void nlGetFloatv(NLenum pname, NLdouble* params) {
|
||||||
switch(pname) {
|
switch(pname) {
|
||||||
case NL_NB_VARIABLES: {
|
case NL_NB_VARIABLES: {
|
||||||
*params = (NLfloat)(__nlCurrentContext->nb_variables);
|
*params = (NLdouble)(__nlCurrentContext->nb_variables);
|
||||||
} break;
|
} break;
|
||||||
case NL_NB_ROWS: {
|
case NL_NB_ROWS: {
|
||||||
*params = (NLfloat)(__nlCurrentContext->nb_rows);
|
*params = (NLdouble)(__nlCurrentContext->nb_rows);
|
||||||
} break;
|
} break;
|
||||||
case NL_LEAST_SQUARES: {
|
case NL_LEAST_SQUARES: {
|
||||||
*params = (NLfloat)(__nlCurrentContext->least_squares);
|
*params = (NLdouble)(__nlCurrentContext->least_squares);
|
||||||
} break;
|
} break;
|
||||||
case NL_SYMMETRIC: {
|
case NL_SYMMETRIC: {
|
||||||
*params = (NLfloat)(__nlCurrentContext->symmetric);
|
*params = (NLdouble)(__nlCurrentContext->symmetric);
|
||||||
} break;
|
} break;
|
||||||
case NL_ERROR: {
|
case NL_ERROR: {
|
||||||
*params = (NLfloat)(__nlCurrentContext->error);
|
*params = (NLdouble)(__nlCurrentContext->error);
|
||||||
} break;
|
} break;
|
||||||
default: {
|
default: {
|
||||||
__nl_assert_not_reached;
|
__nl_assert_not_reached;
|
||||||
@@ -751,13 +751,13 @@ NLboolean nlIsEnabled(NLenum pname) {
|
|||||||
/************************************************************************************/
|
/************************************************************************************/
|
||||||
/* Get/Set Lock/Unlock variables */
|
/* Get/Set Lock/Unlock variables */
|
||||||
|
|
||||||
void nlSetVariable(NLuint rhsindex, NLuint index, NLfloat value) {
|
void nlSetVariable(NLuint rhsindex, NLuint index, NLdouble value) {
|
||||||
__nlCheckState(__NL_STATE_SYSTEM);
|
__nlCheckState(__NL_STATE_SYSTEM);
|
||||||
__nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
|
__nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
|
||||||
__nlCurrentContext->variable[index].value[rhsindex] = value;
|
__nlCurrentContext->variable[index].value[rhsindex] = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
NLfloat nlGetVariable(NLuint rhsindex, NLuint index) {
|
NLdouble nlGetVariable(NLuint rhsindex, NLuint index) {
|
||||||
__nl_assert(__nlCurrentContext->state != __NL_STATE_INITIAL);
|
__nl_assert(__nlCurrentContext->state != __NL_STATE_INITIAL);
|
||||||
__nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
|
__nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
|
||||||
return __nlCurrentContext->variable[index].value[rhsindex];
|
return __nlCurrentContext->variable[index].value[rhsindex];
|
||||||
@@ -870,15 +870,15 @@ static void __nlBeginMatrix() {
|
|||||||
__nlSparseMatrixConstruct(&context->M, m, n, storage);
|
__nlSparseMatrixConstruct(&context->M, m, n, storage);
|
||||||
context->alloc_M = NL_TRUE;
|
context->alloc_M = NL_TRUE;
|
||||||
|
|
||||||
context->b = __NL_NEW_ARRAY(NLfloat, m*context->nb_rhs);
|
context->b = __NL_NEW_ARRAY(NLdouble, m*context->nb_rhs);
|
||||||
context->alloc_b = NL_TRUE;
|
context->alloc_b = NL_TRUE;
|
||||||
|
|
||||||
context->x = __NL_NEW_ARRAY(NLfloat, n*context->nb_rhs);
|
context->x = __NL_NEW_ARRAY(NLdouble, n*context->nb_rhs);
|
||||||
context->alloc_x = NL_TRUE;
|
context->alloc_x = NL_TRUE;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
/* need to recompute b only, A is not constructed anymore */
|
/* need to recompute b only, A is not constructed anymore */
|
||||||
__NL_CLEAR_ARRAY(NLfloat, context->b, context->m*context->nb_rhs);
|
__NL_CLEAR_ARRAY(NLdouble, context->b, context->m*context->nb_rhs);
|
||||||
}
|
}
|
||||||
|
|
||||||
__nlVariablesToVector();
|
__nlVariablesToVector();
|
||||||
@@ -888,7 +888,7 @@ static void __nlEndMatrixRHS(NLuint rhs) {
|
|||||||
__NLContext *context = __nlCurrentContext;
|
__NLContext *context = __nlCurrentContext;
|
||||||
__NLVariable *variable;
|
__NLVariable *variable;
|
||||||
__NLRowColumn *a;
|
__NLRowColumn *a;
|
||||||
NLfloat *b, *Mtb;
|
NLdouble *b, *Mtb;
|
||||||
NLuint i, j;
|
NLuint i, j;
|
||||||
|
|
||||||
b = context->b + context->m*rhs;
|
b = context->b + context->m*rhs;
|
||||||
@@ -922,7 +922,7 @@ static void __nlEndMatrix() {
|
|||||||
context->alloc_MtM = NL_TRUE;
|
context->alloc_MtM = NL_TRUE;
|
||||||
|
|
||||||
context->Mtb =
|
context->Mtb =
|
||||||
__NL_NEW_ARRAY(NLfloat, context->n*context->nb_rhs);
|
__NL_NEW_ARRAY(NLdouble, context->n*context->nb_rhs);
|
||||||
context->alloc_Mtb = NL_TRUE;
|
context->alloc_Mtb = NL_TRUE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -931,7 +931,7 @@ static void __nlEndMatrix() {
|
|||||||
__nlEndMatrixRHS(i);
|
__nlEndMatrixRHS(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
void nlMatrixAdd(NLuint row, NLuint col, NLfloat value)
|
void nlMatrixAdd(NLuint row, NLuint col, NLdouble value)
|
||||||
{
|
{
|
||||||
__NLContext *context = __nlCurrentContext;
|
__NLContext *context = __nlCurrentContext;
|
||||||
|
|
||||||
@@ -960,10 +960,10 @@ void nlMatrixAdd(NLuint row, NLuint col, NLfloat value)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void nlRightHandSideAdd(NLuint rhsindex, NLuint index, NLfloat value)
|
void nlRightHandSideAdd(NLuint rhsindex, NLuint index, NLdouble value)
|
||||||
{
|
{
|
||||||
__NLContext *context = __nlCurrentContext;
|
__NLContext *context = __nlCurrentContext;
|
||||||
NLfloat* b = context->b;
|
NLdouble* b = context->b;
|
||||||
|
|
||||||
__nlCheckState(__NL_STATE_MATRIX);
|
__nlCheckState(__NL_STATE_MATRIX);
|
||||||
|
|
||||||
@@ -981,10 +981,10 @@ void nlRightHandSideAdd(NLuint rhsindex, NLuint index, NLfloat value)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void nlRightHandSideSet(NLuint rhsindex, NLuint index, NLfloat value)
|
void nlRightHandSideSet(NLuint rhsindex, NLuint index, NLdouble value)
|
||||||
{
|
{
|
||||||
__NLContext *context = __nlCurrentContext;
|
__NLContext *context = __nlCurrentContext;
|
||||||
NLfloat* b = context->b;
|
NLdouble* b = context->b;
|
||||||
|
|
||||||
__nlCheckState(__NL_STATE_MATRIX);
|
__nlCheckState(__NL_STATE_MATRIX);
|
||||||
|
|
||||||
@@ -1047,8 +1047,8 @@ static NLboolean __nlFactorize_SUPERLU(__NLContext *context, NLint *permutation)
|
|||||||
|
|
||||||
/* Compressed Row Storage matrix representation */
|
/* Compressed Row Storage matrix representation */
|
||||||
NLint *xa = __NL_NEW_ARRAY(NLint, n+1);
|
NLint *xa = __NL_NEW_ARRAY(NLint, n+1);
|
||||||
NLfloat *rhs = __NL_NEW_ARRAY(NLfloat, n);
|
NLdouble *rhs = __NL_NEW_ARRAY(NLdouble, n);
|
||||||
NLfloat *a = __NL_NEW_ARRAY(NLfloat, nnz);
|
NLdouble *a = __NL_NEW_ARRAY(NLdouble, nnz);
|
||||||
NLint *asub = __NL_NEW_ARRAY(NLint, nnz);
|
NLint *asub = __NL_NEW_ARRAY(NLint, nnz);
|
||||||
NLint *etree = __NL_NEW_ARRAY(NLint, n);
|
NLint *etree = __NL_NEW_ARRAY(NLint, n);
|
||||||
|
|
||||||
@@ -1083,7 +1083,7 @@ static NLboolean __nlFactorize_SUPERLU(__NLContext *context, NLint *permutation)
|
|||||||
sCreate_CompCol_Matrix(
|
sCreate_CompCol_Matrix(
|
||||||
&At, n, n, nnz, a, asub, xa,
|
&At, n, n, nnz, a, asub, xa,
|
||||||
SLU_NC, /* Colum wise, no supernode */
|
SLU_NC, /* Colum wise, no supernode */
|
||||||
SLU_S, /* floats */
|
SLU_S, /* doubles */
|
||||||
SLU_GE /* general storage */
|
SLU_GE /* general storage */
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -1136,8 +1136,8 @@ static NLboolean __nlFactorize_SUPERLU(__NLContext *context, NLint *permutation)
|
|||||||
static NLboolean __nlInvert_SUPERLU(__NLContext *context) {
|
static NLboolean __nlInvert_SUPERLU(__NLContext *context) {
|
||||||
|
|
||||||
/* OpenNL Context */
|
/* OpenNL Context */
|
||||||
NLfloat* b = (context->least_squares)? context->Mtb: context->b;
|
NLdouble* b = (context->least_squares)? context->Mtb: context->b;
|
||||||
NLfloat* x = context->x;
|
NLdouble* x = context->x;
|
||||||
NLuint n = context->n, j;
|
NLuint n = context->n, j;
|
||||||
|
|
||||||
/* SuperLU variables */
|
/* SuperLU variables */
|
||||||
@@ -1149,7 +1149,7 @@ static NLboolean __nlInvert_SUPERLU(__NLContext *context) {
|
|||||||
sCreate_Dense_Matrix(
|
sCreate_Dense_Matrix(
|
||||||
&B, n, 1, b, n,
|
&B, n, 1, b, n,
|
||||||
SLU_DN, /* Fortran-type column-wise storage */
|
SLU_DN, /* Fortran-type column-wise storage */
|
||||||
SLU_S, /* floats */
|
SLU_S, /* doubles */
|
||||||
SLU_GE /* general */
|
SLU_GE /* general */
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -1184,12 +1184,12 @@ void nlPrintMatrix(void) {
|
|||||||
__NLContext *context = __nlCurrentContext;
|
__NLContext *context = __nlCurrentContext;
|
||||||
__NLSparseMatrix* M = &(context->M);
|
__NLSparseMatrix* M = &(context->M);
|
||||||
__NLSparseMatrix* MtM = &(context->MtM);
|
__NLSparseMatrix* MtM = &(context->MtM);
|
||||||
float *b = context->b;
|
double *b = context->b;
|
||||||
NLuint i, jj, k;
|
NLuint i, jj, k;
|
||||||
NLuint m = context->m;
|
NLuint m = context->m;
|
||||||
NLuint n = context->n;
|
NLuint n = context->n;
|
||||||
__NLRowColumn* Ri = NULL;
|
__NLRowColumn* Ri = NULL;
|
||||||
float *value = malloc(sizeof(*value)*(n+m));
|
double *value = malloc(sizeof(*value)*(n+m));
|
||||||
|
|
||||||
printf("A:\n");
|
printf("A:\n");
|
||||||
for(i=0; i<m; i++) {
|
for(i=0; i<m; i++) {
|
||||||
|
@@ -29,9 +29,9 @@
|
|||||||
/*
|
/*
|
||||||
* Function prototypes
|
* Function prototypes
|
||||||
*/
|
*/
|
||||||
void susolve(int, int, float*, float*);
|
void susolve(int, int, double*, double*);
|
||||||
void slsolve(int, int, float*, float*);
|
void slsolve(int, int, double*, double*);
|
||||||
void smatvec(int, int, int, float*, float*, float*);
|
void smatvec(int, int, int, double*, double*, double*);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -42,8 +42,8 @@ int
|
|||||||
scolumn_bmod (
|
scolumn_bmod (
|
||||||
const int jcol, /* in */
|
const int jcol, /* in */
|
||||||
const int nseg, /* in */
|
const int nseg, /* in */
|
||||||
float *dense, /* in */
|
double *dense, /* in */
|
||||||
float *tempv, /* working array */
|
double *tempv, /* working array */
|
||||||
int *segrep, /* in */
|
int *segrep, /* in */
|
||||||
int *repfnz, /* in */
|
int *repfnz, /* in */
|
||||||
int fpanelc, /* in -- first column in the current panel */
|
int fpanelc, /* in -- first column in the current panel */
|
||||||
@@ -67,7 +67,7 @@ scolumn_bmod (
|
|||||||
|
|
||||||
#ifdef USE_VENDOR_BLAS
|
#ifdef USE_VENDOR_BLAS
|
||||||
int incx = 1, incy = 1;
|
int incx = 1, incy = 1;
|
||||||
float alpha, beta;
|
double alpha, beta;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* krep = representative of current k-th supernode
|
/* krep = representative of current k-th supernode
|
||||||
@@ -78,7 +78,7 @@ scolumn_bmod (
|
|||||||
* kfnz = first nonz in the k-th supernodal segment
|
* kfnz = first nonz in the k-th supernodal segment
|
||||||
* no_zeros = no of leading zeros in a supernodal U-segment
|
* no_zeros = no of leading zeros in a supernodal U-segment
|
||||||
*/
|
*/
|
||||||
float ukj, ukj1, ukj2;
|
double ukj, ukj1, ukj2;
|
||||||
int luptr, luptr1, luptr2;
|
int luptr, luptr1, luptr2;
|
||||||
int fsupc, nsupc, nsupr, segsze;
|
int fsupc, nsupc, nsupr, segsze;
|
||||||
int nrow; /* No of rows in the matrix of matrix-vector */
|
int nrow; /* No of rows in the matrix of matrix-vector */
|
||||||
@@ -91,14 +91,14 @@ scolumn_bmod (
|
|||||||
panel and the first column of the current snode. */
|
panel and the first column of the current snode. */
|
||||||
int *xsup, *supno;
|
int *xsup, *supno;
|
||||||
int *lsub, *xlsub;
|
int *lsub, *xlsub;
|
||||||
float *lusup;
|
double *lusup;
|
||||||
int *xlusup;
|
int *xlusup;
|
||||||
int nzlumax;
|
int nzlumax;
|
||||||
float *tempv1;
|
double *tempv1;
|
||||||
float zero = 0.0;
|
double zero = 0.0;
|
||||||
#ifdef USE_VENDOR_BLAS
|
#ifdef USE_VENDOR_BLAS
|
||||||
float one = 1.0;
|
double one = 1.0;
|
||||||
float none = -1.0;
|
double none = -1.0;
|
||||||
#endif
|
#endif
|
||||||
int mem_error;
|
int mem_error;
|
||||||
flops_t *ops = stat->ops;
|
flops_t *ops = stat->ops;
|
||||||
|
@@ -33,7 +33,7 @@ scopy_to_ucol(
|
|||||||
int *segrep, /* in */
|
int *segrep, /* in */
|
||||||
int *repfnz, /* in */
|
int *repfnz, /* in */
|
||||||
int *perm_r, /* in */
|
int *perm_r, /* in */
|
||||||
float *dense, /* modified - reset to zero on return */
|
double *dense, /* modified - reset to zero on return */
|
||||||
GlobalLU_t *Glu /* modified */
|
GlobalLU_t *Glu /* modified */
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
@@ -47,11 +47,11 @@ scopy_to_ucol(
|
|||||||
int new_next, mem_error;
|
int new_next, mem_error;
|
||||||
int *xsup, *supno;
|
int *xsup, *supno;
|
||||||
int *lsub, *xlsub;
|
int *lsub, *xlsub;
|
||||||
float *ucol;
|
double *ucol;
|
||||||
int *usub, *xusub;
|
int *usub, *xusub;
|
||||||
int nzumax;
|
int nzumax;
|
||||||
|
|
||||||
float zero = 0.0;
|
double zero = 0.0;
|
||||||
|
|
||||||
xsup = Glu->xsup;
|
xsup = Glu->xsup;
|
||||||
supno = Glu->supno;
|
supno = Glu->supno;
|
||||||
|
@@ -119,7 +119,7 @@ sgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
|
|||||||
* On exit, the solution matrix if info = 0;
|
* On exit, the solution matrix if info = 0;
|
||||||
*
|
*
|
||||||
* stat (output) SuperLUStat_t*
|
* stat (output) SuperLUStat_t*
|
||||||
* Record the statistics on runtime and floating-point operation count.
|
* Record the statistics on runtime and doubleing-point operation count.
|
||||||
* See util.h for the definition of 'SuperLUStat_t'.
|
* See util.h for the definition of 'SuperLUStat_t'.
|
||||||
*
|
*
|
||||||
* info (output) int*
|
* info (output) int*
|
||||||
|
@@ -56,7 +56,7 @@ sgstrf (superlu_options_t *options, SuperMatrix *A,
|
|||||||
* (A->nrow, A->ncol). The type of A can be:
|
* (A->nrow, A->ncol). The type of A can be:
|
||||||
* Stype = SLU_NCP; Dtype = SLU_S; Mtype = SLU_GE.
|
* Stype = SLU_NCP; Dtype = SLU_S; Mtype = SLU_GE.
|
||||||
*
|
*
|
||||||
* drop_tol (input) float (NOT IMPLEMENTED)
|
* drop_tol (input) double (NOT IMPLEMENTED)
|
||||||
* Drop tolerance parameter. At step j of the Gaussian elimination,
|
* Drop tolerance parameter. At step j of the Gaussian elimination,
|
||||||
* if abs(A_ij)/(max_i abs(A_ij)) < drop_tol, drop entry A_ij.
|
* if abs(A_ij)/(max_i abs(A_ij)) < drop_tol, drop entry A_ij.
|
||||||
* 0 <= drop_tol <= 1. The default value of drop_tol is 0.
|
* 0 <= drop_tol <= 1. The default value of drop_tol is 0.
|
||||||
@@ -119,7 +119,7 @@ sgstrf (superlu_options_t *options, SuperMatrix *A,
|
|||||||
* Dtype = SLU_S, Mtype = SLU_TRU.
|
* Dtype = SLU_S, Mtype = SLU_TRU.
|
||||||
*
|
*
|
||||||
* stat (output) SuperLUStat_t*
|
* stat (output) SuperLUStat_t*
|
||||||
* Record the statistics on runtime and floating-point operation count.
|
* Record the statistics on runtime and doubleing-point operation count.
|
||||||
* See util.h for the definition of 'SuperLUStat_t'.
|
* See util.h for the definition of 'SuperLUStat_t'.
|
||||||
*
|
*
|
||||||
* info (output) int*
|
* info (output) int*
|
||||||
@@ -189,14 +189,14 @@ sgstrf (superlu_options_t *options, SuperMatrix *A,
|
|||||||
used when options->Fact == SamePattern_SameRowPerm */
|
used when options->Fact == SamePattern_SameRowPerm */
|
||||||
int *iperm_c; /* inverse of perm_c */
|
int *iperm_c; /* inverse of perm_c */
|
||||||
int *iwork;
|
int *iwork;
|
||||||
float *swork;
|
double *swork;
|
||||||
int *segrep, *repfnz, *parent, *xplore;
|
int *segrep, *repfnz, *parent, *xplore;
|
||||||
int *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */
|
int *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */
|
||||||
int *xprune;
|
int *xprune;
|
||||||
int *marker;
|
int *marker;
|
||||||
float *dense, *tempv;
|
double *dense, *tempv;
|
||||||
int *relax_end;
|
int *relax_end;
|
||||||
float *a;
|
double *a;
|
||||||
int *asub;
|
int *asub;
|
||||||
int *xa_begin, *xa_end;
|
int *xa_begin, *xa_end;
|
||||||
int *xsup, *supno;
|
int *xsup, *supno;
|
||||||
|
@@ -28,10 +28,10 @@
|
|||||||
/*
|
/*
|
||||||
* Function prototypes
|
* Function prototypes
|
||||||
*/
|
*/
|
||||||
void susolve(int, int, float*, float*);
|
void susolve(int, int, double*, double*);
|
||||||
void slsolve(int, int, float*, float*);
|
void slsolve(int, int, double*, double*);
|
||||||
void smatvec(int, int, int, float*, float*, float*);
|
void smatvec(int, int, int, double*, double*, double*);
|
||||||
void sprint_soln(int , float *);
|
void sprint_soln(int , double *);
|
||||||
|
|
||||||
void
|
void
|
||||||
sgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
|
sgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
|
||||||
@@ -82,7 +82,7 @@ sgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
|
|||||||
* On exit, the solution matrix if info = 0;
|
* On exit, the solution matrix if info = 0;
|
||||||
*
|
*
|
||||||
* stat (output) SuperLUStat_t*
|
* stat (output) SuperLUStat_t*
|
||||||
* Record the statistics on runtime and floating-point operation count.
|
* Record the statistics on runtime and doubleing-point operation count.
|
||||||
* See util.h for the definition of 'SuperLUStat_t'.
|
* See util.h for the definition of 'SuperLUStat_t'.
|
||||||
*
|
*
|
||||||
* info (output) int*
|
* info (output) int*
|
||||||
@@ -94,17 +94,17 @@ sgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
|
|||||||
_fcd ftcs1, ftcs2, ftcs3, ftcs4;
|
_fcd ftcs1, ftcs2, ftcs3, ftcs4;
|
||||||
#endif
|
#endif
|
||||||
#ifdef USE_VENDOR_BLAS
|
#ifdef USE_VENDOR_BLAS
|
||||||
float alpha = 1.0, beta = 1.0;
|
double alpha = 1.0, beta = 1.0;
|
||||||
float *work_col;
|
double *work_col;
|
||||||
#endif
|
#endif
|
||||||
DNformat *Bstore;
|
DNformat *Bstore;
|
||||||
float *Bmat;
|
double *Bmat;
|
||||||
SCformat *Lstore;
|
SCformat *Lstore;
|
||||||
NCformat *Ustore;
|
NCformat *Ustore;
|
||||||
float *Lval, *Uval;
|
double *Lval, *Uval;
|
||||||
int fsupc, nrow, nsupr, nsupc, luptr, istart, irow;
|
int fsupc, nrow, nsupr, nsupc, luptr, istart, irow;
|
||||||
int i, j, k, iptr, jcol, n, ldb, nrhs;
|
int i, j, k, iptr, jcol, n, ldb, nrhs;
|
||||||
float *work, *rhs_work, *soln;
|
double *work, *rhs_work, *soln;
|
||||||
flops_t solve_ops;
|
flops_t solve_ops;
|
||||||
void sprint_soln();
|
void sprint_soln();
|
||||||
|
|
||||||
@@ -130,9 +130,9 @@ sgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
|
|||||||
}
|
}
|
||||||
|
|
||||||
n = L->nrow;
|
n = L->nrow;
|
||||||
work = floatCalloc(n * nrhs);
|
work = doubleCalloc(n * nrhs);
|
||||||
if ( !work ) ABORT("Malloc fails for local work[].");
|
if ( !work ) ABORT("Malloc fails for local work[].");
|
||||||
soln = floatMalloc(n);
|
soln = doubleMalloc(n);
|
||||||
if ( !soln ) ABORT("Malloc fails for local soln[].");
|
if ( !soln ) ABORT("Malloc fails for local soln[].");
|
||||||
|
|
||||||
Bmat = Bstore->nzval;
|
Bmat = Bstore->nzval;
|
||||||
@@ -325,7 +325,7 @@ sgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
|
|||||||
* Diagnostic print of the solution vector
|
* Diagnostic print of the solution vector
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
sprint_soln(int n, float *soln)
|
sprint_soln(int n, double *soln)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
@@ -24,8 +24,8 @@
|
|||||||
|
|
||||||
/* Internal prototypes */
|
/* Internal prototypes */
|
||||||
void *sexpand (int *, MemType,int, int, GlobalLU_t *);
|
void *sexpand (int *, MemType,int, int, GlobalLU_t *);
|
||||||
int sLUWorkInit (int, int, int, int **, float **, LU_space_t);
|
int sLUWorkInit (int, int, int, int **, double **, LU_space_t);
|
||||||
void copy_mem_float (int, void *, void *);
|
void copy_mem_double (int, void *, void *);
|
||||||
void sStackCompress (GlobalLU_t *);
|
void sStackCompress (GlobalLU_t *);
|
||||||
void sSetupSpace (void *, int, LU_space_t *);
|
void sSetupSpace (void *, int, LU_space_t *);
|
||||||
void *suser_malloc (int, int);
|
void *suser_malloc (int, int);
|
||||||
@@ -59,7 +59,7 @@ static int no_expand;
|
|||||||
#define NotDoubleAlign(addr) ( (intptr_t)addr & 7 )
|
#define NotDoubleAlign(addr) ( (intptr_t)addr & 7 )
|
||||||
#define DoubleAlign(addr) ( ((intptr_t)addr + 7) & ~7L )
|
#define DoubleAlign(addr) ( ((intptr_t)addr + 7) & ~7L )
|
||||||
#define TempSpace(m, w) ( (2*w + 4 + NO_MARKER) * m * sizeof(int) + \
|
#define TempSpace(m, w) ( (2*w + 4 + NO_MARKER) * m * sizeof(int) + \
|
||||||
(w + 1) * m * sizeof(float) )
|
(w + 1) * m * sizeof(double) )
|
||||||
#define Reduce(alpha) ((alpha + 1) / 2) /* i.e. (alpha-1)/2 + 1 */
|
#define Reduce(alpha) ((alpha + 1) / 2) /* i.e. (alpha-1)/2 + 1 */
|
||||||
|
|
||||||
|
|
||||||
@@ -119,9 +119,9 @@ void suser_free(int bytes, int which_end)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* mem_usage consists of the following fields:
|
* mem_usage consists of the following fields:
|
||||||
* - for_lu (float)
|
* - for_lu (double)
|
||||||
* The amount of space used in bytes for the L\U data structures.
|
* The amount of space used in bytes for the L\U data structures.
|
||||||
* - total_needed (float)
|
* - total_needed (double)
|
||||||
* The amount of space needed in bytes to perform factorization.
|
* The amount of space needed in bytes to perform factorization.
|
||||||
* - expansions (int)
|
* - expansions (int)
|
||||||
* Number of memory expansions during the LU factorization.
|
* Number of memory expansions during the LU factorization.
|
||||||
@@ -136,17 +136,17 @@ int sQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage)
|
|||||||
Ustore = U->Store;
|
Ustore = U->Store;
|
||||||
n = L->ncol;
|
n = L->ncol;
|
||||||
iword = sizeof(int);
|
iword = sizeof(int);
|
||||||
dword = sizeof(float);
|
dword = sizeof(double);
|
||||||
|
|
||||||
/* For LU factors */
|
/* For LU factors */
|
||||||
mem_usage->for_lu = (float)( (4*n + 3) * iword + Lstore->nzval_colptr[n] *
|
mem_usage->for_lu = (double)( (4*n + 3) * iword + Lstore->nzval_colptr[n] *
|
||||||
dword + Lstore->rowind_colptr[n] * iword );
|
dword + Lstore->rowind_colptr[n] * iword );
|
||||||
mem_usage->for_lu += (float)( (n + 1) * iword +
|
mem_usage->for_lu += (double)( (n + 1) * iword +
|
||||||
Ustore->colptr[n] * (dword + iword) );
|
Ustore->colptr[n] * (dword + iword) );
|
||||||
|
|
||||||
/* Working storage to support factorization */
|
/* Working storage to support factorization */
|
||||||
mem_usage->total_needed = mem_usage->for_lu +
|
mem_usage->total_needed = mem_usage->for_lu +
|
||||||
(float)( (2 * panel_size + 4 + NO_MARKER) * n * iword +
|
(double)( (2 * panel_size + 4 + NO_MARKER) * n * iword +
|
||||||
(panel_size + 1) * n * dword );
|
(panel_size + 1) * n * dword );
|
||||||
|
|
||||||
mem_usage->expansions = --no_expand;
|
mem_usage->expansions = --no_expand;
|
||||||
@@ -165,16 +165,16 @@ int sQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage)
|
|||||||
int
|
int
|
||||||
sLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
|
sLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
|
||||||
int panel_size, SuperMatrix *L, SuperMatrix *U, GlobalLU_t *Glu,
|
int panel_size, SuperMatrix *L, SuperMatrix *U, GlobalLU_t *Glu,
|
||||||
int **iwork, float **dwork)
|
int **iwork, double **dwork)
|
||||||
{
|
{
|
||||||
int info, iword, dword;
|
int info, iword, dword;
|
||||||
SCformat *Lstore;
|
SCformat *Lstore;
|
||||||
NCformat *Ustore;
|
NCformat *Ustore;
|
||||||
int *xsup, *supno;
|
int *xsup, *supno;
|
||||||
int *lsub, *xlsub;
|
int *lsub, *xlsub;
|
||||||
float *lusup;
|
double *lusup;
|
||||||
int *xlusup;
|
int *xlusup;
|
||||||
float *ucol;
|
double *ucol;
|
||||||
int *usub, *xusub;
|
int *usub, *xusub;
|
||||||
int nzlmax, nzumax, nzlumax;
|
int nzlmax, nzumax, nzlumax;
|
||||||
int FILL = sp_ienv(6);
|
int FILL = sp_ienv(6);
|
||||||
@@ -182,7 +182,7 @@ sLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
|
|||||||
Glu->n = n;
|
Glu->n = n;
|
||||||
no_expand = 0;
|
no_expand = 0;
|
||||||
iword = sizeof(int);
|
iword = sizeof(int);
|
||||||
dword = sizeof(float);
|
dword = sizeof(double);
|
||||||
|
|
||||||
if ( !expanders )
|
if ( !expanders )
|
||||||
expanders = (ExpHeader*)SUPERLU_MALLOC(NO_MEMTYPE * sizeof(ExpHeader));
|
expanders = (ExpHeader*)SUPERLU_MALLOC(NO_MEMTYPE * sizeof(ExpHeader));
|
||||||
@@ -220,8 +220,8 @@ sLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
|
|||||||
xusub = (int *)suser_malloc((n+1) * iword, HEAD);
|
xusub = (int *)suser_malloc((n+1) * iword, HEAD);
|
||||||
}
|
}
|
||||||
|
|
||||||
lusup = (float *) sexpand( &nzlumax, LUSUP, 0, 0, Glu );
|
lusup = (double *) sexpand( &nzlumax, LUSUP, 0, 0, Glu );
|
||||||
ucol = (float *) sexpand( &nzumax, UCOL, 0, 0, Glu );
|
ucol = (double *) sexpand( &nzumax, UCOL, 0, 0, Glu );
|
||||||
lsub = (int *) sexpand( &nzlmax, LSUB, 0, 0, Glu );
|
lsub = (int *) sexpand( &nzlmax, LSUB, 0, 0, Glu );
|
||||||
usub = (int *) sexpand( &nzumax, USUB, 0, 1, Glu );
|
usub = (int *) sexpand( &nzumax, USUB, 0, 1, Glu );
|
||||||
|
|
||||||
@@ -241,8 +241,8 @@ sLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
|
|||||||
printf("Not enough memory to perform factorization.\n");
|
printf("Not enough memory to perform factorization.\n");
|
||||||
return (smemory_usage(nzlmax, nzumax, nzlumax, n) + n);
|
return (smemory_usage(nzlmax, nzumax, nzlumax, n) + n);
|
||||||
}
|
}
|
||||||
lusup = (float *) sexpand( &nzlumax, LUSUP, 0, 0, Glu );
|
lusup = (double *) sexpand( &nzlumax, LUSUP, 0, 0, Glu );
|
||||||
ucol = (float *) sexpand( &nzumax, UCOL, 0, 0, Glu );
|
ucol = (double *) sexpand( &nzumax, UCOL, 0, 0, Glu );
|
||||||
lsub = (int *) sexpand( &nzlmax, LSUB, 0, 0, Glu );
|
lsub = (int *) sexpand( &nzlmax, LSUB, 0, 0, Glu );
|
||||||
usub = (int *) sexpand( &nzumax, USUB, 0, 1, Glu );
|
usub = (int *) sexpand( &nzumax, USUB, 0, 1, Glu );
|
||||||
}
|
}
|
||||||
@@ -307,16 +307,16 @@ sLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
|
|||||||
returns the number of bytes allocated so far when failure occurred. */
|
returns the number of bytes allocated so far when failure occurred. */
|
||||||
int
|
int
|
||||||
sLUWorkInit(int m, int n, int panel_size, int **iworkptr,
|
sLUWorkInit(int m, int n, int panel_size, int **iworkptr,
|
||||||
float **dworkptr, LU_space_t MemModel)
|
double **dworkptr, LU_space_t MemModel)
|
||||||
{
|
{
|
||||||
int isize, dsize, extra;
|
int isize, dsize, extra;
|
||||||
float *old_ptr;
|
double *old_ptr;
|
||||||
int maxsuper = sp_ienv(3),
|
int maxsuper = sp_ienv(3),
|
||||||
rowblk = sp_ienv(4);
|
rowblk = sp_ienv(4);
|
||||||
|
|
||||||
isize = ( (2 * panel_size + 3 + NO_MARKER ) * m + n ) * sizeof(int);
|
isize = ( (2 * panel_size + 3 + NO_MARKER ) * m + n ) * sizeof(int);
|
||||||
dsize = (m * panel_size +
|
dsize = (m * panel_size +
|
||||||
NUM_TEMPV(m,panel_size,maxsuper,rowblk)) * sizeof(float);
|
NUM_TEMPV(m,panel_size,maxsuper,rowblk)) * sizeof(double);
|
||||||
|
|
||||||
if ( MemModel == SYSTEM )
|
if ( MemModel == SYSTEM )
|
||||||
*iworkptr = (int *) intCalloc(isize/sizeof(int));
|
*iworkptr = (int *) intCalloc(isize/sizeof(int));
|
||||||
@@ -328,13 +328,13 @@ sLUWorkInit(int m, int n, int panel_size, int **iworkptr,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if ( MemModel == SYSTEM )
|
if ( MemModel == SYSTEM )
|
||||||
*dworkptr = (float *) SUPERLU_MALLOC(dsize);
|
*dworkptr = (double *) SUPERLU_MALLOC(dsize);
|
||||||
else {
|
else {
|
||||||
*dworkptr = (float *) suser_malloc(dsize, TAIL);
|
*dworkptr = (double *) suser_malloc(dsize, TAIL);
|
||||||
if ( NotDoubleAlign(*dworkptr) ) {
|
if ( NotDoubleAlign(*dworkptr) ) {
|
||||||
old_ptr = *dworkptr;
|
old_ptr = *dworkptr;
|
||||||
*dworkptr = (float*) DoubleAlign(*dworkptr);
|
*dworkptr = (double*) DoubleAlign(*dworkptr);
|
||||||
*dworkptr = (float*) ((double*)*dworkptr - 1);
|
*dworkptr = (double*) ((double*)*dworkptr - 1);
|
||||||
extra = (char*)old_ptr - (char*)*dworkptr;
|
extra = (char*)old_ptr - (char*)*dworkptr;
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
printf("sLUWorkInit: not aligned, extra %d\n", extra);
|
printf("sLUWorkInit: not aligned, extra %d\n", extra);
|
||||||
@@ -356,10 +356,10 @@ sLUWorkInit(int m, int n, int panel_size, int **iworkptr,
|
|||||||
* Set up pointers for real working arrays.
|
* Set up pointers for real working arrays.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
sSetRWork(int m, int panel_size, float *dworkptr,
|
sSetRWork(int m, int panel_size, double *dworkptr,
|
||||||
float **dense, float **tempv)
|
double **dense, double **tempv)
|
||||||
{
|
{
|
||||||
float zero = 0.0;
|
double zero = 0.0;
|
||||||
|
|
||||||
int maxsuper = sp_ienv(3),
|
int maxsuper = sp_ienv(3),
|
||||||
rowblk = sp_ienv(4);
|
rowblk = sp_ienv(4);
|
||||||
@@ -372,7 +372,7 @@ sSetRWork(int m, int panel_size, float *dworkptr,
|
|||||||
/*
|
/*
|
||||||
* Free the working storage used by factor routines.
|
* Free the working storage used by factor routines.
|
||||||
*/
|
*/
|
||||||
void sLUWorkFree(int *iwork, float *dwork, GlobalLU_t *Glu)
|
void sLUWorkFree(int *iwork, double *dwork, GlobalLU_t *Glu)
|
||||||
{
|
{
|
||||||
if ( Glu->MemModel == SYSTEM ) {
|
if ( Glu->MemModel == SYSTEM ) {
|
||||||
SUPERLU_FREE (iwork);
|
SUPERLU_FREE (iwork);
|
||||||
@@ -421,11 +421,11 @@ sLUMemXpand(int jcol,
|
|||||||
|
|
||||||
switch ( mem_type ) {
|
switch ( mem_type ) {
|
||||||
case LUSUP:
|
case LUSUP:
|
||||||
Glu->lusup = (float *) new_mem;
|
Glu->lusup = (double *) new_mem;
|
||||||
Glu->nzlumax = *maxlen;
|
Glu->nzlumax = *maxlen;
|
||||||
break;
|
break;
|
||||||
case UCOL:
|
case UCOL:
|
||||||
Glu->ucol = (float *) new_mem;
|
Glu->ucol = (double *) new_mem;
|
||||||
Glu->nzumax = *maxlen;
|
Glu->nzumax = *maxlen;
|
||||||
break;
|
break;
|
||||||
case LSUB:
|
case LSUB:
|
||||||
@@ -445,11 +445,11 @@ sLUMemXpand(int jcol,
|
|||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
copy_mem_float(int howmany, void *old, void *new)
|
copy_mem_double(int howmany, void *old, void *new)
|
||||||
{
|
{
|
||||||
register int i;
|
register int i;
|
||||||
float *dold = old;
|
double *dold = old;
|
||||||
float *dnew = new;
|
double *dnew = new;
|
||||||
for (i = 0; i < howmany; i++) dnew[i] = dold[i];
|
for (i = 0; i < howmany; i++) dnew[i] = dold[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -466,8 +466,8 @@ void
|
|||||||
GlobalLU_t *Glu /* modified - global LU data structures */
|
GlobalLU_t *Glu /* modified - global LU data structures */
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
float EXPAND = 1.5;
|
double EXPAND = 1.5;
|
||||||
float alpha;
|
double alpha;
|
||||||
void *new_mem, *old_mem;
|
void *new_mem, *old_mem;
|
||||||
int new_len, tries, lword, extra, bytes_to_copy;
|
int new_len, tries, lword, extra, bytes_to_copy;
|
||||||
|
|
||||||
@@ -480,7 +480,7 @@ void
|
|||||||
}
|
}
|
||||||
|
|
||||||
if ( type == LSUB || type == USUB ) lword = sizeof(int);
|
if ( type == LSUB || type == USUB ) lword = sizeof(int);
|
||||||
else lword = sizeof(float);
|
else lword = sizeof(double);
|
||||||
|
|
||||||
if ( Glu->MemModel == SYSTEM ) {
|
if ( Glu->MemModel == SYSTEM ) {
|
||||||
new_mem = (void *) SUPERLU_MALLOC(new_len * lword);
|
new_mem = (void *) SUPERLU_MALLOC(new_len * lword);
|
||||||
@@ -501,7 +501,7 @@ void
|
|||||||
if ( type == LSUB || type == USUB ) {
|
if ( type == LSUB || type == USUB ) {
|
||||||
copy_mem_int(len_to_copy, expanders[type].mem, new_mem);
|
copy_mem_int(len_to_copy, expanders[type].mem, new_mem);
|
||||||
} else {
|
} else {
|
||||||
copy_mem_float(len_to_copy, expanders[type].mem, new_mem);
|
copy_mem_double(len_to_copy, expanders[type].mem, new_mem);
|
||||||
}
|
}
|
||||||
SUPERLU_FREE (expanders[type].mem);
|
SUPERLU_FREE (expanders[type].mem);
|
||||||
}
|
}
|
||||||
@@ -585,12 +585,12 @@ sStackCompress(GlobalLU_t *Glu)
|
|||||||
register int iword, dword, ndim;
|
register int iword, dword, ndim;
|
||||||
char *last, *fragment;
|
char *last, *fragment;
|
||||||
int *ifrom, *ito;
|
int *ifrom, *ito;
|
||||||
float *dfrom, *dto;
|
double *dfrom, *dto;
|
||||||
int *xlsub, *lsub, *xusub, *usub, *xlusup;
|
int *xlsub, *lsub, *xusub, *usub, *xlusup;
|
||||||
float *ucol, *lusup;
|
double *ucol, *lusup;
|
||||||
|
|
||||||
iword = sizeof(int);
|
iword = sizeof(int);
|
||||||
dword = sizeof(float);
|
dword = sizeof(double);
|
||||||
ndim = Glu->n;
|
ndim = Glu->n;
|
||||||
|
|
||||||
xlsub = Glu->xlsub;
|
xlsub = Glu->xlsub;
|
||||||
@@ -602,8 +602,8 @@ sStackCompress(GlobalLU_t *Glu)
|
|||||||
lusup = Glu->lusup;
|
lusup = Glu->lusup;
|
||||||
|
|
||||||
dfrom = ucol;
|
dfrom = ucol;
|
||||||
dto = (float *)((char*)lusup + xlusup[ndim] * dword);
|
dto = (double *)((char*)lusup + xlusup[ndim] * dword);
|
||||||
copy_mem_float(xusub[ndim], dfrom, dto);
|
copy_mem_double(xusub[ndim], dfrom, dto);
|
||||||
ucol = dto;
|
ucol = dto;
|
||||||
|
|
||||||
ifrom = lsub;
|
ifrom = lsub;
|
||||||
@@ -637,32 +637,32 @@ sStackCompress(GlobalLU_t *Glu)
|
|||||||
* Allocate storage for original matrix A
|
* Allocate storage for original matrix A
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
sallocateA(int n, int nnz, float **a, int **asub, int **xa)
|
sallocateA(int n, int nnz, double **a, int **asub, int **xa)
|
||||||
{
|
{
|
||||||
*a = (float *) floatMalloc(nnz);
|
*a = (double *) doubleMalloc(nnz);
|
||||||
*asub = (int *) intMalloc(nnz);
|
*asub = (int *) intMalloc(nnz);
|
||||||
*xa = (int *) intMalloc(n+1);
|
*xa = (int *) intMalloc(n+1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
float *floatMalloc(int n)
|
double *doubleMalloc(int n)
|
||||||
{
|
{
|
||||||
float *buf;
|
double *buf;
|
||||||
buf = (float *) SUPERLU_MALLOC(n * sizeof(float));
|
buf = (double *) SUPERLU_MALLOC(n * sizeof(double));
|
||||||
if ( !buf ) {
|
if ( !buf ) {
|
||||||
ABORT("SUPERLU_MALLOC failed for buf in floatMalloc()\n");
|
ABORT("SUPERLU_MALLOC failed for buf in doubleMalloc()\n");
|
||||||
}
|
}
|
||||||
return (buf);
|
return (buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
float *floatCalloc(int n)
|
double *doubleCalloc(int n)
|
||||||
{
|
{
|
||||||
float *buf;
|
double *buf;
|
||||||
register int i;
|
register int i;
|
||||||
float zero = 0.0;
|
double zero = 0.0;
|
||||||
buf = (float *) SUPERLU_MALLOC(n * sizeof(float));
|
buf = (double *) SUPERLU_MALLOC(n * sizeof(double));
|
||||||
if ( !buf ) {
|
if ( !buf ) {
|
||||||
ABORT("SUPERLU_MALLOC failed for buf in floatCalloc()\n");
|
ABORT("SUPERLU_MALLOC failed for buf in doubleCalloc()\n");
|
||||||
}
|
}
|
||||||
for (i = 0; i < n; ++i) buf[i] = zero;
|
for (i = 0; i < n; ++i) buf[i] = zero;
|
||||||
return (buf);
|
return (buf);
|
||||||
@@ -675,7 +675,7 @@ int smemory_usage(const int nzlmax, const int nzumax,
|
|||||||
register int iword, dword;
|
register int iword, dword;
|
||||||
|
|
||||||
iword = sizeof(int);
|
iword = sizeof(int);
|
||||||
dword = sizeof(float);
|
dword = sizeof(double);
|
||||||
|
|
||||||
return (10 * n * iword +
|
return (10 * n * iword +
|
||||||
nzlmax * iword + nzumax * (iword + dword) + nzlumax * dword);
|
nzlmax * iword + nzumax * (iword + dword) + nzlumax * dword);
|
||||||
|
@@ -25,17 +25,17 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/* local prototypes*/
|
/* local prototypes*/
|
||||||
void slsolve ( int, int, float *, float *);
|
void slsolve ( int, int, double *, double *);
|
||||||
void susolve ( int, int, float *, float *);
|
void susolve ( int, int, double *, double *);
|
||||||
void smatvec ( int, int, int, float *, float *, float *);
|
void smatvec ( int, int, int, double *, double *, double *);
|
||||||
|
|
||||||
|
|
||||||
void slsolve ( int ldm, int ncol, float *M, float *rhs )
|
void slsolve ( int ldm, int ncol, double *M, double *rhs )
|
||||||
{
|
{
|
||||||
int k;
|
int k;
|
||||||
float x0, x1, x2, x3, x4, x5, x6, x7;
|
double x0, x1, x2, x3, x4, x5, x6, x7;
|
||||||
float *M0;
|
double *M0;
|
||||||
register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
|
register double *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
|
||||||
register int firstcol = 0;
|
register int firstcol = 0;
|
||||||
|
|
||||||
M0 = &M[0];
|
M0 = &M[0];
|
||||||
@@ -131,10 +131,10 @@ void
|
|||||||
susolve ( ldm, ncol, M, rhs )
|
susolve ( ldm, ncol, M, rhs )
|
||||||
int ldm; /* in */
|
int ldm; /* in */
|
||||||
int ncol; /* in */
|
int ncol; /* in */
|
||||||
float *M; /* in */
|
double *M; /* in */
|
||||||
float *rhs; /* modified */
|
double *rhs; /* modified */
|
||||||
{
|
{
|
||||||
float xj;
|
double xj;
|
||||||
int jcol, j, irow;
|
int jcol, j, irow;
|
||||||
|
|
||||||
jcol = ncol - 1;
|
jcol = ncol - 1;
|
||||||
@@ -162,14 +162,14 @@ void smatvec ( ldm, nrow, ncol, M, vec, Mxvec )
|
|||||||
int ldm; /* in -- leading dimension of M */
|
int ldm; /* in -- leading dimension of M */
|
||||||
int nrow; /* in */
|
int nrow; /* in */
|
||||||
int ncol; /* in */
|
int ncol; /* in */
|
||||||
float *M; /* in */
|
double *M; /* in */
|
||||||
float *vec; /* in */
|
double *vec; /* in */
|
||||||
float *Mxvec; /* in/out */
|
double *Mxvec; /* in/out */
|
||||||
|
|
||||||
{
|
{
|
||||||
float vi0, vi1, vi2, vi3, vi4, vi5, vi6, vi7;
|
double vi0, vi1, vi2, vi3, vi4, vi5, vi6, vi7;
|
||||||
float *M0;
|
double *M0;
|
||||||
register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
|
register double *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
|
||||||
register int firstcol = 0;
|
register int firstcol = 0;
|
||||||
int k;
|
int k;
|
||||||
|
|
||||||
|
@@ -29,8 +29,8 @@
|
|||||||
/*
|
/*
|
||||||
* Function prototypes
|
* Function prototypes
|
||||||
*/
|
*/
|
||||||
void slsolve(int, int, float *, float *);
|
void slsolve(int, int, double *, double *);
|
||||||
void smatvec(int, int, int, float *, float *, float *);
|
void smatvec(int, int, int, double *, double *, double *);
|
||||||
extern void scheck_tempv();
|
extern void scheck_tempv();
|
||||||
|
|
||||||
void
|
void
|
||||||
@@ -39,8 +39,8 @@ spanel_bmod (
|
|||||||
const int w, /* in */
|
const int w, /* in */
|
||||||
const int jcol, /* in */
|
const int jcol, /* in */
|
||||||
const int nseg, /* in */
|
const int nseg, /* in */
|
||||||
float *dense, /* out, of size n by w */
|
double *dense, /* out, of size n by w */
|
||||||
float *tempv, /* working array */
|
double *tempv, /* working array */
|
||||||
int *segrep, /* in */
|
int *segrep, /* in */
|
||||||
int *repfnz, /* in, of size n by w */
|
int *repfnz, /* in, of size n by w */
|
||||||
GlobalLU_t *Glu, /* modified */
|
GlobalLU_t *Glu, /* modified */
|
||||||
@@ -71,13 +71,13 @@ spanel_bmod (
|
|||||||
ftcs3 = _cptofcd("U", strlen("U"));
|
ftcs3 = _cptofcd("U", strlen("U"));
|
||||||
#endif
|
#endif
|
||||||
int incx = 1, incy = 1;
|
int incx = 1, incy = 1;
|
||||||
float alpha, beta;
|
double alpha, beta;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
register int k, ksub;
|
register int k, ksub;
|
||||||
int fsupc, nsupc, nsupr, nrow;
|
int fsupc, nsupc, nsupr, nrow;
|
||||||
int krep, krep_ind;
|
int krep, krep_ind;
|
||||||
float ukj, ukj1, ukj2;
|
double ukj, ukj1, ukj2;
|
||||||
int luptr, luptr1, luptr2;
|
int luptr, luptr1, luptr2;
|
||||||
int segsze;
|
int segsze;
|
||||||
int block_nrow; /* no of rows in a block row */
|
int block_nrow; /* no of rows in a block row */
|
||||||
@@ -87,13 +87,13 @@ spanel_bmod (
|
|||||||
register int jj; /* Index through each column in the panel */
|
register int jj; /* Index through each column in the panel */
|
||||||
int *xsup, *supno;
|
int *xsup, *supno;
|
||||||
int *lsub, *xlsub;
|
int *lsub, *xlsub;
|
||||||
float *lusup;
|
double *lusup;
|
||||||
int *xlusup;
|
int *xlusup;
|
||||||
int *repfnz_col; /* repfnz[] for a column in the panel */
|
int *repfnz_col; /* repfnz[] for a column in the panel */
|
||||||
float *dense_col; /* dense[] for a column in the panel */
|
double *dense_col; /* dense[] for a column in the panel */
|
||||||
float *tempv1; /* Used in 1-D update */
|
double *tempv1; /* Used in 1-D update */
|
||||||
float *TriTmp, *MatvecTmp; /* used in 2-D update */
|
double *TriTmp, *MatvecTmp; /* used in 2-D update */
|
||||||
float zero = 0.0;
|
double zero = 0.0;
|
||||||
register int ldaTmp;
|
register int ldaTmp;
|
||||||
register int r_ind, r_hi;
|
register int r_ind, r_hi;
|
||||||
static int first = 1, maxsuper, rowblk, colblk;
|
static int first = 1, maxsuper, rowblk, colblk;
|
||||||
|
@@ -34,7 +34,7 @@ spanel_dfs (
|
|||||||
SuperMatrix *A, /* in - original matrix */
|
SuperMatrix *A, /* in - original matrix */
|
||||||
int *perm_r, /* in */
|
int *perm_r, /* in */
|
||||||
int *nseg, /* out */
|
int *nseg, /* out */
|
||||||
float *dense, /* out */
|
double *dense, /* out */
|
||||||
int *panel_lsub, /* out */
|
int *panel_lsub, /* out */
|
||||||
int *segrep, /* out */
|
int *segrep, /* out */
|
||||||
int *repfnz, /* out */
|
int *repfnz, /* out */
|
||||||
@@ -74,7 +74,7 @@ spanel_dfs (
|
|||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
NCPformat *Astore;
|
NCPformat *Astore;
|
||||||
float *a;
|
double *a;
|
||||||
int *asub;
|
int *asub;
|
||||||
int *xa_begin, *xa_end;
|
int *xa_begin, *xa_end;
|
||||||
int krep, chperm, chmark, chrep, oldrep, kchild, myfnz;
|
int krep, chperm, chmark, chrep, oldrep, kchild, myfnz;
|
||||||
@@ -84,7 +84,7 @@ spanel_dfs (
|
|||||||
int *marker1; /* marker1[jj] >= jcol if vertex jj was visited
|
int *marker1; /* marker1[jj] >= jcol if vertex jj was visited
|
||||||
by a previous column within this panel. */
|
by a previous column within this panel. */
|
||||||
int *repfnz_col; /* start of each column in the panel */
|
int *repfnz_col; /* start of each column in the panel */
|
||||||
float *dense_col; /* start of each column in the panel */
|
double *dense_col; /* start of each column in the panel */
|
||||||
int nextl_col; /* next available position in panel_lsub[*,jj] */
|
int nextl_col; /* next available position in panel_lsub[*,jj] */
|
||||||
int *xsup, *supno;
|
int *xsup, *supno;
|
||||||
int *lsub, *xlsub;
|
int *lsub, *xlsub;
|
||||||
|
@@ -31,7 +31,7 @@
|
|||||||
int
|
int
|
||||||
spivotL(
|
spivotL(
|
||||||
const int jcol, /* in */
|
const int jcol, /* in */
|
||||||
const float u, /* in - diagonal pivoting threshold */
|
const double u, /* in - diagonal pivoting threshold */
|
||||||
int *usepr, /* re-use the pivot sequence given by perm_r/iperm_r */
|
int *usepr, /* re-use the pivot sequence given by perm_r/iperm_r */
|
||||||
int *perm_r, /* may be modified */
|
int *perm_r, /* may be modified */
|
||||||
int *iperm_r, /* in - inverse of perm_r */
|
int *iperm_r, /* in - inverse of perm_r */
|
||||||
@@ -67,14 +67,14 @@ spivotL(
|
|||||||
int nsupr; /* no of rows in the supernode */
|
int nsupr; /* no of rows in the supernode */
|
||||||
int lptr; /* points to the starting subscript of the supernode */
|
int lptr; /* points to the starting subscript of the supernode */
|
||||||
int pivptr, old_pivptr, diag, diagind;
|
int pivptr, old_pivptr, diag, diagind;
|
||||||
float pivmax, rtemp, thresh;
|
double pivmax, rtemp, thresh;
|
||||||
float temp;
|
double temp;
|
||||||
float *lu_sup_ptr;
|
double *lu_sup_ptr;
|
||||||
float *lu_col_ptr;
|
double *lu_col_ptr;
|
||||||
int *lsub_ptr;
|
int *lsub_ptr;
|
||||||
int isub, icol, k, itemp;
|
int isub, icol, k, itemp;
|
||||||
int *lsub, *xlsub;
|
int *lsub, *xlsub;
|
||||||
float *lusup;
|
double *lusup;
|
||||||
int *xlusup;
|
int *xlusup;
|
||||||
flops_t *ops = stat->ops;
|
flops_t *ops = stat->ops;
|
||||||
|
|
||||||
|
@@ -45,13 +45,13 @@ spruneL(
|
|||||||
* contains the current pivot row "pivrow"
|
* contains the current pivot row "pivrow"
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
float utemp;
|
double utemp;
|
||||||
int jsupno, irep, irep1, kmin, kmax, krow, movnum;
|
int jsupno, irep, irep1, kmin, kmax, krow, movnum;
|
||||||
int i, ktemp, minloc, maxloc;
|
int i, ktemp, minloc, maxloc;
|
||||||
int do_prune; /* logical variable */
|
int do_prune; /* logical variable */
|
||||||
int *xsup, *supno;
|
int *xsup, *supno;
|
||||||
int *lsub, *xlsub;
|
int *lsub, *xlsub;
|
||||||
float *lusup;
|
double *lusup;
|
||||||
int *xlusup;
|
int *xlusup;
|
||||||
|
|
||||||
xsup = Glu->xsup;
|
xsup = Glu->xsup;
|
||||||
|
@@ -24,8 +24,8 @@
|
|||||||
|
|
||||||
#include "ssp_defs.h"
|
#include "ssp_defs.h"
|
||||||
|
|
||||||
void slsolve(int, int, float*, float*);
|
void slsolve(int, int, double*, double*);
|
||||||
void smatvec(int, int, int, float*, float*, float*);
|
void smatvec(int, int, int, double*, double*, double*);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Performs numeric block updates within the relaxed snode.
|
* Performs numeric block updates within the relaxed snode.
|
||||||
@@ -34,8 +34,8 @@ int
|
|||||||
ssnode_bmod (
|
ssnode_bmod (
|
||||||
const int jcol, /* in */
|
const int jcol, /* in */
|
||||||
const int fsupc, /* in */
|
const int fsupc, /* in */
|
||||||
float *dense, /* in */
|
double *dense, /* in */
|
||||||
float *tempv, /* working array */
|
double *tempv, /* working array */
|
||||||
GlobalLU_t *Glu, /* modified */
|
GlobalLU_t *Glu, /* modified */
|
||||||
SuperLUStat_t *stat /* output */
|
SuperLUStat_t *stat /* output */
|
||||||
)
|
)
|
||||||
@@ -47,14 +47,14 @@ ssnode_bmod (
|
|||||||
ftcs3 = _cptofcd("U", strlen("U"));
|
ftcs3 = _cptofcd("U", strlen("U"));
|
||||||
#endif
|
#endif
|
||||||
int incx = 1, incy = 1;
|
int incx = 1, incy = 1;
|
||||||
float alpha = -1.0, beta = 1.0;
|
double alpha = -1.0, beta = 1.0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int luptr, nsupc, nsupr, nrow;
|
int luptr, nsupc, nsupr, nrow;
|
||||||
int isub, irow, i, iptr;
|
int isub, irow, i, iptr;
|
||||||
register int ufirst, nextlu;
|
register int ufirst, nextlu;
|
||||||
int *lsub, *xlsub;
|
int *lsub, *xlsub;
|
||||||
float *lusup;
|
double *lusup;
|
||||||
int *xlusup;
|
int *xlusup;
|
||||||
flops_t *ops = stat->ops;
|
flops_t *ops = stat->ops;
|
||||||
|
|
||||||
|
@@ -19,14 +19,14 @@
|
|||||||
/*
|
/*
|
||||||
* Function prototypes
|
* Function prototypes
|
||||||
*/
|
*/
|
||||||
void susolve(int, int, float*, float*);
|
void susolve(int, int, double*, double*);
|
||||||
void slsolve(int, int, float*, float*);
|
void slsolve(int, int, double*, double*);
|
||||||
void smatvec(int, int, int, float*, float*, float*);
|
void smatvec(int, int, int, double*, double*, double*);
|
||||||
int strsv_(char*, char*, char*, int*, float*, int*, float*, int*);
|
int strsv_(char*, char*, char*, int*, double*, int*, double*, int*);
|
||||||
|
|
||||||
int
|
int
|
||||||
sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
|
sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
|
||||||
SuperMatrix *U, float *x, SuperLUStat_t *stat, int *info)
|
SuperMatrix *U, double *x, SuperLUStat_t *stat, int *info)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Purpose
|
* Purpose
|
||||||
@@ -71,7 +71,7 @@ sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
|
|||||||
* The factor U from the factorization Pr*A*Pc=L*U.
|
* The factor U from the factorization Pr*A*Pc=L*U.
|
||||||
* U has types: Stype = NC, Dtype = SLU_S, Mtype = TRU.
|
* U has types: Stype = NC, Dtype = SLU_S, Mtype = TRU.
|
||||||
*
|
*
|
||||||
* x - (input/output) float*
|
* x - (input/output) double*
|
||||||
* Before entry, the incremented array X must contain the n
|
* Before entry, the incremented array X must contain the n
|
||||||
* element right-hand side vector b. On exit, X is overwritten
|
* element right-hand side vector b. On exit, X is overwritten
|
||||||
* with the solution vector x.
|
* with the solution vector x.
|
||||||
@@ -87,12 +87,12 @@ sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
|
|||||||
#endif
|
#endif
|
||||||
SCformat *Lstore;
|
SCformat *Lstore;
|
||||||
NCformat *Ustore;
|
NCformat *Ustore;
|
||||||
float *Lval, *Uval;
|
double *Lval, *Uval;
|
||||||
int incx = 1;
|
int incx = 1;
|
||||||
int nrow;
|
int nrow;
|
||||||
int fsupc, nsupr, nsupc, luptr, istart, irow;
|
int fsupc, nsupr, nsupc, luptr, istart, irow;
|
||||||
int i, k, iptr, jcol;
|
int i, k, iptr, jcol;
|
||||||
float *work;
|
double *work;
|
||||||
flops_t solve_ops;
|
flops_t solve_ops;
|
||||||
|
|
||||||
/* Test the input parameters */
|
/* Test the input parameters */
|
||||||
@@ -115,7 +115,7 @@ sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
|
|||||||
Uval = Ustore->nzval;
|
Uval = Ustore->nzval;
|
||||||
solve_ops = 0;
|
solve_ops = 0;
|
||||||
|
|
||||||
if ( !(work = floatCalloc(L->nrow)) )
|
if ( !(work = doubleCalloc(L->nrow)) )
|
||||||
ABORT("Malloc fails for work in sp_strsv().");
|
ABORT("Malloc fails for work in sp_strsv().");
|
||||||
|
|
||||||
if ( lsame_(trans, "N") ) { /* Form x := inv(A)*x. */
|
if ( lsame_(trans, "N") ) { /* Form x := inv(A)*x. */
|
||||||
@@ -306,8 +306,8 @@ sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
|
|||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
sp_sgemv(char *trans, float alpha, SuperMatrix *A, float *x,
|
sp_sgemv(char *trans, double alpha, SuperMatrix *A, double *x,
|
||||||
int incx, float beta, float *y, int incy)
|
int incx, double beta, double *y, int incy)
|
||||||
{
|
{
|
||||||
/* Purpose
|
/* Purpose
|
||||||
=======
|
=======
|
||||||
@@ -327,7 +327,7 @@ sp_sgemv(char *trans, float alpha, SuperMatrix *A, float *x,
|
|||||||
TRANS = 'T' or 't' y := alpha*A'*x + beta*y.
|
TRANS = 'T' or 't' y := alpha*A'*x + beta*y.
|
||||||
TRANS = 'C' or 'c' y := alpha*A'*x + beta*y.
|
TRANS = 'C' or 'c' y := alpha*A'*x + beta*y.
|
||||||
|
|
||||||
ALPHA - (input) float
|
ALPHA - (input) double
|
||||||
On entry, ALPHA specifies the scalar alpha.
|
On entry, ALPHA specifies the scalar alpha.
|
||||||
|
|
||||||
A - (input) SuperMatrix*
|
A - (input) SuperMatrix*
|
||||||
@@ -336,7 +336,7 @@ sp_sgemv(char *trans, float alpha, SuperMatrix *A, float *x,
|
|||||||
Stype = NC or NCP; Dtype = SLU_S; Mtype = GE.
|
Stype = NC or NCP; Dtype = SLU_S; Mtype = GE.
|
||||||
In the future, more general A can be handled.
|
In the future, more general A can be handled.
|
||||||
|
|
||||||
X - (input) float*, array of DIMENSION at least
|
X - (input) double*, array of DIMENSION at least
|
||||||
( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
|
( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
|
||||||
and at least
|
and at least
|
||||||
( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
|
( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
|
||||||
@@ -347,11 +347,11 @@ sp_sgemv(char *trans, float alpha, SuperMatrix *A, float *x,
|
|||||||
On entry, INCX specifies the increment for the elements of
|
On entry, INCX specifies the increment for the elements of
|
||||||
X. INCX must not be zero.
|
X. INCX must not be zero.
|
||||||
|
|
||||||
BETA - (input) float
|
BETA - (input) double
|
||||||
On entry, BETA specifies the scalar beta. When BETA is
|
On entry, BETA specifies the scalar beta. When BETA is
|
||||||
supplied as zero then Y need not be set on input.
|
supplied as zero then Y need not be set on input.
|
||||||
|
|
||||||
Y - (output) float*, array of DIMENSION at least
|
Y - (output) double*, array of DIMENSION at least
|
||||||
( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
|
( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
|
||||||
and at least
|
and at least
|
||||||
( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
|
( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
|
||||||
@@ -368,9 +368,9 @@ sp_sgemv(char *trans, float alpha, SuperMatrix *A, float *x,
|
|||||||
|
|
||||||
/* Local variables */
|
/* Local variables */
|
||||||
NCformat *Astore;
|
NCformat *Astore;
|
||||||
float *Aval;
|
double *Aval;
|
||||||
int info;
|
int info;
|
||||||
float temp;
|
double temp;
|
||||||
int lenx, leny, i, j, irow;
|
int lenx, leny, i, j, irow;
|
||||||
int iy, jx, jy, kx, ky;
|
int iy, jx, jy, kx, ky;
|
||||||
int notran;
|
int notran;
|
||||||
|
@@ -20,8 +20,8 @@
|
|||||||
|
|
||||||
int
|
int
|
||||||
sp_sgemm(char *transa, int n,
|
sp_sgemm(char *transa, int n,
|
||||||
float alpha, SuperMatrix *A, float *b, int ldb,
|
double alpha, SuperMatrix *A, double *b, int ldb,
|
||||||
float beta, float *c, int ldc)
|
double beta, double *c, int ldc)
|
||||||
{
|
{
|
||||||
/* Purpose
|
/* Purpose
|
||||||
=======
|
=======
|
||||||
@@ -74,7 +74,7 @@ sp_sgemm(char *transa, int n,
|
|||||||
be at least zero.
|
be at least zero.
|
||||||
Unchanged on exit.
|
Unchanged on exit.
|
||||||
|
|
||||||
ALPHA - (input) float
|
ALPHA - (input) double
|
||||||
On entry, ALPHA specifies the scalar alpha.
|
On entry, ALPHA specifies the scalar alpha.
|
||||||
|
|
||||||
A - (input) SuperMatrix*
|
A - (input) SuperMatrix*
|
||||||
@@ -96,7 +96,7 @@ sp_sgemm(char *transa, int n,
|
|||||||
in the calling (sub) program. LDB must be at least max( 1, n ).
|
in the calling (sub) program. LDB must be at least max( 1, n ).
|
||||||
Unchanged on exit.
|
Unchanged on exit.
|
||||||
|
|
||||||
BETA - (input) float
|
BETA - (input) double
|
||||||
On entry, BETA specifies the scalar beta. When BETA is
|
On entry, BETA specifies the scalar beta. When BETA is
|
||||||
supplied as zero then C need not be set on input.
|
supplied as zero then C need not be set on input.
|
||||||
|
|
||||||
|
@@ -88,9 +88,9 @@ typedef struct {
|
|||||||
int *supno;
|
int *supno;
|
||||||
int *lsub; /* compressed L subscripts */
|
int *lsub; /* compressed L subscripts */
|
||||||
int *xlsub;
|
int *xlsub;
|
||||||
float *lusup; /* L supernodes */
|
double *lusup; /* L supernodes */
|
||||||
int *xlusup;
|
int *xlusup;
|
||||||
float *ucol; /* U columns */
|
double *ucol; /* U columns */
|
||||||
int *usub;
|
int *usub;
|
||||||
int *xusub;
|
int *xusub;
|
||||||
int nzlmax; /* current max size of lsub */
|
int nzlmax; /* current max size of lsub */
|
||||||
@@ -101,8 +101,8 @@ typedef struct {
|
|||||||
} GlobalLU_t;
|
} GlobalLU_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
float for_lu;
|
double for_lu;
|
||||||
float total_needed;
|
double total_needed;
|
||||||
int expansions;
|
int expansions;
|
||||||
} mem_usage_t;
|
} mem_usage_t;
|
||||||
|
|
||||||
@@ -116,61 +116,61 @@ sgssv(superlu_options_t *, SuperMatrix *, int *, int *, SuperMatrix *,
|
|||||||
SuperMatrix *, SuperMatrix *, SuperLUStat_t *, int *);
|
SuperMatrix *, SuperMatrix *, SuperLUStat_t *, int *);
|
||||||
extern void
|
extern void
|
||||||
sgssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *,
|
sgssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *,
|
||||||
char *, float *, float *, SuperMatrix *, SuperMatrix *,
|
char *, double *, double *, SuperMatrix *, SuperMatrix *,
|
||||||
void *, int, SuperMatrix *, SuperMatrix *,
|
void *, int, SuperMatrix *, SuperMatrix *,
|
||||||
float *, float *, float *, float *,
|
double *, double *, double *, double *,
|
||||||
mem_usage_t *, SuperLUStat_t *, int *);
|
mem_usage_t *, SuperLUStat_t *, int *);
|
||||||
|
|
||||||
/* Supernodal LU factor related */
|
/* Supernodal LU factor related */
|
||||||
extern void
|
extern void
|
||||||
sCreate_CompCol_Matrix(SuperMatrix *, int, int, int, float *,
|
sCreate_CompCol_Matrix(SuperMatrix *, int, int, int, double *,
|
||||||
int *, int *, Stype_t, Dtype_t, Mtype_t);
|
int *, int *, Stype_t, Dtype_t, Mtype_t);
|
||||||
extern void
|
extern void
|
||||||
sCreate_CompRow_Matrix(SuperMatrix *, int, int, int, float *,
|
sCreate_CompRow_Matrix(SuperMatrix *, int, int, int, double *,
|
||||||
int *, int *, Stype_t, Dtype_t, Mtype_t);
|
int *, int *, Stype_t, Dtype_t, Mtype_t);
|
||||||
extern void
|
extern void
|
||||||
sCopy_CompCol_Matrix(SuperMatrix *, SuperMatrix *);
|
sCopy_CompCol_Matrix(SuperMatrix *, SuperMatrix *);
|
||||||
extern void
|
extern void
|
||||||
sCreate_Dense_Matrix(SuperMatrix *, int, int, float *, int,
|
sCreate_Dense_Matrix(SuperMatrix *, int, int, double *, int,
|
||||||
Stype_t, Dtype_t, Mtype_t);
|
Stype_t, Dtype_t, Mtype_t);
|
||||||
extern void
|
extern void
|
||||||
sCreate_SuperNode_Matrix(SuperMatrix *, int, int, int, float *,
|
sCreate_SuperNode_Matrix(SuperMatrix *, int, int, int, double *,
|
||||||
int *, int *, int *, int *, int *,
|
int *, int *, int *, int *, int *,
|
||||||
Stype_t, Dtype_t, Mtype_t);
|
Stype_t, Dtype_t, Mtype_t);
|
||||||
extern void
|
extern void
|
||||||
sCopy_Dense_Matrix(int, int, float *, int, float *, int);
|
sCopy_Dense_Matrix(int, int, double *, int, double *, int);
|
||||||
|
|
||||||
extern void countnz (const int, int *, int *, int *, GlobalLU_t *);
|
extern void countnz (const int, int *, int *, int *, GlobalLU_t *);
|
||||||
extern void fixupL (const int, const int *, GlobalLU_t *);
|
extern void fixupL (const int, const int *, GlobalLU_t *);
|
||||||
|
|
||||||
extern void sallocateA (int, int, float **, int **, int **);
|
extern void sallocateA (int, int, double **, int **, int **);
|
||||||
extern void sgstrf (superlu_options_t*, SuperMatrix*,
|
extern void sgstrf (superlu_options_t*, SuperMatrix*,
|
||||||
int, int, int*, void *, int, int *, int *,
|
int, int, int*, void *, int, int *, int *,
|
||||||
SuperMatrix *, SuperMatrix *, SuperLUStat_t*, int *);
|
SuperMatrix *, SuperMatrix *, SuperLUStat_t*, int *);
|
||||||
extern int ssnode_dfs (const int, const int, const int *, const int *,
|
extern int ssnode_dfs (const int, const int, const int *, const int *,
|
||||||
const int *, int *, int *, GlobalLU_t *);
|
const int *, int *, int *, GlobalLU_t *);
|
||||||
extern int ssnode_bmod (const int, const int, float *,
|
extern int ssnode_bmod (const int, const int, double *,
|
||||||
float *, GlobalLU_t *, SuperLUStat_t*);
|
double *, GlobalLU_t *, SuperLUStat_t*);
|
||||||
extern void spanel_dfs (const int, const int, const int, SuperMatrix *,
|
extern void spanel_dfs (const int, const int, const int, SuperMatrix *,
|
||||||
int *, int *, float *, int *, int *, int *,
|
int *, int *, double *, int *, int *, int *,
|
||||||
int *, int *, int *, int *, GlobalLU_t *);
|
int *, int *, int *, int *, GlobalLU_t *);
|
||||||
extern void spanel_bmod (const int, const int, const int, const int,
|
extern void spanel_bmod (const int, const int, const int, const int,
|
||||||
float *, float *, int *, int *,
|
double *, double *, int *, int *,
|
||||||
GlobalLU_t *, SuperLUStat_t*);
|
GlobalLU_t *, SuperLUStat_t*);
|
||||||
extern int scolumn_dfs (const int, const int, int *, int *, int *, int *,
|
extern int scolumn_dfs (const int, const int, int *, int *, int *, int *,
|
||||||
int *, int *, int *, int *, int *, GlobalLU_t *);
|
int *, int *, int *, int *, int *, GlobalLU_t *);
|
||||||
extern int scolumn_bmod (const int, const int, float *,
|
extern int scolumn_bmod (const int, const int, double *,
|
||||||
float *, int *, int *, int,
|
double *, int *, int *, int,
|
||||||
GlobalLU_t *, SuperLUStat_t*);
|
GlobalLU_t *, SuperLUStat_t*);
|
||||||
extern int scopy_to_ucol (int, int, int *, int *, int *,
|
extern int scopy_to_ucol (int, int, int *, int *, int *,
|
||||||
float *, GlobalLU_t *);
|
double *, GlobalLU_t *);
|
||||||
extern int spivotL (const int, const float, int *, int *,
|
extern int spivotL (const int, const double, int *, int *,
|
||||||
int *, int *, int *, GlobalLU_t *, SuperLUStat_t*);
|
int *, int *, int *, GlobalLU_t *, SuperLUStat_t*);
|
||||||
extern void spruneL (const int, const int *, const int, const int,
|
extern void spruneL (const int, const int *, const int, const int,
|
||||||
const int *, const int *, int *, GlobalLU_t *);
|
const int *, const int *, int *, GlobalLU_t *);
|
||||||
extern void sreadmt (int *, int *, int *, float **, int **, int **);
|
extern void sreadmt (int *, int *, int *, double **, int **, int **);
|
||||||
extern void sGenXtrue (int, int, float *, int);
|
extern void sGenXtrue (int, int, double *, int);
|
||||||
extern void sFillRHS (trans_t, int, float *, int, SuperMatrix *,
|
extern void sFillRHS (trans_t, int, double *, int, SuperMatrix *,
|
||||||
SuperMatrix *);
|
SuperMatrix *);
|
||||||
extern void sgstrs (trans_t, SuperMatrix *, SuperMatrix *, int *, int *,
|
extern void sgstrs (trans_t, SuperMatrix *, SuperMatrix *, int *, int *,
|
||||||
SuperMatrix *, SuperLUStat_t*, int *);
|
SuperMatrix *, SuperLUStat_t*, int *);
|
||||||
@@ -178,56 +178,56 @@ extern void sgstrs (trans_t, SuperMatrix *, SuperMatrix *, int *, int *,
|
|||||||
|
|
||||||
/* Driver related */
|
/* Driver related */
|
||||||
|
|
||||||
extern void sgsequ (SuperMatrix *, float *, float *, float *,
|
extern void sgsequ (SuperMatrix *, double *, double *, double *,
|
||||||
float *, float *, int *);
|
double *, double *, int *);
|
||||||
extern void slaqgs (SuperMatrix *, float *, float *, float,
|
extern void slaqgs (SuperMatrix *, double *, double *, double,
|
||||||
float, float, char *);
|
double, double, char *);
|
||||||
extern void sgscon (char *, SuperMatrix *, SuperMatrix *,
|
extern void sgscon (char *, SuperMatrix *, SuperMatrix *,
|
||||||
float, float *, SuperLUStat_t*, int *);
|
double, double *, SuperLUStat_t*, int *);
|
||||||
extern float sPivotGrowth(int, SuperMatrix *, int *,
|
extern double sPivotGrowth(int, SuperMatrix *, int *,
|
||||||
SuperMatrix *, SuperMatrix *);
|
SuperMatrix *, SuperMatrix *);
|
||||||
extern void sgsrfs (trans_t, SuperMatrix *, SuperMatrix *,
|
extern void sgsrfs (trans_t, SuperMatrix *, SuperMatrix *,
|
||||||
SuperMatrix *, int *, int *, char *, float *,
|
SuperMatrix *, int *, int *, char *, double *,
|
||||||
float *, SuperMatrix *, SuperMatrix *,
|
double *, SuperMatrix *, SuperMatrix *,
|
||||||
float *, float *, SuperLUStat_t*, int *);
|
double *, double *, SuperLUStat_t*, int *);
|
||||||
|
|
||||||
extern int sp_strsv (char *, char *, char *, SuperMatrix *,
|
extern int sp_strsv (char *, char *, char *, SuperMatrix *,
|
||||||
SuperMatrix *, float *, SuperLUStat_t*, int *);
|
SuperMatrix *, double *, SuperLUStat_t*, int *);
|
||||||
extern int sp_sgemv (char *, float, SuperMatrix *, float *,
|
extern int sp_sgemv (char *, double, SuperMatrix *, double *,
|
||||||
int, float, float *, int);
|
int, double, double *, int);
|
||||||
|
|
||||||
extern int sp_sgemm (char *, int, float,
|
extern int sp_sgemm (char *, int, double,
|
||||||
SuperMatrix *, float *, int, float,
|
SuperMatrix *, double *, int, double,
|
||||||
float *, int);
|
double *, int);
|
||||||
|
|
||||||
/* Memory-related */
|
/* Memory-related */
|
||||||
extern int sLUMemInit (fact_t, void *, int, int, int, int, int,
|
extern int sLUMemInit (fact_t, void *, int, int, int, int, int,
|
||||||
SuperMatrix *, SuperMatrix *,
|
SuperMatrix *, SuperMatrix *,
|
||||||
GlobalLU_t *, int **, float **);
|
GlobalLU_t *, int **, double **);
|
||||||
extern void sSetRWork (int, int, float *, float **, float **);
|
extern void sSetRWork (int, int, double *, double **, double **);
|
||||||
extern void sLUWorkFree (int *, float *, GlobalLU_t *);
|
extern void sLUWorkFree (int *, double *, GlobalLU_t *);
|
||||||
extern int sLUMemXpand (int, int, MemType, int *, GlobalLU_t *);
|
extern int sLUMemXpand (int, int, MemType, int *, GlobalLU_t *);
|
||||||
|
|
||||||
extern float *floatMalloc(int);
|
extern double *doubleMalloc(int);
|
||||||
extern float *floatCalloc(int);
|
extern double *doubleCalloc(int);
|
||||||
extern int smemory_usage(const int, const int, const int, const int);
|
extern int smemory_usage(const int, const int, const int, const int);
|
||||||
extern int sQuerySpace (SuperMatrix *, SuperMatrix *, mem_usage_t *);
|
extern int sQuerySpace (SuperMatrix *, SuperMatrix *, mem_usage_t *);
|
||||||
|
|
||||||
/* Auxiliary routines */
|
/* Auxiliary routines */
|
||||||
extern void sreadhb(int *, int *, int *, float **, int **, int **);
|
extern void sreadhb(int *, int *, int *, double **, int **, int **);
|
||||||
extern void sCompRow_to_CompCol(int, int, int, float*, int*, int*,
|
extern void sCompRow_to_CompCol(int, int, int, double*, int*, int*,
|
||||||
float **, int **, int **);
|
double **, int **, int **);
|
||||||
extern void sfill (float *, int, float);
|
extern void sfill (double *, int, double);
|
||||||
extern void sinf_norm_error (int, SuperMatrix *, float *);
|
extern void sinf_norm_error (int, SuperMatrix *, double *);
|
||||||
extern void PrintPerf (SuperMatrix *, SuperMatrix *, mem_usage_t *,
|
extern void PrintPerf (SuperMatrix *, SuperMatrix *, mem_usage_t *,
|
||||||
float, float, float *, float *, char *);
|
double, double, double *, double *, char *);
|
||||||
|
|
||||||
/* Routines for debugging */
|
/* Routines for debugging */
|
||||||
extern void sPrint_CompCol_Matrix(char *, SuperMatrix *);
|
extern void sPrint_CompCol_Matrix(char *, SuperMatrix *);
|
||||||
extern void sPrint_SuperNode_Matrix(char *, SuperMatrix *);
|
extern void sPrint_SuperNode_Matrix(char *, SuperMatrix *);
|
||||||
extern void sPrint_Dense_Matrix(char *, SuperMatrix *);
|
extern void sPrint_Dense_Matrix(char *, SuperMatrix *);
|
||||||
extern void print_lu_col(char *, int, int, int *, GlobalLU_t *);
|
extern void print_lu_col(char *, int, int, int *, GlobalLU_t *);
|
||||||
extern void check_tempv(int, float *);
|
extern void check_tempv(int, double *);
|
||||||
extern int print_int_vec(char *what, int n, int *vec);
|
extern int print_int_vec(char *what, int n, int *vec);
|
||||||
|
|
||||||
extern int sp_symetree(int *acolst, int *acolend, int *arow, int n, int *parent);
|
extern int sp_symetree(int *acolst, int *acolend, int *arow, int n, int *parent);
|
||||||
|
@@ -1,17 +1,17 @@
|
|||||||
/** \file opennl/superlu/strsv.c
|
/** \file opennl/superlu/strsv.c
|
||||||
* \ingroup opennl
|
* \ingroup opennl
|
||||||
*/
|
*/
|
||||||
int strsv_(char *, char *, char *, int *, float *, int *, float *, int *);
|
int strsv_(char *, char *, char *, int *, double *, int *, double *, int *);
|
||||||
|
|
||||||
|
|
||||||
/* Subroutine */ int strsv_(char *uplo, char *trans, char *diag, int *n,
|
/* Subroutine */ int strsv_(char *uplo, char *trans, char *diag, int *n,
|
||||||
float *a, int *lda, float *x, int *incx)
|
double *a, int *lda, double *x, int *incx)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
|
||||||
/* Local variables */
|
/* Local variables */
|
||||||
static int info;
|
static int info;
|
||||||
static float temp;
|
static double temp;
|
||||||
static int i, j;
|
static int i, j;
|
||||||
extern int lsame_(char *, char *);
|
extern int lsame_(char *, char *);
|
||||||
static int ix, jx, kx;
|
static int ix, jx, kx;
|
||||||
|
@@ -27,15 +27,15 @@
|
|||||||
|
|
||||||
/* prototypes */
|
/* prototypes */
|
||||||
void sprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu);
|
void sprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu);
|
||||||
void scheck_tempv(int n, float *tempv);
|
void scheck_tempv(int n, double *tempv);
|
||||||
void sPrintPerf(SuperMatrix *, SuperMatrix *, mem_usage_t *,float , float ,
|
void sPrintPerf(SuperMatrix *, SuperMatrix *, mem_usage_t *,double , double ,
|
||||||
float *, float *, char *, SuperLUStat_t *);
|
double *, double *, char *, SuperLUStat_t *);
|
||||||
int print_float_vec(char *what, int n, float *vec);
|
int print_double_vec(char *what, int n, double *vec);
|
||||||
/* ********** */
|
/* ********** */
|
||||||
|
|
||||||
void
|
void
|
||||||
sCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
|
sCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
|
||||||
float *nzval, int *rowind, int *colptr,
|
double *nzval, int *rowind, int *colptr,
|
||||||
Stype_t stype, Dtype_t dtype, Mtype_t mtype)
|
Stype_t stype, Dtype_t dtype, Mtype_t mtype)
|
||||||
{
|
{
|
||||||
NCformat *Astore;
|
NCformat *Astore;
|
||||||
@@ -56,7 +56,7 @@ sCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
|
|||||||
|
|
||||||
void
|
void
|
||||||
sCreate_CompRow_Matrix(SuperMatrix *A, int m, int n, int nnz,
|
sCreate_CompRow_Matrix(SuperMatrix *A, int m, int n, int nnz,
|
||||||
float *nzval, int *colind, int *rowptr,
|
double *nzval, int *colind, int *rowptr,
|
||||||
Stype_t stype, Dtype_t dtype, Mtype_t mtype)
|
Stype_t stype, Dtype_t dtype, Mtype_t mtype)
|
||||||
{
|
{
|
||||||
NRformat *Astore;
|
NRformat *Astore;
|
||||||
@@ -91,14 +91,14 @@ sCopy_CompCol_Matrix(SuperMatrix *A, SuperMatrix *B)
|
|||||||
Bstore = (NCformat *) B->Store;
|
Bstore = (NCformat *) B->Store;
|
||||||
Bstore->nnz = nnz = Astore->nnz;
|
Bstore->nnz = nnz = Astore->nnz;
|
||||||
for (i = 0; i < nnz; ++i)
|
for (i = 0; i < nnz; ++i)
|
||||||
((float *)Bstore->nzval)[i] = ((float *)Astore->nzval)[i];
|
((double *)Bstore->nzval)[i] = ((double *)Astore->nzval)[i];
|
||||||
for (i = 0; i < nnz; ++i) Bstore->rowind[i] = Astore->rowind[i];
|
for (i = 0; i < nnz; ++i) Bstore->rowind[i] = Astore->rowind[i];
|
||||||
for (i = 0; i <= ncol; ++i) Bstore->colptr[i] = Astore->colptr[i];
|
for (i = 0; i <= ncol; ++i) Bstore->colptr[i] = Astore->colptr[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
sCreate_Dense_Matrix(SuperMatrix *X, int m, int n, float *x, int ldx,
|
sCreate_Dense_Matrix(SuperMatrix *X, int m, int n, double *x, int ldx,
|
||||||
Stype_t stype, Dtype_t dtype, Mtype_t mtype)
|
Stype_t stype, Dtype_t dtype, Mtype_t mtype)
|
||||||
{
|
{
|
||||||
DNformat *Xstore;
|
DNformat *Xstore;
|
||||||
@@ -112,12 +112,12 @@ sCreate_Dense_Matrix(SuperMatrix *X, int m, int n, float *x, int ldx,
|
|||||||
if ( !(X->Store) ) ABORT("SUPERLU_MALLOC fails for X->Store");
|
if ( !(X->Store) ) ABORT("SUPERLU_MALLOC fails for X->Store");
|
||||||
Xstore = (DNformat *) X->Store;
|
Xstore = (DNformat *) X->Store;
|
||||||
Xstore->lda = ldx;
|
Xstore->lda = ldx;
|
||||||
Xstore->nzval = (float *) x;
|
Xstore->nzval = (double *) x;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
sCopy_Dense_Matrix(int M, int N, float *X, int ldx,
|
sCopy_Dense_Matrix(int M, int N, double *X, int ldx,
|
||||||
float *Y, int ldy)
|
double *Y, int ldy)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
*
|
*
|
||||||
@@ -135,7 +135,7 @@ sCopy_Dense_Matrix(int M, int N, float *X, int ldx,
|
|||||||
|
|
||||||
void
|
void
|
||||||
sCreate_SuperNode_Matrix(SuperMatrix *L, int m, int n, int nnz,
|
sCreate_SuperNode_Matrix(SuperMatrix *L, int m, int n, int nnz,
|
||||||
float *nzval, int *nzval_colptr, int *rowind,
|
double *nzval, int *nzval_colptr, int *rowind,
|
||||||
int *rowind_colptr, int *col_to_sup, int *sup_to_col,
|
int *rowind_colptr, int *col_to_sup, int *sup_to_col,
|
||||||
Stype_t stype, Dtype_t dtype, Mtype_t mtype)
|
Stype_t stype, Dtype_t dtype, Mtype_t mtype)
|
||||||
{
|
{
|
||||||
@@ -166,14 +166,14 @@ sCreate_SuperNode_Matrix(SuperMatrix *L, int m, int n, int nnz,
|
|||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
sCompRow_to_CompCol(int m, int n, int nnz,
|
sCompRow_to_CompCol(int m, int n, int nnz,
|
||||||
float *a, int *colind, int *rowptr,
|
double *a, int *colind, int *rowptr,
|
||||||
float **at, int **rowind, int **colptr)
|
double **at, int **rowind, int **colptr)
|
||||||
{
|
{
|
||||||
register int i, j, col, relpos;
|
register int i, j, col, relpos;
|
||||||
int *marker;
|
int *marker;
|
||||||
|
|
||||||
/* Allocate storage for another copy of the matrix. */
|
/* Allocate storage for another copy of the matrix. */
|
||||||
*at = (float *) floatMalloc(nnz);
|
*at = (double *) doubleMalloc(nnz);
|
||||||
*rowind = (int *) intMalloc(nnz);
|
*rowind = (int *) intMalloc(nnz);
|
||||||
*colptr = (int *) intMalloc(n+1);
|
*colptr = (int *) intMalloc(n+1);
|
||||||
marker = (int *) intCalloc(n);
|
marker = (int *) intCalloc(n);
|
||||||
@@ -207,13 +207,13 @@ sPrint_CompCol_Matrix(char *what, SuperMatrix *A)
|
|||||||
{
|
{
|
||||||
NCformat *Astore;
|
NCformat *Astore;
|
||||||
register int i,n;
|
register int i,n;
|
||||||
float *dp;
|
double *dp;
|
||||||
|
|
||||||
printf("\nCompCol matrix %s:\n", what);
|
printf("\nCompCol matrix %s:\n", what);
|
||||||
printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
|
printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
|
||||||
n = A->ncol;
|
n = A->ncol;
|
||||||
Astore = (NCformat *) A->Store;
|
Astore = (NCformat *) A->Store;
|
||||||
dp = (float *) Astore->nzval;
|
dp = (double *) Astore->nzval;
|
||||||
printf("nrow %d, ncol %d, nnz %d\n", A->nrow,A->ncol,Astore->nnz);
|
printf("nrow %d, ncol %d, nnz %d\n", A->nrow,A->ncol,Astore->nnz);
|
||||||
printf("nzval: ");
|
printf("nzval: ");
|
||||||
for (i = 0; i < Astore->colptr[n]; ++i) printf("%f ", dp[i]);
|
for (i = 0; i < Astore->colptr[n]; ++i) printf("%f ", dp[i]);
|
||||||
@@ -230,14 +230,14 @@ sPrint_SuperNode_Matrix(char *what, SuperMatrix *A)
|
|||||||
{
|
{
|
||||||
SCformat *Astore;
|
SCformat *Astore;
|
||||||
register int i, j, k, c, d, n, nsup;
|
register int i, j, k, c, d, n, nsup;
|
||||||
float *dp;
|
double *dp;
|
||||||
int *col_to_sup, *sup_to_col, *rowind, *rowind_colptr;
|
int *col_to_sup, *sup_to_col, *rowind, *rowind_colptr;
|
||||||
|
|
||||||
printf("\nSuperNode matrix %s:\n", what);
|
printf("\nSuperNode matrix %s:\n", what);
|
||||||
printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
|
printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
|
||||||
n = A->ncol;
|
n = A->ncol;
|
||||||
Astore = (SCformat *) A->Store;
|
Astore = (SCformat *) A->Store;
|
||||||
dp = (float *) Astore->nzval;
|
dp = (double *) Astore->nzval;
|
||||||
col_to_sup = Astore->col_to_sup;
|
col_to_sup = Astore->col_to_sup;
|
||||||
sup_to_col = Astore->sup_to_col;
|
sup_to_col = Astore->sup_to_col;
|
||||||
rowind_colptr = Astore->rowind_colptr;
|
rowind_colptr = Astore->rowind_colptr;
|
||||||
@@ -279,12 +279,12 @@ sPrint_Dense_Matrix(char *what, SuperMatrix *A)
|
|||||||
{
|
{
|
||||||
DNformat *Astore;
|
DNformat *Astore;
|
||||||
register int i;
|
register int i;
|
||||||
float *dp;
|
double *dp;
|
||||||
|
|
||||||
printf("\nDense matrix %s:\n", what);
|
printf("\nDense matrix %s:\n", what);
|
||||||
printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
|
printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
|
||||||
Astore = (DNformat *) A->Store;
|
Astore = (DNformat *) A->Store;
|
||||||
dp = (float *) Astore->nzval;
|
dp = (double *) Astore->nzval;
|
||||||
printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,Astore->lda);
|
printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,Astore->lda);
|
||||||
printf("\nnzval: ");
|
printf("\nnzval: ");
|
||||||
for (i = 0; i < A->nrow; ++i) printf("%f ", dp[i]);
|
for (i = 0; i < A->nrow; ++i) printf("%f ", dp[i]);
|
||||||
@@ -301,9 +301,9 @@ sprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu)
|
|||||||
int i, k, fsupc;
|
int i, k, fsupc;
|
||||||
int *xsup, *supno;
|
int *xsup, *supno;
|
||||||
int *xlsub, *lsub;
|
int *xlsub, *lsub;
|
||||||
float *lusup;
|
double *lusup;
|
||||||
int *xlusup;
|
int *xlusup;
|
||||||
float *ucol;
|
double *ucol;
|
||||||
int *usub, *xusub;
|
int *usub, *xusub;
|
||||||
|
|
||||||
xsup = Glu->xsup;
|
xsup = Glu->xsup;
|
||||||
@@ -339,7 +339,7 @@ sprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu)
|
|||||||
* Check whether tempv[] == 0. This should be true before and after
|
* Check whether tempv[] == 0. This should be true before and after
|
||||||
* calling any numeric routines, i.e., "panel_bmod" and "column_bmod".
|
* calling any numeric routines, i.e., "panel_bmod" and "column_bmod".
|
||||||
*/
|
*/
|
||||||
void scheck_tempv(int n, float *tempv)
|
void scheck_tempv(int n, double *tempv)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
@@ -354,12 +354,12 @@ void scheck_tempv(int n, float *tempv)
|
|||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
sGenXtrue(int n, int nrhs, float *x, int ldx)
|
sGenXtrue(int n, int nrhs, double *x, int ldx)
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
for (j = 0; j < nrhs; ++j)
|
for (j = 0; j < nrhs; ++j)
|
||||||
for (i = 0; i < n; ++i) {
|
for (i = 0; i < n; ++i) {
|
||||||
x[i + j*ldx] = 1.0;/* + (float)(i+1.)/n;*/
|
x[i + j*ldx] = 1.0;/* + (double)(i+1.)/n;*/
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -367,13 +367,13 @@ sGenXtrue(int n, int nrhs, float *x, int ldx)
|
|||||||
* Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's
|
* Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
sFillRHS(trans_t trans, int nrhs, float *x, int ldx,
|
sFillRHS(trans_t trans, int nrhs, double *x, int ldx,
|
||||||
SuperMatrix *A, SuperMatrix *B)
|
SuperMatrix *A, SuperMatrix *B)
|
||||||
{
|
{
|
||||||
DNformat *Bstore;
|
DNformat *Bstore;
|
||||||
float *rhs;
|
double *rhs;
|
||||||
float one = 1.0;
|
double one = 1.0;
|
||||||
float zero = 0.0;
|
double zero = 0.0;
|
||||||
int ldc;
|
int ldc;
|
||||||
char transc[1];
|
char transc[1];
|
||||||
|
|
||||||
@@ -390,10 +390,10 @@ sFillRHS(trans_t trans, int nrhs, float *x, int ldx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Fills a float precision array with a given value.
|
* Fills a double precision array with a given value.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
sfill(float *a, int alen, float dval)
|
sfill(double *a, int alen, double dval)
|
||||||
{
|
{
|
||||||
register int i;
|
register int i;
|
||||||
for (i = 0; i < alen; i++) a[i] = dval;
|
for (i = 0; i < alen; i++) a[i] = dval;
|
||||||
@@ -404,11 +404,11 @@ sfill(float *a, int alen, float dval)
|
|||||||
/*
|
/*
|
||||||
* Check the inf-norm of the error vector
|
* Check the inf-norm of the error vector
|
||||||
*/
|
*/
|
||||||
void sinf_norm_error(int nrhs, SuperMatrix *X, float *xtrue)
|
void sinf_norm_error(int nrhs, SuperMatrix *X, double *xtrue)
|
||||||
{
|
{
|
||||||
DNformat *Xstore;
|
DNformat *Xstore;
|
||||||
float err, xnorm;
|
double err, xnorm;
|
||||||
float *Xmat, *soln_work;
|
double *Xmat, *soln_work;
|
||||||
int i, j;
|
int i, j;
|
||||||
|
|
||||||
Xstore = X->Store;
|
Xstore = X->Store;
|
||||||
@@ -431,8 +431,8 @@ void sinf_norm_error(int nrhs, SuperMatrix *X, float *xtrue)
|
|||||||
/* Print performance of the code. */
|
/* Print performance of the code. */
|
||||||
void
|
void
|
||||||
sPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage,
|
sPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage,
|
||||||
float rpg, float rcond, float *ferr,
|
double rpg, double rcond, double *ferr,
|
||||||
float *berr, char *equed, SuperLUStat_t *stat)
|
double *berr, char *equed, SuperLUStat_t *stat)
|
||||||
{
|
{
|
||||||
SCformat *Lstore;
|
SCformat *Lstore;
|
||||||
NCformat *Ustore;
|
NCformat *Ustore;
|
||||||
@@ -475,7 +475,7 @@ sPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage,
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
int print_float_vec(char *what, int n, float *vec)
|
int print_double_vec(char *what, int n, double *vec)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
printf("%s: n %d\n", what, n);
|
printf("%s: n %d\n", what, n);
|
||||||
|
@@ -28,8 +28,8 @@
|
|||||||
/* prototypes */
|
/* prototypes */
|
||||||
flops_t LUFactFlops(SuperLUStat_t *stat);
|
flops_t LUFactFlops(SuperLUStat_t *stat);
|
||||||
flops_t LUSolveFlops(SuperLUStat_t *stat);
|
flops_t LUSolveFlops(SuperLUStat_t *stat);
|
||||||
float SpaSize(int n, int np, float sum_npw);
|
double SpaSize(int n, int np, double sum_npw);
|
||||||
float DenseSize(int n, float sum_nw);
|
double DenseSize(int n, double sum_nw);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Global statistics variale
|
* Global statistics variale
|
||||||
@@ -332,27 +332,27 @@ void super_stats(int nsuper, int *xsup)
|
|||||||
|
|
||||||
for (i = 0; i <= nsuper; i++) {
|
for (i = 0; i <= nsuper; i++) {
|
||||||
isize = xsup[i+1] - xsup[i];
|
isize = xsup[i+1] - xsup[i];
|
||||||
whichb = (float) isize / max_sup_size * NBUCKS;
|
whichb = (double) isize / max_sup_size * NBUCKS;
|
||||||
if (whichb >= NBUCKS) whichb = NBUCKS - 1;
|
if (whichb >= NBUCKS) whichb = NBUCKS - 1;
|
||||||
bucket[whichb]++;
|
bucket[whichb]++;
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("\tHistogram of supernode sizes:\n");
|
printf("\tHistogram of supernode sizes:\n");
|
||||||
for (i = 0; i < NBUCKS; i++) {
|
for (i = 0; i < NBUCKS; i++) {
|
||||||
bl = (float) i * max_sup_size / NBUCKS;
|
bl = (double) i * max_sup_size / NBUCKS;
|
||||||
bh = (float) (i+1) * max_sup_size / NBUCKS;
|
bh = (double) (i+1) * max_sup_size / NBUCKS;
|
||||||
printf("\tsnode: %d-%d\t\t%d\n", bl+1, bh, bucket[i]);
|
printf("\tsnode: %d-%d\t\t%d\n", bl+1, bh, bucket[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
float SpaSize(int n, int np, float sum_npw)
|
double SpaSize(int n, int np, double sum_npw)
|
||||||
{
|
{
|
||||||
return (sum_npw*8 + np*8 + n*4)/1024.;
|
return (sum_npw*8 + np*8 + n*4)/1024.;
|
||||||
}
|
}
|
||||||
|
|
||||||
float DenseSize(int n, float sum_nw)
|
double DenseSize(int n, double sum_nw)
|
||||||
{
|
{
|
||||||
return (sum_nw*8 + n*8)/1024.;;
|
return (sum_nw*8 + n*8)/1024.;;
|
||||||
}
|
}
|
||||||
|
@@ -97,7 +97,7 @@ typedef enum {
|
|||||||
RCOND, /* estimate reciprocal condition number */
|
RCOND, /* estimate reciprocal condition number */
|
||||||
SOLVE, /* forward and back solves */
|
SOLVE, /* forward and back solves */
|
||||||
REFINE, /* perform iterative refinement */
|
REFINE, /* perform iterative refinement */
|
||||||
SLU_FLOAT, /* time spent in floating-point operations */
|
SLU_FLOAT, /* time spent in doubleing-point operations */
|
||||||
TRSV, /* fraction of FACT spent in xTRSV */
|
TRSV, /* fraction of FACT spent in xTRSV */
|
||||||
GEMV, /* fraction of FACT spent in xGEMV */
|
GEMV, /* fraction of FACT spent in xGEMV */
|
||||||
FERR, /* estimate error bounds after iterative refinement */
|
FERR, /* estimate error bounds after iterative refinement */
|
||||||
@@ -108,7 +108,7 @@ typedef enum {
|
|||||||
/***********************************************************************
|
/***********************************************************************
|
||||||
* Type definitions
|
* Type definitions
|
||||||
***********************************************************************/
|
***********************************************************************/
|
||||||
typedef float flops_t;
|
typedef double flops_t;
|
||||||
typedef unsigned char Logical;
|
typedef unsigned char Logical;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
Reference in New Issue
Block a user