Added block_size templates to BlockedMatrix and Reorder functions

This commit is contained in:
T.D. (Tongdong) Qiu 2020-06-25 11:51:41 +02:00
parent 8f9fa15ecd
commit b692b66d3a
7 changed files with 114 additions and 73 deletions

View File

@ -65,6 +65,7 @@ namespace bda
template <unsigned int block_size>
bool BILU0<block_size>::init(BlockedMatrix *mat)
{
const unsigned int bs = block_size;
double t1 = 0.0, t2 = 0.0;
BlockedMatrix *CSCmat = nullptr;
@ -79,13 +80,13 @@ namespace bda
CSCmat = new BlockedMatrix();
CSCmat->Nb = Nb;
CSCmat->nnzbs = nnzbs;
CSCmat->nnzValues = new Block[nnzbs];
CSCmat->nnzValues = new double[nnzbs * bs * bs];
CSCmat->colIndices = new int[nnzbs];
CSCmat->rowPointers = new int[Nb + 1];
if(verbosity >= 3){
t1 = second();
}
bcsr_to_bcsc(mat->nnzValues, mat->colIndices, mat->rowPointers, CSCmat->nnzValues, CSCmat->colIndices, CSCmat->rowPointers, mat->Nb);
bcsr_to_bcsc<block_size>(mat->nnzValues, mat->colIndices, mat->rowPointers, CSCmat->nnzValues, CSCmat->colIndices, CSCmat->rowPointers, mat->Nb);
if(verbosity >= 3){
t2 = second();
std::ostringstream out;
@ -97,7 +98,7 @@ namespace bda
if(verbosity >= 3){
t1 = second();
}
rMat = allocateBlockedMatrix(mat->Nb, mat->nnzbs);
rMat = allocateBlockedMatrix<block_size>(mat->Nb, mat->nnzbs);
LUMat = soft_copyBlockedMatrix(rMat);
if (level_scheduling) {
rowsPerColor = findLevelScheduling(mat->colIndices, mat->rowPointers, CSCmat->colIndices, CSCmat->rowPointers, mat->Nb, &numColors, toOrder, fromOrder);
@ -115,12 +116,12 @@ namespace bda
}
diagIndex = new int[mat->Nb];
invDiagVals = new Block[mat->Nb];
invDiagVals = new double[mat->Nb * bs * bs];
LMat = allocateBlockedMatrix(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
UMat = allocateBlockedMatrix(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
LMat = allocateBlockedMatrix<block_size>(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
UMat = allocateBlockedMatrix<block_size>(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
LUMat->nnzValues = new Block[mat->nnzbs];
LUMat->nnzValues = new double[mat->nnzbs * bs * bs];
if (level_scheduling) {
delete[] CSCmat->nnzValues;
@ -130,22 +131,22 @@ namespace bda
}
s.Lvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(Block) * LMat->nnzbs);
s.Uvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(Block) * UMat->nnzbs);
s.Lvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * LMat->nnzbs);
s.Uvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * UMat->nnzbs);
s.Lcols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * LMat->nnzbs);
s.Ucols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * UMat->nnzbs);
s.Lrows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (LMat->Nb + 1));
s.Urows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (UMat->Nb + 1));
s.invDiagVals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(Block) * mat->Nb);
s.invDiagVals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * mat->Nb);
s.rowsPerColor = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (numColors + 1));
queue->enqueueWriteBuffer(s.Lvals, CL_TRUE, 0, LMat->nnzbs * sizeof(Block), LMat->nnzValues);
queue->enqueueWriteBuffer(s.Uvals, CL_TRUE, 0, UMat->nnzbs * sizeof(Block), UMat->nnzValues);
queue->enqueueWriteBuffer(s.Lvals, CL_TRUE, 0, LMat->nnzbs * sizeof(double) * bs * bs, LMat->nnzValues);
queue->enqueueWriteBuffer(s.Uvals, CL_TRUE, 0, UMat->nnzbs * sizeof(double) * bs * bs, UMat->nnzValues);
queue->enqueueWriteBuffer(s.Lcols, CL_TRUE, 0, LMat->nnzbs * sizeof(int), LMat->colIndices);
queue->enqueueWriteBuffer(s.Ucols, CL_TRUE, 0, UMat->nnzbs * sizeof(int), UMat->colIndices);
queue->enqueueWriteBuffer(s.Lrows, CL_TRUE, 0, (LMat->Nb + 1) * sizeof(int), LMat->rowPointers);
queue->enqueueWriteBuffer(s.Urows, CL_TRUE, 0, (UMat->Nb + 1) * sizeof(int), UMat->rowPointers);
queue->enqueueWriteBuffer(s.invDiagVals, CL_TRUE, 0, mat->Nb * sizeof(Block), invDiagVals);
queue->enqueueWriteBuffer(s.invDiagVals, CL_TRUE, 0, mat->Nb * sizeof(double) * bs * bs, invDiagVals);
int *rowsPerColorPrefix = new int[numColors + 1];
int prefix = 0;
@ -163,11 +164,12 @@ namespace bda
template <unsigned int block_size>
bool BILU0<block_size>::create_preconditioner(BlockedMatrix *mat)
{
const unsigned int bs = block_size;
double t1 = 0.0, t2 = 0.0;
if (verbosity >= 3){
t1 = second();
}
blocked_reorder_matrix_by_pattern(mat, toOrder, fromOrder, rMat);
blocked_reorder_matrix_by_pattern<block_size>(mat, toOrder, fromOrder, rMat);
if (verbosity >= 3){
t2 = second();
std::ostringstream out;
@ -179,7 +181,7 @@ namespace bda
if (verbosity >= 3){
t1 = second();
}
memcpy(LUMat->nnzValues, rMat->nnzValues, sizeof(Block) * rMat->nnzbs);
memcpy(LUMat->nnzValues, rMat->nnzValues, sizeof(double) * bs * bs * rMat->nnzbs);
if (verbosity >= 3){
t2 = second();
std::ostringstream out;
@ -189,10 +191,9 @@ namespace bda
int i, j, ij, ik, jk;
int iRowStart, iRowEnd, jRowEnd;
Block pivot;
double pivot[bs * bs];
int LSize = 0;
const int blockSquare = block_size * block_size;
if (verbosity >= 3){
t1 = second();
@ -221,9 +222,9 @@ namespace bda
LSize++;
// calculate the pivot of this row
blockMult(LUMat->nnzValues[ij], invDiagVals[j], pivot);
blockMult<bs>(LUMat->nnzValues + ij * bs * bs, invDiagVals + j * bs * bs, &pivot[0]);
memcpy(LUMat->nnzValues[ij], &pivot, sizeof(double)*block_size * block_size);
memcpy(LUMat->nnzValues + ij * bs * bs, &pivot[0], sizeof(double) * block_size * block_size);
jRowEnd = LUMat->rowPointers[j + 1];
jk = diagIndex[j] + 1;
@ -231,7 +232,7 @@ namespace bda
// substract that row scaled by the pivot from this row.
while (ik < iRowEnd && jk < jRowEnd) {
if (LUMat->colIndices[ik] == LUMat->colIndices[jk]) {
blockMultSub(LUMat->nnzValues[ik], pivot, LUMat->nnzValues[jk]);
blockMultSub<bs>(LUMat->nnzValues + ik * bs * bs, pivot, LUMat->nnzValues + jk * bs * bs);
ik++;
jk++;
} else {
@ -243,9 +244,9 @@ namespace bda
}
}
// store the inverse in the diagonal!
blockInvert3x3(LUMat->nnzValues[ij], invDiagVals[i]);
blockInvert3x3(LUMat->nnzValues + ij * bs * bs, invDiagVals + i * bs * bs);
memcpy(LUMat->nnzValues[ij], invDiagVals[i], sizeof(double)*block_size * block_size);
memcpy(LUMat->nnzValues + ij * bs * bs, invDiagVals + i * bs * bs, sizeof(double) * bs * bs);
}
LMat->rowPointers[0] = 0;
@ -256,7 +257,7 @@ namespace bda
int offsetL = LMat->rowPointers[i];
int rowSize = diagIndex[i] - LUMat->rowPointers[i];
int offsetLU = LUMat->rowPointers[i];
memcpy(LMat->nnzValues[offsetL], LUMat->nnzValues[offsetLU], sizeof(double) * blockSquare * rowSize);
memcpy(LMat->nnzValues + offsetL * bs * bs, LUMat->nnzValues + offsetLU * bs * bs, sizeof(double) * bs * bs * rowSize);
memcpy(LMat->colIndices + offsetL, LUMat->colIndices + offsetLU, sizeof(int) * rowSize);
offsetL += rowSize;
LMat->rowPointers[i + 1] = offsetL;
@ -268,7 +269,7 @@ namespace bda
int offsetU = UMat->rowPointers[URowIndex];
int rowSize = LUMat->rowPointers[i + 1] - diagIndex[i] - 1;
int offsetLU = diagIndex[i] + 1;
memcpy(UMat->nnzValues[offsetU], LUMat->nnzValues[offsetLU], sizeof(double) * blockSquare * rowSize);
memcpy(UMat->nnzValues + offsetU * bs * bs, LUMat->nnzValues + offsetLU * bs * bs, sizeof(double) * bs * bs * rowSize);
memcpy(UMat->colIndices + offsetU, LUMat->colIndices + offsetLU, sizeof(int) * rowSize);
offsetU += rowSize;
UMat->rowPointers[URowIndex + 1] = offsetU;
@ -291,9 +292,9 @@ namespace bda
queue->enqueueWriteBuffer(s.Urows, CL_TRUE, 0, (UMat->Nb + 1) * sizeof(int), UMat->rowPointers);
pattern_uploaded = true;
}
queue->enqueueWriteBuffer(s.Lvals, CL_TRUE, 0, LMat->nnzbs * sizeof(Block), LMat->nnzValues);
queue->enqueueWriteBuffer(s.Uvals, CL_TRUE, 0, UMat->nnzbs * sizeof(Block), UMat->nnzValues);
queue->enqueueWriteBuffer(s.invDiagVals, CL_TRUE, 0, Nb * sizeof(Block), invDiagVals);
queue->enqueueWriteBuffer(s.Lvals, CL_TRUE, 0, LMat->nnzbs * sizeof(double) * bs * bs, LMat->nnzValues);
queue->enqueueWriteBuffer(s.Uvals, CL_TRUE, 0, UMat->nnzbs * sizeof(double) * bs * bs, UMat->nnzValues);
queue->enqueueWriteBuffer(s.invDiagVals, CL_TRUE, 0, Nb * sizeof(double) * bs * bs, invDiagVals);
if (verbosity >= 3) {
t2 = second();
std::ostringstream out;

View File

@ -47,7 +47,7 @@ namespace bda
int nnzbs; // number of blocks of the matrix
BlockedMatrix *LMat, *UMat, *LUMat;
BlockedMatrix *rMat = nullptr; // only used with PAR_SIM
Block *invDiagVals;
double *invDiagVals;
int *diagIndex, *rowsPerColor;
int *toOrder, *fromOrder;
int numColors;

View File

@ -30,10 +30,11 @@ using bda::BlockedMatrix;
namespace bda
{
template <unsigned int block_size>
BlockedMatrix *allocateBlockedMatrix(int Nb, int nnzbs) {
BlockedMatrix *mat = new BlockedMatrix();
mat->nnzValues = new Block[nnzbs];
mat->nnzValues = new double[nnzbs * block_size * block_size];
mat->colIndices = new int[nnzbs];
mat->rowPointers = new int[Nb + 1];
mat->Nb = Nb;
@ -65,11 +66,13 @@ BlockedMatrix *soft_copyBlockedMatrix(BlockedMatrix *mat) {
/*Sort a row of matrix elements from a blocked CSR-format.*/
void sortBlockedRow(int *colIndices, Block *data, int left, int right) {
template <unsigned int block_size>
void sortBlockedRow(int *colIndices, double *data, int left, int right) {
const unsigned int bs = block_size;
int l = left;
int r = right;
int middle = colIndices[(l + r) >> 1];
double lDatum[BLOCK_SIZE * BLOCK_SIZE];
double lDatum[bs * bs];
do {
while (colIndices[l] < middle)
l++;
@ -79,9 +82,9 @@ void sortBlockedRow(int *colIndices, Block *data, int left, int right) {
int lColIndex = colIndices[l];
colIndices[l] = colIndices[r];
colIndices[r] = lColIndex;
memcpy(lDatum, data + l, sizeof(double) * BLOCK_SIZE * BLOCK_SIZE);
memcpy(data + l, data + r, sizeof(double) * BLOCK_SIZE * BLOCK_SIZE);
memcpy(data + r, lDatum, sizeof(double) * BLOCK_SIZE * BLOCK_SIZE);
memcpy(lDatum, data + l * bs * bs, sizeof(double) * bs * bs);
memcpy(data + l * bs * bs, data + r * bs * bs, sizeof(double) * bs * bs);
memcpy(data + r * bs * bs, lDatum, sizeof(double) * bs * bs);
l++;
r--;
@ -89,38 +92,40 @@ void sortBlockedRow(int *colIndices, Block *data, int left, int right) {
} while (l < r);
if (left < r)
sortBlockedRow(colIndices, data, left, r);
sortBlockedRow<bs>(colIndices, data, left, r);
if (right > l)
sortBlockedRow(colIndices, data, l, right);
sortBlockedRow<bs>(colIndices, data, l, right);
}
// LUMat->nnzValues[ik] = LUMat->nnzValues[ik] - (pivot * LUMat->nnzValues[jk]) in ilu decomposition
// a = a - (b * c)
void blockMultSub(Block a, Block b, Block c)
template <unsigned int block_size>
void blockMultSub(double *a, double *b, double *c)
{
for (int row = 0; row < BLOCK_SIZE; row++) {
for (int col = 0; col < BLOCK_SIZE; col++) {
for (int row = 0; row < block_size; row++) {
for (int col = 0; col < block_size; col++) {
double temp = 0.0;
for (int k = 0; k < BLOCK_SIZE; k++) {
temp += b[BLOCK_SIZE * row + k] * c[BLOCK_SIZE * k + col];
for (int k = 0; k < block_size; k++) {
temp += b[block_size * row + k] * c[block_size * k + col];
}
a[BLOCK_SIZE * row + col] -= temp;
a[block_size * row + col] -= temp;
}
}
}
/*Perform a 3x3 matrix-matrix multiplicationj on two blocks*/
void blockMult(Block mat1, Block mat2, Block resMat) {
for (int row = 0; row < BLOCK_SIZE; row++) {
for (int col = 0; col < BLOCK_SIZE; col++) {
template <unsigned int block_size>
void blockMult(double *mat1, double *mat2, double *resMat) {
for (int row = 0; row < block_size; row++) {
for (int col = 0; col < block_size; col++) {
double temp = 0;
for (int k = 0; k < BLOCK_SIZE; k++) {
temp += mat1[BLOCK_SIZE * row + k] * mat2[BLOCK_SIZE * k + col];
for (int k = 0; k < block_size; k++) {
temp += mat1[block_size * row + k] * mat2[block_size * k + col];
}
resMat[BLOCK_SIZE * row + col] = temp;
resMat[block_size * row + col] = temp;
}
}
}
@ -128,7 +133,7 @@ void blockMult(Block mat1, Block mat2, Block resMat) {
/* Calculate the inverse of a block. This function is specific for only 3x3 block size.*/
void blockInvert3x3(Block mat, Block res) {
void blockInvert3x3(double *mat, double *res) {
// code generated by maple, copied from DUNE
double t4 = mat[0] * mat[4];
double t6 = mat[0] * mat[5];
@ -153,4 +158,17 @@ void blockInvert3x3(Block mat, Block res) {
}
#define INSTANTIATE_BDA_FUNCTIONS(n) \
template BlockedMatrix *allocateBlockedMatrix<n>(int Nb, int nnzbs); \
template void sortBlockedRow<n>(int *, double *, int, int); \
template void blockMultSub<n>(double *, double *, double *); \
template void blockMult<n>(double *, double *, double *); \
INSTANTIATE_BDA_FUNCTIONS(1);
INSTANTIATE_BDA_FUNCTIONS(2);
INSTANTIATE_BDA_FUNCTIONS(3);
INSTANTIATE_BDA_FUNCTIONS(4);
#undef INSTANTIATE_BDA_FUNCTIONS
} // end namespace bda

View File

@ -20,15 +20,11 @@
#ifndef BLOCKED_MATRIX_HPP
#define BLOCKED_MATRIX_HPP
#define BLOCK_SIZE 3
typedef double Block[9];
namespace bda
{
typedef struct {
Block *nnzValues;
double *nnzValues;
int *colIndices;
int *rowPointers;
int Nb;
@ -39,6 +35,7 @@ typedef struct {
/// \param[in] Nb number of blockrows
/// \param[in] nnzbs number of nonzero blocks
/// \return pointer to BlockedMatrix
template <unsigned int block_size>
BlockedMatrix *allocateBlockedMatrix(int Nb, int nnzbs);
/// Allocate BlockedMatrix, but copy data pointers instead of allocating new memory
@ -55,25 +52,28 @@ void freeBlockedMatrix(BlockedMatrix **mat);
/// \param[inout] data
/// \param[in] left lower index of data of row
/// \param[in] right upper index of data of row
void sortBlockedRow(int *colIndices, Block *data, int left, int right);
template <unsigned int block_size>
void sortBlockedRow(int *colIndices, double *data, int left, int right);
/// Multiply and subtract blocks
/// a = a - (b * c)
/// \param[inout] a block to be subtracted from
/// \param[in] b input block
/// \param[in] c input block
void blockMultSub(Block a, Block b, Block c);
template <unsigned int block_size>
void blockMultSub(double *a, double *b, double *c);
/// Perform a 3x3 matrix-matrix multiplication on two blocks
/// \param[in] mat1 input block 1
/// \param[in] mat2 input block 2
/// \param[inout] resMat output block
void blockMult(Block mat1, Block mat2, Block resMat);
template <unsigned int block_size>
void blockMult(double *mat1, double *mat2, double *resMat);
/// Calculate the inverse of a block. This function is specific for only 3x3 block size.
/// \param[in] mat input block
/// \param[inout] res output block
void blockInvert3x3(Block mat, Block res);
void blockInvert3x3(double *mat, double *res);
} // end namespace bda

View File

@ -131,9 +131,12 @@ int colorBlockedNodes(int rows, const int *rowPointers, const int *colIndices, s
* Both a to order array, which contains for every node from the old matrix where it will move in the new matrix,
* and the from order, which contains for every node in the new matrix where it came from in the old matrix.*/
template <unsigned int block_size>
void blocked_reorder_matrix_by_pattern(BlockedMatrix *mat, int *toOrder, int *fromOrder, BlockedMatrix *rMat) {
const unsigned int bs = block_size;
int rIndex = 0;
int i, j, k;
int i, k;
unsigned int j;
rMat->rowPointers[0] = 0;
for (i = 0; i < mat->Nb; i++) {
@ -141,8 +144,9 @@ void blocked_reorder_matrix_by_pattern(BlockedMatrix *mat, int *toOrder, int *fr
// put thisRow from the old matrix into row i of the new matrix
rMat->rowPointers[i + 1] = rMat->rowPointers[i] + mat->rowPointers[thisRow + 1] - mat->rowPointers[thisRow];
for (k = mat->rowPointers[thisRow]; k < mat->rowPointers[thisRow + 1]; k++) {
for (j = 0; j < BLOCK_SIZE * BLOCK_SIZE; j++)
rMat->nnzValues[rIndex][j] = mat->nnzValues[k][j];
for (j = 0; j < bs * bs; j++){
rMat->nnzValues[rIndex * bs * bs + j] = mat->nnzValues[k * bs * bs + j];
}
rMat->colIndices[rIndex] = mat->colIndices[k];
rIndex++;
}
@ -153,7 +157,7 @@ void blocked_reorder_matrix_by_pattern(BlockedMatrix *mat, int *toOrder, int *fr
}
// re-sort the column indices of every row.
for (i = 0; i < mat->Nb; i++) {
sortBlockedRow(rMat->colIndices, rMat->nnzValues, rMat->rowPointers[i], rMat->rowPointers[i + 1] - 1);
sortBlockedRow<bs>(rMat->colIndices, rMat->nnzValues, rMat->rowPointers[i], rMat->rowPointers[i + 1] - 1);
}
}
@ -182,10 +186,11 @@ void colorsToReordering(int Nb, std::vector<int>& colors, int *toOrder, int *fro
// Reorder a matrix according to a reordering pattern
template <unsigned int block_size>
void blocked_reorder_vector_by_pattern(int Nb, double *vector, int *fromOrder, double *rVector) {
for (int i = 0; i < Nb; i++) {
for (int j = 0; j < BLOCK_SIZE; j++) {
rVector[BLOCK_SIZE * i + j] = vector[BLOCK_SIZE * fromOrder[i] + j];
for (unsigned int j = 0; j < block_size; j++) {
rVector[block_size * i + j] = vector[block_size * fromOrder[i] + j];
}
}
}
@ -304,7 +309,8 @@ int* findGraphColoring(int *colIndices, int *rowPointers, int Nb, int maxRowsPer
// input : matrix A via Avals, Acols, Arows, Nb
// output: matrix B via Bvals, Bcols, Brows
// arrays for B must be preallocated
void bcsr_to_bcsc(Block *Avals, int *Acols, int *Arows, Block *Bvals, int *Bcols, int *Brows, int Nb) {
template <unsigned int block_size>
void bcsr_to_bcsc(double *Avals, int *Acols, int *Arows, double *Bvals, int *Bcols, int *Brows, int Nb) {
int nnz = Arows[Nb];
@ -328,7 +334,7 @@ void bcsr_to_bcsc(Block *Avals, int *Acols, int *Arows, Block *Bvals, int *Bcols
int col = Acols[j];
int dest = Brows[col];
Bcols[dest] = row;
memcpy(Bvals + dest, Avals + dest, sizeof(Block));
memcpy(Bvals + dest, Avals + dest, sizeof(double) * block_size * block_size);
Brows[col]++;
}
}
@ -340,4 +346,17 @@ void bcsr_to_bcsc(Block *Avals, int *Acols, int *Arows, Block *Bvals, int *Bcols
}
}
#define INSTANTIATE_BDA_FUNCTIONS(n) \
template void blocked_reorder_matrix_by_pattern<n>(BlockedMatrix *, int *, int *, BlockedMatrix *); \
template void blocked_reorder_vector_by_pattern<n>(int, double*, int*, double*); \
template void bcsr_to_bcsc<n>(double *, int *, int *, double *, int *, int *, int ); \
INSTANTIATE_BDA_FUNCTIONS(1);
INSTANTIATE_BDA_FUNCTIONS(2);
INSTANTIATE_BDA_FUNCTIONS(3);
INSTANTIATE_BDA_FUNCTIONS(4);
#undef INSTANTIATE_BDA_FUNCTIONS
} //namespace bda

View File

@ -45,6 +45,7 @@ int colorBlockedNodes(int rows, const int *rowPointers, const int *colIndices, s
/// \param[in] toOrder reorder pattern that lists for each index in the original order, to which index in the new order it should be moved
/// \param[in] fromOrder reorder pattern that lists for each index in the new order, from which index in the original order it was moved
/// \param[inout] rMat reordered Matrix
template <unsigned int block_size>
void blocked_reorder_matrix_by_pattern(BlockedMatrix *mat, int *toOrder, int *fromOrder, BlockedMatrix *rMat);
/// Compute reorder mapping from the color that each node has received
@ -63,6 +64,7 @@ void colorsToReordering(int Nb, std::vector<int>& colors, int *toOrder, int *fro
/// \param[in] toOrder reorder pattern that lists for each index in the original order, to which index in the new order it should be moved
/// \param[in] fromOrder reorder pattern that lists for each index in the new order, from which index in the original order it was moved
/// \param[inout] rVector reordered vector
template <unsigned int block_size>
void blocked_reorder_vector_by_pattern(int Nb, double *vector, int *fromOrder, double *rVector);
/// Determine whether all rows that a certain row depends on are done already
@ -109,7 +111,8 @@ int* findGraphColoring(int *colIndices, int *rowPointers, int Nb, int maxRowsPer
/// \param[inout] Bcols row indices of the result BCSC matrix
/// \param[inout] Brows column pointers of the result BCSC matrix
/// \param[in] Nb number of blockrows in the matrix
void bcsr_to_bcsc(Block *Avals, int *Acols, int *Arows, Block *Bvals, int *Bcols, int *Brows, int Nb);
template <unsigned int block_size>
void bcsr_to_bcsc(double *Avals, int *Acols, int *Arows, double *Bvals, int *Bcols, int *Brows, int Nb);
}

View File

@ -497,10 +497,10 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
vals_contiguous = new double[N];
#endif
mat = (BlockedMatrix *)malloc(sizeof(BlockedMatrix));
mat = new BlockedMatrix();
mat->Nb = Nb;
mat->nnzbs = nnzb;
mat->nnzValues = (Block*)vals;
mat->nnzValues = vals;
mat->colIndices = cols;
mat->rowPointers = rows;
@ -547,6 +547,7 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
template <unsigned int block_size>
void openclSolverBackend<block_size>::finalize() {
delete mat;
delete[] rb;
delete[] tmp;
#if COPY_ROW_BY_ROW
@ -668,9 +669,8 @@ void openclSolverBackend<block_size>::update_system(double *vals, double *b) {
t1 = second();
}
mat->nnzValues = (Block*)vals;
//mat->nnzValues = static_cast<Block*>(vals);
blocked_reorder_vector_by_pattern(mat->Nb, b, fromOrder, rb);
mat->nnzValues = vals;
blocked_reorder_vector_by_pattern<block_size>(mat->Nb, b, fromOrder, rb);
if (verbosity > 2) {
t2 = second();
@ -732,7 +732,7 @@ void openclSolverBackend<block_size>::get_result(double *x) {
}
queue->enqueueReadBuffer(d_x, CL_TRUE, 0, sizeof(double) * N, rb);
blocked_reorder_vector_by_pattern(mat->Nb, rb, toOrder, x);
blocked_reorder_vector_by_pattern<block_size>(mat->Nb, rb, toOrder, x);
if (verbosity > 2) {
t2 = second();