Renamed functions to all used Camel case, and renamed parameters to better represent what is stored in them.

Fixed mistake of using wrong sparsity pattern data to call canBeStarted function, and removed nnzValues of CSCmat, which were never used.
This commit is contained in:
Tom Hogervorst 2020-07-01 15:16:14 +02:00 committed by T.D. (Tongdong) Qiu
parent 7f8faa018b
commit 38c58bffae
7 changed files with 160 additions and 171 deletions

View File

@ -48,19 +48,20 @@ namespace bda
delete[] invDiagVals; delete[] invDiagVals;
delete[] diagIndex; delete[] diagIndex;
delete[] rowsPerColor; delete[] rowsPerColor;
freeBlockedMatrix(&LUMat); freeBlockedMatrix(&LUmat);
freeBlockedMatrix(&LMat); freeBlockedMatrix(&Lmat);
freeBlockedMatrix(&UMat); freeBlockedMatrix(&Umat);
delete[] toOrder; delete[] toOrder;
delete[] fromOrder; delete[] fromOrder;
freeBlockedMatrix(&rMat); freeBlockedMatrix(&rmat);
} }
template <unsigned int block_size> template <unsigned int block_size>
bool BILU0<block_size>::init(BlockedMatrix *mat) bool BILU0<block_size>::init(BlockedMatrix *mat)
{ {
const unsigned int bs = block_size; const unsigned int bs = block_size;
BlockedMatrix *CSCmat = nullptr; int *CSCRowIndices = nullptr;
int *CSCColPointers = nullptr;
this->N = mat->Nb * block_size; this->N = mat->Nb * block_size;
this->Nb = mat->Nb; this->Nb = mat->Nb;
@ -70,14 +71,11 @@ namespace bda
toOrder = new int[N]; toOrder = new int[N];
fromOrder = new int[N]; fromOrder = new int[N];
if (level_scheduling) { if (level_scheduling) {
CSCmat = new BlockedMatrix(); CSCRowIndices = new int[nnzbs];
CSCmat->Nb = Nb; CSCColPointers = new int[Nb + 1];
CSCmat->nnzbs = nnzbs;
CSCmat->nnzValues = new double[nnzbs * bs * bs];
CSCmat->colIndices = new int[nnzbs];
CSCmat->rowPointers = new int[Nb + 1];
Timer t_convert; Timer t_convert;
bcsr_to_bcsc<block_size>(mat->nnzValues, mat->colIndices, mat->rowPointers, CSCmat->nnzValues, CSCmat->colIndices, CSCmat->rowPointers, mat->Nb); csrPatternToCsc(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb);
if(verbosity >= 3){ if(verbosity >= 3){
std::ostringstream out; std::ostringstream out;
out << "BILU0 convert CSR to CSC: " << t_convert.stop() << " s\n"; out << "BILU0 convert CSR to CSC: " << t_convert.stop() << " s\n";
@ -86,10 +84,11 @@ namespace bda
} }
Timer t_analysis; Timer t_analysis;
rMat = allocateBlockedMatrix<block_size>(mat->Nb, mat->nnzbs); rmat = allocateBlockedMatrix<block_size>(mat->Nb, mat->nnzbs);
LUMat = soft_copyBlockedMatrix(rMat); LUmat = softCopyBlockedMatrix(rmat);
if (level_scheduling) { if (level_scheduling) {
rowsPerColor = findLevelScheduling(mat->colIndices, mat->rowPointers, CSCmat->colIndices, CSCmat->rowPointers, mat->Nb, &numColors, toOrder, fromOrder); rowsPerColor = findLevelScheduling(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb, &numColors, toOrder, fromOrder);
} else if (graph_coloring) { } else if (graph_coloring) {
rowsPerColor = findGraphColoring<block_size>(mat->colIndices, mat->rowPointers, mat->Nb, mat->Nb, mat->Nb, &numColors, toOrder, fromOrder); rowsPerColor = findGraphColoring<block_size>(mat->colIndices, mat->rowPointers, mat->Nb, mat->Nb, mat->Nb, &numColors, toOrder, fromOrder);
} }
@ -105,34 +104,32 @@ namespace bda
diagIndex = new int[mat->Nb]; diagIndex = new int[mat->Nb];
invDiagVals = new double[mat->Nb * bs * bs]; invDiagVals = new double[mat->Nb * bs * bs];
LMat = allocateBlockedMatrix<block_size>(mat->Nb, (mat->nnzbs - mat->Nb) / 2); Lmat = allocateBlockedMatrix<block_size>(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
UMat = allocateBlockedMatrix<block_size>(mat->Nb, (mat->nnzbs - mat->Nb) / 2); Umat = allocateBlockedMatrix<block_size>(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
LUMat->nnzValues = new double[mat->nnzbs * bs * bs]; LUmat->nnzValues = new double[mat->nnzbs * bs * bs];
if (level_scheduling) { if (level_scheduling) {
delete[] CSCmat->nnzValues; delete[] CSCRowIndices;
delete[] CSCmat->colIndices; delete[] CSCColPointers;
delete[] CSCmat->rowPointers;
delete CSCmat;
} }
s.Lvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * LMat->nnzbs); s.Lvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * Lmat->nnzbs);
s.Uvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * UMat->nnzbs); s.Uvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * Umat->nnzbs);
s.Lcols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * LMat->nnzbs); s.Lcols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * Lmat->nnzbs);
s.Ucols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * UMat->nnzbs); s.Ucols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * Umat->nnzbs);
s.Lrows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (LMat->Nb + 1)); s.Lrows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Lmat->Nb + 1));
s.Urows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (UMat->Nb + 1)); s.Urows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Umat->Nb + 1));
s.invDiagVals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * mat->Nb); s.invDiagVals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * mat->Nb);
s.rowsPerColor = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (numColors + 1)); s.rowsPerColor = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (numColors + 1));
queue->enqueueWriteBuffer(s.Lvals, CL_TRUE, 0, LMat->nnzbs * sizeof(double) * bs * bs, LMat->nnzValues); queue->enqueueWriteBuffer(s.Lvals, CL_TRUE, 0, Lmat->nnzbs * sizeof(double) * bs * bs, Lmat->nnzValues);
queue->enqueueWriteBuffer(s.Uvals, CL_TRUE, 0, UMat->nnzbs * sizeof(double) * bs * bs, UMat->nnzValues); queue->enqueueWriteBuffer(s.Uvals, CL_TRUE, 0, Umat->nnzbs * sizeof(double) * bs * bs, Umat->nnzValues);
queue->enqueueWriteBuffer(s.Lcols, CL_TRUE, 0, LMat->nnzbs * sizeof(int), LMat->colIndices); queue->enqueueWriteBuffer(s.Lcols, CL_TRUE, 0, Lmat->nnzbs * sizeof(int), Lmat->colIndices);
queue->enqueueWriteBuffer(s.Ucols, CL_TRUE, 0, UMat->nnzbs * sizeof(int), UMat->colIndices); queue->enqueueWriteBuffer(s.Ucols, CL_TRUE, 0, Umat->nnzbs * sizeof(int), Umat->colIndices);
queue->enqueueWriteBuffer(s.Lrows, CL_TRUE, 0, (LMat->Nb + 1) * sizeof(int), LMat->rowPointers); queue->enqueueWriteBuffer(s.Lrows, CL_TRUE, 0, (Lmat->Nb + 1) * sizeof(int), Lmat->rowPointers);
queue->enqueueWriteBuffer(s.Urows, CL_TRUE, 0, (UMat->Nb + 1) * sizeof(int), UMat->rowPointers); queue->enqueueWriteBuffer(s.Urows, CL_TRUE, 0, (Umat->Nb + 1) * sizeof(int), Umat->rowPointers);
queue->enqueueWriteBuffer(s.invDiagVals, CL_TRUE, 0, mat->Nb * sizeof(double) * bs * bs, invDiagVals); queue->enqueueWriteBuffer(s.invDiagVals, CL_TRUE, 0, mat->Nb * sizeof(double) * bs * bs, invDiagVals);
int *rowsPerColorPrefix = new int[numColors + 1]; int *rowsPerColorPrefix = new int[numColors + 1];
@ -154,7 +151,7 @@ namespace bda
const unsigned int bs = block_size; const unsigned int bs = block_size;
Timer t_reorder; Timer t_reorder;
blocked_reorder_matrix_by_pattern<block_size>(mat, toOrder, fromOrder, rMat); reorderBlockedMatrixByPattern<block_size>(mat, toOrder, fromOrder, rmat);
if (verbosity >= 3){ if (verbosity >= 3){
std::ostringstream out; std::ostringstream out;
out << "BILU0 reorder matrix: " << t_reorder.stop() << " s"; out << "BILU0 reorder matrix: " << t_reorder.stop() << " s";
@ -163,7 +160,7 @@ namespace bda
// TODO: remove this copy by replacing inplace ilu decomp by out-of-place ilu decomp // TODO: remove this copy by replacing inplace ilu decomp by out-of-place ilu decomp
Timer t_copy; Timer t_copy;
memcpy(LUMat->nnzValues, rMat->nnzValues, sizeof(double) * bs * bs * rMat->nnzbs); memcpy(LUmat->nnzValues, rmat->nnzValues, sizeof(double) * bs * bs * rmat->nnzbs);
if (verbosity >= 3){ if (verbosity >= 3){
std::ostringstream out; std::ostringstream out;
out << "BILU0 memcpy: " << t_copy.stop() << " s"; out << "BILU0 memcpy: " << t_copy.stop() << " s";
@ -179,13 +176,13 @@ namespace bda
Timer t_decomposition; Timer t_decomposition;
// go through all rows // go through all rows
for (i = 0; i < LUMat->Nb; i++) { for (i = 0; i < LUmat->Nb; i++) {
iRowStart = LUMat->rowPointers[i]; iRowStart = LUmat->rowPointers[i];
iRowEnd = LUMat->rowPointers[i + 1]; iRowEnd = LUmat->rowPointers[i + 1];
// go through all elements of the row // go through all elements of the row
for (ij = iRowStart; ij < iRowEnd; ij++) { for (ij = iRowStart; ij < iRowEnd; ij++) {
j = LUMat->colIndices[ij]; j = LUmat->colIndices[ij];
// if the element is the diagonal, store the index and go to next row // if the element is the diagonal, store the index and go to next row
if (j == i) { if (j == i) {
diagIndex[i] = ij; diagIndex[i] = ij;
@ -202,21 +199,21 @@ namespace bda
LSize++; LSize++;
// calculate the pivot of this row // calculate the pivot of this row
blockMult<bs>(LUMat->nnzValues + ij * bs * bs, invDiagVals + j * bs * bs, &pivot[0]); blockMult<bs>(LUmat->nnzValues + ij * bs * bs, invDiagVals + j * bs * bs, &pivot[0]);
memcpy(LUMat->nnzValues + ij * bs * bs, &pivot[0], sizeof(double) * block_size * block_size); memcpy(LUmat->nnzValues + ij * bs * bs, &pivot[0], sizeof(double) * block_size * block_size);
jRowEnd = LUMat->rowPointers[j + 1]; jRowEnd = LUmat->rowPointers[j + 1];
jk = diagIndex[j] + 1; jk = diagIndex[j] + 1;
ik = ij + 1; ik = ij + 1;
// substract that row scaled by the pivot from this row. // substract that row scaled by the pivot from this row.
while (ik < iRowEnd && jk < jRowEnd) { while (ik < iRowEnd && jk < jRowEnd) {
if (LUMat->colIndices[ik] == LUMat->colIndices[jk]) { if (LUmat->colIndices[ik] == LUmat->colIndices[jk]) {
blockMultSub<bs>(LUMat->nnzValues + ik * bs * bs, pivot, LUMat->nnzValues + jk * bs * bs); blockMultSub<bs>(LUmat->nnzValues + ik * bs * bs, pivot, LUmat->nnzValues + jk * bs * bs);
ik++; ik++;
jk++; jk++;
} else { } else {
if (LUMat->colIndices[ik] < LUMat->colIndices[jk]) if (LUmat->colIndices[ik] < LUmat->colIndices[jk])
{ ik++; } { ik++; }
else else
{ jk++; } { jk++; }
@ -224,35 +221,35 @@ namespace bda
} }
} }
// store the inverse in the diagonal! // store the inverse in the diagonal!
blockInvert3x3(LUMat->nnzValues + ij * bs * bs, invDiagVals + i * bs * bs); blockInvert3x3(LUmat->nnzValues + ij * bs * bs, invDiagVals + i * bs * bs);
memcpy(LUMat->nnzValues + ij * bs * bs, invDiagVals + i * bs * bs, sizeof(double) * bs * bs); memcpy(LUmat->nnzValues + ij * bs * bs, invDiagVals + i * bs * bs, sizeof(double) * bs * bs);
} }
LMat->rowPointers[0] = 0; Lmat->rowPointers[0] = 0;
UMat->rowPointers[0] = 0; Umat->rowPointers[0] = 0;
// Split the LU matrix into two by comparing column indices to diagonal indices // Split the LU matrix into two by comparing column indices to diagonal indices
for (i = 0; i < LUMat->Nb; i++) { for (i = 0; i < LUmat->Nb; i++) {
int offsetL = LMat->rowPointers[i]; int offsetL = Lmat->rowPointers[i];
int rowSize = diagIndex[i] - LUMat->rowPointers[i]; int rowSize = diagIndex[i] - LUmat->rowPointers[i];
int offsetLU = LUMat->rowPointers[i]; int offsetLU = LUmat->rowPointers[i];
memcpy(LMat->nnzValues + offsetL * bs * bs, LUMat->nnzValues + offsetLU * bs * bs, sizeof(double) * bs * bs * rowSize); memcpy(Lmat->nnzValues + offsetL * bs * bs, LUmat->nnzValues + offsetLU * bs * bs, sizeof(double) * bs * bs * rowSize);
memcpy(LMat->colIndices + offsetL, LUMat->colIndices + offsetLU, sizeof(int) * rowSize); memcpy(Lmat->colIndices + offsetL, LUmat->colIndices + offsetLU, sizeof(int) * rowSize);
offsetL += rowSize; offsetL += rowSize;
LMat->rowPointers[i + 1] = offsetL; Lmat->rowPointers[i + 1] = offsetL;
} }
// Reverse the order or the (blocked) rows for the U matrix, // Reverse the order or the (blocked) rows for the U matrix,
// because the rows are accessed in reverse order when applying the ILU0 // because the rows are accessed in reverse order when applying the ILU0
int URowIndex = 0; int URowIndex = 0;
for (i = LUMat->Nb - 1; i >= 0; i--) { for (i = LUmat->Nb - 1; i >= 0; i--) {
int offsetU = UMat->rowPointers[URowIndex]; int offsetU = Umat->rowPointers[URowIndex];
int rowSize = LUMat->rowPointers[i + 1] - diagIndex[i] - 1; int rowSize = LUmat->rowPointers[i + 1] - diagIndex[i] - 1;
int offsetLU = diagIndex[i] + 1; int offsetLU = diagIndex[i] + 1;
memcpy(UMat->nnzValues + offsetU * bs * bs, LUMat->nnzValues + offsetLU * bs * bs, sizeof(double) * bs * bs * rowSize); memcpy(Umat->nnzValues + offsetU * bs * bs, LUmat->nnzValues + offsetLU * bs * bs, sizeof(double) * bs * bs * rowSize);
memcpy(UMat->colIndices + offsetU, LUMat->colIndices + offsetLU, sizeof(int) * rowSize); memcpy(Umat->colIndices + offsetU, LUmat->colIndices + offsetLU, sizeof(int) * rowSize);
offsetU += rowSize; offsetU += rowSize;
UMat->rowPointers[URowIndex + 1] = offsetU; Umat->rowPointers[URowIndex + 1] = offsetU;
URowIndex++; URowIndex++;
} }
if (verbosity >= 3) { if (verbosity >= 3) {
@ -263,14 +260,14 @@ namespace bda
Timer t_copyToGpu; Timer t_copyToGpu;
if (pattern_uploaded == false) { if (pattern_uploaded == false) {
queue->enqueueWriteBuffer(s.Lcols, CL_TRUE, 0, LMat->nnzbs * sizeof(int), LMat->colIndices); queue->enqueueWriteBuffer(s.Lcols, CL_TRUE, 0, Lmat->nnzbs * sizeof(int), Lmat->colIndices);
queue->enqueueWriteBuffer(s.Ucols, CL_TRUE, 0, UMat->nnzbs * sizeof(int), UMat->colIndices); queue->enqueueWriteBuffer(s.Ucols, CL_TRUE, 0, Umat->nnzbs * sizeof(int), Umat->colIndices);
queue->enqueueWriteBuffer(s.Lrows, CL_TRUE, 0, (LMat->Nb + 1) * sizeof(int), LMat->rowPointers); queue->enqueueWriteBuffer(s.Lrows, CL_TRUE, 0, (Lmat->Nb + 1) * sizeof(int), Lmat->rowPointers);
queue->enqueueWriteBuffer(s.Urows, CL_TRUE, 0, (UMat->Nb + 1) * sizeof(int), UMat->rowPointers); queue->enqueueWriteBuffer(s.Urows, CL_TRUE, 0, (Umat->Nb + 1) * sizeof(int), Umat->rowPointers);
pattern_uploaded = true; pattern_uploaded = true;
} }
queue->enqueueWriteBuffer(s.Lvals, CL_TRUE, 0, LMat->nnzbs * sizeof(double) * bs * bs, LMat->nnzValues); queue->enqueueWriteBuffer(s.Lvals, CL_TRUE, 0, Lmat->nnzbs * sizeof(double) * bs * bs, Lmat->nnzValues);
queue->enqueueWriteBuffer(s.Uvals, CL_TRUE, 0, UMat->nnzbs * sizeof(double) * bs * bs, UMat->nnzValues); queue->enqueueWriteBuffer(s.Uvals, CL_TRUE, 0, Umat->nnzbs * sizeof(double) * bs * bs, Umat->nnzValues);
queue->enqueueWriteBuffer(s.invDiagVals, CL_TRUE, 0, Nb * sizeof(double) * bs * bs, invDiagVals); queue->enqueueWriteBuffer(s.invDiagVals, CL_TRUE, 0, Nb * sizeof(double) * bs * bs, invDiagVals);
if (verbosity >= 3) { if (verbosity >= 3) {
std::ostringstream out; std::ostringstream out;

View File

@ -39,8 +39,8 @@ namespace bda
int Nb; // number of blockrows of the matrix int Nb; // number of blockrows of the matrix
int nnz; // number of nonzeroes of the matrix (scalar) int nnz; // number of nonzeroes of the matrix (scalar)
int nnzbs; // number of blocks of the matrix int nnzbs; // number of blocks of the matrix
BlockedMatrix *LMat, *UMat, *LUMat; BlockedMatrix *Lmat, *Umat, *LUmat;
BlockedMatrix *rMat = nullptr; // only used with PAR_SIM BlockedMatrix *rmat = nullptr; // only used with PAR_SIM
double *invDiagVals; double *invDiagVals;
int *diagIndex, *rowsPerColor; int *diagIndex, *rowsPerColor;
int *toOrder, *fromOrder; int *toOrder, *fromOrder;
@ -101,7 +101,7 @@ namespace bda
BlockedMatrix* getRMat() BlockedMatrix* getRMat()
{ {
return rMat; return rmat;
} }
}; };

View File

@ -50,7 +50,7 @@ void freeBlockedMatrix(BlockedMatrix **mat) {
} }
} }
BlockedMatrix *soft_copyBlockedMatrix(BlockedMatrix *mat) { BlockedMatrix *softCopyBlockedMatrix(BlockedMatrix *mat) {
BlockedMatrix *res = new BlockedMatrix(); BlockedMatrix *res = new BlockedMatrix();
res->nnzValues = mat->nnzValues; res->nnzValues = mat->nnzValues;
res->colIndices = mat->colIndices; res->colIndices = mat->colIndices;

View File

@ -41,7 +41,7 @@ BlockedMatrix *allocateBlockedMatrix(int Nb, int nnzbs);
/// Allocate BlockedMatrix, but copy data pointers instead of allocating new memory /// Allocate BlockedMatrix, but copy data pointers instead of allocating new memory
/// \param[in] mat matrix to be copied /// \param[in] mat matrix to be copied
/// \return pointer to BlockedMatrix /// \return pointer to BlockedMatrix
BlockedMatrix *soft_copyBlockedMatrix(BlockedMatrix *mat); BlockedMatrix *softCopyBlockedMatrix(BlockedMatrix *mat);
/// Free BlockedMatrix and its data /// Free BlockedMatrix and its data
/// \param[in] mat matrix to be free'd /// \param[in] mat matrix to be free'd

View File

@ -35,7 +35,9 @@ namespace bda
/* Give every node in the matrix (of which only the sparsity pattern in the /* Give every node in the matrix (of which only the sparsity pattern in the
* form of row pointers and column indices arrays are in the input), a color * form of row pointers and column indices arrays are in the input), a color
* in the colors array. Also return the amount of colors in the return integer. */ * in the colors array. Also return the amount of colors in the return integer.
* This graph-coloring algorithm is based on the Jones-Plassmann algorithm, proposed in:
* "A Parallel Graph Coloring Heuristic" by M.T. Jones and P.E. Plassmann in SIAM Journal of Scientific Computing 14 (1993) */
template <unsigned int block_size> template <unsigned int block_size>
int colorBlockedNodes(int rows, const int *rowPointers, const int *colIndices, std::vector<int>& colors, int maxRowsPerColor, int maxColsPerColor) int colorBlockedNodes(int rows, const int *rowPointers, const int *colIndices, std::vector<int>& colors, int maxRowsPerColor, int maxColsPerColor)
@ -145,7 +147,7 @@ int colorBlockedNodes(int rows, const int *rowPointers, const int *colIndices, s
* and the from order, which contains for every node in the new matrix where it came from in the old matrix.*/ * and the from order, which contains for every node in the new matrix where it came from in the old matrix.*/
template <unsigned int block_size> template <unsigned int block_size>
void blocked_reorder_matrix_by_pattern(BlockedMatrix *mat, int *toOrder, int *fromOrder, BlockedMatrix *rMat) { void reorderBlockedMatrixByPattern(BlockedMatrix *mat, int *toOrder, int *fromOrder, BlockedMatrix *rMat) {
const unsigned int bs = block_size; const unsigned int bs = block_size;
int rIndex = 0; int rIndex = 0;
int i, k; int i, k;
@ -176,18 +178,18 @@ void blocked_reorder_matrix_by_pattern(BlockedMatrix *mat, int *toOrder, int *fr
/* Reorder a matrix according to the colors that every node of the matrix has received*/ /* Reorder a matrix according to the colors that every node of the matrix has received*/
void colorsToReordering(int Nb, std::vector<int>& colors, int *toOrder, int *fromOrder, int *iters) { void colorsToReordering(int Nb, std::vector<int>& colors, int numColors, int *toOrder, int *fromOrder, int *rowsPerColor) {
int reordered = 0; int reordered = 0;
int i, c; int i, c;
for (i = 0; i < MAX_COLORS; i++) { for (i = 0; i < numColors; i++) {
iters[i] = 0; rowsPerColor[i] = 0;
} }
// Find reordering patterns // Find reordering patterns
for (c = 0; c < MAX_COLORS; c++) { for (c = 0; c < numColors; c++) {
for (i = 0; i < Nb; i++) { for (i = 0; i < Nb; i++) {
if (colors[i] == c) { if (colors[i] == c) {
iters[c]++; rowsPerColor[c]++;
toOrder[i] = reordered; toOrder[i] = reordered;
fromOrder[reordered] = i; fromOrder[reordered] = i;
@ -197,10 +199,10 @@ void colorsToReordering(int Nb, std::vector<int>& colors, int *toOrder, int *fro
} }
} }
// Reorder a matrix according to a reordering pattern // Reorder a vector according to a reordering pattern
template <unsigned int block_size> template <unsigned int block_size>
void blocked_reorder_vector_by_pattern(int Nb, double *vector, int *fromOrder, double *rVector) { void reorderBlockedVectorByPattern(int Nb, double *vector, int *fromOrder, double *rVector) {
for (int i = 0; i < Nb; i++) { for (int i = 0; i < Nb; i++) {
for (unsigned int j = 0; j < block_size; j++) { for (unsigned int j = 0; j < block_size; j++) {
rVector[block_size * i + j] = vector[block_size * fromOrder[i] + j]; rVector[block_size * i + j] = vector[block_size * fromOrder[i] + j];
@ -212,7 +214,7 @@ void blocked_reorder_vector_by_pattern(int Nb, double *vector, int *fromOrder, d
/* Check is operations on a node in the matrix can be started /* Check is operations on a node in the matrix can be started
* A node can only be started if all nodes that it depends on during sequential execution have already completed.*/ * A node can only be started if all nodes that it depends on during sequential execution have already completed.*/
bool canBeStarted(int rowIndex, int *rowPointers, int *colIndices, std::vector<bool>& doneRows) { bool canBeStarted(const int rowIndex, const int *rowPointers, const int *colIndices, const std::vector<bool>& doneRows) {
bool canStart = !doneRows[rowIndex]; bool canStart = !doneRows[rowIndex];
int i, thisDependency; int i, thisDependency;
if (canStart) { if (canStart) {
@ -231,41 +233,43 @@ bool canBeStarted(int rowIndex, int *rowPointers, int *colIndices, std::vector<b
} }
/* /*
* The level scheduling of a non-symmetric, blocked matrix requires access to a CSC encoding and a CSR encoding of the same matrix. * The level scheduling of a non-symmetric, blocked matrix requires access to a CSC encoding and a CSR encoding of the sparsity pattern of the input matrix.
*/ * This function is based on a standard level scheduling algorithm, like the one described in:
* "Iterative methods for Sparse Linear Systems" by Yousef Saad in section 11.6.3
*/
int *findLevelScheduling(int *CSRColIndices, int *CSRRowPointers, int *CSCColIndices, int *CSCRowPointers, int Nb, int *iters, int *toOrder, int* fromOrder) { int *findLevelScheduling(int *CSRColIndices, int *CSRRowPointers, int *CSCRowIndices, int *CSCColPointers, int Nb, int *numColors, int *toOrder, int* fromOrder) {
int activeRowIndex = 0, iterEnd, nextActiveRowIndex = 0; int activeRowIndex = 0, colorEnd, nextActiveRowIndex = 0;
int thisRow; int thisRow;
std::vector<bool> doneRows(Nb, false); std::vector<bool> doneRows(Nb, false);
std::vector<int> rowsPerIter; std::vector<int> rowsPerColor;
rowsPerIter.reserve(Nb); rowsPerColor.reserve(Nb);
int *resRowsPerIter; int *resRowsPerColor;
std::vector <int> rowsToStart; std::vector <int> rowsToStart;
// find starting rows: rows that are independent from all rows that come before them. // find starting rows: rows that are independent from all rows that come before them.
for (thisRow = 0; thisRow < Nb; thisRow++) { for (thisRow = 0; thisRow < Nb; thisRow++) {
if (canBeStarted(thisRow, CSRRowPointers, CSCColIndices, doneRows)) { if (canBeStarted(thisRow, CSCColPointers, CSCRowIndices, doneRows)) {
fromOrder[nextActiveRowIndex] = thisRow; fromOrder[nextActiveRowIndex] = thisRow;
toOrder[thisRow] = nextActiveRowIndex; toOrder[thisRow] = nextActiveRowIndex;
nextActiveRowIndex++; nextActiveRowIndex++;
} }
} }
// 'do' compute on all active rows // 'do' compute on all active rows
for (iterEnd = 0; iterEnd < nextActiveRowIndex; iterEnd++) { for (colorEnd = 0; colorEnd < nextActiveRowIndex; colorEnd++) {
doneRows[fromOrder[iterEnd]] = true; doneRows[fromOrder[colorEnd]] = true;
} }
rowsPerIter.emplace_back(nextActiveRowIndex - activeRowIndex); rowsPerColor.emplace_back(nextActiveRowIndex - activeRowIndex);
while (iterEnd < Nb) { while (colorEnd < Nb) {
// Go over all rows active from the last iteration, and check which of their neighbours can be activated this iteration // Go over all rows active from the last color, and check which of their neighbours can be activated this color
for (; activeRowIndex < iterEnd; activeRowIndex++) { for (; activeRowIndex < colorEnd; activeRowIndex++) {
thisRow = fromOrder[activeRowIndex]; thisRow = fromOrder[activeRowIndex];
for (int i = CSCRowPointers[thisRow]; i < CSCRowPointers[thisRow + 1]; i++) { for (int i = CSCColPointers[thisRow]; i < CSCColPointers[thisRow + 1]; i++) {
int thatRow = CSCColIndices[i]; int thatRow = CSCRowIndices[i];
if (canBeStarted(thatRow, CSRRowPointers, CSRColIndices, doneRows)) { if (canBeStarted(thatRow, CSRRowPointers, CSRColIndices, doneRows)) {
rowsToStart.emplace_back(thatRow); rowsToStart.emplace_back(thatRow);
@ -282,78 +286,69 @@ int *findLevelScheduling(int *CSRColIndices, int *CSRRowPointers, int *CSCColInd
nextActiveRowIndex++; nextActiveRowIndex++;
} }
} }
iterEnd = nextActiveRowIndex; colorEnd = nextActiveRowIndex;
rowsPerIter.emplace_back(nextActiveRowIndex - activeRowIndex); rowsPerColor.emplace_back(nextActiveRowIndex - activeRowIndex);
} }
// Crop the rowsPerIter array to it minimum size. // Crop the rowsPerColor array to it minimum size.
int numColors = rowsPerIter.size(); resRowsPerColor = new int[rowsPerColor.size()];
resRowsPerIter = new int[numColors]; for (unsigned int i = 0; i < rowsPerColor.size(); i++) {
for (int i = 0; i < numColors; i++) { resRowsPerColor[i] = rowsPerColor[i];
resRowsPerIter[i] = rowsPerIter[i];
} }
*iters = rowsPerIter.size(); *numColors = rowsPerColor.size();
return resRowsPerIter; return resRowsPerColor;
} }
/* Perform the complete graph coloring algorithm on a matrix. Return an array with the amount of nodes per color.*/ /* Perform the complete graph coloring algorithm on a matrix. Return an array with the amount of nodes per color.*/
template <unsigned int block_size> template <unsigned int block_size>
int* findGraphColoring(int *colIndices, int *rowPointers, int Nb, int maxRowsPerColor, int maxColsPerColor, int *numColors, int *toOrder, int* fromOrder) { int* findGraphColoring(const int *colIndices, const int *rowPointers, int Nb, int maxRowsPerColor, int maxColsPerColor, int *numColors, int *toOrder, int* fromOrder) {
std::vector<int> rowColor; std::vector<int> rowColor;
rowColor.resize(Nb); rowColor.resize(Nb);
int *rowsPerColor = new int[MAX_COLORS]; int *rowsPerColor = new int[MAX_COLORS];
colorBlockedNodes<block_size>(Nb, rowPointers, colIndices, rowColor, maxRowsPerColor, maxColsPerColor); *numColors = colorBlockedNodes<block_size>(Nb, rowPointers, colIndices, rowColor, maxRowsPerColor, maxColsPerColor);
colorsToReordering(Nb, rowColor, toOrder, fromOrder, rowsPerColor); colorsToReordering(Nb, rowColor, *numColors, toOrder, fromOrder, rowsPerColor);
*numColors = MAX_COLORS; // The rowsPerColor array contains a non-zero value for each color denoting how many rows are in that color. It has a size of *numColors.
while (rowsPerColor[*numColors - 1] == 0) {
*numColors = *numColors - 1;
}
return rowsPerColor; return rowsPerColor;
} }
// based on the scipy package from python, scipy/sparse/sparsetools/csr.h on github // based on the scipy package from python, scipy/sparse/sparsetools/csr.h on github
// input : matrix A via Avals, Acols, Arows, Nb void csrPatternToCsc(int *CSRColIndices, int *CSRRowPointers, int *CSCRowIndices, int *CSCColPointers, int Nb) {
// output: matrix B via Bvals, Bcols, Brows
// arrays for B must be preallocated
template <unsigned int block_size>
void bcsr_to_bcsc(double *Avals, int *Acols, int *Arows, double *Bvals, int *Bcols, int *Brows, int Nb) {
int nnz = Arows[Nb]; int nnz = CSRRowPointers[Nb];
// compute number of nnzs per column // compute number of nnzs per column
std::fill(Brows, Brows + Nb, 0); std::fill(CSCColPointers, CSCColPointers + Nb, 0);
for (int n = 0; n < nnz; ++n) { for (int n = 0; n < nnz; ++n) {
Brows[Acols[n]]++; CSCColPointers[CSRColIndices[n]]++;
} }
// cumsum the nnz per col to get Brows // cumsum the nnz per col to get CSCColPointers
for (int col = 0, cumsum = 0; col < Nb; ++col) { for (int col = 0, cumsum = 0; col < Nb; ++col) {
int temp = Brows[col]; int temp = CSCColPointers[col];
Brows[col] = cumsum; CSCColPointers[col] = cumsum;
cumsum += temp; cumsum += temp;
} }
Brows[Nb] = nnz; CSCColPointers[Nb] = nnz;
for (int row = 0; row < Nb; ++row) { for (int row = 0; row < Nb; ++row) {
for (int j = Arows[row]; j < Arows[row + 1]; ++j) { for (int j = CSRRowPointers[row]; j < CSRRowPointers[row + 1]; ++j) {
int col = Acols[j]; int col = CSRColIndices[j];
int dest = Brows[col]; int dest = CSCColPointers[col];
Bcols[dest] = row; CSCRowIndices[dest] = row;
memcpy(Bvals + dest, Avals + dest, sizeof(double) * block_size * block_size); CSCColPointers[col]++;
Brows[col]++;
} }
} }
for (int col = 0, last = 0; col <= Nb; ++col) { for (int col = 0, last = 0; col <= Nb; ++col) {
int temp = Brows[col]; int temp = CSCColPointers[col];
Brows[col] = last; CSCColPointers[col] = last;
last = temp; last = temp;
} }
} }
@ -361,10 +356,9 @@ void bcsr_to_bcsc(double *Avals, int *Acols, int *Arows, double *Bvals, int *Bco
#define INSTANTIATE_BDA_FUNCTIONS(n) \ #define INSTANTIATE_BDA_FUNCTIONS(n) \
template int colorBlockedNodes<n>(int, const int *, const int *, std::vector<int>&, int, int); \ template int colorBlockedNodes<n>(int, const int *, const int *, std::vector<int>&, int, int); \
template void blocked_reorder_matrix_by_pattern<n>(BlockedMatrix *, int *, int *, BlockedMatrix *); \ template void reorderBlockedMatrixByPattern<n>(BlockedMatrix *, int *, int *, BlockedMatrix *); \
template void blocked_reorder_vector_by_pattern<n>(int, double*, int*, double*); \ template void reorderBlockedVectorByPattern<n>(int, double*, int*, double*); \
template int* findGraphColoring<n>(int *, int *, int, int, int, int *, int *, int *); \ template int* findGraphColoring<n>(const int *, const int *, int, int, int, int *, int *, int *); \
template void bcsr_to_bcsc<n>(double *, int *, int *, double *, int *, int *, int); \
INSTANTIATE_BDA_FUNCTIONS(1); INSTANTIATE_BDA_FUNCTIONS(1);
INSTANTIATE_BDA_FUNCTIONS(2); INSTANTIATE_BDA_FUNCTIONS(2);

View File

@ -48,18 +48,19 @@ int colorBlockedNodes(int rows, const int *rowPointers, const int *colIndices, s
/// \param[in] fromOrder reorder pattern that lists for each index in the new order, from which index in the original order it was moved /// \param[in] fromOrder reorder pattern that lists for each index in the new order, from which index in the original order it was moved
/// \param[inout] rMat reordered Matrix /// \param[inout] rMat reordered Matrix
template <unsigned int block_size> template <unsigned int block_size>
void blocked_reorder_matrix_by_pattern(BlockedMatrix *mat, int *toOrder, int *fromOrder, BlockedMatrix *rMat); void reorderBlockedMatrixByPattern(BlockedMatrix *mat, int *toOrder, int *fromOrder, BlockedMatrix *rMat);
/// Compute reorder mapping from the color that each node has received /// Compute reorder mapping from the color that each node has received
/// The toOrder, fromOrder and iters arrays must be allocated already /// The toOrder, fromOrder and iters arrays must be allocated already
/// \param[in] Nb number of blocks in the vector /// \param[in] Nb number of blocks in the vector
/// \param[in] colors array containing the number of the color that each row is assigned to /// \param[in] colors array containing the number of the color that each row is assigned to
/// \param[in] numColors the total number of colors into which all rows have been divided
/// \param[inout] toOrder reorder pattern that lists for each index in the original order, to which index in the new order it should be moved /// \param[inout] toOrder reorder pattern that lists for each index in the original order, to which index in the new order it should be moved
/// \param[inout] fromOrder reorder pattern that lists for each index in the new order, from which index in the original order it was moved /// \param[inout] fromOrder reorder pattern that lists for each index in the new order, from which index in the original order it was moved
/// \param[inout] iters array containing for each color the number of rows that it contains /// \param[inout] rowsPerColor array containing for each color the number of rows that it contains
void colorsToReordering(int Nb, std::vector<int>& colors, int *toOrder, int *fromOrder, int *iters); void colorsToReordering(int Nb, std::vector<int>& colors, int numColors, int *toOrder, int *fromOrder, int *rowsPerColor);
/// Reorder a vector according to the mapping in toOrder and fromOrder /// Reorder a vector according to the mapping in fromOrder
/// The rVector array must be allocated already /// The rVector array must be allocated already
/// \param[in] Nb number of blocks in the vector /// \param[in] Nb number of blocks in the vector
/// \param[in] vector vector to be reordered /// \param[in] vector vector to be reordered
@ -67,7 +68,7 @@ void colorsToReordering(int Nb, std::vector<int>& colors, int *toOrder, int *fro
/// \param[in] fromOrder reorder pattern that lists for each index in the new order, from which index in the original order it was moved /// \param[in] fromOrder reorder pattern that lists for each index in the new order, from which index in the original order it was moved
/// \param[inout] rVector reordered vector /// \param[inout] rVector reordered vector
template <unsigned int block_size> template <unsigned int block_size>
void blocked_reorder_vector_by_pattern(int Nb, double *vector, int *fromOrder, double *rVector); void reorderBlockedVectorByPattern(int Nb, double *vector, int *fromOrder, double *rVector);
/// Determine whether all rows that a certain row depends on are done already /// Determine whether all rows that a certain row depends on are done already
/// \param[in] rowIndex index of the row that needs to be checked for /// \param[in] rowIndex index of the row that needs to be checked for
@ -75,7 +76,7 @@ void blocked_reorder_vector_by_pattern(int Nb, double *vector, int *fromOrder, d
/// \param[in] colIndices column indices of the matrix that the row is in /// \param[in] colIndices column indices of the matrix that the row is in
/// \param[in] doneRows array that for each row lists whether it is done or not /// \param[in] doneRows array that for each row lists whether it is done or not
/// \return true iff all dependencies are done and if the result itself was not done yet /// \return true iff all dependencies are done and if the result itself was not done yet
bool canBeStarted(int rowIndex, int *rowPointers, int *colIndices, std::vector<bool>& doneRows); bool canBeStarted(const int rowIndex, const int *rowPointers, const int *colIndices, const std::vector<bool>& doneRows);
/// Find a level scheduling reordering for an input matrix /// Find a level scheduling reordering for an input matrix
/// The toOrder and fromOrder arrays must be allocated already /// The toOrder and fromOrder arrays must be allocated already
@ -84,11 +85,11 @@ bool canBeStarted(int rowIndex, int *rowPointers, int *colIndices, std::vector<b
/// \param[in] CSCColIndices row indices array, obtained from storing the input matrix in the CSC format /// \param[in] CSCColIndices row indices array, obtained from storing the input matrix in the CSC format
/// \param[in] CSCRowPointers column pointers array, obtained from storing the input matrix in the CSC format /// \param[in] CSCRowPointers column pointers array, obtained from storing the input matrix in the CSC format
/// \param[in] Nb number of blockrows in the matrix /// \param[in] Nb number of blockrows in the matrix
/// \param[out] iters a pointer to the number of colors needed for the level scheduling /// \param[out] numColors a pointer to the number of colors needed for the level scheduling
/// \param[inout] toOrder the reorder pattern that was found, which lists for each index in the original order, to which index in the new order it should be moved /// \param[inout] toOrder the reorder pattern that was found, which lists for each index in the original order, to which index in the new order it should be moved
/// \param[inout] fromOrder the reorder pattern that was found, which lists for each index in the new order, from which index in the original order it was moved /// \param[inout] fromOrder the reorder pattern that was found, which lists for each index in the new order, from which index in the original order it was moved
/// \return a pointer to an array that contains for each color, the number of rows that that color contains /// \return a pointer to an array that contains for each color, the number of rows that that color contains
int* findLevelScheduling(int *CSRColIndices, int *CSRRowPointers, int *CSCColIndices, int *CSCRowPointers, int Nb, int *iters, int *toOrder, int* fromOrder); int* findLevelScheduling(int *CSRColIndices, int *CSRRowPointers, int *CSCColIndices, int *CSCRowPointers, int Nb, int *numColors, int *toOrder, int* fromOrder);
/// Find a graph coloring reordering for an input matrix /// Find a graph coloring reordering for an input matrix
/// The toOrder and fromOrder arrays must be allocated already /// The toOrder and fromOrder arrays must be allocated already
@ -102,20 +103,17 @@ int* findLevelScheduling(int *CSRColIndices, int *CSRRowPointers, int *CSCColInd
/// \param[inout] fromOrder the reorder pattern that was found, which lists for each index in the new order, from which index in the original order it was moved /// \param[inout] fromOrder the reorder pattern that was found, which lists for each index in the new order, from which index in the original order it was moved
/// \return a pointer to an array that contains for each color, the number of rows that that color contains /// \return a pointer to an array that contains for each color, the number of rows that that color contains
template <unsigned int block_size> template <unsigned int block_size>
int* findGraphColoring(int *colIndices, int *rowPointers, int Nb, int maxRowsPerColor, int maxColsPerColor, int *numColors, int *toOrder, int* fromOrder); int* findGraphColoring(const int *colIndices, const int *rowPointers, int Nb, int maxRowsPerColor, int maxColsPerColor, int *numColors, int *toOrder, int* fromOrder);
/// Convert BCSR matrix to BCSC /// Convert a sparsity pattern stored in the CSR format to the CSC format
/// Arrays for output matrix B must be allocated already /// CSCRowIndices and CSCColPointers arrays must be allocated already
/// Based on the csr_tocsc() function from the scipy package from python, https://github.com/scipy/scipy/blob/master/scipy/sparse/sparsetools/csr.h /// Based on the csr_tocsc() function from the scipy package from python, https://github.com/scipy/scipy/blob/master/scipy/sparse/sparsetools/csr.h
/// \param[in] Avals non-zero values of the BCSR matrix /// \param[in] CSRColIndices column indices of the CSR representation of the pattern
/// \param[in] Acols column indices of the BCSR matrix /// \param[in] CSRRowPointers row pointers of the CSR representation of the pattern
/// \param[in] Arows row pointers of the BCSR matrix /// \param[inout] CSCRowIndices row indices of the result CSC representation of the pattern
/// \param[inout] Bvals non-zero values of the result BCSC matrix /// \param[inout] CSCColPointers column pointers of the result CSC representation of the pattern
/// \param[inout] Bcols row indices of the result BCSC matrix
/// \param[inout] Brows column pointers of the result BCSC matrix
/// \param[in] Nb number of blockrows in the matrix /// \param[in] Nb number of blockrows in the matrix
template <unsigned int block_size> void csrPatternToCsc(int *CSRColIndices, int *CSRRowPointers, int *CSCRowIndices, int *CSCColPointers, int Nb);
void bcsr_to_bcsc(double *Avals, int *Acols, int *Arows, double *Bvals, int *Bcols, int *Brows, int Nb);
} }

View File

@ -318,9 +318,9 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
} }
if (verbosity >= 4) { if (verbosity >= 4) {
std::ostringstream out; std::ostringstream out;
out << "openclSolver::ily_apply: " << prec_time << "s\n"; out << "openclSolver::ily_apply: " << t_prec.elapsed() << "s\n";
out << "openclSolver::spmv: " << spmv_time << "s\n"; out << "openclSolver::spmv: " << t_spmv.elapsed() << "s\n";
out << "openclSolver::rest: " << rest_time << "s\n"; out << "openclSolver::rest: " << t_rest.elapsed() << "s\n";
out << "openclSolver::total_solve: " << res.elapsed << "s\n"; out << "openclSolver::total_solve: " << res.elapsed << "s\n";
OpmLog::info(out.str()); OpmLog::info(out.str());
} }
@ -614,7 +614,7 @@ void openclSolverBackend<block_size>::update_system(double *vals, double *b) {
Timer t; Timer t;
mat->nnzValues = vals; mat->nnzValues = vals;
blocked_reorder_vector_by_pattern<block_size>(mat->Nb, b, fromOrder, rb); reorderBlockedVectorByPattern<block_size>(mat->Nb, b, fromOrder, rb);
if (verbosity > 2) { if (verbosity > 2) {
std::ostringstream out; std::ostringstream out;
@ -662,7 +662,7 @@ void openclSolverBackend<block_size>::get_result(double *x) {
Timer t; Timer t;
queue->enqueueReadBuffer(d_x, CL_TRUE, 0, sizeof(double) * N, rb); queue->enqueueReadBuffer(d_x, CL_TRUE, 0, sizeof(double) * N, rb);
blocked_reorder_vector_by_pattern<block_size>(mat->Nb, rb, toOrder, x); reorderBlockedVectorByPattern<block_size>(mat->Nb, rb, toOrder, x);
if (verbosity > 2) { if (verbosity > 2) {
std::ostringstream out; std::ostringstream out;