mirror of
https://github.com/OPM/opm-simulators.git
synced 2024-12-27 09:40:59 -06:00
Renamed functions to all used Camel case, and renamed parameters to better represent what is stored in them.
Fixed mistake of using wrong sparsity pattern data to call canBeStarted function, and removed nnzValues of CSCmat, which were never used.
This commit is contained in:
parent
7f8faa018b
commit
38c58bffae
@ -48,19 +48,20 @@ namespace bda
|
||||
delete[] invDiagVals;
|
||||
delete[] diagIndex;
|
||||
delete[] rowsPerColor;
|
||||
freeBlockedMatrix(&LUMat);
|
||||
freeBlockedMatrix(&LMat);
|
||||
freeBlockedMatrix(&UMat);
|
||||
freeBlockedMatrix(&LUmat);
|
||||
freeBlockedMatrix(&Lmat);
|
||||
freeBlockedMatrix(&Umat);
|
||||
delete[] toOrder;
|
||||
delete[] fromOrder;
|
||||
freeBlockedMatrix(&rMat);
|
||||
freeBlockedMatrix(&rmat);
|
||||
}
|
||||
|
||||
template <unsigned int block_size>
|
||||
bool BILU0<block_size>::init(BlockedMatrix *mat)
|
||||
{
|
||||
const unsigned int bs = block_size;
|
||||
BlockedMatrix *CSCmat = nullptr;
|
||||
int *CSCRowIndices = nullptr;
|
||||
int *CSCColPointers = nullptr;
|
||||
|
||||
this->N = mat->Nb * block_size;
|
||||
this->Nb = mat->Nb;
|
||||
@ -70,14 +71,11 @@ namespace bda
|
||||
toOrder = new int[N];
|
||||
fromOrder = new int[N];
|
||||
if (level_scheduling) {
|
||||
CSCmat = new BlockedMatrix();
|
||||
CSCmat->Nb = Nb;
|
||||
CSCmat->nnzbs = nnzbs;
|
||||
CSCmat->nnzValues = new double[nnzbs * bs * bs];
|
||||
CSCmat->colIndices = new int[nnzbs];
|
||||
CSCmat->rowPointers = new int[Nb + 1];
|
||||
CSCRowIndices = new int[nnzbs];
|
||||
CSCColPointers = new int[Nb + 1];
|
||||
|
||||
Timer t_convert;
|
||||
bcsr_to_bcsc<block_size>(mat->nnzValues, mat->colIndices, mat->rowPointers, CSCmat->nnzValues, CSCmat->colIndices, CSCmat->rowPointers, mat->Nb);
|
||||
csrPatternToCsc(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb);
|
||||
if(verbosity >= 3){
|
||||
std::ostringstream out;
|
||||
out << "BILU0 convert CSR to CSC: " << t_convert.stop() << " s\n";
|
||||
@ -86,10 +84,11 @@ namespace bda
|
||||
}
|
||||
|
||||
Timer t_analysis;
|
||||
rMat = allocateBlockedMatrix<block_size>(mat->Nb, mat->nnzbs);
|
||||
LUMat = soft_copyBlockedMatrix(rMat);
|
||||
rmat = allocateBlockedMatrix<block_size>(mat->Nb, mat->nnzbs);
|
||||
LUmat = softCopyBlockedMatrix(rmat);
|
||||
|
||||
if (level_scheduling) {
|
||||
rowsPerColor = findLevelScheduling(mat->colIndices, mat->rowPointers, CSCmat->colIndices, CSCmat->rowPointers, mat->Nb, &numColors, toOrder, fromOrder);
|
||||
rowsPerColor = findLevelScheduling(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb, &numColors, toOrder, fromOrder);
|
||||
} else if (graph_coloring) {
|
||||
rowsPerColor = findGraphColoring<block_size>(mat->colIndices, mat->rowPointers, mat->Nb, mat->Nb, mat->Nb, &numColors, toOrder, fromOrder);
|
||||
}
|
||||
@ -105,34 +104,32 @@ namespace bda
|
||||
diagIndex = new int[mat->Nb];
|
||||
invDiagVals = new double[mat->Nb * bs * bs];
|
||||
|
||||
LMat = allocateBlockedMatrix<block_size>(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
|
||||
UMat = allocateBlockedMatrix<block_size>(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
|
||||
Lmat = allocateBlockedMatrix<block_size>(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
|
||||
Umat = allocateBlockedMatrix<block_size>(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
|
||||
|
||||
LUMat->nnzValues = new double[mat->nnzbs * bs * bs];
|
||||
LUmat->nnzValues = new double[mat->nnzbs * bs * bs];
|
||||
|
||||
if (level_scheduling) {
|
||||
delete[] CSCmat->nnzValues;
|
||||
delete[] CSCmat->colIndices;
|
||||
delete[] CSCmat->rowPointers;
|
||||
delete CSCmat;
|
||||
delete[] CSCRowIndices;
|
||||
delete[] CSCColPointers;
|
||||
}
|
||||
|
||||
|
||||
s.Lvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * LMat->nnzbs);
|
||||
s.Uvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * UMat->nnzbs);
|
||||
s.Lcols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * LMat->nnzbs);
|
||||
s.Ucols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * UMat->nnzbs);
|
||||
s.Lrows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (LMat->Nb + 1));
|
||||
s.Urows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (UMat->Nb + 1));
|
||||
s.Lvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * Lmat->nnzbs);
|
||||
s.Uvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * Umat->nnzbs);
|
||||
s.Lcols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * Lmat->nnzbs);
|
||||
s.Ucols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * Umat->nnzbs);
|
||||
s.Lrows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Lmat->Nb + 1));
|
||||
s.Urows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Umat->Nb + 1));
|
||||
s.invDiagVals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * mat->Nb);
|
||||
s.rowsPerColor = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (numColors + 1));
|
||||
|
||||
queue->enqueueWriteBuffer(s.Lvals, CL_TRUE, 0, LMat->nnzbs * sizeof(double) * bs * bs, LMat->nnzValues);
|
||||
queue->enqueueWriteBuffer(s.Uvals, CL_TRUE, 0, UMat->nnzbs * sizeof(double) * bs * bs, UMat->nnzValues);
|
||||
queue->enqueueWriteBuffer(s.Lcols, CL_TRUE, 0, LMat->nnzbs * sizeof(int), LMat->colIndices);
|
||||
queue->enqueueWriteBuffer(s.Ucols, CL_TRUE, 0, UMat->nnzbs * sizeof(int), UMat->colIndices);
|
||||
queue->enqueueWriteBuffer(s.Lrows, CL_TRUE, 0, (LMat->Nb + 1) * sizeof(int), LMat->rowPointers);
|
||||
queue->enqueueWriteBuffer(s.Urows, CL_TRUE, 0, (UMat->Nb + 1) * sizeof(int), UMat->rowPointers);
|
||||
queue->enqueueWriteBuffer(s.Lvals, CL_TRUE, 0, Lmat->nnzbs * sizeof(double) * bs * bs, Lmat->nnzValues);
|
||||
queue->enqueueWriteBuffer(s.Uvals, CL_TRUE, 0, Umat->nnzbs * sizeof(double) * bs * bs, Umat->nnzValues);
|
||||
queue->enqueueWriteBuffer(s.Lcols, CL_TRUE, 0, Lmat->nnzbs * sizeof(int), Lmat->colIndices);
|
||||
queue->enqueueWriteBuffer(s.Ucols, CL_TRUE, 0, Umat->nnzbs * sizeof(int), Umat->colIndices);
|
||||
queue->enqueueWriteBuffer(s.Lrows, CL_TRUE, 0, (Lmat->Nb + 1) * sizeof(int), Lmat->rowPointers);
|
||||
queue->enqueueWriteBuffer(s.Urows, CL_TRUE, 0, (Umat->Nb + 1) * sizeof(int), Umat->rowPointers);
|
||||
queue->enqueueWriteBuffer(s.invDiagVals, CL_TRUE, 0, mat->Nb * sizeof(double) * bs * bs, invDiagVals);
|
||||
|
||||
int *rowsPerColorPrefix = new int[numColors + 1];
|
||||
@ -154,7 +151,7 @@ namespace bda
|
||||
const unsigned int bs = block_size;
|
||||
|
||||
Timer t_reorder;
|
||||
blocked_reorder_matrix_by_pattern<block_size>(mat, toOrder, fromOrder, rMat);
|
||||
reorderBlockedMatrixByPattern<block_size>(mat, toOrder, fromOrder, rmat);
|
||||
if (verbosity >= 3){
|
||||
std::ostringstream out;
|
||||
out << "BILU0 reorder matrix: " << t_reorder.stop() << " s";
|
||||
@ -163,7 +160,7 @@ namespace bda
|
||||
|
||||
// TODO: remove this copy by replacing inplace ilu decomp by out-of-place ilu decomp
|
||||
Timer t_copy;
|
||||
memcpy(LUMat->nnzValues, rMat->nnzValues, sizeof(double) * bs * bs * rMat->nnzbs);
|
||||
memcpy(LUmat->nnzValues, rmat->nnzValues, sizeof(double) * bs * bs * rmat->nnzbs);
|
||||
if (verbosity >= 3){
|
||||
std::ostringstream out;
|
||||
out << "BILU0 memcpy: " << t_copy.stop() << " s";
|
||||
@ -179,13 +176,13 @@ namespace bda
|
||||
Timer t_decomposition;
|
||||
|
||||
// go through all rows
|
||||
for (i = 0; i < LUMat->Nb; i++) {
|
||||
iRowStart = LUMat->rowPointers[i];
|
||||
iRowEnd = LUMat->rowPointers[i + 1];
|
||||
for (i = 0; i < LUmat->Nb; i++) {
|
||||
iRowStart = LUmat->rowPointers[i];
|
||||
iRowEnd = LUmat->rowPointers[i + 1];
|
||||
|
||||
// go through all elements of the row
|
||||
for (ij = iRowStart; ij < iRowEnd; ij++) {
|
||||
j = LUMat->colIndices[ij];
|
||||
j = LUmat->colIndices[ij];
|
||||
// if the element is the diagonal, store the index and go to next row
|
||||
if (j == i) {
|
||||
diagIndex[i] = ij;
|
||||
@ -202,21 +199,21 @@ namespace bda
|
||||
|
||||
LSize++;
|
||||
// calculate the pivot of this row
|
||||
blockMult<bs>(LUMat->nnzValues + ij * bs * bs, invDiagVals + j * bs * bs, &pivot[0]);
|
||||
blockMult<bs>(LUmat->nnzValues + ij * bs * bs, invDiagVals + j * bs * bs, &pivot[0]);
|
||||
|
||||
memcpy(LUMat->nnzValues + ij * bs * bs, &pivot[0], sizeof(double) * block_size * block_size);
|
||||
memcpy(LUmat->nnzValues + ij * bs * bs, &pivot[0], sizeof(double) * block_size * block_size);
|
||||
|
||||
jRowEnd = LUMat->rowPointers[j + 1];
|
||||
jRowEnd = LUmat->rowPointers[j + 1];
|
||||
jk = diagIndex[j] + 1;
|
||||
ik = ij + 1;
|
||||
// substract that row scaled by the pivot from this row.
|
||||
while (ik < iRowEnd && jk < jRowEnd) {
|
||||
if (LUMat->colIndices[ik] == LUMat->colIndices[jk]) {
|
||||
blockMultSub<bs>(LUMat->nnzValues + ik * bs * bs, pivot, LUMat->nnzValues + jk * bs * bs);
|
||||
if (LUmat->colIndices[ik] == LUmat->colIndices[jk]) {
|
||||
blockMultSub<bs>(LUmat->nnzValues + ik * bs * bs, pivot, LUmat->nnzValues + jk * bs * bs);
|
||||
ik++;
|
||||
jk++;
|
||||
} else {
|
||||
if (LUMat->colIndices[ik] < LUMat->colIndices[jk])
|
||||
if (LUmat->colIndices[ik] < LUmat->colIndices[jk])
|
||||
{ ik++; }
|
||||
else
|
||||
{ jk++; }
|
||||
@ -224,35 +221,35 @@ namespace bda
|
||||
}
|
||||
}
|
||||
// store the inverse in the diagonal!
|
||||
blockInvert3x3(LUMat->nnzValues + ij * bs * bs, invDiagVals + i * bs * bs);
|
||||
blockInvert3x3(LUmat->nnzValues + ij * bs * bs, invDiagVals + i * bs * bs);
|
||||
|
||||
memcpy(LUMat->nnzValues + ij * bs * bs, invDiagVals + i * bs * bs, sizeof(double) * bs * bs);
|
||||
memcpy(LUmat->nnzValues + ij * bs * bs, invDiagVals + i * bs * bs, sizeof(double) * bs * bs);
|
||||
}
|
||||
|
||||
LMat->rowPointers[0] = 0;
|
||||
UMat->rowPointers[0] = 0;
|
||||
Lmat->rowPointers[0] = 0;
|
||||
Umat->rowPointers[0] = 0;
|
||||
|
||||
// Split the LU matrix into two by comparing column indices to diagonal indices
|
||||
for (i = 0; i < LUMat->Nb; i++) {
|
||||
int offsetL = LMat->rowPointers[i];
|
||||
int rowSize = diagIndex[i] - LUMat->rowPointers[i];
|
||||
int offsetLU = LUMat->rowPointers[i];
|
||||
memcpy(LMat->nnzValues + offsetL * bs * bs, LUMat->nnzValues + offsetLU * bs * bs, sizeof(double) * bs * bs * rowSize);
|
||||
memcpy(LMat->colIndices + offsetL, LUMat->colIndices + offsetLU, sizeof(int) * rowSize);
|
||||
for (i = 0; i < LUmat->Nb; i++) {
|
||||
int offsetL = Lmat->rowPointers[i];
|
||||
int rowSize = diagIndex[i] - LUmat->rowPointers[i];
|
||||
int offsetLU = LUmat->rowPointers[i];
|
||||
memcpy(Lmat->nnzValues + offsetL * bs * bs, LUmat->nnzValues + offsetLU * bs * bs, sizeof(double) * bs * bs * rowSize);
|
||||
memcpy(Lmat->colIndices + offsetL, LUmat->colIndices + offsetLU, sizeof(int) * rowSize);
|
||||
offsetL += rowSize;
|
||||
LMat->rowPointers[i + 1] = offsetL;
|
||||
Lmat->rowPointers[i + 1] = offsetL;
|
||||
}
|
||||
// Reverse the order or the (blocked) rows for the U matrix,
|
||||
// because the rows are accessed in reverse order when applying the ILU0
|
||||
int URowIndex = 0;
|
||||
for (i = LUMat->Nb - 1; i >= 0; i--) {
|
||||
int offsetU = UMat->rowPointers[URowIndex];
|
||||
int rowSize = LUMat->rowPointers[i + 1] - diagIndex[i] - 1;
|
||||
for (i = LUmat->Nb - 1; i >= 0; i--) {
|
||||
int offsetU = Umat->rowPointers[URowIndex];
|
||||
int rowSize = LUmat->rowPointers[i + 1] - diagIndex[i] - 1;
|
||||
int offsetLU = diagIndex[i] + 1;
|
||||
memcpy(UMat->nnzValues + offsetU * bs * bs, LUMat->nnzValues + offsetLU * bs * bs, sizeof(double) * bs * bs * rowSize);
|
||||
memcpy(UMat->colIndices + offsetU, LUMat->colIndices + offsetLU, sizeof(int) * rowSize);
|
||||
memcpy(Umat->nnzValues + offsetU * bs * bs, LUmat->nnzValues + offsetLU * bs * bs, sizeof(double) * bs * bs * rowSize);
|
||||
memcpy(Umat->colIndices + offsetU, LUmat->colIndices + offsetLU, sizeof(int) * rowSize);
|
||||
offsetU += rowSize;
|
||||
UMat->rowPointers[URowIndex + 1] = offsetU;
|
||||
Umat->rowPointers[URowIndex + 1] = offsetU;
|
||||
URowIndex++;
|
||||
}
|
||||
if (verbosity >= 3) {
|
||||
@ -263,14 +260,14 @@ namespace bda
|
||||
|
||||
Timer t_copyToGpu;
|
||||
if (pattern_uploaded == false) {
|
||||
queue->enqueueWriteBuffer(s.Lcols, CL_TRUE, 0, LMat->nnzbs * sizeof(int), LMat->colIndices);
|
||||
queue->enqueueWriteBuffer(s.Ucols, CL_TRUE, 0, UMat->nnzbs * sizeof(int), UMat->colIndices);
|
||||
queue->enqueueWriteBuffer(s.Lrows, CL_TRUE, 0, (LMat->Nb + 1) * sizeof(int), LMat->rowPointers);
|
||||
queue->enqueueWriteBuffer(s.Urows, CL_TRUE, 0, (UMat->Nb + 1) * sizeof(int), UMat->rowPointers);
|
||||
queue->enqueueWriteBuffer(s.Lcols, CL_TRUE, 0, Lmat->nnzbs * sizeof(int), Lmat->colIndices);
|
||||
queue->enqueueWriteBuffer(s.Ucols, CL_TRUE, 0, Umat->nnzbs * sizeof(int), Umat->colIndices);
|
||||
queue->enqueueWriteBuffer(s.Lrows, CL_TRUE, 0, (Lmat->Nb + 1) * sizeof(int), Lmat->rowPointers);
|
||||
queue->enqueueWriteBuffer(s.Urows, CL_TRUE, 0, (Umat->Nb + 1) * sizeof(int), Umat->rowPointers);
|
||||
pattern_uploaded = true;
|
||||
}
|
||||
queue->enqueueWriteBuffer(s.Lvals, CL_TRUE, 0, LMat->nnzbs * sizeof(double) * bs * bs, LMat->nnzValues);
|
||||
queue->enqueueWriteBuffer(s.Uvals, CL_TRUE, 0, UMat->nnzbs * sizeof(double) * bs * bs, UMat->nnzValues);
|
||||
queue->enqueueWriteBuffer(s.Lvals, CL_TRUE, 0, Lmat->nnzbs * sizeof(double) * bs * bs, Lmat->nnzValues);
|
||||
queue->enqueueWriteBuffer(s.Uvals, CL_TRUE, 0, Umat->nnzbs * sizeof(double) * bs * bs, Umat->nnzValues);
|
||||
queue->enqueueWriteBuffer(s.invDiagVals, CL_TRUE, 0, Nb * sizeof(double) * bs * bs, invDiagVals);
|
||||
if (verbosity >= 3) {
|
||||
std::ostringstream out;
|
||||
|
@ -39,8 +39,8 @@ namespace bda
|
||||
int Nb; // number of blockrows of the matrix
|
||||
int nnz; // number of nonzeroes of the matrix (scalar)
|
||||
int nnzbs; // number of blocks of the matrix
|
||||
BlockedMatrix *LMat, *UMat, *LUMat;
|
||||
BlockedMatrix *rMat = nullptr; // only used with PAR_SIM
|
||||
BlockedMatrix *Lmat, *Umat, *LUmat;
|
||||
BlockedMatrix *rmat = nullptr; // only used with PAR_SIM
|
||||
double *invDiagVals;
|
||||
int *diagIndex, *rowsPerColor;
|
||||
int *toOrder, *fromOrder;
|
||||
@ -101,7 +101,7 @@ namespace bda
|
||||
|
||||
BlockedMatrix* getRMat()
|
||||
{
|
||||
return rMat;
|
||||
return rmat;
|
||||
}
|
||||
|
||||
};
|
||||
|
@ -50,7 +50,7 @@ void freeBlockedMatrix(BlockedMatrix **mat) {
|
||||
}
|
||||
}
|
||||
|
||||
BlockedMatrix *soft_copyBlockedMatrix(BlockedMatrix *mat) {
|
||||
BlockedMatrix *softCopyBlockedMatrix(BlockedMatrix *mat) {
|
||||
BlockedMatrix *res = new BlockedMatrix();
|
||||
res->nnzValues = mat->nnzValues;
|
||||
res->colIndices = mat->colIndices;
|
||||
|
@ -41,7 +41,7 @@ BlockedMatrix *allocateBlockedMatrix(int Nb, int nnzbs);
|
||||
/// Allocate BlockedMatrix, but copy data pointers instead of allocating new memory
|
||||
/// \param[in] mat matrix to be copied
|
||||
/// \return pointer to BlockedMatrix
|
||||
BlockedMatrix *soft_copyBlockedMatrix(BlockedMatrix *mat);
|
||||
BlockedMatrix *softCopyBlockedMatrix(BlockedMatrix *mat);
|
||||
|
||||
/// Free BlockedMatrix and its data
|
||||
/// \param[in] mat matrix to be free'd
|
||||
|
@ -35,7 +35,9 @@ namespace bda
|
||||
|
||||
/* Give every node in the matrix (of which only the sparsity pattern in the
|
||||
* form of row pointers and column indices arrays are in the input), a color
|
||||
* in the colors array. Also return the amount of colors in the return integer. */
|
||||
* in the colors array. Also return the amount of colors in the return integer.
|
||||
* This graph-coloring algorithm is based on the Jones-Plassmann algorithm, proposed in:
|
||||
* "A Parallel Graph Coloring Heuristic" by M.T. Jones and P.E. Plassmann in SIAM Journal of Scientific Computing 14 (1993) */
|
||||
|
||||
template <unsigned int block_size>
|
||||
int colorBlockedNodes(int rows, const int *rowPointers, const int *colIndices, std::vector<int>& colors, int maxRowsPerColor, int maxColsPerColor)
|
||||
@ -145,7 +147,7 @@ int colorBlockedNodes(int rows, const int *rowPointers, const int *colIndices, s
|
||||
* and the from order, which contains for every node in the new matrix where it came from in the old matrix.*/
|
||||
|
||||
template <unsigned int block_size>
|
||||
void blocked_reorder_matrix_by_pattern(BlockedMatrix *mat, int *toOrder, int *fromOrder, BlockedMatrix *rMat) {
|
||||
void reorderBlockedMatrixByPattern(BlockedMatrix *mat, int *toOrder, int *fromOrder, BlockedMatrix *rMat) {
|
||||
const unsigned int bs = block_size;
|
||||
int rIndex = 0;
|
||||
int i, k;
|
||||
@ -176,18 +178,18 @@ void blocked_reorder_matrix_by_pattern(BlockedMatrix *mat, int *toOrder, int *fr
|
||||
|
||||
/* Reorder a matrix according to the colors that every node of the matrix has received*/
|
||||
|
||||
void colorsToReordering(int Nb, std::vector<int>& colors, int *toOrder, int *fromOrder, int *iters) {
|
||||
void colorsToReordering(int Nb, std::vector<int>& colors, int numColors, int *toOrder, int *fromOrder, int *rowsPerColor) {
|
||||
int reordered = 0;
|
||||
int i, c;
|
||||
for (i = 0; i < MAX_COLORS; i++) {
|
||||
iters[i] = 0;
|
||||
for (i = 0; i < numColors; i++) {
|
||||
rowsPerColor[i] = 0;
|
||||
}
|
||||
|
||||
// Find reordering patterns
|
||||
for (c = 0; c < MAX_COLORS; c++) {
|
||||
for (c = 0; c < numColors; c++) {
|
||||
for (i = 0; i < Nb; i++) {
|
||||
if (colors[i] == c) {
|
||||
iters[c]++;
|
||||
rowsPerColor[c]++;
|
||||
toOrder[i] = reordered;
|
||||
|
||||
fromOrder[reordered] = i;
|
||||
@ -197,10 +199,10 @@ void colorsToReordering(int Nb, std::vector<int>& colors, int *toOrder, int *fro
|
||||
}
|
||||
}
|
||||
|
||||
// Reorder a matrix according to a reordering pattern
|
||||
// Reorder a vector according to a reordering pattern
|
||||
|
||||
template <unsigned int block_size>
|
||||
void blocked_reorder_vector_by_pattern(int Nb, double *vector, int *fromOrder, double *rVector) {
|
||||
void reorderBlockedVectorByPattern(int Nb, double *vector, int *fromOrder, double *rVector) {
|
||||
for (int i = 0; i < Nb; i++) {
|
||||
for (unsigned int j = 0; j < block_size; j++) {
|
||||
rVector[block_size * i + j] = vector[block_size * fromOrder[i] + j];
|
||||
@ -212,7 +214,7 @@ void blocked_reorder_vector_by_pattern(int Nb, double *vector, int *fromOrder, d
|
||||
/* Check is operations on a node in the matrix can be started
|
||||
* A node can only be started if all nodes that it depends on during sequential execution have already completed.*/
|
||||
|
||||
bool canBeStarted(int rowIndex, int *rowPointers, int *colIndices, std::vector<bool>& doneRows) {
|
||||
bool canBeStarted(const int rowIndex, const int *rowPointers, const int *colIndices, const std::vector<bool>& doneRows) {
|
||||
bool canStart = !doneRows[rowIndex];
|
||||
int i, thisDependency;
|
||||
if (canStart) {
|
||||
@ -231,41 +233,43 @@ bool canBeStarted(int rowIndex, int *rowPointers, int *colIndices, std::vector<b
|
||||
}
|
||||
|
||||
/*
|
||||
* The level scheduling of a non-symmetric, blocked matrix requires access to a CSC encoding and a CSR encoding of the same matrix.
|
||||
*/
|
||||
* The level scheduling of a non-symmetric, blocked matrix requires access to a CSC encoding and a CSR encoding of the sparsity pattern of the input matrix.
|
||||
* This function is based on a standard level scheduling algorithm, like the one described in:
|
||||
* "Iterative methods for Sparse Linear Systems" by Yousef Saad in section 11.6.3
|
||||
*/
|
||||
|
||||
int *findLevelScheduling(int *CSRColIndices, int *CSRRowPointers, int *CSCColIndices, int *CSCRowPointers, int Nb, int *iters, int *toOrder, int* fromOrder) {
|
||||
int activeRowIndex = 0, iterEnd, nextActiveRowIndex = 0;
|
||||
int *findLevelScheduling(int *CSRColIndices, int *CSRRowPointers, int *CSCRowIndices, int *CSCColPointers, int Nb, int *numColors, int *toOrder, int* fromOrder) {
|
||||
int activeRowIndex = 0, colorEnd, nextActiveRowIndex = 0;
|
||||
int thisRow;
|
||||
std::vector<bool> doneRows(Nb, false);
|
||||
std::vector<int> rowsPerIter;
|
||||
rowsPerIter.reserve(Nb);
|
||||
int *resRowsPerIter;
|
||||
std::vector<int> rowsPerColor;
|
||||
rowsPerColor.reserve(Nb);
|
||||
int *resRowsPerColor;
|
||||
|
||||
std::vector <int> rowsToStart;
|
||||
|
||||
// find starting rows: rows that are independent from all rows that come before them.
|
||||
for (thisRow = 0; thisRow < Nb; thisRow++) {
|
||||
if (canBeStarted(thisRow, CSRRowPointers, CSCColIndices, doneRows)) {
|
||||
if (canBeStarted(thisRow, CSCColPointers, CSCRowIndices, doneRows)) {
|
||||
fromOrder[nextActiveRowIndex] = thisRow;
|
||||
toOrder[thisRow] = nextActiveRowIndex;
|
||||
nextActiveRowIndex++;
|
||||
}
|
||||
}
|
||||
// 'do' compute on all active rows
|
||||
for (iterEnd = 0; iterEnd < nextActiveRowIndex; iterEnd++) {
|
||||
doneRows[fromOrder[iterEnd]] = true;
|
||||
for (colorEnd = 0; colorEnd < nextActiveRowIndex; colorEnd++) {
|
||||
doneRows[fromOrder[colorEnd]] = true;
|
||||
}
|
||||
|
||||
rowsPerIter.emplace_back(nextActiveRowIndex - activeRowIndex);
|
||||
rowsPerColor.emplace_back(nextActiveRowIndex - activeRowIndex);
|
||||
|
||||
while (iterEnd < Nb) {
|
||||
// Go over all rows active from the last iteration, and check which of their neighbours can be activated this iteration
|
||||
for (; activeRowIndex < iterEnd; activeRowIndex++) {
|
||||
while (colorEnd < Nb) {
|
||||
// Go over all rows active from the last color, and check which of their neighbours can be activated this color
|
||||
for (; activeRowIndex < colorEnd; activeRowIndex++) {
|
||||
thisRow = fromOrder[activeRowIndex];
|
||||
|
||||
for (int i = CSCRowPointers[thisRow]; i < CSCRowPointers[thisRow + 1]; i++) {
|
||||
int thatRow = CSCColIndices[i];
|
||||
for (int i = CSCColPointers[thisRow]; i < CSCColPointers[thisRow + 1]; i++) {
|
||||
int thatRow = CSCRowIndices[i];
|
||||
|
||||
if (canBeStarted(thatRow, CSRRowPointers, CSRColIndices, doneRows)) {
|
||||
rowsToStart.emplace_back(thatRow);
|
||||
@ -282,89 +286,79 @@ int *findLevelScheduling(int *CSRColIndices, int *CSRRowPointers, int *CSCColInd
|
||||
nextActiveRowIndex++;
|
||||
}
|
||||
}
|
||||
iterEnd = nextActiveRowIndex;
|
||||
rowsPerIter.emplace_back(nextActiveRowIndex - activeRowIndex);
|
||||
colorEnd = nextActiveRowIndex;
|
||||
rowsPerColor.emplace_back(nextActiveRowIndex - activeRowIndex);
|
||||
}
|
||||
// Crop the rowsPerIter array to it minimum size.
|
||||
int numColors = rowsPerIter.size();
|
||||
resRowsPerIter = new int[numColors];
|
||||
for (int i = 0; i < numColors; i++) {
|
||||
resRowsPerIter[i] = rowsPerIter[i];
|
||||
// Crop the rowsPerColor array to it minimum size.
|
||||
resRowsPerColor = new int[rowsPerColor.size()];
|
||||
for (unsigned int i = 0; i < rowsPerColor.size(); i++) {
|
||||
resRowsPerColor[i] = rowsPerColor[i];
|
||||
}
|
||||
|
||||
*iters = rowsPerIter.size();
|
||||
*numColors = rowsPerColor.size();
|
||||
|
||||
return resRowsPerIter;
|
||||
return resRowsPerColor;
|
||||
}
|
||||
|
||||
/* Perform the complete graph coloring algorithm on a matrix. Return an array with the amount of nodes per color.*/
|
||||
|
||||
template <unsigned int block_size>
|
||||
int* findGraphColoring(int *colIndices, int *rowPointers, int Nb, int maxRowsPerColor, int maxColsPerColor, int *numColors, int *toOrder, int* fromOrder) {
|
||||
int* findGraphColoring(const int *colIndices, const int *rowPointers, int Nb, int maxRowsPerColor, int maxColsPerColor, int *numColors, int *toOrder, int* fromOrder) {
|
||||
std::vector<int> rowColor;
|
||||
rowColor.resize(Nb);
|
||||
int *rowsPerColor = new int[MAX_COLORS];
|
||||
|
||||
colorBlockedNodes<block_size>(Nb, rowPointers, colIndices, rowColor, maxRowsPerColor, maxColsPerColor);
|
||||
*numColors = colorBlockedNodes<block_size>(Nb, rowPointers, colIndices, rowColor, maxRowsPerColor, maxColsPerColor);
|
||||
|
||||
colorsToReordering(Nb, rowColor, toOrder, fromOrder, rowsPerColor);
|
||||
colorsToReordering(Nb, rowColor, *numColors, toOrder, fromOrder, rowsPerColor);
|
||||
|
||||
*numColors = MAX_COLORS;
|
||||
while (rowsPerColor[*numColors - 1] == 0) {
|
||||
*numColors = *numColors - 1;
|
||||
}
|
||||
// The rowsPerColor array contains a non-zero value for each color denoting how many rows are in that color. It has a size of *numColors.
|
||||
|
||||
return rowsPerColor;
|
||||
}
|
||||
|
||||
// based on the scipy package from python, scipy/sparse/sparsetools/csr.h on github
|
||||
// input : matrix A via Avals, Acols, Arows, Nb
|
||||
// output: matrix B via Bvals, Bcols, Brows
|
||||
// arrays for B must be preallocated
|
||||
template <unsigned int block_size>
|
||||
void bcsr_to_bcsc(double *Avals, int *Acols, int *Arows, double *Bvals, int *Bcols, int *Brows, int Nb) {
|
||||
void csrPatternToCsc(int *CSRColIndices, int *CSRRowPointers, int *CSCRowIndices, int *CSCColPointers, int Nb) {
|
||||
|
||||
int nnz = Arows[Nb];
|
||||
int nnz = CSRRowPointers[Nb];
|
||||
|
||||
// compute number of nnzs per column
|
||||
std::fill(Brows, Brows + Nb, 0);
|
||||
std::fill(CSCColPointers, CSCColPointers + Nb, 0);
|
||||
|
||||
for (int n = 0; n < nnz; ++n) {
|
||||
Brows[Acols[n]]++;
|
||||
CSCColPointers[CSRColIndices[n]]++;
|
||||
}
|
||||
|
||||
// cumsum the nnz per col to get Brows
|
||||
// cumsum the nnz per col to get CSCColPointers
|
||||
for (int col = 0, cumsum = 0; col < Nb; ++col) {
|
||||
int temp = Brows[col];
|
||||
Brows[col] = cumsum;
|
||||
int temp = CSCColPointers[col];
|
||||
CSCColPointers[col] = cumsum;
|
||||
cumsum += temp;
|
||||
}
|
||||
Brows[Nb] = nnz;
|
||||
CSCColPointers[Nb] = nnz;
|
||||
|
||||
for (int row = 0; row < Nb; ++row) {
|
||||
for (int j = Arows[row]; j < Arows[row + 1]; ++j) {
|
||||
int col = Acols[j];
|
||||
int dest = Brows[col];
|
||||
Bcols[dest] = row;
|
||||
memcpy(Bvals + dest, Avals + dest, sizeof(double) * block_size * block_size);
|
||||
Brows[col]++;
|
||||
for (int j = CSRRowPointers[row]; j < CSRRowPointers[row + 1]; ++j) {
|
||||
int col = CSRColIndices[j];
|
||||
int dest = CSCColPointers[col];
|
||||
CSCRowIndices[dest] = row;
|
||||
CSCColPointers[col]++;
|
||||
}
|
||||
}
|
||||
|
||||
for (int col = 0, last = 0; col <= Nb; ++col) {
|
||||
int temp = Brows[col];
|
||||
Brows[col] = last;
|
||||
int temp = CSCColPointers[col];
|
||||
CSCColPointers[col] = last;
|
||||
last = temp;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
||||
template int colorBlockedNodes<n>(int, const int *, const int *, std::vector<int>&, int, int); \
|
||||
template void blocked_reorder_matrix_by_pattern<n>(BlockedMatrix *, int *, int *, BlockedMatrix *); \
|
||||
template void blocked_reorder_vector_by_pattern<n>(int, double*, int*, double*); \
|
||||
template int* findGraphColoring<n>(int *, int *, int, int, int, int *, int *, int *); \
|
||||
template void bcsr_to_bcsc<n>(double *, int *, int *, double *, int *, int *, int); \
|
||||
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
||||
template int colorBlockedNodes<n>(int, const int *, const int *, std::vector<int>&, int, int); \
|
||||
template void reorderBlockedMatrixByPattern<n>(BlockedMatrix *, int *, int *, BlockedMatrix *); \
|
||||
template void reorderBlockedVectorByPattern<n>(int, double*, int*, double*); \
|
||||
template int* findGraphColoring<n>(const int *, const int *, int, int, int, int *, int *, int *); \
|
||||
|
||||
INSTANTIATE_BDA_FUNCTIONS(1);
|
||||
INSTANTIATE_BDA_FUNCTIONS(2);
|
||||
|
@ -48,18 +48,19 @@ int colorBlockedNodes(int rows, const int *rowPointers, const int *colIndices, s
|
||||
/// \param[in] fromOrder reorder pattern that lists for each index in the new order, from which index in the original order it was moved
|
||||
/// \param[inout] rMat reordered Matrix
|
||||
template <unsigned int block_size>
|
||||
void blocked_reorder_matrix_by_pattern(BlockedMatrix *mat, int *toOrder, int *fromOrder, BlockedMatrix *rMat);
|
||||
void reorderBlockedMatrixByPattern(BlockedMatrix *mat, int *toOrder, int *fromOrder, BlockedMatrix *rMat);
|
||||
|
||||
/// Compute reorder mapping from the color that each node has received
|
||||
/// The toOrder, fromOrder and iters arrays must be allocated already
|
||||
/// \param[in] Nb number of blocks in the vector
|
||||
/// \param[in] colors array containing the number of the color that each row is assigned to
|
||||
/// \param[inout] toOrder reorder pattern that lists for each index in the original order, to which index in the new order it should be moved
|
||||
/// \param[inout] fromOrder reorder pattern that lists for each index in the new order, from which index in the original order it was moved
|
||||
/// \param[inout] iters array containing for each color the number of rows that it contains
|
||||
void colorsToReordering(int Nb, std::vector<int>& colors, int *toOrder, int *fromOrder, int *iters);
|
||||
/// \param[in] Nb number of blocks in the vector
|
||||
/// \param[in] colors array containing the number of the color that each row is assigned to
|
||||
/// \param[in] numColors the total number of colors into which all rows have been divided
|
||||
/// \param[inout] toOrder reorder pattern that lists for each index in the original order, to which index in the new order it should be moved
|
||||
/// \param[inout] fromOrder reorder pattern that lists for each index in the new order, from which index in the original order it was moved
|
||||
/// \param[inout] rowsPerColor array containing for each color the number of rows that it contains
|
||||
void colorsToReordering(int Nb, std::vector<int>& colors, int numColors, int *toOrder, int *fromOrder, int *rowsPerColor);
|
||||
|
||||
/// Reorder a vector according to the mapping in toOrder and fromOrder
|
||||
/// Reorder a vector according to the mapping in fromOrder
|
||||
/// The rVector array must be allocated already
|
||||
/// \param[in] Nb number of blocks in the vector
|
||||
/// \param[in] vector vector to be reordered
|
||||
@ -67,7 +68,7 @@ void colorsToReordering(int Nb, std::vector<int>& colors, int *toOrder, int *fro
|
||||
/// \param[in] fromOrder reorder pattern that lists for each index in the new order, from which index in the original order it was moved
|
||||
/// \param[inout] rVector reordered vector
|
||||
template <unsigned int block_size>
|
||||
void blocked_reorder_vector_by_pattern(int Nb, double *vector, int *fromOrder, double *rVector);
|
||||
void reorderBlockedVectorByPattern(int Nb, double *vector, int *fromOrder, double *rVector);
|
||||
|
||||
/// Determine whether all rows that a certain row depends on are done already
|
||||
/// \param[in] rowIndex index of the row that needs to be checked for
|
||||
@ -75,7 +76,7 @@ void blocked_reorder_vector_by_pattern(int Nb, double *vector, int *fromOrder, d
|
||||
/// \param[in] colIndices column indices of the matrix that the row is in
|
||||
/// \param[in] doneRows array that for each row lists whether it is done or not
|
||||
/// \return true iff all dependencies are done and if the result itself was not done yet
|
||||
bool canBeStarted(int rowIndex, int *rowPointers, int *colIndices, std::vector<bool>& doneRows);
|
||||
bool canBeStarted(const int rowIndex, const int *rowPointers, const int *colIndices, const std::vector<bool>& doneRows);
|
||||
|
||||
/// Find a level scheduling reordering for an input matrix
|
||||
/// The toOrder and fromOrder arrays must be allocated already
|
||||
@ -84,11 +85,11 @@ bool canBeStarted(int rowIndex, int *rowPointers, int *colIndices, std::vector<b
|
||||
/// \param[in] CSCColIndices row indices array, obtained from storing the input matrix in the CSC format
|
||||
/// \param[in] CSCRowPointers column pointers array, obtained from storing the input matrix in the CSC format
|
||||
/// \param[in] Nb number of blockrows in the matrix
|
||||
/// \param[out] iters a pointer to the number of colors needed for the level scheduling
|
||||
/// \param[out] numColors a pointer to the number of colors needed for the level scheduling
|
||||
/// \param[inout] toOrder the reorder pattern that was found, which lists for each index in the original order, to which index in the new order it should be moved
|
||||
/// \param[inout] fromOrder the reorder pattern that was found, which lists for each index in the new order, from which index in the original order it was moved
|
||||
/// \return a pointer to an array that contains for each color, the number of rows that that color contains
|
||||
int* findLevelScheduling(int *CSRColIndices, int *CSRRowPointers, int *CSCColIndices, int *CSCRowPointers, int Nb, int *iters, int *toOrder, int* fromOrder);
|
||||
int* findLevelScheduling(int *CSRColIndices, int *CSRRowPointers, int *CSCColIndices, int *CSCRowPointers, int Nb, int *numColors, int *toOrder, int* fromOrder);
|
||||
|
||||
/// Find a graph coloring reordering for an input matrix
|
||||
/// The toOrder and fromOrder arrays must be allocated already
|
||||
@ -102,20 +103,17 @@ int* findLevelScheduling(int *CSRColIndices, int *CSRRowPointers, int *CSCColInd
|
||||
/// \param[inout] fromOrder the reorder pattern that was found, which lists for each index in the new order, from which index in the original order it was moved
|
||||
/// \return a pointer to an array that contains for each color, the number of rows that that color contains
|
||||
template <unsigned int block_size>
|
||||
int* findGraphColoring(int *colIndices, int *rowPointers, int Nb, int maxRowsPerColor, int maxColsPerColor, int *numColors, int *toOrder, int* fromOrder);
|
||||
int* findGraphColoring(const int *colIndices, const int *rowPointers, int Nb, int maxRowsPerColor, int maxColsPerColor, int *numColors, int *toOrder, int* fromOrder);
|
||||
|
||||
/// Convert BCSR matrix to BCSC
|
||||
/// Arrays for output matrix B must be allocated already
|
||||
/// Convert a sparsity pattern stored in the CSR format to the CSC format
|
||||
/// CSCRowIndices and CSCColPointers arrays must be allocated already
|
||||
/// Based on the csr_tocsc() function from the scipy package from python, https://github.com/scipy/scipy/blob/master/scipy/sparse/sparsetools/csr.h
|
||||
/// \param[in] Avals non-zero values of the BCSR matrix
|
||||
/// \param[in] Acols column indices of the BCSR matrix
|
||||
/// \param[in] Arows row pointers of the BCSR matrix
|
||||
/// \param[inout] Bvals non-zero values of the result BCSC matrix
|
||||
/// \param[inout] Bcols row indices of the result BCSC matrix
|
||||
/// \param[inout] Brows column pointers of the result BCSC matrix
|
||||
/// \param[in] Nb number of blockrows in the matrix
|
||||
template <unsigned int block_size>
|
||||
void bcsr_to_bcsc(double *Avals, int *Acols, int *Arows, double *Bvals, int *Bcols, int *Brows, int Nb);
|
||||
/// \param[in] CSRColIndices column indices of the CSR representation of the pattern
|
||||
/// \param[in] CSRRowPointers row pointers of the CSR representation of the pattern
|
||||
/// \param[inout] CSCRowIndices row indices of the result CSC representation of the pattern
|
||||
/// \param[inout] CSCColPointers column pointers of the result CSC representation of the pattern
|
||||
/// \param[in] Nb number of blockrows in the matrix
|
||||
void csrPatternToCsc(int *CSRColIndices, int *CSRRowPointers, int *CSCRowIndices, int *CSCColPointers, int Nb);
|
||||
|
||||
}
|
||||
|
||||
|
@ -318,9 +318,9 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
|
||||
}
|
||||
if (verbosity >= 4) {
|
||||
std::ostringstream out;
|
||||
out << "openclSolver::ily_apply: " << prec_time << "s\n";
|
||||
out << "openclSolver::spmv: " << spmv_time << "s\n";
|
||||
out << "openclSolver::rest: " << rest_time << "s\n";
|
||||
out << "openclSolver::ily_apply: " << t_prec.elapsed() << "s\n";
|
||||
out << "openclSolver::spmv: " << t_spmv.elapsed() << "s\n";
|
||||
out << "openclSolver::rest: " << t_rest.elapsed() << "s\n";
|
||||
out << "openclSolver::total_solve: " << res.elapsed << "s\n";
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
@ -614,7 +614,7 @@ void openclSolverBackend<block_size>::update_system(double *vals, double *b) {
|
||||
Timer t;
|
||||
|
||||
mat->nnzValues = vals;
|
||||
blocked_reorder_vector_by_pattern<block_size>(mat->Nb, b, fromOrder, rb);
|
||||
reorderBlockedVectorByPattern<block_size>(mat->Nb, b, fromOrder, rb);
|
||||
|
||||
if (verbosity > 2) {
|
||||
std::ostringstream out;
|
||||
@ -662,7 +662,7 @@ void openclSolverBackend<block_size>::get_result(double *x) {
|
||||
Timer t;
|
||||
|
||||
queue->enqueueReadBuffer(d_x, CL_TRUE, 0, sizeof(double) * N, rb);
|
||||
blocked_reorder_vector_by_pattern<block_size>(mat->Nb, rb, toOrder, x);
|
||||
reorderBlockedVectorByPattern<block_size>(mat->Nb, rb, toOrder, x);
|
||||
|
||||
if (verbosity > 2) {
|
||||
std::ostringstream out;
|
||||
|
Loading…
Reference in New Issue
Block a user