Speed up reordering for opencl

This commit is contained in:
Tong Dong Qiu 2022-03-23 17:46:25 +01:00
parent fc298d8f9c
commit 68b604c85f
6 changed files with 83 additions and 45 deletions

View File

@ -37,14 +37,12 @@ namespace Accelerator
using Opm::OpmLog;
/*Sort a row of matrix elements from a blocked CSR-format.*/
void sortBlockedRow(int *colIndices, double *data, int left, int right, unsigned block_size) {
const unsigned int bs = block_size;
void sortRow(int *colIndices, int *data, int left, int right) {
int l = left;
int r = right;
int middle = colIndices[(l + r) >> 1];
double lDatum[bs * bs];
do {
while (colIndices[l] < middle)
l++;
@ -54,9 +52,9 @@ void sortBlockedRow(int *colIndices, double *data, int left, int right, unsigned
int lColIndex = colIndices[l];
colIndices[l] = colIndices[r];
colIndices[r] = lColIndex;
memcpy(lDatum, data + l * bs * bs, sizeof(double) * bs * bs);
memcpy(data + l * bs * bs, data + r * bs * bs, sizeof(double) * bs * bs);
memcpy(data + r * bs * bs, lDatum, sizeof(double) * bs * bs);
int tmp = data[l];
data[l] = data[r];
data[r] = tmp;
l++;
r--;
@ -64,10 +62,10 @@ void sortBlockedRow(int *colIndices, double *data, int left, int right, unsigned
} while (l < r);
if (left < r)
sortBlockedRow(colIndices, data, left, r, bs);
sortRow(colIndices, data, left, r);
if (right > l)
sortBlockedRow(colIndices, data, l, right, bs);
sortRow(colIndices, data, l, right);
}

View File

@ -127,13 +127,13 @@ public:
};
/// Sort a row of matrix elements from a blocked CSR-format
/// \param[inout] colIndices
/// \param[inout] data
/// Sort a row of matrix elements from a CSR-format, where the nonzeroes are ints
/// These ints aren't actually nonzeroes, but represent a mapping used later
/// \param[inout] colIndices represent keys in sorting
/// \param[inout] data sorted according to the colIndices
/// \param[in] left lower index of data of row
/// \param[in] right upper index of data of row
/// \param[in] block_size size of blocks in the row
void sortBlockedRow(int *colIndices, double *data, int left, int right, unsigned block_size);
void sortRow(int *colIndices, int *data, int left, int right);
/// Multiply and subtract blocks
/// a = a - (b * c)

View File

@ -29,6 +29,7 @@
#include <random>
#include <sstream>
#include <vector>
#include <cstring>
namespace {
@ -176,39 +177,58 @@ int colorBlockedNodes(int rows, const int *CSRRowPointers, const int *CSRColIndi
/* Reorder a matrix by a specified input order.
* Both a to order array, which contains for every node from the old matrix where it will move in the new matrix,
* and the from order, which contains for every node in the new matrix where it came from in the old matrix.*/
void reorderBlockedMatrixByPattern(BlockedMatrix *mat, int *toOrder, int *fromOrder, BlockedMatrix *rmat) {
assert(mat->block_size == rmat->block_size);
* and the from order, which contains for every node in the new matrix where it came from in the old matrix.
* reordermapping_nonzeroes is filled with increasing indices, and reordered using the translated colIndices as keys,
* this means the resulting reordermapping_nonzeroes array contains the mapping
*/
void reorderBlockedMatrixByPattern(BlockedMatrix *mat, std::vector<int>& reordermapping_nonzeroes, int *toOrder, int *fromOrder, BlockedMatrix *rmat){
const unsigned int bs = mat->block_size;
int rIndex = 0;
int i, k;
unsigned int j;
std::vector<int> tmp(mat->nnzbs);
reordermapping_nonzeroes.resize(mat->nnzbs);
for(int i = 0; i < mat->nnzbs; ++i){
reordermapping_nonzeroes[i] = i;
}
rmat->rowPointers[0] = 0;
for (i = 0; i < mat->Nb; i++) {
for(int i = 0; i < mat->Nb; i++){
int thisRow = fromOrder[i];
// put thisRow from the old matrix into row i of the new matrix
rmat->rowPointers[i + 1] = rmat->rowPointers[i] + mat->rowPointers[thisRow + 1] - mat->rowPointers[thisRow];
for (k = mat->rowPointers[thisRow]; k < mat->rowPointers[thisRow + 1]; k++) {
for (j = 0; j < bs * bs; j++) {
rmat->nnzValues[rIndex * bs * bs + j] = mat->nnzValues[k * bs * bs + j];
}
rmat->rowPointers[i+1] = rmat->rowPointers[i] + mat->rowPointers[thisRow+1] - mat->rowPointers[thisRow];
for(int k = mat->rowPointers[thisRow]; k < mat->rowPointers[thisRow+1]; k++){
tmp[rIndex] = reordermapping_nonzeroes[k]; // only get 1 entry per block
rmat->colIndices[rIndex] = mat->colIndices[k];
rIndex++;
}
}
// re-assign column indices according to the new positions of the nodes referenced by the column indices
for (i = 0; i < mat->nnzbs; i++) {
for(int i = 0; i < mat->nnzbs; i++){
rmat->colIndices[i] = toOrder[rmat->colIndices[i]];
}
// re-sort the column indices of every row.
for (i = 0; i < mat->Nb; i++) {
sortBlockedRow(rmat->colIndices, rmat->nnzValues, rmat->rowPointers[i], rmat->rowPointers[i + 1] - 1, bs);
for(int i = 0; i < mat->Nb; i++){
sortRow(rmat->colIndices, tmp.data(), rmat->rowPointers[i], rmat->rowPointers[i+1]-1);
}
for(int i = 0; i < mat->nnzbs; i++){
reordermapping_nonzeroes[i] = tmp[i];
}
// std::copy();
}
/* Reorder an array of nonzero blocks into another array, using a mapping */
void reorderNonzeroes(BlockedMatrix *mat, std::vector<int>& reordermapping_nonzeroes, BlockedMatrix *rmat){
assert(mat->block_size == rmat->block_size);
const unsigned int bs = mat->block_size;
for(int i = 0; i < mat->nnzbs; i++){
int old_idx = reordermapping_nonzeroes[i];
memcpy(rmat->nnzValues+i*bs*bs, mat->nnzValues+old_idx*bs*bs, sizeof(double)*bs*bs); // copy nnz block
}
}
/* Reorder a matrix according to the colors that every node of the matrix has received*/
/* Find a reorder mapping according to the colors that every node of the matrix has received */
void colorsToReordering(int Nb, std::vector<int>& colors, int numColors, int *toOrder, int *fromOrder, std::vector<int>& rowsPerColor) {
int reordered = 0;

View File

@ -47,13 +47,22 @@ namespace Accelerator
template <unsigned int block_size>
int colorBlockedNodes(int rows, const int *CSRRowPointers, const int *CSRColIndices, const int *CSCColPointers, const int *CSCRowIndices, std::vector<int>& colors, int maxRowsPerColor, int maxColsPerColor);
/// Reorder the rows of the matrix according to the mapping in toOrder and fromOrder
/// rMat must be allocated already
/// \param[in] mat matrix to be reordered
/// \param[in] toOrder reorder pattern that lists for each index in the original order, to which index in the new order it should be moved
/// \param[in] fromOrder reorder pattern that lists for each index in the new order, from which index in the original order it was moved
/// \param[inout] rMat reordered Matrix
void reorderBlockedMatrixByPattern(BlockedMatrix *mat, int *toOrder, int *fromOrder, BlockedMatrix *rmat);
/// Reorder the sparsity pattern of the matrix according to the mapping in toOrder and fromOrder
/// Also find mapping for nnz blocks
/// rmat must be allocated already
/// \param[in] mat matrix to be reordered
/// \param[out] reordermapping_nonzeroes contains new index for every nnz block
/// \param[in] toOrder reorder pattern that lists for each index in the original order, to which index in the new order it should be moved
/// \param[in] fromOrder reorder pattern that lists for each index in the new order, from which index in the original order it was moved
/// \param[out] rmat reordered Matrix
void reorderBlockedMatrixByPattern(BlockedMatrix *mat, std::vector<int>& reordermapping_nonzeroes, int *toOrder, int *fromOrder, BlockedMatrix *rmat);
/// Write nnz blocks from mat to rmat, according to the mapping in reordermapping_nonzeroes
/// rmat must be allocated already
/// \param[in] mat matrix to be reordered
/// \param[in] reordermapping_nonzeroes contains old index for every nnz block, so rmat_nnz[i] == mat_nnz[mapping[i]]
/// \param[inout] rmat reordered Matrix
void reorderNonzeroes(BlockedMatrix *mat, std::vector<int>& reordermapping_nonzeroes, BlockedMatrix *rmat);
/// Compute reorder mapping from the color that each node has received
/// The toOrder, fromOrder and iters arrays must be allocated already

View File

@ -101,9 +101,17 @@ bool BILU0<block_size>::analyze_matrix(BlockedMatrix *mat, BlockedMatrix *jacMat
if (opencl_ilu_reorder == ILUReorder::LEVEL_SCHEDULING) {
out << "BILU0 reordering strategy: " << "level_scheduling\n";
findLevelScheduling(matToDecompose->colIndices, matToDecompose->rowPointers, CSCRowIndices.data(), CSCColPointers.data(), Nb, &numColors, toOrder.data(), fromOrder.data(), rowsPerColor);
reorderBlockedMatrixByPattern(mat, reordermappingNonzeroes, toOrder.data(), fromOrder.data(), rmat.get());
if (jacMat) {
reorderBlockedMatrixByPattern(jacMat, jacReordermappingNonzeroes, toOrder.data(), fromOrder.data(), rJacMat.get());
}
} else if (opencl_ilu_reorder == ILUReorder::GRAPH_COLORING) {
out << "BILU0 reordering strategy: " << "graph_coloring\n";
findGraphColoring<block_size>(matToDecompose->colIndices, matToDecompose->rowPointers, CSCRowIndices.data(), CSCColPointers.data(), Nb, Nb, Nb, &numColors, toOrder.data(), fromOrder.data(), rowsPerColor);
reorderBlockedMatrixByPattern(mat, reordermappingNonzeroes, toOrder.data(), fromOrder.data(), rmat.get());
if (jacMat) {
reorderBlockedMatrixByPattern(jacMat, jacReordermappingNonzeroes, toOrder.data(), fromOrder.data(), rJacMat.get());
}
} else if (opencl_ilu_reorder == ILUReorder::NONE) {
out << "BILU0 reordering strategy: none\n";
// numColors = 1;
@ -187,11 +195,11 @@ bool BILU0<block_size>::create_preconditioner(BlockedMatrix *mat, BlockedMatrix
Timer t_reorder;
if (jacMat) {
matToDecompose = rJacMat.get();
reorderBlockedMatrixByPattern(mat, toOrder.data(), fromOrder.data(), rmat.get());
reorderBlockedMatrixByPattern(jacMat, toOrder.data(), fromOrder.data(), rJacMat.get());
reorderNonzeroes(mat, reordermappingNonzeroes, rmat.get());
reorderNonzeroes(jacMat, jacReordermappingNonzeroes, rJacMat.get());
} else {
matToDecompose = rmat.get();
reorderBlockedMatrixByPattern(mat, toOrder.data(), fromOrder.data(), rmat.get());
reorderNonzeroes(mat, reordermappingNonzeroes, rmat.get());
}
if (verbosity >= 3){

View File

@ -69,6 +69,9 @@ private:
ILUReorder opencl_ilu_reorder;
std::vector<int> reordermappingNonzeroes; // maps nonzero blocks to new location in reordered matrix
std::vector<int> jacReordermappingNonzeroes; // same but for jacMatrix
typedef struct {
cl::Buffer invDiagVals;
cl::Buffer diagIndex;
@ -117,6 +120,11 @@ public:
return rmat.get();
}
BlockedMatrix* getRJacMat()
{
return rJacMat.get();
}
std::tuple<std::vector<int>, std::vector<int>, std::vector<int>> get_preconditioner_structure()
{
return {{LUmat->rowPointers, LUmat->rowPointers + (Nb + 1)}, {LUmat->colIndices, LUmat->colIndices + nnzb}, diagIndex};
@ -130,11 +138,6 @@ public:
return std::make_pair(s.LUvals, s.invDiagVals);
#endif
}
BlockedMatrix* getRJacMat()
{
return rJacMat.get();
}
};
} // namespace Accelerator