opm-simulators/opm/simulators/linalg/gpubridge/Reorder.cpp
2025-01-23 07:57:44 +01:00

172 lines
5.7 KiB
C++

/*
Copyright 2019 Equinor ASA
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#if HAVE_CONFIG_H
#include "config.h"
#endif // HAVE_CONFIG_H
#include <opm/simulators/linalg/gpubridge/Reorder.hpp>
#include <cassert>
#include <vector>
namespace Opm::Accelerator {
/* Check is operations on a node in the matrix can be started
* A node can only be started if all nodes that it depends on during sequential
* execution have already completed.*/
bool canBeStarted(const int rowIndex,
const int* rowPointers,
const int* colIndices,
const std::vector<bool>& doneRows)
{
bool canStart = !doneRows[rowIndex];
if (canStart) {
for (int i = rowPointers[rowIndex]; i < rowPointers[rowIndex + 1]; ++i) {
int thisDependency = colIndices[i];
// Only dependencies on rows that should execute before the current one are relevant
if (thisDependency >= rowIndex) {
break;
}
// Check if dependency has been resolved
if (!doneRows[thisDependency]) {
return false;
}
}
}
return canStart;
}
/*
* The level scheduling of a non-symmetric, blocked matrix requires access to a CSC
* encoding and a CSR encoding of the sparsity pattern of the input matrix.
* This function is based on a standard level scheduling algorithm, like the one described in:
* "Iterative methods for Sparse Linear Systems" by Yousef Saad in section 11.6.3
*/
void findLevelScheduling(const int* CSRColIndices,
const int* CSRRowPointers,
const int* CSCRowIndices,
const int* CSCColPointers,
int Nb,
int* numColors,
int* toOrder,
int* fromOrder,
std::vector<int>& rowsPerColor)
{
int activeRowIndex = 0, nextActiveRowIndex = 0;
std::vector<bool> doneRows(Nb, false);
std::vector <int> rowsToStart;
// since emplace_back() is used to fill, the vector must be empty
assert(rowsPerColor.empty());
// find starting rows: rows that are independent from all rows that come before them.
int thisRow;
for (thisRow = 0; thisRow < Nb; ++thisRow) {
if (canBeStarted(thisRow, CSCColPointers, CSCRowIndices, doneRows)) {
fromOrder[nextActiveRowIndex] = thisRow;
toOrder[thisRow] = nextActiveRowIndex;
++nextActiveRowIndex;
}
}
// 'do' compute on all active rows
int colorEnd;
for (colorEnd = 0; colorEnd < nextActiveRowIndex; ++colorEnd) {
doneRows[fromOrder[colorEnd]] = true;
}
rowsPerColor.emplace_back(nextActiveRowIndex - activeRowIndex);
while (colorEnd < Nb) {
// Go over all rows active from the last color, and check which of
// their neighbours can be activated this color
for (; activeRowIndex < colorEnd; ++activeRowIndex) {
thisRow = fromOrder[activeRowIndex];
for (int i = CSCColPointers[thisRow]; i < CSCColPointers[thisRow + 1]; i++) {
int thatRow = CSCRowIndices[i];
if (canBeStarted(thatRow, CSRRowPointers, CSRColIndices, doneRows)) {
rowsToStart.emplace_back(thatRow);
}
}
}
// 'do' compute on all active rows
for (unsigned int i = 0; i < rowsToStart.size(); i++) {
thisRow = rowsToStart[i];
if (!doneRows[thisRow]) {
doneRows[thisRow] = true;
fromOrder[nextActiveRowIndex] = thisRow;
toOrder[thisRow] = nextActiveRowIndex;
++nextActiveRowIndex;
}
}
rowsToStart.clear();
colorEnd = nextActiveRowIndex;
rowsPerColor.emplace_back(nextActiveRowIndex - activeRowIndex);
}
*numColors = rowsPerColor.size();
}
// based on the scipy package from python, scipy/sparse/sparsetools/csr.h on github
void csrPatternToCsc(const int* CSRColIndices,
const int* CSRRowPointers,
int* CSCRowIndices,
int* CSCColPointers,
int Nb)
{
int nnz = CSRRowPointers[Nb];
// compute number of nnzs per column
std::fill(CSCColPointers, CSCColPointers + Nb, 0);
for (int n = 0; n < nnz; ++n) {
CSCColPointers[CSRColIndices[n]]++;
}
// cumsum the nnz per col to get CSCColPointers
for (int col = 0, cumsum = 0; col < Nb; ++col) {
int temp = CSCColPointers[col];
CSCColPointers[col] = cumsum;
cumsum += temp;
}
CSCColPointers[Nb] = nnz;
for (int row = 0; row < Nb; ++row) {
for (int j = CSRRowPointers[row]; j < CSRRowPointers[row + 1]; ++j) {
int col = CSRColIndices[j];
int dest = CSCColPointers[col];
CSCRowIndices[dest] = row;
++CSCColPointers[col];
}
}
for (int col = 0, last = 0; col <= Nb; ++col) {
int temp = CSCColPointers[col];
CSCColPointers[col] = last;
last = temp;
}
}
} // namespace Opm::Accelerator