2020-12-22 05:57:01 -06:00
|
|
|
/*
|
|
|
|
Copyright 2020 Equinor ASA
|
|
|
|
|
|
|
|
This file is part of the Open Porous Media project (OPM).
|
|
|
|
|
|
|
|
OPM is free software: you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
OPM is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <opm/common/OpmLog/OpmLog.hpp>
|
|
|
|
#include <opm/common/ErrorMacros.hpp>
|
|
|
|
|
|
|
|
#include <opm/simulators/linalg/bda/FPGAMatrix.hpp>
|
|
|
|
#include <opm/simulators/linalg/bda/FPGAUtils.hpp>
|
|
|
|
|
2021-10-25 04:08:06 -05:00
|
|
|
namespace Opm
|
|
|
|
{
|
|
|
|
namespace Accelerator
|
2020-12-22 05:57:01 -06:00
|
|
|
{
|
|
|
|
|
|
|
|
/*Sort a row of matrix elements from a CSR-format.*/
|
|
|
|
void sortRow(int *colIndices, double *data, int left, int right) {
|
|
|
|
int l = left;
|
|
|
|
int r = right;
|
|
|
|
int middle = colIndices[(l + r) >> 1];
|
|
|
|
do {
|
|
|
|
while (colIndices[l] < middle)
|
|
|
|
l++;
|
|
|
|
while (colIndices[r] > middle)
|
|
|
|
r--;
|
|
|
|
if (l <= r) {
|
|
|
|
int lColIndex = colIndices[l];
|
|
|
|
colIndices[l] = colIndices[r];
|
|
|
|
colIndices[r] = lColIndex;
|
|
|
|
double lDatum = data[l];
|
|
|
|
data[l] = data[r];
|
|
|
|
data[r] = lDatum;
|
|
|
|
|
|
|
|
l++;
|
|
|
|
r--;
|
|
|
|
}
|
|
|
|
} while (l < r);
|
|
|
|
if (left < r)
|
|
|
|
sortRow(colIndices, data, left, r);
|
|
|
|
if (right > l)
|
|
|
|
sortRow(colIndices, data, l, right);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Write all data used by the VHDL testbenches to raw data arrays. The arrays are as follows:
|
|
|
|
* - The "colorSizes" array, which first contains the number of rows, columns, non-zero values
|
|
|
|
* and colors, and the size, in elements, of the NROffsets array, followed by:
|
|
|
|
* the number of rows (rounded to the nearest 32), the number of rows (not rounded),
|
|
|
|
* the number of columns (not rounded) and the number of non-zero values
|
|
|
|
* (rounded to the nearest 32) for every partition.
|
|
|
|
* This array is zero padded up to the nearest 64-byte cacheline.
|
|
|
|
* - The "colIndicesInColor" array, which contains for every partition, from which elements
|
|
|
|
* in the global X vector the elements of that X vector partition came.
|
|
|
|
* For example, if a matrix partition only has non-zero values in columns 1, 3 and 6, then
|
|
|
|
* that X vector partition will only have three elements, and the color_col_indices array
|
|
|
|
* will contain 1, 3 and 6 for that partition.
|
|
|
|
* This array is zero padded up to the nearest 64-byte cacheline for every partition.
|
|
|
|
* - The "nnzValues" array contains all non-zero values of each partition of the matrix.
|
|
|
|
* This array is zero-padded so that each color has a multiple of 32 elements (to have the
|
|
|
|
* same number of elements per partition as the column indices array).
|
|
|
|
* - The "colIndices" array contains all column indices of each partition of the matrix.
|
|
|
|
* These column indices are the local indices for that partition, so to be used, first a
|
|
|
|
* local X vector partition needs to be loaded into some local memory (this is done using
|
|
|
|
* data from the _color_col_indices array), before these column indices can be used as
|
|
|
|
* addresses to that local memory to read the desired X vector values.
|
|
|
|
* This array is zero-padded so that data for every partition fills up a number of complete
|
|
|
|
* cachelines (this means every color has a multiple of 32 elements).
|
|
|
|
* - "NROffsets" is the name of the array that contains the new row offsets for
|
|
|
|
* all elements of every partition of the matrix. New row offsets are 8-bit values which
|
|
|
|
* are 0 if that element is not the first element in a row, or which, if that element is
|
|
|
|
* the first element of a row) is equal to the amount of empty rows between that new row
|
|
|
|
* and the row before it plus 1. This array is zero-padded so that data for every partition
|
|
|
|
* fills up a number of complete cachelines (this means every color has a multiple of 64 elements).
|
|
|
|
*/
|
|
|
|
int Matrix::toRDF(int numColors, std::vector<int>& nodesPerColor,
|
|
|
|
std::vector<std::vector<int> >& colIndicesInColor, int nnzsThisRowLimit,
|
|
|
|
std::vector<std::vector<double> >& ubNnzValues, short int *ubColIndices, int *nnzValsSizes, unsigned char *NROffsets, int *colorSizes)
|
|
|
|
{
|
|
|
|
auto mat = this;
|
|
|
|
|
|
|
|
int doneRows = 0;
|
|
|
|
int totalRowNum = 0; // total number of non-empty rows
|
|
|
|
int nnzsPerColor = 0; // total number of nnzs in current color, padded to multiple of 32 for each color
|
|
|
|
int maxNNZsPerColor = 0; // max of nnzsPerColor
|
|
|
|
|
|
|
|
int totalValSize = 0; // sum of nnzsPerColor, padded
|
|
|
|
|
|
|
|
std::vector<int> nnzRowsPerColor(numColors);
|
|
|
|
|
|
|
|
// find number of nnzs per color and number of non-empty rows
|
|
|
|
for (int c = 0; c < numColors; c++) {
|
|
|
|
int numRows = 0;
|
|
|
|
nnzRowsPerColor[c] = 0;
|
|
|
|
int firstNnzOfColor = mat->rowPointers[doneRows];
|
|
|
|
int lastNnzOfColor = mat->rowPointers[doneRows + nodesPerColor[c]];
|
|
|
|
nnzsPerColor = roundUpTo(lastNnzOfColor - firstNnzOfColor, 32); // round up to nearest 16 for short ints of column indices
|
|
|
|
totalValSize += nnzsPerColor;
|
|
|
|
maxNNZsPerColor = std::max(nnzsPerColor, maxNNZsPerColor);
|
|
|
|
int row = doneRows;
|
|
|
|
for (; row < doneRows + nodesPerColor[c]; row++) {
|
|
|
|
if ( mat->rowPointers[row] != mat->rowPointers[row + 1]) {
|
|
|
|
numRows++;
|
|
|
|
nnzRowsPerColor[c] = nnzRowsPerColor[c] + 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
doneRows = row;
|
|
|
|
totalRowNum += numRows;
|
|
|
|
}
|
|
|
|
|
|
|
|
int conseqZeroRows = 0; // number of consecutive empty rows
|
|
|
|
int maxConseqZeroRows = 0;
|
|
|
|
int numEmptyRows = 0; // total number of empty rows
|
|
|
|
std::vector<int> rowOffsets(totalRowNum);
|
|
|
|
std::vector<int> nnzRowPointers(totalRowNum + 1, 0); // rowPointers, but only for non empty rows
|
|
|
|
std::vector<int> colorValPointers(numColors + 1); // points to first nnz of first row of each color
|
|
|
|
std::vector<int> colorValZeroPointers(numColors); // points to first padded zero for each color
|
|
|
|
|
|
|
|
int nonEmptyRowIdx = 0; // read all rows, but only keep non empty rows, this idx keeps track of how many non empty rows where seen
|
|
|
|
doneRows = 0;
|
|
|
|
|
|
|
|
int totalPaddingSize = 0; // number of padded zeros from previous colors
|
|
|
|
int NROffsetSize = 0; // number of NROffsets entries, padded to multiple of 64 for each color
|
|
|
|
int maxRows = 0;
|
|
|
|
int maxNNZsPerRow = 0;
|
|
|
|
|
|
|
|
// determine the row offset of each row (amount of zero rows between it and the previous non-zero row)
|
|
|
|
// this is later converted to rowOffset for each nnz
|
|
|
|
for (int c = 0; c < numColors; c++) {
|
|
|
|
conseqZeroRows = 0;
|
|
|
|
for (int row = doneRows; row < doneRows + nodesPerColor[c]; row++) {
|
|
|
|
int nnzsThisRow = mat->rowPointers[row + 1] - mat->rowPointers[row];
|
|
|
|
if (nnzsThisRow == 0) {
|
|
|
|
conseqZeroRows++;
|
|
|
|
numEmptyRows++;
|
|
|
|
} else {
|
|
|
|
maxNNZsPerRow = std::max(nnzsThisRow, maxNNZsPerRow);
|
|
|
|
nnzRowPointers[nonEmptyRowIdx + 1] = mat->rowPointers[row + 1];
|
|
|
|
rowOffsets[nonEmptyRowIdx] = conseqZeroRows;
|
|
|
|
maxConseqZeroRows = std::max(conseqZeroRows, maxConseqZeroRows);
|
|
|
|
conseqZeroRows = 0;
|
|
|
|
nonEmptyRowIdx++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// calculate sizes that include zeropadding
|
|
|
|
colorValZeroPointers[c] = nnzRowPointers[nonEmptyRowIdx] + totalPaddingSize;
|
|
|
|
colorValPointers[c + 1] = roundUpTo(colorValZeroPointers[c], 32);
|
|
|
|
totalPaddingSize += colorValPointers[c + 1] - colorValZeroPointers[c];
|
|
|
|
NROffsetSize += roundUpTo(colorValPointers[c + 1] - colorValPointers[c], 64);
|
|
|
|
|
|
|
|
doneRows += nodesPerColor[c];
|
|
|
|
maxRows = std::max(nodesPerColor[c], maxRows);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (maxNNZsPerRow > nnzsThisRowLimit) {
|
|
|
|
std::ostringstream errorstring;
|
|
|
|
errorstring << "ERROR: Current reordering exceeds maximum number of non-zero values per row limit: " << maxNNZsPerRow << " > " << nnzsThisRowLimit;
|
|
|
|
OPM_THROW(std::logic_error, errorstring.str());
|
|
|
|
}
|
|
|
|
|
|
|
|
// create and fill RDF arrays
|
|
|
|
colorSizes[3] = colorValPointers[numColors]; // total number of nnzs the FPGA has to process, including zeropadding
|
|
|
|
colorSizes[5] = NROffsetSize;
|
|
|
|
|
|
|
|
for (int c = 0; c < numColors; c++) {
|
|
|
|
colorSizes[c * 4 + 8] = nnzRowsPerColor[c];
|
|
|
|
colorSizes[c * 4 + 11] = colorValPointers[c + 1] - colorValPointers[c];
|
|
|
|
}
|
|
|
|
|
|
|
|
int rowIndex = 0; // keep track of where to read/write
|
|
|
|
int valIndex = 0;
|
|
|
|
int NRIndex = 0;
|
|
|
|
int halfwayPoint = colorValPointers[numColors] / 2;
|
|
|
|
nnzValsSizes[0] = colorValPointers[numColors];
|
|
|
|
|
|
|
|
colorSizes[7] = halfwayPoint;
|
|
|
|
|
|
|
|
for (int c = 0; c < numColors; c++) {
|
|
|
|
int nnzsThisRow;
|
|
|
|
// make sure 32 values are written in batches (pad with zeros if needed)
|
|
|
|
for (int v = colorValPointers[c]; v < colorValPointers[c + 1]; v += 32) {
|
|
|
|
for (int vb = 0; vb < 32; vb++) {
|
|
|
|
|
|
|
|
// if there are enough values for the whole cacheline
|
|
|
|
if (v + vb < colorValZeroPointers[c]) {
|
|
|
|
ubNnzValues[0][v + vb] = mat->nnzValues[valIndex];
|
|
|
|
ubColIndices[v + vb] = static_cast<short int>(colIndicesInColor[c][mat->colIndices[valIndex]]);
|
|
|
|
|
|
|
|
// if this val is the first of a row
|
|
|
|
if (nnzRowPointers[rowIndex] == valIndex) {
|
|
|
|
|
|
|
|
if (rowOffsets[rowIndex] + 1 >= 255) {
|
|
|
|
std::ostringstream errorstring;
|
|
|
|
errorstring << "ERROR: row offset size exceeded in row " << rowIndex << " with an offset of " << rowOffsets[rowIndex] + 1;
|
|
|
|
OPM_THROW(std::logic_error, errorstring.str());
|
|
|
|
}
|
|
|
|
|
|
|
|
NROffsets[NRIndex] = static_cast<unsigned char>(rowOffsets[rowIndex] + 1);
|
|
|
|
|
|
|
|
// skip all empty rows
|
|
|
|
while (rowIndex < mat->N && nnzRowPointers[rowIndex] == valIndex) {
|
|
|
|
rowIndex++;
|
|
|
|
nnzsThisRow = 0;
|
|
|
|
}
|
|
|
|
nnzsThisRow++;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
NROffsets[NRIndex] = (unsigned char) 0;
|
|
|
|
nnzsThisRow++;
|
|
|
|
}
|
|
|
|
valIndex++;
|
|
|
|
}
|
|
|
|
else // zeropadding is needed
|
|
|
|
{
|
|
|
|
ubNnzValues[0][v + vb] = 0.0;
|
|
|
|
ubColIndices[v + vb] = static_cast<short int>(colIndicesInColor[c][mat->colIndices[valIndex - 1]]);
|
|
|
|
NROffsets[NRIndex] = 0;
|
|
|
|
}
|
|
|
|
NRIndex++;
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// zeropad the NROffsets file
|
|
|
|
while (NRIndex % 64 != 0) {
|
|
|
|
NROffsets[NRIndex] = 0;
|
|
|
|
NRIndex++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-10-25 04:08:06 -05:00
|
|
|
} // namespace Accelerator
|
|
|
|
} // namespace Opm
|