mirror of
https://github.com/OPM/opm-simulators.git
synced 2024-12-27 09:40:59 -06:00
192 lines
7.4 KiB
C++
192 lines
7.4 KiB
C++
/*
|
|
Copyright 2019 Equinor ASA
|
|
|
|
This file is part of the Open Porous Media project (OPM).
|
|
|
|
OPM is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
OPM is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#ifndef OPM_BLOCKED_MATRIX_HPP
|
|
#define OPM_BLOCKED_MATRIX_HPP
|
|
|
|
#if HAVE_FPGA
|
|
#include <vector>
|
|
#include <opm/simulators/linalg/bda/Matrix.hpp>
|
|
#endif
|
|
|
|
|
|
namespace Opm
|
|
{
|
|
namespace Accelerator
|
|
{
|
|
|
|
/// This struct resembles a blocked csr matrix, like Dune::BCRSMatrix.
|
|
/// The data is stored in contiguous memory, such that they can be copied to a device in one transfer.
|
|
class BlockedMatrix
|
|
{
|
|
|
|
public:
|
|
|
|
/// Allocate BlockedMatrix and data arrays with given sizes
|
|
/// \param[in] Nb number of blockrows
|
|
/// \param[in] nnzbs number of nonzero blocks
|
|
/// \param[in] block_size the number of rows and columns for each block
|
|
BlockedMatrix(int Nb_, int nnzbs_, unsigned int block_size_)
|
|
: nnzValues(new double[nnzbs_*block_size_*block_size_]),
|
|
colIndices(new int[nnzbs_*block_size_*block_size_]),
|
|
rowPointers(new int[Nb_+1]),
|
|
Nb(Nb_),
|
|
nnzbs(nnzbs_),
|
|
block_size(block_size_),
|
|
deleteNnzs(true),
|
|
deleteSparsity(true)
|
|
{}
|
|
|
|
/// Allocate BlockedMatrix, but copy sparsity pattern instead of allocating new memory
|
|
/// \param[in] M matrix to be copied
|
|
BlockedMatrix(const BlockedMatrix& M)
|
|
: nnzValues(new double[M.nnzbs*M.block_size*M.block_size]),
|
|
colIndices(M.colIndices),
|
|
rowPointers(M.rowPointers),
|
|
Nb(M.Nb),
|
|
nnzbs(M.nnzbs),
|
|
block_size(M.block_size),
|
|
deleteNnzs(true),
|
|
deleteSparsity(false)
|
|
{}
|
|
|
|
/// Allocate BlockedMatrix, but let data arrays point to existing arrays
|
|
/// \param[in] Nb number of blockrows
|
|
/// \param[in] nnzbs number of nonzero blocks
|
|
/// \param[in] block_size the number of rows and columns for each block
|
|
/// \param[in] nnzValues array of nonzero values, contains nnzb*block_size*block_size scalars
|
|
/// \param[in] colIndices array of column indices, contains nnzb entries
|
|
/// \param[in] rowPointers array of row pointers, contains Nb+1 entries
|
|
BlockedMatrix(int Nb_, int nnzbs_, unsigned int block_size_, double *nnzValues_, int *colIndices_, int *rowPointers_)
|
|
: nnzValues(nnzValues_),
|
|
colIndices(colIndices_),
|
|
rowPointers(rowPointers_),
|
|
Nb(Nb_),
|
|
nnzbs(nnzbs_),
|
|
block_size(block_size_),
|
|
deleteNnzs(false),
|
|
deleteSparsity(false)
|
|
{}
|
|
|
|
~BlockedMatrix(){
|
|
if (deleteNnzs) {
|
|
delete[] nnzValues;
|
|
}
|
|
if (deleteSparsity) {
|
|
delete[] colIndices;
|
|
delete[] rowPointers;
|
|
}
|
|
}
|
|
|
|
#if HAVE_FPGA
|
|
constexpr static double nnzThreshold = 1e-80; // for unblocking, a nonzero must be bigger than this threshold to be considered a nonzero
|
|
|
|
int countUnblockedNnzs();
|
|
|
|
void unblock(Matrix *mat, bool isUMatrix);
|
|
|
|
/// Converts this matrix to the dataformat used by the FPGA
|
|
/// Is done every linear solve. The exact sparsity pattern can change every time since the zeros are removed during unblocking
|
|
int toRDF(int numColors, int *nodesPerColor, bool isUMatrix,
|
|
std::vector<std::vector<int> >& colIndicesInColor, int nnzsPerRowLimit, int *nnzValsSizes,
|
|
std::vector<std::vector<double> >& nnzValues, short int *colIndices, unsigned char *NROffsets, int *colorSizes, int *valSize);
|
|
|
|
/// Analyses the sparsity pattern and prepares for toRDF()
|
|
/// Is only called once
|
|
int findPartitionColumns(int numColors, int *nodesPerColor,
|
|
int rowsPerColorLimit, int columnsPerColorLimit,
|
|
std::vector<std::vector<int> >& colIndicesInColor, int *PIndicesAddr, int *colorSizes,
|
|
std::vector<std::vector<int> >& LColIndicesInColor, int *LPIndicesAddr, int *LColorSizes,
|
|
std::vector<std::vector<int> >& UColIndicesInColor, int *UPIndicesAddr, int *UColorSizes);
|
|
#endif
|
|
|
|
double *nnzValues;
|
|
int *colIndices;
|
|
int *rowPointers;
|
|
int Nb;
|
|
int nnzbs;
|
|
unsigned int block_size;
|
|
bool deleteNnzs;
|
|
bool deleteSparsity;
|
|
};
|
|
|
|
|
|
/// Sort a row of matrix elements from a CSR-format, where the nonzeroes are ints
|
|
/// These ints aren't actually nonzeroes, but represent a mapping used later
|
|
/// \param[inout] colIndices represent keys in sorting
|
|
/// \param[inout] data sorted according to the colIndices
|
|
/// \param[in] left lower index of data of row
|
|
/// \param[in] right upper index of data of row
|
|
void sortRow(int *colIndices, int *data, int left, int right);
|
|
|
|
/// Multiply and subtract blocks
|
|
/// a = a - (b * c)
|
|
/// \param[inout] a block to be subtracted from
|
|
/// \param[in] b input block
|
|
/// \param[in] c input block
|
|
/// \param[in] block_size size of block
|
|
void blockMultSub(double *a, double *b, double *c, unsigned int block_size);
|
|
|
|
/// Perform a matrix-matrix multiplication on two blocks
|
|
/// resMat = mat1 * mat2
|
|
/// \param[in] mat1 input block 1
|
|
/// \param[in] mat2 input block 2
|
|
/// \param[out] resMat output block
|
|
/// \param[in] block_size size of block
|
|
void blockMult(double *mat1, double *mat2, double *resMat, unsigned int block_size);
|
|
|
|
|
|
#if HAVE_FPGA
|
|
/// Perform a matrix-matrix subtraction on two blocks, element-wise
|
|
/// resMat = mat1 - mat2
|
|
/// \param[in] mat1 input block 1
|
|
/// \param[in] mat2 input block 2
|
|
/// \param[out] resMat output block
|
|
/// \param[in] block_size size of block
|
|
void blockSub(double *mat1, double *mat2, double *resMat, unsigned int block_size);
|
|
|
|
/// Perform a matrix-vector multiplication
|
|
/// resVect = mat * vect
|
|
/// resVect += mat * vect
|
|
/// \param[in] mat input matrix
|
|
/// \param[in] vect input vector
|
|
/// \param[in] scale multiply output with this factor
|
|
/// \param[inout] resVect output vector
|
|
/// \param[in] resetRes if true, overwrite resVect, otherwise add to it
|
|
/// \param[in] block_size size of block
|
|
void blockVectMult(double *mat, double *vect, double scale, double *resVect, bool resetRes, unsigned int block_size);
|
|
|
|
/// Convert a blocked inverse diagonal to the FPGA format.
|
|
/// This is the only blocked structure on the FPGA, since it needs blocked matrix-vector multiplication after the backwards substitution of U.
|
|
/// Since the rows of U are reversed, the rows of the diag are also reversed.
|
|
/// The cachelines can hold 8 doubles, a block has 9 doubles.
|
|
/// The format converts 3x3 blocks to 3x4 blocks, so 1 cacheline holds 2 unblocked rows.
|
|
/// Then 2 blocks (24 doubles) fit on 3 cachelines.
|
|
/// Example:
|
|
/// [1 2 3] [1 2 3 0] [1 2 3 0 4 5 6 0]
|
|
/// [4 5 6] -> [4 5 6 0] -> hardware: [7 8 9 0 block2 row1]
|
|
/// [7 8 9] [7 8 9 0] [block2 row2 block2 row3]
|
|
void blockedDiagtoRDF(double *blockedDiagVals, int rowSize, int numColors, std::vector<int>& rowsPerColor, double *RDFDiag);
|
|
#endif
|
|
|
|
} // namespace Accelerator
|
|
} // namespace Opm
|
|
|
|
#endif
|