BlockMatrix: template Scalar type

This commit is contained in:
Arne Morten Kvarving 2024-04-15 16:13:22 +02:00
parent b9ee637d78
commit 25374b0e54
25 changed files with 242 additions and 183 deletions

View File

@ -221,12 +221,16 @@ void BdaBridge<BridgeMatrix, BridgeVector, block_size>::solve_system(BridgeMatri
return;
}
using Mat = Accelerator::BlockedMatrix<double>;
if (!matrix) {
h_rows.reserve(Nb+1);
h_cols.reserve(nnzb);
copySparsityPatternFromISTL(*bridgeMat, h_rows, h_cols);
checkMemoryContiguous(*bridgeMat);
matrix = std::make_unique<Opm::Accelerator::BlockedMatrix>(Nb, nnzb, block_size, static_cast<double*>(&(((*bridgeMat)[0][0][0][0]))), h_cols.data(), h_rows.data());
matrix = std::make_unique<Mat>(Nb, nnzb, block_size,
static_cast<double*>(&(((*bridgeMat)[0][0][0][0]))),
h_cols.data(),
h_rows.data());
}
Dune::Timer t_zeros;
@ -245,7 +249,10 @@ void BdaBridge<BridgeMatrix, BridgeVector, block_size>::solve_system(BridgeMatri
h_jacCols.reserve(jacNnzb);
copySparsityPatternFromISTL(*jacMat, h_jacRows, h_jacCols);
checkMemoryContiguous(*jacMat);
jacMatrix = std::make_unique<Opm::Accelerator::BlockedMatrix>(Nb, jacNnzb, block_size, static_cast<double*>(&(((*jacMat)[0][0][0][0]))), h_jacCols.data(), h_jacRows.data());
jacMatrix = std::make_unique<Mat>(Nb, jacNnzb, block_size,
static_cast<double*>(&(((*jacMat)[0][0][0][0]))),
h_jacCols.data(),
h_jacRows.data());
}
Dune::Timer t_zeros2;

View File

@ -39,9 +39,9 @@ private:
int verbosity = 0;
bool use_gpu = false;
std::string accelerator_mode;
std::unique_ptr<Opm::Accelerator::BdaSolver<block_size> > backend;
std::shared_ptr<Opm::Accelerator::BlockedMatrix> matrix; // 'stores' matrix, actually points to h_rows, h_cols and the received BridgeMatrix for the nonzeroes
std::shared_ptr<Opm::Accelerator::BlockedMatrix> jacMatrix; // 'stores' preconditioner matrix, actually points to h_rows, h_cols and the received BridgeMatrix for the nonzeroes
std::unique_ptr<Accelerator::BdaSolver<block_size> > backend;
std::shared_ptr<Accelerator::BlockedMatrix<double>> matrix; // 'stores' matrix, actually points to h_rows, h_cols and the received BridgeMatrix for the nonzeroes
std::shared_ptr<Accelerator::BlockedMatrix<double>> jacMatrix; // 'stores' preconditioner matrix, actually points to h_rows, h_cols and the received BridgeMatrix for the nonzeroes
std::vector<int> h_rows, h_cols; // store the sparsity pattern of the matrix
std::vector<int> h_jacRows, h_jacCols; // store the sparsity pattern of the jacMatrix
std::vector<typename BridgeMatrix::size_type> diagIndices; // contains offsets of the diagonal blocks wrt start of the row, used for replaceZeroDiagonal()

View File

@ -83,8 +83,10 @@ namespace Accelerator {
virtual ~BdaSolver() {};
/// Define as pure virtual functions, so derivedclass must implement them
virtual SolverStatus solve_system(std::shared_ptr<BlockedMatrix> matrix, double *b,
std::shared_ptr<BlockedMatrix> jacMatrix, WellContributions& wellContribs, BdaResult &res) = 0;
virtual SolverStatus solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
double *b,
std::shared_ptr<BlockedMatrix<double>> jacMatrix,
WellContributions& wellContribs, BdaResult& res) = 0;
virtual void get_result(double *x) = 0;

View File

@ -17,9 +17,6 @@
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#include <cstring>
#include <cmath>
#include <config.h>
#include <opm/common/OpmLog/OpmLog.hpp>
@ -29,16 +26,10 @@
#include <opm/simulators/linalg/bda/Matrix.hpp>
#include <opm/simulators/linalg/bda/Matrix.hpp>
namespace Opm
namespace Opm::Accelerator {
void sortRow(int *colIndices, int *data, int left, int right)
{
namespace Accelerator
{
using Opm::OpmLog;
void sortRow(int *colIndices, int *data, int left, int right) {
int l = left;
int r = right;
int middle = colIndices[(l + r) >> 1];
@ -67,14 +58,14 @@ void sortRow(int *colIndices, int *data, int left, int right) {
sortRow(colIndices, data, l, right);
}
// LUMat->nnzValues[ik] = LUMat->nnzValues[ik] - (pivot * LUMat->nnzValues[jk]) in ilu decomposition
// a = a - (b * c)
void blockMultSub(double *a, double *b, double *c, unsigned int block_size)
template<class Scalar>
void blockMultSub(Scalar* a, Scalar* b, Scalar* c, unsigned int block_size)
{
for (unsigned int row = 0; row < block_size; row++) {
for (unsigned int col = 0; col < block_size; col++) {
double temp = 0.0;
Scalar temp = 0.0;
for (unsigned int k = 0; k < block_size; k++) {
temp += b[block_size * row + k] * c[block_size * k + col];
}
@ -84,11 +75,12 @@ void blockMultSub(double *a, double *b, double *c, unsigned int block_size)
}
/*Perform a 3x3 matrix-matrix multiplicationj on two blocks*/
void blockMult(double *mat1, double *mat2, double *resMat, unsigned int block_size) {
template<class Scalar>
void blockMult(Scalar* mat1, Scalar* mat2, Scalar* resMat, unsigned int block_size)
{
for (unsigned int row = 0; row < block_size; row++) {
for (unsigned int col = 0; col < block_size; col++) {
double temp = 0;
Scalar temp = 0;
for (unsigned int k = 0; k < block_size; k++) {
temp += mat1[block_size * row + k] * mat2[block_size * k + col];
}
@ -97,5 +89,10 @@ void blockMult(double *mat1, double *mat2, double *resMat, unsigned int block_si
}
}
} // namespace Accelerator
} // namespace Opm
#define INSTANCE_TYPE(T) \
template void blockMultSub(double*, double*, double*, unsigned int); \
template void blockMult(double*, double*, double*, unsigned int);
INSTANCE_TYPE(double)
} // namespace Opm::Accelerator

View File

@ -20,44 +20,40 @@
#ifndef OPM_BLOCKED_MATRIX_HPP
#define OPM_BLOCKED_MATRIX_HPP
namespace Opm
{
namespace Accelerator
{
namespace Opm::Accelerator {
/// This struct resembles a blocked csr matrix, like Dune::BCRSMatrix.
/// The data is stored in contiguous memory, such that they can be copied to a device in one transfer.
template<class Scalar>
class BlockedMatrix
{
public:
/// Allocate BlockedMatrix and data arrays with given sizes
/// \param[in] Nb number of blockrows
/// \param[in] nnzbs number of nonzero blocks
/// \param[in] block_size the number of rows and columns for each block
BlockedMatrix(int Nb_, int nnzbs_, unsigned int block_size_)
: nnzValues(new double[nnzbs_*block_size_*block_size_]),
colIndices(new int[nnzbs_*block_size_*block_size_]),
rowPointers(new int[Nb_+1]),
Nb(Nb_),
nnzbs(nnzbs_),
block_size(block_size_),
deleteNnzs(true),
deleteSparsity(true)
: nnzValues(new Scalar[nnzbs_*block_size_*block_size_])
, colIndices(new int[nnzbs_*block_size_*block_size_])
, rowPointers(new int[Nb_+1])
, Nb(Nb_)
, nnzbs(nnzbs_)
, block_size(block_size_)
, deleteNnzs(true)
, deleteSparsity(true)
{}
/// Allocate BlockedMatrix, but copy sparsity pattern instead of allocating new memory
/// \param[in] M matrix to be copied
BlockedMatrix(const BlockedMatrix& M)
: nnzValues(new double[M.nnzbs*M.block_size*M.block_size]),
colIndices(M.colIndices),
rowPointers(M.rowPointers),
Nb(M.Nb),
nnzbs(M.nnzbs),
block_size(M.block_size),
deleteNnzs(true),
deleteSparsity(false)
: nnzValues(new Scalar[M.nnzbs*M.block_size*M.block_size])
, colIndices(M.colIndices)
, rowPointers(M.rowPointers)
, Nb(M.Nb)
, nnzbs(M.nnzbs)
, block_size(M.block_size)
, deleteNnzs(true)
, deleteSparsity(false)
{}
/// Allocate BlockedMatrix, but let data arrays point to existing arrays
@ -67,18 +63,20 @@ public:
/// \param[in] nnzValues array of nonzero values, contains nnzb*block_size*block_size scalars
/// \param[in] colIndices array of column indices, contains nnzb entries
/// \param[in] rowPointers array of row pointers, contains Nb+1 entries
BlockedMatrix(int Nb_, int nnzbs_, unsigned int block_size_, double *nnzValues_, int *colIndices_, int *rowPointers_)
: nnzValues(nnzValues_),
colIndices(colIndices_),
rowPointers(rowPointers_),
Nb(Nb_),
nnzbs(nnzbs_),
block_size(block_size_),
deleteNnzs(false),
deleteSparsity(false)
BlockedMatrix(int Nb_, int nnzbs_, unsigned int block_size_,
Scalar* nnzValues_, int *colIndices_, int *rowPointers_)
: nnzValues(nnzValues_)
, colIndices(colIndices_)
, rowPointers(rowPointers_)
, Nb(Nb_)
, nnzbs(nnzbs_)
, block_size(block_size_)
, deleteNnzs(false)
, deleteSparsity(false)
{}
~BlockedMatrix(){
~BlockedMatrix()
{
if (deleteNnzs) {
delete[] nnzValues;
}
@ -88,8 +86,7 @@ public:
}
}
double *nnzValues;
Scalar* nnzValues;
int *colIndices;
int *rowPointers;
int Nb;
@ -99,14 +96,13 @@ public:
bool deleteSparsity;
};
/// Sort a row of matrix elements from a CSR-format, where the nonzeroes are ints
/// These ints aren't actually nonzeroes, but represent a mapping used later
/// \param[inout] colIndices represent keys in sorting
/// \param[inout] data sorted according to the colIndices
/// \param[in] left lower index of data of row
/// \param[in] right upper index of data of row
void sortRow(int *colIndices, int *data, int left, int right);
void sortRow(int* colIndices, int* data, int left, int right);
/// Multiply and subtract blocks
/// a = a - (b * c)
@ -114,7 +110,8 @@ void sortRow(int *colIndices, int *data, int left, int right);
/// \param[in] b input block
/// \param[in] c input block
/// \param[in] block_size size of block
void blockMultSub(double *a, double *b, double *c, unsigned int block_size);
template<class Scalar>
void blockMultSub(Scalar* a, Scalar* b, Scalar* c, unsigned int block_size);
/// Perform a matrix-matrix multiplication on two blocks
/// resMat = mat1 * mat2
@ -122,9 +119,9 @@ void blockMultSub(double *a, double *b, double *c, unsigned int block_size);
/// \param[in] mat2 input block 2
/// \param[out] resMat output block
/// \param[in] block_size size of block
void blockMult(double *mat1, double *mat2, double *resMat, unsigned int block_size);
template<class Scalar>
void blockMult(Scalar* mat1, Scalar* mat2, Scalar* resMat, unsigned int block_size);
} // namespace Accelerator
} // namespace Opm
} // namespace Opm::Accelerator
#endif

View File

@ -403,13 +403,13 @@ void amgclSolverBackend<block_size>::get_result(double *x_) {
}
} // end get_result()
template <unsigned int block_size>
SolverStatus amgclSolverBackend<block_size>::solve_system(std::shared_ptr<BlockedMatrix> matrix,
double *b,
[[maybe_unused]] std::shared_ptr<BlockedMatrix> jacMatrix,
[[maybe_unused]] WellContributions& wellContribs,
BdaResult &res)
SolverStatus amgclSolverBackend<block_size>::
solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
double *b,
[[maybe_unused]] std::shared_ptr<BlockedMatrix<double>> jacMatrix,
[[maybe_unused]] WellContributions& wellContribs,
BdaResult& res)
{
if (initialized == false) {
initialize(matrix->Nb, matrix->nnzbs);

View File

@ -140,8 +140,11 @@ public:
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
/// \param[inout] res summary of solver result
/// \return status code
SolverStatus solve_system(std::shared_ptr<BlockedMatrix> matrix, double *b,
std::shared_ptr<BlockedMatrix> jacMatrix, WellContributions& wellContribs, BdaResult &res) override;
SolverStatus solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
double *b,
std::shared_ptr<BlockedMatrix<double>> jacMatrix,
WellContributions& wellContribs,
BdaResult& res) override;
/// Get result after linear solve, and peform postprocessing if necessary
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array

View File

@ -216,9 +216,11 @@ void cusparseSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellCon
}
}
template <unsigned int block_size>
void cusparseSolverBackend<block_size>::initialize(std::shared_ptr<BlockedMatrix> matrix, std::shared_ptr<BlockedMatrix> jacMatrix) {
void cusparseSolverBackend<block_size>::
initialize(std::shared_ptr<BlockedMatrix<double>> matrix,
std::shared_ptr<BlockedMatrix<double>> jacMatrix)
{
this->Nb = matrix->Nb;
this->N = Nb * block_size;
this->nnzb = matrix->nnzbs;
@ -309,7 +311,11 @@ void cusparseSolverBackend<block_size>::finalize() {
template <unsigned int block_size>
void cusparseSolverBackend<block_size>::copy_system_to_gpu(std::shared_ptr<BlockedMatrix> matrix, double *b, std::shared_ptr<BlockedMatrix> jacMatrix) {
void cusparseSolverBackend<block_size>::
copy_system_to_gpu(std::shared_ptr<BlockedMatrix<double>> matrix,
double *b,
std::shared_ptr<BlockedMatrix<double>> jacMatrix)
{
Timer t;
cudaMemcpyAsync(d_bCols, matrix->colIndices, nnzb * sizeof(int), cudaMemcpyHostToDevice, stream);
@ -356,7 +362,11 @@ void cusparseSolverBackend<block_size>::copy_system_to_gpu(std::shared_ptr<Block
// don't copy rowpointers and colindices, they stay the same
template <unsigned int block_size>
void cusparseSolverBackend<block_size>::update_system_on_gpu(std::shared_ptr<BlockedMatrix> matrix, double *b, std::shared_ptr<BlockedMatrix> jacMatrix) {
void cusparseSolverBackend<block_size>::
update_system_on_gpu(std::shared_ptr<BlockedMatrix<double>> matrix,
double *b,
std::shared_ptr<BlockedMatrix<double>> jacMatrix)
{
Timer t;
cudaMemcpyAsync(d_b, b, N * sizeof(double), cudaMemcpyHostToDevice, stream);
@ -523,14 +533,13 @@ void cusparseSolverBackend<block_size>::get_result(double *x) {
}
} // end get_result()
template <unsigned int block_size>
SolverStatus cusparseSolverBackend<block_size>::solve_system(std::shared_ptr<BlockedMatrix> matrix,
double *b,
std::shared_ptr<BlockedMatrix> jacMatrix,
WellContributions& wellContribs,
BdaResult &res)
SolverStatus cusparseSolverBackend<block_size>::
solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
double *b,
std::shared_ptr<BlockedMatrix<double>> jacMatrix,
WellContributions& wellContribs,
BdaResult& res)
{
if (initialized == false) {
initialize(matrix, jacMatrix);

View File

@ -82,7 +82,8 @@ private:
/// Initialize GPU and allocate memory
/// \param[in] matrix matrix for spmv
/// \param[in] jacMatrix matrix for preconditioner
void initialize(std::shared_ptr<BlockedMatrix> matrix, std::shared_ptr<BlockedMatrix> jacMatrix);
void initialize(std::shared_ptr<BlockedMatrix<double>> matrix,
std::shared_ptr<BlockedMatrix<double>> jacMatrix);
/// Clean memory
void finalize();
@ -92,14 +93,18 @@ private:
/// \param[in] matrix matrix for spmv
/// \param[in] b input vector, contains N values
/// \param[in] jacMatrix matrix for preconditioner
void copy_system_to_gpu(std::shared_ptr<BlockedMatrix> matrix, double *b, std::shared_ptr<BlockedMatrix> jacMatrix);
void copy_system_to_gpu(std::shared_ptr<BlockedMatrix<double>> matrix,
double *b,
std::shared_ptr<BlockedMatrix<double>> jacMatrix);
/// Update linear system on GPU, don't copy rowpointers and colindices, they stay the same
/// also copy matrix for preconditioner if needed
/// \param[in] matrix matrix for spmv
/// \param[in] b input vector, contains N values
/// \param[in] jacMatrix matrix for preconditioner
void update_system_on_gpu(std::shared_ptr<BlockedMatrix> matrix, double *b, std::shared_ptr<BlockedMatrix> jacMatrix);
void update_system_on_gpu(std::shared_ptr<BlockedMatrix<double>> matrix,
double *b,
std::shared_ptr<BlockedMatrix<double>> jacMatrix);
/// Analyse sparsity pattern to extract parallelism
/// \return true iff analysis was successful
@ -134,8 +139,10 @@ public:
/// \param[in] wellContribs contains all WellContributions, to apply them separately, instead of adding them to matrix A
/// \param[inout] res summary of solver result
/// \return status code
SolverStatus solve_system(std::shared_ptr<BlockedMatrix> matrix, double *b,
std::shared_ptr<BlockedMatrix> jacMatrix, WellContributions& wellContribs, BdaResult &res) override;
SolverStatus solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
double *b,
std::shared_ptr<BlockedMatrix<double>> jacMatrix,
WellContributions& wellContribs, BdaResult& res) override;
/// Get resulting vector x after linear solve, also includes post processing if necessary
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array

View File

@ -50,14 +50,15 @@ BILU0<block_size>::BILU0(bool opencl_ilu_parallel_, int verbosity_) :
template <unsigned int block_size>
bool BILU0<block_size>::analyze_matrix(BlockedMatrix *mat)
bool BILU0<block_size>::analyze_matrix(BlockedMatrix<double>* mat)
{
return analyze_matrix(mat, nullptr);
}
template <unsigned int block_size>
bool BILU0<block_size>::analyze_matrix(BlockedMatrix *mat, BlockedMatrix *jacMat)
bool BILU0<block_size>::analyze_matrix(BlockedMatrix<double>* mat,
BlockedMatrix<double>* jacMat)
{
const unsigned int bs = block_size;
@ -77,7 +78,7 @@ bool BILU0<block_size>::analyze_matrix(BlockedMatrix *mat, BlockedMatrix *jacMat
CSCRowIndices.resize(matToDecompose->nnzbs);
CSCColPointers.resize(Nb + 1);
LUmat = std::make_unique<BlockedMatrix>(*matToDecompose);
LUmat = std::make_unique<BlockedMatrix<double>>(*matToDecompose);
Timer t_convert;
csrPatternToCsc(matToDecompose->colIndices, matToDecompose->rowPointers, CSCRowIndices.data(), CSCColPointers.data(), Nb);
@ -87,7 +88,7 @@ bool BILU0<block_size>::analyze_matrix(BlockedMatrix *mat, BlockedMatrix *jacMat
OpmLog::info(out.str());
}
} else {
LUmat = std::make_unique<BlockedMatrix>(*matToDecompose);
LUmat = std::make_unique<BlockedMatrix<double>>(*matToDecompose);
}
Timer t_analysis;
@ -168,17 +169,16 @@ bool BILU0<block_size>::analyze_matrix(BlockedMatrix *mat, BlockedMatrix *jacMat
return true;
}
template <unsigned int block_size>
bool BILU0<block_size>::create_preconditioner(BlockedMatrix *mat)
bool BILU0<block_size>::create_preconditioner(BlockedMatrix<double>* mat)
{
return create_preconditioner(mat, nullptr);
}
template <unsigned int block_size>
bool BILU0<block_size>::create_preconditioner(BlockedMatrix *mat, BlockedMatrix *jacMat)
bool BILU0<block_size>::
create_preconditioner(BlockedMatrix<double>* mat,
BlockedMatrix<double>* jacMat)
{
const unsigned int bs = block_size;

View File

@ -53,9 +53,9 @@ class BILU0 : public Preconditioner<block_size>
using Base::err;
private:
std::unique_ptr<BlockedMatrix> LUmat = nullptr;
std::unique_ptr<BlockedMatrix<double>> LUmat = nullptr;
#if CHOW_PATEL
std::unique_ptr<BlockedMatrix> Lmat = nullptr, Umat = nullptr;
std::unique_ptr<BlockedMatrix<double>> Lmat = nullptr, Umat = nullptr;
#endif
std::vector<double> invDiagVals;
std::vector<int> diagIndex;
@ -93,12 +93,14 @@ public:
BILU0(bool opencl_ilu_parallel, int verbosity);
// analysis, extract parallelism if specified
bool analyze_matrix(BlockedMatrix *mat) override;
bool analyze_matrix(BlockedMatrix *mat, BlockedMatrix *jacMat) override;
bool analyze_matrix(BlockedMatrix<double>* mat) override;
bool analyze_matrix(BlockedMatrix<double>* mat,
BlockedMatrix<double>* jacMat) override;
// ilu_decomposition
bool create_preconditioner(BlockedMatrix *mat) override;
bool create_preconditioner(BlockedMatrix *mat, BlockedMatrix *jacMat) override;
bool create_preconditioner(BlockedMatrix<double>* mat) override;
bool create_preconditioner(BlockedMatrix<double>* mat,
BlockedMatrix<double>* jacMat) override;
// apply preconditioner, x = prec(y)
// via Lz = y

View File

@ -78,13 +78,14 @@ std::vector<int> buildCsrToCscOffsetMap(std::vector<int> colPointers, std::vecto
}
template <unsigned int block_size>
bool BISAI<block_size>::analyze_matrix(BlockedMatrix *mat)
bool BISAI<block_size>::analyze_matrix(BlockedMatrix<double>* mat)
{
return analyze_matrix(mat, nullptr);
}
template <unsigned int block_size>
bool BISAI<block_size>::analyze_matrix(BlockedMatrix *mat, BlockedMatrix *jacMat)
bool BISAI<block_size>::analyze_matrix(BlockedMatrix<double>* mat,
BlockedMatrix<double>* jacMat)
{
const unsigned int bs = block_size;
auto *m = mat;
@ -176,7 +177,9 @@ void BISAI<block_size>::buildUpperSubsystemsStructures(){
}
template <unsigned int block_size>
bool BISAI<block_size>::create_preconditioner(BlockedMatrix *mat, BlockedMatrix *jacMat)
bool BISAI<block_size>::
create_preconditioner(BlockedMatrix<double>* mat,
BlockedMatrix<double>* jacMat)
{
const unsigned int bs = block_size;
@ -274,7 +277,7 @@ bool BISAI<block_size>::create_preconditioner(BlockedMatrix *mat, BlockedMatrix
}
template <unsigned int block_size>
bool BISAI<block_size>::create_preconditioner(BlockedMatrix *mat)
bool BISAI<block_size>::create_preconditioner(BlockedMatrix<double>* mat)
{
return create_preconditioner(mat, nullptr);
}

View File

@ -31,7 +31,7 @@ namespace Opm
namespace Accelerator
{
class BlockedMatrix;
template<class Scalar> class BlockedMatrix;
/// This class implements a Blocked version of the Incomplete Sparse Approximate Inverse (ISAI) preconditioner.
/// Inspired by the paper "Incomplete Sparse Approximate Inverses for Parallel Preconditioning" by Anzt et. al.
@ -116,12 +116,14 @@ public:
void setOpencl(std::shared_ptr<cl::Context>& context, std::shared_ptr<cl::CommandQueue>& queue) override;
// analysis, extract parallelism
bool analyze_matrix(BlockedMatrix *mat) override;
bool analyze_matrix(BlockedMatrix *mat, BlockedMatrix *jacMat) override;
bool analyze_matrix(BlockedMatrix<double>* mat) override;
bool analyze_matrix(BlockedMatrix<double>* mat,
BlockedMatrix<double>* jacMat) override;
// ilu_decomposition
bool create_preconditioner(BlockedMatrix *mat) override;
bool create_preconditioner(BlockedMatrix *mat, BlockedMatrix *jacMat) override;
bool create_preconditioner(BlockedMatrix<double>* mat) override;
bool create_preconditioner(BlockedMatrix<double>* mat,
BlockedMatrix<double>* jacMat) override;
// apply preconditioner, x = prec(y)
void apply(const cl::Buffer& y, cl::Buffer& x) override;

View File

@ -61,10 +61,9 @@ void CPR<block_size>::setOpencl(std::shared_ptr<cl::Context>& context_, std::sha
bilu0->setOpencl(context, queue);
}
template <unsigned int block_size>
bool CPR<block_size>::analyze_matrix(BlockedMatrix *mat_) {
bool CPR<block_size>::analyze_matrix(BlockedMatrix<double>* mat_)
{
this->Nb = mat_->Nb;
this->nnzb = mat_->nnzbs;
this->N = Nb * block_size;
@ -76,7 +75,9 @@ bool CPR<block_size>::analyze_matrix(BlockedMatrix *mat_) {
}
template <unsigned int block_size>
bool CPR<block_size>::analyze_matrix(BlockedMatrix *mat_, BlockedMatrix *jacMat) {
bool CPR<block_size>::analyze_matrix(BlockedMatrix<double>* mat_,
BlockedMatrix<double>* jacMat)
{
this->Nb = mat_->Nb;
this->nnzb = mat_->nnzbs;
this->N = Nb * block_size;
@ -89,7 +90,10 @@ bool CPR<block_size>::analyze_matrix(BlockedMatrix *mat_, BlockedMatrix *jacMat)
}
template <unsigned int block_size>
bool CPR<block_size>::create_preconditioner(BlockedMatrix *mat_, BlockedMatrix *jacMat) {
bool CPR<block_size>::
create_preconditioner(BlockedMatrix<double>* mat_,
BlockedMatrix<double>* jacMat)
{
Dune::Timer t_bilu0;
bool result = bilu0->create_preconditioner(mat_, jacMat);
if (verbosity >= 3) {
@ -109,7 +113,8 @@ bool CPR<block_size>::create_preconditioner(BlockedMatrix *mat_, BlockedMatrix *
}
template <unsigned int block_size>
bool CPR<block_size>::create_preconditioner(BlockedMatrix *mat_) {
bool CPR<block_size>::create_preconditioner(BlockedMatrix<double>* mat_)
{
Dune::Timer t_bilu0;
bool result = bilu0->create_preconditioner(mat_);
if (verbosity >= 3) {
@ -212,9 +217,9 @@ void CPR<block_size>::opencl_upload() {
}
}
template <unsigned int block_size>
void CPR<block_size>::create_preconditioner_amg(BlockedMatrix *mat_) {
void CPR<block_size>::create_preconditioner_amg(BlockedMatrix<double>* mat_)
{
this->mat = mat_;
coarse_vals.resize(nnzb);

View File

@ -38,7 +38,7 @@ namespace Opm
namespace Accelerator
{
class BlockedMatrix;
template<class Scalar> class BlockedMatrix;
/// This class implements a Constrained Pressure Residual (CPR) preconditioner
template <unsigned int block_size>
@ -73,7 +73,7 @@ private:
std::once_flag opencl_buffers_allocated; // only allocate OpenCL Buffers once
std::unique_ptr<BILU0<block_size> > bilu0; // Blocked ILU0 preconditioner
BlockedMatrix *mat = nullptr; // input matrix, blocked
BlockedMatrix<double>* mat = nullptr; // input matrix, blocked
using DuneMat = Dune::BCRSMatrix<Dune::FieldMatrix<double, 1, 1> >;
using DuneVec = Dune::BlockVector<Dune::FieldVector<double, 1> >;
@ -112,13 +112,14 @@ private:
void amg_cycle_gpu(const int level, cl::Buffer &y, cl::Buffer &x);
void create_preconditioner_amg(BlockedMatrix *mat);
void create_preconditioner_amg(BlockedMatrix<double>* mat);
public:
CPR(bool opencl_ilu_parallel, int verbosity);
bool analyze_matrix(BlockedMatrix *mat) override;
bool analyze_matrix(BlockedMatrix *mat, BlockedMatrix *jacMat) override;
bool analyze_matrix(BlockedMatrix<double>* mat) override;
bool analyze_matrix(BlockedMatrix<double>* mat,
BlockedMatrix<double>* jacMat) override;
// set own Opencl variables, but also that of the bilu0 preconditioner
void setOpencl(std::shared_ptr<cl::Context>& context, std::shared_ptr<cl::CommandQueue>& queue) override;
@ -127,8 +128,9 @@ public:
// also applies amg for pressure component
void apply(const cl::Buffer& y, cl::Buffer& x) override;
bool create_preconditioner(BlockedMatrix *mat) override;
bool create_preconditioner(BlockedMatrix *mat, BlockedMatrix *jacMat) override;
bool create_preconditioner(BlockedMatrix<double>* mat) override;
bool create_preconditioner(BlockedMatrix<double>* mat,
BlockedMatrix<double>* jacMat) override;
};
// solve A^T * x = b

View File

@ -54,7 +54,8 @@ void OpenclMatrix::upload(cl::CommandQueue *queue, Matrix *matrix) {
upload(queue, matrix->nnzValues.data(), matrix->colIndices.data(), matrix->rowPointers.data());
}
void OpenclMatrix::upload(cl::CommandQueue *queue, BlockedMatrix *matrix) {
void OpenclMatrix::upload(cl::CommandQueue* queue, BlockedMatrix<double>* matrix)
{
if (matrix->block_size != block_size) {
OPM_THROW(std::logic_error, "Error trying to upload a BlockedMatrix to OpenclMatrix with different block_size");
}

View File

@ -30,7 +30,7 @@ namespace Accelerator
{
class Matrix;
class BlockedMatrix;
template<class Scalar> class BlockedMatrix;
/// This struct resembles a csr matrix, only doubles are supported
/// The matrix data is stored in OpenCL Buffers
@ -50,7 +50,7 @@ public:
void upload(cl::CommandQueue *queue, double *vals, int *cols, int *rows);
void upload(cl::CommandQueue *queue, Matrix *matrix);
void upload(cl::CommandQueue *queue, BlockedMatrix *matrix);
void upload(cl::CommandQueue* queue, BlockedMatrix<double>* matrix);
cl::Buffer nnzValues;
cl::Buffer colIndices;

View File

@ -60,21 +60,26 @@ Preconditioner<block_size>::create(Type type, bool opencl_ilu_parallel, int verb
}
template <unsigned int block_size>
bool Preconditioner<block_size>::analyze_matrix(BlockedMatrix *mat, [[maybe_unused]] BlockedMatrix *jacMat) {
bool Preconditioner<block_size>::
analyze_matrix(BlockedMatrix<double>* mat,
[[maybe_unused]] BlockedMatrix<double>* jacMat)
{
return analyze_matrix(mat);
}
template <unsigned int block_size>
bool Preconditioner<block_size>::create_preconditioner(BlockedMatrix *mat, [[maybe_unused]] BlockedMatrix *jacMat) {
bool Preconditioner<block_size>::
create_preconditioner(BlockedMatrix<double>* mat,
[[maybe_unused]] BlockedMatrix<double>* jacMat)
{
return create_preconditioner(mat);
}
#define INSTANTIATE_BDA_FUNCTIONS(n) \
template std::unique_ptr<Preconditioner<n> > Preconditioner<n>::create(Type, bool, int); \
template void Preconditioner<n>::setOpencl(std::shared_ptr<cl::Context>&, std::shared_ptr<cl::CommandQueue>&); \
template bool Preconditioner<n>::analyze_matrix(BlockedMatrix *, BlockedMatrix *); \
template bool Preconditioner<n>::create_preconditioner(BlockedMatrix *, BlockedMatrix *);
template bool Preconditioner<n>::analyze_matrix(BlockedMatrix<double> *, BlockedMatrix<double> *); \
template bool Preconditioner<n>::create_preconditioner(BlockedMatrix<double> *, BlockedMatrix<double> *);
INSTANTIATE_BDA_FUNCTIONS(1);
INSTANTIATE_BDA_FUNCTIONS(2);

View File

@ -29,7 +29,7 @@ namespace Opm
namespace Accelerator
{
class BlockedMatrix;
template<class Scalar> class BlockedMatrix;
template <unsigned int block_size>
class Preconditioner
@ -73,13 +73,15 @@ public:
// analyze matrix, e.g. the sparsity pattern
// probably only called once
// the version with two params can be overloaded, if not, it will default to using the one param version
virtual bool analyze_matrix(BlockedMatrix *mat) = 0;
virtual bool analyze_matrix(BlockedMatrix *mat, BlockedMatrix *jacMat);
virtual bool analyze_matrix(BlockedMatrix<double>* mat) = 0;
virtual bool analyze_matrix(BlockedMatrix<double>* mat,
BlockedMatrix<double>* jacMat);
// create/update preconditioner, probably used every linear solve
// the version with two params can be overloaded, if not, it will default to using the one param version
virtual bool create_preconditioner(BlockedMatrix *mat) = 0;
virtual bool create_preconditioner(BlockedMatrix *mat, BlockedMatrix *jacMat);
virtual bool create_preconditioner(BlockedMatrix<double>* mat) = 0;
virtual bool create_preconditioner(BlockedMatrix<double>* mat,
BlockedMatrix<double>* jacMat);
};
} //namespace Accelerator

View File

@ -405,9 +405,11 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
}
}
template <unsigned int block_size>
void openclSolverBackend<block_size>::initialize(std::shared_ptr<BlockedMatrix> matrix, std::shared_ptr<BlockedMatrix> jacMatrix) {
void openclSolverBackend<block_size>::
initialize(std::shared_ptr<BlockedMatrix<double>> matrix,
std::shared_ptr<BlockedMatrix<double>> jacMatrix)
{
this->Nb = matrix->Nb;
this->N = Nb * block_size;
this->nnzb = matrix->nnzbs;
@ -637,13 +639,13 @@ void openclSolverBackend<block_size>::get_result(double *x) {
}
} // end get_result()
template <unsigned int block_size>
SolverStatus openclSolverBackend<block_size>::solve_system(std::shared_ptr<BlockedMatrix> matrix,
double *b,
std::shared_ptr<BlockedMatrix> jacMatrix,
WellContributions& wellContribs,
BdaResult &res)
SolverStatus openclSolverBackend<block_size>::
solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
double *b,
std::shared_ptr<BlockedMatrix<double>> jacMatrix,
WellContributions& wellContribs,
BdaResult& res)
{
if (initialized == false) {
initialize(matrix, jacMatrix);

View File

@ -67,8 +67,8 @@ private:
// can perform blocked ILU0 and AMG on pressure component
bool is_root; // allow for nested solvers, the root solver is called by BdaBridge
bool analysis_done = false;
std::shared_ptr<BlockedMatrix> mat = nullptr; // original matrix
std::shared_ptr<BlockedMatrix> jacMat = nullptr; // matrix for preconditioner
std::shared_ptr<BlockedMatrix<double>> mat{}; // original matrix
std::shared_ptr<BlockedMatrix<double>> jacMat{}; // matrix for preconditioner
bool opencl_ilu_parallel; // parallelize ILU operations (with level_scheduling)
std::vector<cl::Event> events;
cl_int err;
@ -81,7 +81,8 @@ private:
/// Initialize GPU and allocate memory
/// \param[in] matrix matrix A
/// \param[in] jacMatrix matrix for preconditioner
void initialize(std::shared_ptr<BlockedMatrix> matrix, std::shared_ptr<BlockedMatrix> jacMatrix);
void initialize(std::shared_ptr<BlockedMatrix<double>> matrix,
std::shared_ptr<BlockedMatrix<double>> jacMatrix);
/// Copy linear system to GPU
void copy_system_to_gpu();
@ -134,8 +135,11 @@ public:
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
/// \param[inout] res summary of solver result
/// \return status code
SolverStatus solve_system(std::shared_ptr<BlockedMatrix> matrix, double *b,
std::shared_ptr<BlockedMatrix> jacMatrix, WellContributions& wellContribs, BdaResult &res) override;
SolverStatus solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
double *b,
std::shared_ptr<BlockedMatrix<double>> jacMatrix,
WellContributions& wellContribs,
BdaResult& res) override;
/// Solve scalar linear system, for example a coarse system of an AMG preconditioner
/// Data is already on the GPU

View File

@ -76,9 +76,9 @@ rocalutionSolverBackend<block_size>::~rocalutionSolverBackend() {
rocalution::stop_rocalution();
}
template <unsigned int block_size>
void rocalutionSolverBackend<block_size>::initialize(BlockedMatrix *matrix) {
void rocalutionSolverBackend<block_size>::initialize(BlockedMatrix<double>* matrix)
{
this->Nb = matrix->Nb;
this->N = Nb * block_size;
this->nnzb = matrix->nnzbs;
@ -94,9 +94,9 @@ void rocalutionSolverBackend<block_size>::initialize(BlockedMatrix *matrix) {
initialized = true;
} // end initialize()
template <unsigned int block_size>
void rocalutionSolverBackend<block_size>::convert_matrix(BlockedMatrix *matrix) {
void rocalutionSolverBackend<block_size>::convert_matrix(BlockedMatrix<double>* matrix)
{
Timer t;
for(int i = 0; i < Nb+1; ++i){
@ -147,13 +147,13 @@ void rocalutionSolverBackend<block_size>::get_result(double *x) {
}
} // end get_result()
template <unsigned int block_size>
SolverStatus rocalutionSolverBackend<block_size>::solve_system(std::shared_ptr<BlockedMatrix> matrix,
double *b,
[[maybe_unused]] std::shared_ptr<BlockedMatrix> jacMatrix,
[[maybe_unused]] WellContributions& wellContribs,
BdaResult &res)
SolverStatus rocalutionSolverBackend<block_size>::
solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
double *b,
[[maybe_unused]] std::shared_ptr<BlockedMatrix<double>> jacMatrix,
[[maybe_unused]] WellContributions& wellContribs,
BdaResult& res)
{
if (initialized == false) {
initialize(matrix.get());

View File

@ -65,12 +65,12 @@ private:
/// Initialize sizes and allocate memory
/// \param[in] matrix matrix A
void initialize(BlockedMatrix *matrix);
void initialize(BlockedMatrix<double>* matrix);
/// Convert matrix to rocalution format
/// copy matrix to raw pointers, which are given to and freed by rocalution
/// \param[in] matrix matrix A
void convert_matrix(BlockedMatrix *matrix);
void convert_matrix(BlockedMatrix<double>* matrix);
public:
@ -91,8 +91,11 @@ public:
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
/// \param[inout] res summary of solver result
/// \return status code
SolverStatus solve_system(std::shared_ptr<BlockedMatrix> matrix, double *b,
std::shared_ptr<BlockedMatrix> jacMatrix, WellContributions& wellContribs, BdaResult &res) override;
SolverStatus solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
double *b,
std::shared_ptr<BlockedMatrix<double>> jacMatrix,
WellContributions& wellContribs,
BdaResult& res) override;
/// Get result after linear solve, and peform postprocessing if necessary
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array

View File

@ -375,9 +375,11 @@ void rocsparseSolverBackend<block_size>::gpu_pbicgstab([[maybe_unused]] WellCont
}
}
template <unsigned int block_size>
void rocsparseSolverBackend<block_size>::initialize(std::shared_ptr<BlockedMatrix> matrix, std::shared_ptr<BlockedMatrix> jacMatrix) {
void rocsparseSolverBackend<block_size>::
initialize(std::shared_ptr<BlockedMatrix<double>> matrix,
std::shared_ptr<BlockedMatrix<double>> jacMatrix)
{
this->Nb = matrix->Nb;
this->N = Nb * block_size;
this->nnzb = matrix->nnzbs;
@ -634,11 +636,12 @@ void rocsparseSolverBackend<block_size>::get_result(double *x) {
template <unsigned int block_size>
SolverStatus rocsparseSolverBackend<block_size>::solve_system(std::shared_ptr<BlockedMatrix> matrix,
double *b,
std::shared_ptr<BlockedMatrix> jacMatrix,
WellContributions& wellContribs,
BdaResult &res)
SolverStatus rocsparseSolverBackend<block_size>::
solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
double *b,
std::shared_ptr<BlockedMatrix<double>> jacMatrix,
WellContributions& wellContribs,
BdaResult& res)
{
if (initialized == false) {
initialize(matrix, jacMatrix);

View File

@ -54,14 +54,13 @@ class rocsparseSolverBackend : public BdaSolver<block_size>
using Base::initialized;
private:
double c_copy = 0.0; // cummulative timer measuring the total time it takes to transfer the data to the GPU
bool useJacMatrix = false;
bool analysis_done = false;
std::shared_ptr<BlockedMatrix> mat = nullptr; // original matrix
std::shared_ptr<BlockedMatrix> jacMat = nullptr; // matrix for preconditioner
std::shared_ptr<BlockedMatrix<double>> mat{}; // original matrix
std::shared_ptr<BlockedMatrix<double>> jacMat{}; // matrix for preconditioner
int nnzbs_prec = 0; // number of nnz blocks in preconditioner matrix M
rocsparse_direction dir = rocsparse_direction_row;
@ -93,7 +92,8 @@ private:
/// Initialize GPU and allocate memory
/// \param[in] matrix matrix A
/// \param[in] jacMatrix matrix for preconditioner
void initialize(std::shared_ptr<BlockedMatrix> matrix, std::shared_ptr<BlockedMatrix> jacMatrix);
void initialize(std::shared_ptr<BlockedMatrix<double>> matrix,
std::shared_ptr<BlockedMatrix<double>> jacMatrix);
/// Copy linear system to GPU
/// \param[in] b input vector, contains N values
@ -138,8 +138,11 @@ public:
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
/// \param[inout] res summary of solver result
/// \return status code
SolverStatus solve_system(std::shared_ptr<BlockedMatrix> matrix, double *b,
std::shared_ptr<BlockedMatrix> jacMatrix, WellContributions& wellContribs, BdaResult &res) override;
SolverStatus solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
double* b,
std::shared_ptr<BlockedMatrix<double>> jacMatrix,
WellContributions& wellContribs,
BdaResult& res) override;
/// Solve scalar linear system, for example a coarse system of an AMG preconditioner
/// Data is already on the GPU