mirror of
https://github.com/OPM/opm-simulators.git
synced 2024-07-07 04:53:03 -05:00
BlockMatrix: template Scalar type
This commit is contained in:
parent
b9ee637d78
commit
25374b0e54
|
@ -221,12 +221,16 @@ void BdaBridge<BridgeMatrix, BridgeVector, block_size>::solve_system(BridgeMatri
|
|||
return;
|
||||
}
|
||||
|
||||
using Mat = Accelerator::BlockedMatrix<double>;
|
||||
if (!matrix) {
|
||||
h_rows.reserve(Nb+1);
|
||||
h_cols.reserve(nnzb);
|
||||
copySparsityPatternFromISTL(*bridgeMat, h_rows, h_cols);
|
||||
checkMemoryContiguous(*bridgeMat);
|
||||
matrix = std::make_unique<Opm::Accelerator::BlockedMatrix>(Nb, nnzb, block_size, static_cast<double*>(&(((*bridgeMat)[0][0][0][0]))), h_cols.data(), h_rows.data());
|
||||
matrix = std::make_unique<Mat>(Nb, nnzb, block_size,
|
||||
static_cast<double*>(&(((*bridgeMat)[0][0][0][0]))),
|
||||
h_cols.data(),
|
||||
h_rows.data());
|
||||
}
|
||||
|
||||
Dune::Timer t_zeros;
|
||||
|
@ -245,7 +249,10 @@ void BdaBridge<BridgeMatrix, BridgeVector, block_size>::solve_system(BridgeMatri
|
|||
h_jacCols.reserve(jacNnzb);
|
||||
copySparsityPatternFromISTL(*jacMat, h_jacRows, h_jacCols);
|
||||
checkMemoryContiguous(*jacMat);
|
||||
jacMatrix = std::make_unique<Opm::Accelerator::BlockedMatrix>(Nb, jacNnzb, block_size, static_cast<double*>(&(((*jacMat)[0][0][0][0]))), h_jacCols.data(), h_jacRows.data());
|
||||
jacMatrix = std::make_unique<Mat>(Nb, jacNnzb, block_size,
|
||||
static_cast<double*>(&(((*jacMat)[0][0][0][0]))),
|
||||
h_jacCols.data(),
|
||||
h_jacRows.data());
|
||||
}
|
||||
|
||||
Dune::Timer t_zeros2;
|
||||
|
|
|
@ -39,9 +39,9 @@ private:
|
|||
int verbosity = 0;
|
||||
bool use_gpu = false;
|
||||
std::string accelerator_mode;
|
||||
std::unique_ptr<Opm::Accelerator::BdaSolver<block_size> > backend;
|
||||
std::shared_ptr<Opm::Accelerator::BlockedMatrix> matrix; // 'stores' matrix, actually points to h_rows, h_cols and the received BridgeMatrix for the nonzeroes
|
||||
std::shared_ptr<Opm::Accelerator::BlockedMatrix> jacMatrix; // 'stores' preconditioner matrix, actually points to h_rows, h_cols and the received BridgeMatrix for the nonzeroes
|
||||
std::unique_ptr<Accelerator::BdaSolver<block_size> > backend;
|
||||
std::shared_ptr<Accelerator::BlockedMatrix<double>> matrix; // 'stores' matrix, actually points to h_rows, h_cols and the received BridgeMatrix for the nonzeroes
|
||||
std::shared_ptr<Accelerator::BlockedMatrix<double>> jacMatrix; // 'stores' preconditioner matrix, actually points to h_rows, h_cols and the received BridgeMatrix for the nonzeroes
|
||||
std::vector<int> h_rows, h_cols; // store the sparsity pattern of the matrix
|
||||
std::vector<int> h_jacRows, h_jacCols; // store the sparsity pattern of the jacMatrix
|
||||
std::vector<typename BridgeMatrix::size_type> diagIndices; // contains offsets of the diagonal blocks wrt start of the row, used for replaceZeroDiagonal()
|
||||
|
|
|
@ -83,8 +83,10 @@ namespace Accelerator {
|
|||
virtual ~BdaSolver() {};
|
||||
|
||||
/// Define as pure virtual functions, so derivedclass must implement them
|
||||
virtual SolverStatus solve_system(std::shared_ptr<BlockedMatrix> matrix, double *b,
|
||||
std::shared_ptr<BlockedMatrix> jacMatrix, WellContributions& wellContribs, BdaResult &res) = 0;
|
||||
virtual SolverStatus solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
|
||||
double *b,
|
||||
std::shared_ptr<BlockedMatrix<double>> jacMatrix,
|
||||
WellContributions& wellContribs, BdaResult& res) = 0;
|
||||
|
||||
virtual void get_result(double *x) = 0;
|
||||
|
||||
|
|
|
@ -17,9 +17,6 @@
|
|||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <cstring>
|
||||
#include <cmath>
|
||||
|
||||
#include <config.h>
|
||||
|
||||
#include <opm/common/OpmLog/OpmLog.hpp>
|
||||
|
@ -29,16 +26,10 @@
|
|||
#include <opm/simulators/linalg/bda/Matrix.hpp>
|
||||
#include <opm/simulators/linalg/bda/Matrix.hpp>
|
||||
|
||||
namespace Opm
|
||||
namespace Opm::Accelerator {
|
||||
|
||||
void sortRow(int *colIndices, int *data, int left, int right)
|
||||
{
|
||||
namespace Accelerator
|
||||
{
|
||||
|
||||
using Opm::OpmLog;
|
||||
|
||||
|
||||
|
||||
void sortRow(int *colIndices, int *data, int left, int right) {
|
||||
int l = left;
|
||||
int r = right;
|
||||
int middle = colIndices[(l + r) >> 1];
|
||||
|
@ -67,14 +58,14 @@ void sortRow(int *colIndices, int *data, int left, int right) {
|
|||
sortRow(colIndices, data, l, right);
|
||||
}
|
||||
|
||||
|
||||
// LUMat->nnzValues[ik] = LUMat->nnzValues[ik] - (pivot * LUMat->nnzValues[jk]) in ilu decomposition
|
||||
// a = a - (b * c)
|
||||
void blockMultSub(double *a, double *b, double *c, unsigned int block_size)
|
||||
template<class Scalar>
|
||||
void blockMultSub(Scalar* a, Scalar* b, Scalar* c, unsigned int block_size)
|
||||
{
|
||||
for (unsigned int row = 0; row < block_size; row++) {
|
||||
for (unsigned int col = 0; col < block_size; col++) {
|
||||
double temp = 0.0;
|
||||
Scalar temp = 0.0;
|
||||
for (unsigned int k = 0; k < block_size; k++) {
|
||||
temp += b[block_size * row + k] * c[block_size * k + col];
|
||||
}
|
||||
|
@ -84,11 +75,12 @@ void blockMultSub(double *a, double *b, double *c, unsigned int block_size)
|
|||
}
|
||||
|
||||
/*Perform a 3x3 matrix-matrix multiplicationj on two blocks*/
|
||||
|
||||
void blockMult(double *mat1, double *mat2, double *resMat, unsigned int block_size) {
|
||||
template<class Scalar>
|
||||
void blockMult(Scalar* mat1, Scalar* mat2, Scalar* resMat, unsigned int block_size)
|
||||
{
|
||||
for (unsigned int row = 0; row < block_size; row++) {
|
||||
for (unsigned int col = 0; col < block_size; col++) {
|
||||
double temp = 0;
|
||||
Scalar temp = 0;
|
||||
for (unsigned int k = 0; k < block_size; k++) {
|
||||
temp += mat1[block_size * row + k] * mat2[block_size * k + col];
|
||||
}
|
||||
|
@ -97,5 +89,10 @@ void blockMult(double *mat1, double *mat2, double *resMat, unsigned int block_si
|
|||
}
|
||||
}
|
||||
|
||||
} // namespace Accelerator
|
||||
} // namespace Opm
|
||||
#define INSTANCE_TYPE(T) \
|
||||
template void blockMultSub(double*, double*, double*, unsigned int); \
|
||||
template void blockMult(double*, double*, double*, unsigned int);
|
||||
|
||||
INSTANCE_TYPE(double)
|
||||
|
||||
} // namespace Opm::Accelerator
|
||||
|
|
|
@ -20,44 +20,40 @@
|
|||
#ifndef OPM_BLOCKED_MATRIX_HPP
|
||||
#define OPM_BLOCKED_MATRIX_HPP
|
||||
|
||||
namespace Opm
|
||||
{
|
||||
namespace Accelerator
|
||||
{
|
||||
namespace Opm::Accelerator {
|
||||
|
||||
/// This struct resembles a blocked csr matrix, like Dune::BCRSMatrix.
|
||||
/// The data is stored in contiguous memory, such that they can be copied to a device in one transfer.
|
||||
template<class Scalar>
|
||||
class BlockedMatrix
|
||||
{
|
||||
|
||||
public:
|
||||
|
||||
/// Allocate BlockedMatrix and data arrays with given sizes
|
||||
/// \param[in] Nb number of blockrows
|
||||
/// \param[in] nnzbs number of nonzero blocks
|
||||
/// \param[in] block_size the number of rows and columns for each block
|
||||
BlockedMatrix(int Nb_, int nnzbs_, unsigned int block_size_)
|
||||
: nnzValues(new double[nnzbs_*block_size_*block_size_]),
|
||||
colIndices(new int[nnzbs_*block_size_*block_size_]),
|
||||
rowPointers(new int[Nb_+1]),
|
||||
Nb(Nb_),
|
||||
nnzbs(nnzbs_),
|
||||
block_size(block_size_),
|
||||
deleteNnzs(true),
|
||||
deleteSparsity(true)
|
||||
: nnzValues(new Scalar[nnzbs_*block_size_*block_size_])
|
||||
, colIndices(new int[nnzbs_*block_size_*block_size_])
|
||||
, rowPointers(new int[Nb_+1])
|
||||
, Nb(Nb_)
|
||||
, nnzbs(nnzbs_)
|
||||
, block_size(block_size_)
|
||||
, deleteNnzs(true)
|
||||
, deleteSparsity(true)
|
||||
{}
|
||||
|
||||
/// Allocate BlockedMatrix, but copy sparsity pattern instead of allocating new memory
|
||||
/// \param[in] M matrix to be copied
|
||||
BlockedMatrix(const BlockedMatrix& M)
|
||||
: nnzValues(new double[M.nnzbs*M.block_size*M.block_size]),
|
||||
colIndices(M.colIndices),
|
||||
rowPointers(M.rowPointers),
|
||||
Nb(M.Nb),
|
||||
nnzbs(M.nnzbs),
|
||||
block_size(M.block_size),
|
||||
deleteNnzs(true),
|
||||
deleteSparsity(false)
|
||||
: nnzValues(new Scalar[M.nnzbs*M.block_size*M.block_size])
|
||||
, colIndices(M.colIndices)
|
||||
, rowPointers(M.rowPointers)
|
||||
, Nb(M.Nb)
|
||||
, nnzbs(M.nnzbs)
|
||||
, block_size(M.block_size)
|
||||
, deleteNnzs(true)
|
||||
, deleteSparsity(false)
|
||||
{}
|
||||
|
||||
/// Allocate BlockedMatrix, but let data arrays point to existing arrays
|
||||
|
@ -67,18 +63,20 @@ public:
|
|||
/// \param[in] nnzValues array of nonzero values, contains nnzb*block_size*block_size scalars
|
||||
/// \param[in] colIndices array of column indices, contains nnzb entries
|
||||
/// \param[in] rowPointers array of row pointers, contains Nb+1 entries
|
||||
BlockedMatrix(int Nb_, int nnzbs_, unsigned int block_size_, double *nnzValues_, int *colIndices_, int *rowPointers_)
|
||||
: nnzValues(nnzValues_),
|
||||
colIndices(colIndices_),
|
||||
rowPointers(rowPointers_),
|
||||
Nb(Nb_),
|
||||
nnzbs(nnzbs_),
|
||||
block_size(block_size_),
|
||||
deleteNnzs(false),
|
||||
deleteSparsity(false)
|
||||
BlockedMatrix(int Nb_, int nnzbs_, unsigned int block_size_,
|
||||
Scalar* nnzValues_, int *colIndices_, int *rowPointers_)
|
||||
: nnzValues(nnzValues_)
|
||||
, colIndices(colIndices_)
|
||||
, rowPointers(rowPointers_)
|
||||
, Nb(Nb_)
|
||||
, nnzbs(nnzbs_)
|
||||
, block_size(block_size_)
|
||||
, deleteNnzs(false)
|
||||
, deleteSparsity(false)
|
||||
{}
|
||||
|
||||
~BlockedMatrix(){
|
||||
~BlockedMatrix()
|
||||
{
|
||||
if (deleteNnzs) {
|
||||
delete[] nnzValues;
|
||||
}
|
||||
|
@ -88,8 +86,7 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
double *nnzValues;
|
||||
Scalar* nnzValues;
|
||||
int *colIndices;
|
||||
int *rowPointers;
|
||||
int Nb;
|
||||
|
@ -99,14 +96,13 @@ public:
|
|||
bool deleteSparsity;
|
||||
};
|
||||
|
||||
|
||||
/// Sort a row of matrix elements from a CSR-format, where the nonzeroes are ints
|
||||
/// These ints aren't actually nonzeroes, but represent a mapping used later
|
||||
/// \param[inout] colIndices represent keys in sorting
|
||||
/// \param[inout] data sorted according to the colIndices
|
||||
/// \param[in] left lower index of data of row
|
||||
/// \param[in] right upper index of data of row
|
||||
void sortRow(int *colIndices, int *data, int left, int right);
|
||||
void sortRow(int* colIndices, int* data, int left, int right);
|
||||
|
||||
/// Multiply and subtract blocks
|
||||
/// a = a - (b * c)
|
||||
|
@ -114,7 +110,8 @@ void sortRow(int *colIndices, int *data, int left, int right);
|
|||
/// \param[in] b input block
|
||||
/// \param[in] c input block
|
||||
/// \param[in] block_size size of block
|
||||
void blockMultSub(double *a, double *b, double *c, unsigned int block_size);
|
||||
template<class Scalar>
|
||||
void blockMultSub(Scalar* a, Scalar* b, Scalar* c, unsigned int block_size);
|
||||
|
||||
/// Perform a matrix-matrix multiplication on two blocks
|
||||
/// resMat = mat1 * mat2
|
||||
|
@ -122,9 +119,9 @@ void blockMultSub(double *a, double *b, double *c, unsigned int block_size);
|
|||
/// \param[in] mat2 input block 2
|
||||
/// \param[out] resMat output block
|
||||
/// \param[in] block_size size of block
|
||||
void blockMult(double *mat1, double *mat2, double *resMat, unsigned int block_size);
|
||||
template<class Scalar>
|
||||
void blockMult(Scalar* mat1, Scalar* mat2, Scalar* resMat, unsigned int block_size);
|
||||
|
||||
} // namespace Accelerator
|
||||
} // namespace Opm
|
||||
} // namespace Opm::Accelerator
|
||||
|
||||
#endif
|
||||
|
|
|
@ -403,13 +403,13 @@ void amgclSolverBackend<block_size>::get_result(double *x_) {
|
|||
}
|
||||
} // end get_result()
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
SolverStatus amgclSolverBackend<block_size>::solve_system(std::shared_ptr<BlockedMatrix> matrix,
|
||||
double *b,
|
||||
[[maybe_unused]] std::shared_ptr<BlockedMatrix> jacMatrix,
|
||||
[[maybe_unused]] WellContributions& wellContribs,
|
||||
BdaResult &res)
|
||||
SolverStatus amgclSolverBackend<block_size>::
|
||||
solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
|
||||
double *b,
|
||||
[[maybe_unused]] std::shared_ptr<BlockedMatrix<double>> jacMatrix,
|
||||
[[maybe_unused]] WellContributions& wellContribs,
|
||||
BdaResult& res)
|
||||
{
|
||||
if (initialized == false) {
|
||||
initialize(matrix->Nb, matrix->nnzbs);
|
||||
|
|
|
@ -140,8 +140,11 @@ public:
|
|||
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
|
||||
/// \param[inout] res summary of solver result
|
||||
/// \return status code
|
||||
SolverStatus solve_system(std::shared_ptr<BlockedMatrix> matrix, double *b,
|
||||
std::shared_ptr<BlockedMatrix> jacMatrix, WellContributions& wellContribs, BdaResult &res) override;
|
||||
SolverStatus solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
|
||||
double *b,
|
||||
std::shared_ptr<BlockedMatrix<double>> jacMatrix,
|
||||
WellContributions& wellContribs,
|
||||
BdaResult& res) override;
|
||||
|
||||
/// Get result after linear solve, and peform postprocessing if necessary
|
||||
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array
|
||||
|
|
|
@ -216,9 +216,11 @@ void cusparseSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellCon
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
void cusparseSolverBackend<block_size>::initialize(std::shared_ptr<BlockedMatrix> matrix, std::shared_ptr<BlockedMatrix> jacMatrix) {
|
||||
void cusparseSolverBackend<block_size>::
|
||||
initialize(std::shared_ptr<BlockedMatrix<double>> matrix,
|
||||
std::shared_ptr<BlockedMatrix<double>> jacMatrix)
|
||||
{
|
||||
this->Nb = matrix->Nb;
|
||||
this->N = Nb * block_size;
|
||||
this->nnzb = matrix->nnzbs;
|
||||
|
@ -309,7 +311,11 @@ void cusparseSolverBackend<block_size>::finalize() {
|
|||
|
||||
|
||||
template <unsigned int block_size>
|
||||
void cusparseSolverBackend<block_size>::copy_system_to_gpu(std::shared_ptr<BlockedMatrix> matrix, double *b, std::shared_ptr<BlockedMatrix> jacMatrix) {
|
||||
void cusparseSolverBackend<block_size>::
|
||||
copy_system_to_gpu(std::shared_ptr<BlockedMatrix<double>> matrix,
|
||||
double *b,
|
||||
std::shared_ptr<BlockedMatrix<double>> jacMatrix)
|
||||
{
|
||||
Timer t;
|
||||
|
||||
cudaMemcpyAsync(d_bCols, matrix->colIndices, nnzb * sizeof(int), cudaMemcpyHostToDevice, stream);
|
||||
|
@ -356,7 +362,11 @@ void cusparseSolverBackend<block_size>::copy_system_to_gpu(std::shared_ptr<Block
|
|||
|
||||
// don't copy rowpointers and colindices, they stay the same
|
||||
template <unsigned int block_size>
|
||||
void cusparseSolverBackend<block_size>::update_system_on_gpu(std::shared_ptr<BlockedMatrix> matrix, double *b, std::shared_ptr<BlockedMatrix> jacMatrix) {
|
||||
void cusparseSolverBackend<block_size>::
|
||||
update_system_on_gpu(std::shared_ptr<BlockedMatrix<double>> matrix,
|
||||
double *b,
|
||||
std::shared_ptr<BlockedMatrix<double>> jacMatrix)
|
||||
{
|
||||
Timer t;
|
||||
|
||||
cudaMemcpyAsync(d_b, b, N * sizeof(double), cudaMemcpyHostToDevice, stream);
|
||||
|
@ -523,14 +533,13 @@ void cusparseSolverBackend<block_size>::get_result(double *x) {
|
|||
}
|
||||
} // end get_result()
|
||||
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
SolverStatus cusparseSolverBackend<block_size>::solve_system(std::shared_ptr<BlockedMatrix> matrix,
|
||||
double *b,
|
||||
std::shared_ptr<BlockedMatrix> jacMatrix,
|
||||
WellContributions& wellContribs,
|
||||
BdaResult &res)
|
||||
SolverStatus cusparseSolverBackend<block_size>::
|
||||
solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
|
||||
double *b,
|
||||
std::shared_ptr<BlockedMatrix<double>> jacMatrix,
|
||||
WellContributions& wellContribs,
|
||||
BdaResult& res)
|
||||
{
|
||||
if (initialized == false) {
|
||||
initialize(matrix, jacMatrix);
|
||||
|
|
|
@ -82,7 +82,8 @@ private:
|
|||
/// Initialize GPU and allocate memory
|
||||
/// \param[in] matrix matrix for spmv
|
||||
/// \param[in] jacMatrix matrix for preconditioner
|
||||
void initialize(std::shared_ptr<BlockedMatrix> matrix, std::shared_ptr<BlockedMatrix> jacMatrix);
|
||||
void initialize(std::shared_ptr<BlockedMatrix<double>> matrix,
|
||||
std::shared_ptr<BlockedMatrix<double>> jacMatrix);
|
||||
|
||||
/// Clean memory
|
||||
void finalize();
|
||||
|
@ -92,14 +93,18 @@ private:
|
|||
/// \param[in] matrix matrix for spmv
|
||||
/// \param[in] b input vector, contains N values
|
||||
/// \param[in] jacMatrix matrix for preconditioner
|
||||
void copy_system_to_gpu(std::shared_ptr<BlockedMatrix> matrix, double *b, std::shared_ptr<BlockedMatrix> jacMatrix);
|
||||
void copy_system_to_gpu(std::shared_ptr<BlockedMatrix<double>> matrix,
|
||||
double *b,
|
||||
std::shared_ptr<BlockedMatrix<double>> jacMatrix);
|
||||
|
||||
/// Update linear system on GPU, don't copy rowpointers and colindices, they stay the same
|
||||
/// also copy matrix for preconditioner if needed
|
||||
/// \param[in] matrix matrix for spmv
|
||||
/// \param[in] b input vector, contains N values
|
||||
/// \param[in] jacMatrix matrix for preconditioner
|
||||
void update_system_on_gpu(std::shared_ptr<BlockedMatrix> matrix, double *b, std::shared_ptr<BlockedMatrix> jacMatrix);
|
||||
void update_system_on_gpu(std::shared_ptr<BlockedMatrix<double>> matrix,
|
||||
double *b,
|
||||
std::shared_ptr<BlockedMatrix<double>> jacMatrix);
|
||||
|
||||
/// Analyse sparsity pattern to extract parallelism
|
||||
/// \return true iff analysis was successful
|
||||
|
@ -134,8 +139,10 @@ public:
|
|||
/// \param[in] wellContribs contains all WellContributions, to apply them separately, instead of adding them to matrix A
|
||||
/// \param[inout] res summary of solver result
|
||||
/// \return status code
|
||||
SolverStatus solve_system(std::shared_ptr<BlockedMatrix> matrix, double *b,
|
||||
std::shared_ptr<BlockedMatrix> jacMatrix, WellContributions& wellContribs, BdaResult &res) override;
|
||||
SolverStatus solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
|
||||
double *b,
|
||||
std::shared_ptr<BlockedMatrix<double>> jacMatrix,
|
||||
WellContributions& wellContribs, BdaResult& res) override;
|
||||
|
||||
/// Get resulting vector x after linear solve, also includes post processing if necessary
|
||||
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array
|
||||
|
|
|
@ -50,14 +50,15 @@ BILU0<block_size>::BILU0(bool opencl_ilu_parallel_, int verbosity_) :
|
|||
|
||||
|
||||
template <unsigned int block_size>
|
||||
bool BILU0<block_size>::analyze_matrix(BlockedMatrix *mat)
|
||||
bool BILU0<block_size>::analyze_matrix(BlockedMatrix<double>* mat)
|
||||
{
|
||||
return analyze_matrix(mat, nullptr);
|
||||
}
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
bool BILU0<block_size>::analyze_matrix(BlockedMatrix *mat, BlockedMatrix *jacMat)
|
||||
bool BILU0<block_size>::analyze_matrix(BlockedMatrix<double>* mat,
|
||||
BlockedMatrix<double>* jacMat)
|
||||
{
|
||||
const unsigned int bs = block_size;
|
||||
|
||||
|
@ -77,7 +78,7 @@ bool BILU0<block_size>::analyze_matrix(BlockedMatrix *mat, BlockedMatrix *jacMat
|
|||
CSCRowIndices.resize(matToDecompose->nnzbs);
|
||||
CSCColPointers.resize(Nb + 1);
|
||||
|
||||
LUmat = std::make_unique<BlockedMatrix>(*matToDecompose);
|
||||
LUmat = std::make_unique<BlockedMatrix<double>>(*matToDecompose);
|
||||
|
||||
Timer t_convert;
|
||||
csrPatternToCsc(matToDecompose->colIndices, matToDecompose->rowPointers, CSCRowIndices.data(), CSCColPointers.data(), Nb);
|
||||
|
@ -87,7 +88,7 @@ bool BILU0<block_size>::analyze_matrix(BlockedMatrix *mat, BlockedMatrix *jacMat
|
|||
OpmLog::info(out.str());
|
||||
}
|
||||
} else {
|
||||
LUmat = std::make_unique<BlockedMatrix>(*matToDecompose);
|
||||
LUmat = std::make_unique<BlockedMatrix<double>>(*matToDecompose);
|
||||
}
|
||||
|
||||
Timer t_analysis;
|
||||
|
@ -168,17 +169,16 @@ bool BILU0<block_size>::analyze_matrix(BlockedMatrix *mat, BlockedMatrix *jacMat
|
|||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
bool BILU0<block_size>::create_preconditioner(BlockedMatrix *mat)
|
||||
bool BILU0<block_size>::create_preconditioner(BlockedMatrix<double>* mat)
|
||||
{
|
||||
return create_preconditioner(mat, nullptr);
|
||||
}
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
bool BILU0<block_size>::create_preconditioner(BlockedMatrix *mat, BlockedMatrix *jacMat)
|
||||
bool BILU0<block_size>::
|
||||
create_preconditioner(BlockedMatrix<double>* mat,
|
||||
BlockedMatrix<double>* jacMat)
|
||||
{
|
||||
const unsigned int bs = block_size;
|
||||
|
||||
|
|
|
@ -53,9 +53,9 @@ class BILU0 : public Preconditioner<block_size>
|
|||
using Base::err;
|
||||
|
||||
private:
|
||||
std::unique_ptr<BlockedMatrix> LUmat = nullptr;
|
||||
std::unique_ptr<BlockedMatrix<double>> LUmat = nullptr;
|
||||
#if CHOW_PATEL
|
||||
std::unique_ptr<BlockedMatrix> Lmat = nullptr, Umat = nullptr;
|
||||
std::unique_ptr<BlockedMatrix<double>> Lmat = nullptr, Umat = nullptr;
|
||||
#endif
|
||||
std::vector<double> invDiagVals;
|
||||
std::vector<int> diagIndex;
|
||||
|
@ -93,12 +93,14 @@ public:
|
|||
BILU0(bool opencl_ilu_parallel, int verbosity);
|
||||
|
||||
// analysis, extract parallelism if specified
|
||||
bool analyze_matrix(BlockedMatrix *mat) override;
|
||||
bool analyze_matrix(BlockedMatrix *mat, BlockedMatrix *jacMat) override;
|
||||
bool analyze_matrix(BlockedMatrix<double>* mat) override;
|
||||
bool analyze_matrix(BlockedMatrix<double>* mat,
|
||||
BlockedMatrix<double>* jacMat) override;
|
||||
|
||||
// ilu_decomposition
|
||||
bool create_preconditioner(BlockedMatrix *mat) override;
|
||||
bool create_preconditioner(BlockedMatrix *mat, BlockedMatrix *jacMat) override;
|
||||
bool create_preconditioner(BlockedMatrix<double>* mat) override;
|
||||
bool create_preconditioner(BlockedMatrix<double>* mat,
|
||||
BlockedMatrix<double>* jacMat) override;
|
||||
|
||||
// apply preconditioner, x = prec(y)
|
||||
// via Lz = y
|
||||
|
|
|
@ -78,13 +78,14 @@ std::vector<int> buildCsrToCscOffsetMap(std::vector<int> colPointers, std::vecto
|
|||
}
|
||||
|
||||
template <unsigned int block_size>
|
||||
bool BISAI<block_size>::analyze_matrix(BlockedMatrix *mat)
|
||||
bool BISAI<block_size>::analyze_matrix(BlockedMatrix<double>* mat)
|
||||
{
|
||||
return analyze_matrix(mat, nullptr);
|
||||
}
|
||||
|
||||
template <unsigned int block_size>
|
||||
bool BISAI<block_size>::analyze_matrix(BlockedMatrix *mat, BlockedMatrix *jacMat)
|
||||
bool BISAI<block_size>::analyze_matrix(BlockedMatrix<double>* mat,
|
||||
BlockedMatrix<double>* jacMat)
|
||||
{
|
||||
const unsigned int bs = block_size;
|
||||
auto *m = mat;
|
||||
|
@ -176,7 +177,9 @@ void BISAI<block_size>::buildUpperSubsystemsStructures(){
|
|||
}
|
||||
|
||||
template <unsigned int block_size>
|
||||
bool BISAI<block_size>::create_preconditioner(BlockedMatrix *mat, BlockedMatrix *jacMat)
|
||||
bool BISAI<block_size>::
|
||||
create_preconditioner(BlockedMatrix<double>* mat,
|
||||
BlockedMatrix<double>* jacMat)
|
||||
{
|
||||
const unsigned int bs = block_size;
|
||||
|
||||
|
@ -274,7 +277,7 @@ bool BISAI<block_size>::create_preconditioner(BlockedMatrix *mat, BlockedMatrix
|
|||
}
|
||||
|
||||
template <unsigned int block_size>
|
||||
bool BISAI<block_size>::create_preconditioner(BlockedMatrix *mat)
|
||||
bool BISAI<block_size>::create_preconditioner(BlockedMatrix<double>* mat)
|
||||
{
|
||||
return create_preconditioner(mat, nullptr);
|
||||
}
|
||||
|
|
|
@ -31,7 +31,7 @@ namespace Opm
|
|||
namespace Accelerator
|
||||
{
|
||||
|
||||
class BlockedMatrix;
|
||||
template<class Scalar> class BlockedMatrix;
|
||||
|
||||
/// This class implements a Blocked version of the Incomplete Sparse Approximate Inverse (ISAI) preconditioner.
|
||||
/// Inspired by the paper "Incomplete Sparse Approximate Inverses for Parallel Preconditioning" by Anzt et. al.
|
||||
|
@ -116,12 +116,14 @@ public:
|
|||
void setOpencl(std::shared_ptr<cl::Context>& context, std::shared_ptr<cl::CommandQueue>& queue) override;
|
||||
|
||||
// analysis, extract parallelism
|
||||
bool analyze_matrix(BlockedMatrix *mat) override;
|
||||
bool analyze_matrix(BlockedMatrix *mat, BlockedMatrix *jacMat) override;
|
||||
bool analyze_matrix(BlockedMatrix<double>* mat) override;
|
||||
bool analyze_matrix(BlockedMatrix<double>* mat,
|
||||
BlockedMatrix<double>* jacMat) override;
|
||||
|
||||
// ilu_decomposition
|
||||
bool create_preconditioner(BlockedMatrix *mat) override;
|
||||
bool create_preconditioner(BlockedMatrix *mat, BlockedMatrix *jacMat) override;
|
||||
bool create_preconditioner(BlockedMatrix<double>* mat) override;
|
||||
bool create_preconditioner(BlockedMatrix<double>* mat,
|
||||
BlockedMatrix<double>* jacMat) override;
|
||||
|
||||
// apply preconditioner, x = prec(y)
|
||||
void apply(const cl::Buffer& y, cl::Buffer& x) override;
|
||||
|
|
|
@ -61,10 +61,9 @@ void CPR<block_size>::setOpencl(std::shared_ptr<cl::Context>& context_, std::sha
|
|||
bilu0->setOpencl(context, queue);
|
||||
}
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
bool CPR<block_size>::analyze_matrix(BlockedMatrix *mat_) {
|
||||
|
||||
bool CPR<block_size>::analyze_matrix(BlockedMatrix<double>* mat_)
|
||||
{
|
||||
this->Nb = mat_->Nb;
|
||||
this->nnzb = mat_->nnzbs;
|
||||
this->N = Nb * block_size;
|
||||
|
@ -76,7 +75,9 @@ bool CPR<block_size>::analyze_matrix(BlockedMatrix *mat_) {
|
|||
}
|
||||
|
||||
template <unsigned int block_size>
|
||||
bool CPR<block_size>::analyze_matrix(BlockedMatrix *mat_, BlockedMatrix *jacMat) {
|
||||
bool CPR<block_size>::analyze_matrix(BlockedMatrix<double>* mat_,
|
||||
BlockedMatrix<double>* jacMat)
|
||||
{
|
||||
this->Nb = mat_->Nb;
|
||||
this->nnzb = mat_->nnzbs;
|
||||
this->N = Nb * block_size;
|
||||
|
@ -89,7 +90,10 @@ bool CPR<block_size>::analyze_matrix(BlockedMatrix *mat_, BlockedMatrix *jacMat)
|
|||
}
|
||||
|
||||
template <unsigned int block_size>
|
||||
bool CPR<block_size>::create_preconditioner(BlockedMatrix *mat_, BlockedMatrix *jacMat) {
|
||||
bool CPR<block_size>::
|
||||
create_preconditioner(BlockedMatrix<double>* mat_,
|
||||
BlockedMatrix<double>* jacMat)
|
||||
{
|
||||
Dune::Timer t_bilu0;
|
||||
bool result = bilu0->create_preconditioner(mat_, jacMat);
|
||||
if (verbosity >= 3) {
|
||||
|
@ -109,7 +113,8 @@ bool CPR<block_size>::create_preconditioner(BlockedMatrix *mat_, BlockedMatrix *
|
|||
}
|
||||
|
||||
template <unsigned int block_size>
|
||||
bool CPR<block_size>::create_preconditioner(BlockedMatrix *mat_) {
|
||||
bool CPR<block_size>::create_preconditioner(BlockedMatrix<double>* mat_)
|
||||
{
|
||||
Dune::Timer t_bilu0;
|
||||
bool result = bilu0->create_preconditioner(mat_);
|
||||
if (verbosity >= 3) {
|
||||
|
@ -212,9 +217,9 @@ void CPR<block_size>::opencl_upload() {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
void CPR<block_size>::create_preconditioner_amg(BlockedMatrix *mat_) {
|
||||
void CPR<block_size>::create_preconditioner_amg(BlockedMatrix<double>* mat_)
|
||||
{
|
||||
this->mat = mat_;
|
||||
|
||||
coarse_vals.resize(nnzb);
|
||||
|
|
|
@ -38,7 +38,7 @@ namespace Opm
|
|||
namespace Accelerator
|
||||
{
|
||||
|
||||
class BlockedMatrix;
|
||||
template<class Scalar> class BlockedMatrix;
|
||||
|
||||
/// This class implements a Constrained Pressure Residual (CPR) preconditioner
|
||||
template <unsigned int block_size>
|
||||
|
@ -73,7 +73,7 @@ private:
|
|||
std::once_flag opencl_buffers_allocated; // only allocate OpenCL Buffers once
|
||||
|
||||
std::unique_ptr<BILU0<block_size> > bilu0; // Blocked ILU0 preconditioner
|
||||
BlockedMatrix *mat = nullptr; // input matrix, blocked
|
||||
BlockedMatrix<double>* mat = nullptr; // input matrix, blocked
|
||||
|
||||
using DuneMat = Dune::BCRSMatrix<Dune::FieldMatrix<double, 1, 1> >;
|
||||
using DuneVec = Dune::BlockVector<Dune::FieldVector<double, 1> >;
|
||||
|
@ -112,13 +112,14 @@ private:
|
|||
|
||||
void amg_cycle_gpu(const int level, cl::Buffer &y, cl::Buffer &x);
|
||||
|
||||
void create_preconditioner_amg(BlockedMatrix *mat);
|
||||
void create_preconditioner_amg(BlockedMatrix<double>* mat);
|
||||
|
||||
public:
|
||||
CPR(bool opencl_ilu_parallel, int verbosity);
|
||||
|
||||
bool analyze_matrix(BlockedMatrix *mat) override;
|
||||
bool analyze_matrix(BlockedMatrix *mat, BlockedMatrix *jacMat) override;
|
||||
bool analyze_matrix(BlockedMatrix<double>* mat) override;
|
||||
bool analyze_matrix(BlockedMatrix<double>* mat,
|
||||
BlockedMatrix<double>* jacMat) override;
|
||||
|
||||
// set own Opencl variables, but also that of the bilu0 preconditioner
|
||||
void setOpencl(std::shared_ptr<cl::Context>& context, std::shared_ptr<cl::CommandQueue>& queue) override;
|
||||
|
@ -127,8 +128,9 @@ public:
|
|||
// also applies amg for pressure component
|
||||
void apply(const cl::Buffer& y, cl::Buffer& x) override;
|
||||
|
||||
bool create_preconditioner(BlockedMatrix *mat) override;
|
||||
bool create_preconditioner(BlockedMatrix *mat, BlockedMatrix *jacMat) override;
|
||||
bool create_preconditioner(BlockedMatrix<double>* mat) override;
|
||||
bool create_preconditioner(BlockedMatrix<double>* mat,
|
||||
BlockedMatrix<double>* jacMat) override;
|
||||
};
|
||||
|
||||
// solve A^T * x = b
|
||||
|
|
|
@ -54,7 +54,8 @@ void OpenclMatrix::upload(cl::CommandQueue *queue, Matrix *matrix) {
|
|||
upload(queue, matrix->nnzValues.data(), matrix->colIndices.data(), matrix->rowPointers.data());
|
||||
}
|
||||
|
||||
void OpenclMatrix::upload(cl::CommandQueue *queue, BlockedMatrix *matrix) {
|
||||
void OpenclMatrix::upload(cl::CommandQueue* queue, BlockedMatrix<double>* matrix)
|
||||
{
|
||||
if (matrix->block_size != block_size) {
|
||||
OPM_THROW(std::logic_error, "Error trying to upload a BlockedMatrix to OpenclMatrix with different block_size");
|
||||
}
|
||||
|
|
|
@ -30,7 +30,7 @@ namespace Accelerator
|
|||
{
|
||||
|
||||
class Matrix;
|
||||
class BlockedMatrix;
|
||||
template<class Scalar> class BlockedMatrix;
|
||||
|
||||
/// This struct resembles a csr matrix, only doubles are supported
|
||||
/// The matrix data is stored in OpenCL Buffers
|
||||
|
@ -50,7 +50,7 @@ public:
|
|||
|
||||
void upload(cl::CommandQueue *queue, double *vals, int *cols, int *rows);
|
||||
void upload(cl::CommandQueue *queue, Matrix *matrix);
|
||||
void upload(cl::CommandQueue *queue, BlockedMatrix *matrix);
|
||||
void upload(cl::CommandQueue* queue, BlockedMatrix<double>* matrix);
|
||||
|
||||
cl::Buffer nnzValues;
|
||||
cl::Buffer colIndices;
|
||||
|
|
|
@ -60,21 +60,26 @@ Preconditioner<block_size>::create(Type type, bool opencl_ilu_parallel, int verb
|
|||
}
|
||||
|
||||
template <unsigned int block_size>
|
||||
bool Preconditioner<block_size>::analyze_matrix(BlockedMatrix *mat, [[maybe_unused]] BlockedMatrix *jacMat) {
|
||||
bool Preconditioner<block_size>::
|
||||
analyze_matrix(BlockedMatrix<double>* mat,
|
||||
[[maybe_unused]] BlockedMatrix<double>* jacMat)
|
||||
{
|
||||
return analyze_matrix(mat);
|
||||
}
|
||||
|
||||
template <unsigned int block_size>
|
||||
bool Preconditioner<block_size>::create_preconditioner(BlockedMatrix *mat, [[maybe_unused]] BlockedMatrix *jacMat) {
|
||||
bool Preconditioner<block_size>::
|
||||
create_preconditioner(BlockedMatrix<double>* mat,
|
||||
[[maybe_unused]] BlockedMatrix<double>* jacMat)
|
||||
{
|
||||
return create_preconditioner(mat);
|
||||
}
|
||||
|
||||
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
||||
template std::unique_ptr<Preconditioner<n> > Preconditioner<n>::create(Type, bool, int); \
|
||||
template void Preconditioner<n>::setOpencl(std::shared_ptr<cl::Context>&, std::shared_ptr<cl::CommandQueue>&); \
|
||||
template bool Preconditioner<n>::analyze_matrix(BlockedMatrix *, BlockedMatrix *); \
|
||||
template bool Preconditioner<n>::create_preconditioner(BlockedMatrix *, BlockedMatrix *);
|
||||
|
||||
template bool Preconditioner<n>::analyze_matrix(BlockedMatrix<double> *, BlockedMatrix<double> *); \
|
||||
template bool Preconditioner<n>::create_preconditioner(BlockedMatrix<double> *, BlockedMatrix<double> *);
|
||||
|
||||
INSTANTIATE_BDA_FUNCTIONS(1);
|
||||
INSTANTIATE_BDA_FUNCTIONS(2);
|
||||
|
|
|
@ -29,7 +29,7 @@ namespace Opm
|
|||
namespace Accelerator
|
||||
{
|
||||
|
||||
class BlockedMatrix;
|
||||
template<class Scalar> class BlockedMatrix;
|
||||
|
||||
template <unsigned int block_size>
|
||||
class Preconditioner
|
||||
|
@ -73,13 +73,15 @@ public:
|
|||
// analyze matrix, e.g. the sparsity pattern
|
||||
// probably only called once
|
||||
// the version with two params can be overloaded, if not, it will default to using the one param version
|
||||
virtual bool analyze_matrix(BlockedMatrix *mat) = 0;
|
||||
virtual bool analyze_matrix(BlockedMatrix *mat, BlockedMatrix *jacMat);
|
||||
virtual bool analyze_matrix(BlockedMatrix<double>* mat) = 0;
|
||||
virtual bool analyze_matrix(BlockedMatrix<double>* mat,
|
||||
BlockedMatrix<double>* jacMat);
|
||||
|
||||
// create/update preconditioner, probably used every linear solve
|
||||
// the version with two params can be overloaded, if not, it will default to using the one param version
|
||||
virtual bool create_preconditioner(BlockedMatrix *mat) = 0;
|
||||
virtual bool create_preconditioner(BlockedMatrix *mat, BlockedMatrix *jacMat);
|
||||
virtual bool create_preconditioner(BlockedMatrix<double>* mat) = 0;
|
||||
virtual bool create_preconditioner(BlockedMatrix<double>* mat,
|
||||
BlockedMatrix<double>* jacMat);
|
||||
};
|
||||
|
||||
} //namespace Accelerator
|
||||
|
|
|
@ -405,9 +405,11 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
void openclSolverBackend<block_size>::initialize(std::shared_ptr<BlockedMatrix> matrix, std::shared_ptr<BlockedMatrix> jacMatrix) {
|
||||
void openclSolverBackend<block_size>::
|
||||
initialize(std::shared_ptr<BlockedMatrix<double>> matrix,
|
||||
std::shared_ptr<BlockedMatrix<double>> jacMatrix)
|
||||
{
|
||||
this->Nb = matrix->Nb;
|
||||
this->N = Nb * block_size;
|
||||
this->nnzb = matrix->nnzbs;
|
||||
|
@ -637,13 +639,13 @@ void openclSolverBackend<block_size>::get_result(double *x) {
|
|||
}
|
||||
} // end get_result()
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
SolverStatus openclSolverBackend<block_size>::solve_system(std::shared_ptr<BlockedMatrix> matrix,
|
||||
double *b,
|
||||
std::shared_ptr<BlockedMatrix> jacMatrix,
|
||||
WellContributions& wellContribs,
|
||||
BdaResult &res)
|
||||
SolverStatus openclSolverBackend<block_size>::
|
||||
solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
|
||||
double *b,
|
||||
std::shared_ptr<BlockedMatrix<double>> jacMatrix,
|
||||
WellContributions& wellContribs,
|
||||
BdaResult& res)
|
||||
{
|
||||
if (initialized == false) {
|
||||
initialize(matrix, jacMatrix);
|
||||
|
|
|
@ -67,8 +67,8 @@ private:
|
|||
// can perform blocked ILU0 and AMG on pressure component
|
||||
bool is_root; // allow for nested solvers, the root solver is called by BdaBridge
|
||||
bool analysis_done = false;
|
||||
std::shared_ptr<BlockedMatrix> mat = nullptr; // original matrix
|
||||
std::shared_ptr<BlockedMatrix> jacMat = nullptr; // matrix for preconditioner
|
||||
std::shared_ptr<BlockedMatrix<double>> mat{}; // original matrix
|
||||
std::shared_ptr<BlockedMatrix<double>> jacMat{}; // matrix for preconditioner
|
||||
bool opencl_ilu_parallel; // parallelize ILU operations (with level_scheduling)
|
||||
std::vector<cl::Event> events;
|
||||
cl_int err;
|
||||
|
@ -81,7 +81,8 @@ private:
|
|||
/// Initialize GPU and allocate memory
|
||||
/// \param[in] matrix matrix A
|
||||
/// \param[in] jacMatrix matrix for preconditioner
|
||||
void initialize(std::shared_ptr<BlockedMatrix> matrix, std::shared_ptr<BlockedMatrix> jacMatrix);
|
||||
void initialize(std::shared_ptr<BlockedMatrix<double>> matrix,
|
||||
std::shared_ptr<BlockedMatrix<double>> jacMatrix);
|
||||
|
||||
/// Copy linear system to GPU
|
||||
void copy_system_to_gpu();
|
||||
|
@ -134,8 +135,11 @@ public:
|
|||
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
|
||||
/// \param[inout] res summary of solver result
|
||||
/// \return status code
|
||||
SolverStatus solve_system(std::shared_ptr<BlockedMatrix> matrix, double *b,
|
||||
std::shared_ptr<BlockedMatrix> jacMatrix, WellContributions& wellContribs, BdaResult &res) override;
|
||||
SolverStatus solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
|
||||
double *b,
|
||||
std::shared_ptr<BlockedMatrix<double>> jacMatrix,
|
||||
WellContributions& wellContribs,
|
||||
BdaResult& res) override;
|
||||
|
||||
/// Solve scalar linear system, for example a coarse system of an AMG preconditioner
|
||||
/// Data is already on the GPU
|
||||
|
|
|
@ -76,9 +76,9 @@ rocalutionSolverBackend<block_size>::~rocalutionSolverBackend() {
|
|||
rocalution::stop_rocalution();
|
||||
}
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
void rocalutionSolverBackend<block_size>::initialize(BlockedMatrix *matrix) {
|
||||
void rocalutionSolverBackend<block_size>::initialize(BlockedMatrix<double>* matrix)
|
||||
{
|
||||
this->Nb = matrix->Nb;
|
||||
this->N = Nb * block_size;
|
||||
this->nnzb = matrix->nnzbs;
|
||||
|
@ -94,9 +94,9 @@ void rocalutionSolverBackend<block_size>::initialize(BlockedMatrix *matrix) {
|
|||
initialized = true;
|
||||
} // end initialize()
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
void rocalutionSolverBackend<block_size>::convert_matrix(BlockedMatrix *matrix) {
|
||||
void rocalutionSolverBackend<block_size>::convert_matrix(BlockedMatrix<double>* matrix)
|
||||
{
|
||||
Timer t;
|
||||
|
||||
for(int i = 0; i < Nb+1; ++i){
|
||||
|
@ -147,13 +147,13 @@ void rocalutionSolverBackend<block_size>::get_result(double *x) {
|
|||
}
|
||||
} // end get_result()
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
SolverStatus rocalutionSolverBackend<block_size>::solve_system(std::shared_ptr<BlockedMatrix> matrix,
|
||||
double *b,
|
||||
[[maybe_unused]] std::shared_ptr<BlockedMatrix> jacMatrix,
|
||||
[[maybe_unused]] WellContributions& wellContribs,
|
||||
BdaResult &res)
|
||||
SolverStatus rocalutionSolverBackend<block_size>::
|
||||
solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
|
||||
double *b,
|
||||
[[maybe_unused]] std::shared_ptr<BlockedMatrix<double>> jacMatrix,
|
||||
[[maybe_unused]] WellContributions& wellContribs,
|
||||
BdaResult& res)
|
||||
{
|
||||
if (initialized == false) {
|
||||
initialize(matrix.get());
|
||||
|
|
|
@ -65,12 +65,12 @@ private:
|
|||
|
||||
/// Initialize sizes and allocate memory
|
||||
/// \param[in] matrix matrix A
|
||||
void initialize(BlockedMatrix *matrix);
|
||||
void initialize(BlockedMatrix<double>* matrix);
|
||||
|
||||
/// Convert matrix to rocalution format
|
||||
/// copy matrix to raw pointers, which are given to and freed by rocalution
|
||||
/// \param[in] matrix matrix A
|
||||
void convert_matrix(BlockedMatrix *matrix);
|
||||
void convert_matrix(BlockedMatrix<double>* matrix);
|
||||
|
||||
public:
|
||||
|
||||
|
@ -91,8 +91,11 @@ public:
|
|||
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
|
||||
/// \param[inout] res summary of solver result
|
||||
/// \return status code
|
||||
SolverStatus solve_system(std::shared_ptr<BlockedMatrix> matrix, double *b,
|
||||
std::shared_ptr<BlockedMatrix> jacMatrix, WellContributions& wellContribs, BdaResult &res) override;
|
||||
SolverStatus solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
|
||||
double *b,
|
||||
std::shared_ptr<BlockedMatrix<double>> jacMatrix,
|
||||
WellContributions& wellContribs,
|
||||
BdaResult& res) override;
|
||||
|
||||
/// Get result after linear solve, and peform postprocessing if necessary
|
||||
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array
|
||||
|
|
|
@ -375,9 +375,11 @@ void rocsparseSolverBackend<block_size>::gpu_pbicgstab([[maybe_unused]] WellCont
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
void rocsparseSolverBackend<block_size>::initialize(std::shared_ptr<BlockedMatrix> matrix, std::shared_ptr<BlockedMatrix> jacMatrix) {
|
||||
void rocsparseSolverBackend<block_size>::
|
||||
initialize(std::shared_ptr<BlockedMatrix<double>> matrix,
|
||||
std::shared_ptr<BlockedMatrix<double>> jacMatrix)
|
||||
{
|
||||
this->Nb = matrix->Nb;
|
||||
this->N = Nb * block_size;
|
||||
this->nnzb = matrix->nnzbs;
|
||||
|
@ -634,11 +636,12 @@ void rocsparseSolverBackend<block_size>::get_result(double *x) {
|
|||
|
||||
|
||||
template <unsigned int block_size>
|
||||
SolverStatus rocsparseSolverBackend<block_size>::solve_system(std::shared_ptr<BlockedMatrix> matrix,
|
||||
double *b,
|
||||
std::shared_ptr<BlockedMatrix> jacMatrix,
|
||||
WellContributions& wellContribs,
|
||||
BdaResult &res)
|
||||
SolverStatus rocsparseSolverBackend<block_size>::
|
||||
solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
|
||||
double *b,
|
||||
std::shared_ptr<BlockedMatrix<double>> jacMatrix,
|
||||
WellContributions& wellContribs,
|
||||
BdaResult& res)
|
||||
{
|
||||
if (initialized == false) {
|
||||
initialize(matrix, jacMatrix);
|
||||
|
|
|
@ -54,14 +54,13 @@ class rocsparseSolverBackend : public BdaSolver<block_size>
|
|||
using Base::initialized;
|
||||
|
||||
private:
|
||||
|
||||
double c_copy = 0.0; // cummulative timer measuring the total time it takes to transfer the data to the GPU
|
||||
|
||||
bool useJacMatrix = false;
|
||||
|
||||
bool analysis_done = false;
|
||||
std::shared_ptr<BlockedMatrix> mat = nullptr; // original matrix
|
||||
std::shared_ptr<BlockedMatrix> jacMat = nullptr; // matrix for preconditioner
|
||||
std::shared_ptr<BlockedMatrix<double>> mat{}; // original matrix
|
||||
std::shared_ptr<BlockedMatrix<double>> jacMat{}; // matrix for preconditioner
|
||||
int nnzbs_prec = 0; // number of nnz blocks in preconditioner matrix M
|
||||
|
||||
rocsparse_direction dir = rocsparse_direction_row;
|
||||
|
@ -93,7 +92,8 @@ private:
|
|||
/// Initialize GPU and allocate memory
|
||||
/// \param[in] matrix matrix A
|
||||
/// \param[in] jacMatrix matrix for preconditioner
|
||||
void initialize(std::shared_ptr<BlockedMatrix> matrix, std::shared_ptr<BlockedMatrix> jacMatrix);
|
||||
void initialize(std::shared_ptr<BlockedMatrix<double>> matrix,
|
||||
std::shared_ptr<BlockedMatrix<double>> jacMatrix);
|
||||
|
||||
/// Copy linear system to GPU
|
||||
/// \param[in] b input vector, contains N values
|
||||
|
@ -138,8 +138,11 @@ public:
|
|||
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
|
||||
/// \param[inout] res summary of solver result
|
||||
/// \return status code
|
||||
SolverStatus solve_system(std::shared_ptr<BlockedMatrix> matrix, double *b,
|
||||
std::shared_ptr<BlockedMatrix> jacMatrix, WellContributions& wellContribs, BdaResult &res) override;
|
||||
SolverStatus solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
|
||||
double* b,
|
||||
std::shared_ptr<BlockedMatrix<double>> jacMatrix,
|
||||
WellContributions& wellContribs,
|
||||
BdaResult& res) override;
|
||||
|
||||
/// Solve scalar linear system, for example a coarse system of an AMG preconditioner
|
||||
/// Data is already on the GPU
|
||||
|
|
Loading…
Reference in New Issue
Block a user