BdaBridge: template Scalar type

This commit is contained in:
Arne Morten Kvarving 2024-04-16 12:28:49 +02:00
parent 3eed028978
commit ad1d862426
2 changed files with 98 additions and 76 deletions

View File

@ -52,28 +52,29 @@
typedef Dune::InverseOperatorResult InverseOperatorResult;
namespace Opm
{
namespace Opm {
using Opm::Accelerator::BdaResult;
using Opm::Accelerator::BdaSolver;
using Opm::Accelerator::SolverStatus;
using Accelerator::BdaResult;
using Accelerator::BdaSolver;
using Accelerator::SolverStatus;
template <class BridgeMatrix, class BridgeVector, int block_size>
BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string accelerator_mode_,
int linear_solver_verbosity,
[[maybe_unused]] int maxit,
[[maybe_unused]] double tolerance,
[[maybe_unused]] unsigned int platformID,
[[maybe_unused]] unsigned int deviceID,
[[maybe_unused]] bool opencl_ilu_parallel,
[[maybe_unused]] std::string linsolver)
: verbosity(linear_solver_verbosity), accelerator_mode(accelerator_mode_)
template<class BridgeMatrix, class BridgeVector, int block_size>
BdaBridge<BridgeMatrix, BridgeVector, block_size>::
BdaBridge(std::string accelerator_mode_,
int linear_solver_verbosity,
[[maybe_unused]] int maxit,
[[maybe_unused]] Scalar tolerance,
[[maybe_unused]] unsigned int platformID,
[[maybe_unused]] unsigned int deviceID,
[[maybe_unused]] bool opencl_ilu_parallel,
[[maybe_unused]] std::string linsolver)
: verbosity(linear_solver_verbosity)
, accelerator_mode(accelerator_mode_)
{
if (accelerator_mode.compare("cusparse") == 0) {
#if HAVE_CUDA
use_gpu = true;
using CU = Accelerator::cusparseSolverBackend<double,block_size>;
using CU = Accelerator::cusparseSolverBackend<Scalar,block_size>;
backend = std::make_unique<CU>(linear_solver_verbosity, maxit, tolerance, deviceID);
#else
OPM_THROW(std::logic_error, "Error cusparseSolver was chosen, but CUDA was not found by CMake");
@ -81,7 +82,7 @@ BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string acceler
} else if (accelerator_mode.compare("opencl") == 0) {
#if HAVE_OPENCL
use_gpu = true;
using OCL = Accelerator::openclSolverBackend<double,block_size>;
using OCL = Accelerator::openclSolverBackend<Scalar,block_size>;
backend = std::make_unique<OCL>(linear_solver_verbosity,
maxit,
tolerance,
@ -95,7 +96,7 @@ BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string acceler
} else if (accelerator_mode.compare("amgcl") == 0) {
#if HAVE_AMGCL
use_gpu = true; // should be replaced by a 'use_bridge' boolean
using AMGCL = Accelerator::amgclSolverBackend<double,block_size>;
using AMGCL = Accelerator::amgclSolverBackend<Scalar,block_size>;
backend = std::make_unique<AMGCL>(linear_solver_verbosity, maxit,
tolerance, platformID, deviceID);
#else
@ -104,7 +105,7 @@ BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string acceler
} else if (accelerator_mode.compare("rocalution") == 0) {
#if HAVE_ROCALUTION
use_gpu = true; // should be replaced by a 'use_bridge' boolean
using ROCA = Accelerator::rocalutionSolverBackend<double,block_size>;
using ROCA = Accelerator::rocalutionSolverBackend<Scalar,block_size>;
backend = std::make_unique<ROCA>(linear_solver_verbosity, maxit, tolerance);
#else
OPM_THROW(std::logic_error, "Error rocalutionSolver was chosen, but rocalution was not found by CMake");
@ -112,7 +113,7 @@ BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string acceler
} else if (accelerator_mode.compare("rocsparse") == 0) {
#if HAVE_ROCSPARSE
use_gpu = true; // should be replaced by a 'use_bridge' boolean
using ROCS = Accelerator::rocsparseSolverBackend<double,block_size>;
using ROCS = Accelerator::rocsparseSolverBackend<Scalar,block_size>;
backend = std::make_unique<ROCS>(linear_solver_verbosity, maxit,
tolerance, platformID, deviceID);
#else
@ -125,13 +126,14 @@ BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string acceler
}
}
template <class BridgeMatrix>
int replaceZeroDiagonal(BridgeMatrix& mat, std::vector<typename BridgeMatrix::size_type>& diag_indices) {
int replaceZeroDiagonal(BridgeMatrix& mat,
std::vector<typename BridgeMatrix::size_type>& diag_indices)
{
using Scalar = typename BridgeMatrix::field_type;
int numZeros = 0;
const int dim = mat[0][0].N(); // might be replaced with BridgeMatrix::block_type::size()
const double zero_replace = 1e-15;
const Scalar zero_replace = 1e-15;
if (diag_indices.empty()) {
int Nb = mat.N();
diag_indices.reserve(Nb);
@ -147,7 +149,7 @@ int replaceZeroDiagonal(BridgeMatrix& mat, std::vector<typename BridgeMatrix::si
}
diag_indices.emplace_back(diag.offset());
}
}else{
} else {
for (typename BridgeMatrix::iterator r = mat.begin(); r != mat.end(); ++r) {
typename BridgeMatrix::size_type offset = diag_indices[r.index()];
auto& diag_block = r->getptr()[offset]; // diag_block is a reference to MatrixBlock, located on column r of row r
@ -164,13 +166,15 @@ int replaceZeroDiagonal(BridgeMatrix& mat, std::vector<typename BridgeMatrix::si
return numZeros;
}
// iterate sparsity pattern from Matrix and put colIndices and rowPointers in arrays
// sparsity pattern should stay the same
// this could be removed if Dune::BCRSMatrix features an API call that returns colIndices and rowPointers
template <class BridgeMatrix, class BridgeVector, int block_size>
void BdaBridge<BridgeMatrix, BridgeVector, block_size>::copySparsityPatternFromISTL(const BridgeMatrix& mat, std::vector<int> &h_rows, std::vector<int> &h_cols) {
void BdaBridge<BridgeMatrix, BridgeVector, block_size>::
copySparsityPatternFromISTL(const BridgeMatrix& mat,
std::vector<int>& h_rows,
std::vector<int>& h_cols)
{
h_rows.clear();
h_cols.clear();
@ -185,17 +189,19 @@ void BdaBridge<BridgeMatrix, BridgeVector, block_size>::copySparsityPatternFromI
// h_rows and h_cols could be changed to 'unsigned int', but cusparse expects 'int'
if (static_cast<unsigned int>(h_rows[mat.N()]) != mat.nonzeroes()) {
OPM_THROW(std::logic_error, "Error size of rows do not sum to number of nonzeroes in BdaBridge::copySparsityPatternFromISTL()");
OPM_THROW(std::logic_error,
"Error size of rows do not sum to number of nonzeroes "
"in BdaBridge::copySparsityPatternFromISTL()");
}
}
// check if the nnz values of the matrix are in contiguous memory
// this is done by checking if the distance between the last value of the last block of row 0 and
// the first value of the first row of row 1 is equal to 1
// if the matrix only has 1 row, it is always contiguous
template <class BridgeMatrix>
void checkMemoryContiguous(const BridgeMatrix& mat) {
void checkMemoryContiguous(const BridgeMatrix& mat)
{
auto block_size = mat[0][0].N();
auto row = mat.begin();
auto last_of_row0 = row->begin();
@ -212,14 +218,13 @@ void checkMemoryContiguous(const BridgeMatrix& mat) {
}
}
template <class BridgeMatrix, class BridgeVector, int block_size>
void BdaBridge<BridgeMatrix, BridgeVector, block_size>::
solve_system(BridgeMatrix* bridgeMat,
BridgeMatrix* jacMat,
int numJacobiBlocks,
BridgeVector& b,
WellContributions<double>& wellContribs,
WellContributions<Scalar>& wellContribs,
InverseOperatorResult& res)
{
if (use_gpu) {
@ -235,14 +240,14 @@ solve_system(BridgeMatrix* bridgeMat,
return;
}
using Mat = Accelerator::BlockedMatrix<double>;
using Mat = Accelerator::BlockedMatrix<Scalar>;
if (!matrix) {
h_rows.reserve(Nb+1);
h_cols.reserve(nnzb);
copySparsityPatternFromISTL(*bridgeMat, h_rows, h_cols);
checkMemoryContiguous(*bridgeMat);
matrix = std::make_unique<Mat>(Nb, nnzb, block_size,
static_cast<double*>(&(((*bridgeMat)[0][0][0][0]))),
static_cast<Scalar*>(&(((*bridgeMat)[0][0][0][0]))),
h_cols.data(),
h_rows.data());
}
@ -251,12 +256,14 @@ solve_system(BridgeMatrix* bridgeMat,
int numZeros = replaceZeroDiagonal(*bridgeMat, diagIndices);
if (verbosity >= 2) {
std::ostringstream out;
out << "Checking zeros took: " << t_zeros.stop() << " s, found " << numZeros << " zeros";
out << "Checking zeros took: " << t_zeros.stop() << " s, found "
<< numZeros << " zeros";
OpmLog::info(out.str());
}
if (numJacobiBlocks >= 2) {
const int jacNnzb = (h_jacRows.empty()) ? jacMat->nonzeroes() : h_jacRows.back();
const int jacNnzb = (h_jacRows.empty()) ? jacMat->nonzeroes()
: h_jacRows.back();
if (!jacMatrix) {
h_jacRows.reserve(Nb+1);
@ -264,7 +271,7 @@ solve_system(BridgeMatrix* bridgeMat,
copySparsityPatternFromISTL(*jacMat, h_jacRows, h_jacCols);
checkMemoryContiguous(*jacMat);
jacMatrix = std::make_unique<Mat>(Nb, jacNnzb, block_size,
static_cast<double*>(&(((*jacMat)[0][0][0][0]))),
static_cast<Scalar*>(&(((*jacMat)[0][0][0][0]))),
h_jacCols.data(),
h_jacRows.data());
}
@ -273,7 +280,8 @@ solve_system(BridgeMatrix* bridgeMat,
int jacNumZeros = replaceZeroDiagonal(*jacMat, jacDiagIndices);
if (verbosity >= 2) {
std::ostringstream out;
out << "Checking zeros for jacMat took: " << t_zeros2.stop() << " s, found " << jacNumZeros << " zeros";
out << "Checking zeros for jacMat took: " << t_zeros2.stop()
<< " s, found " << jacNumZeros << " zeros";
OpmLog::info(out.str());
}
}
@ -281,17 +289,23 @@ solve_system(BridgeMatrix* bridgeMat,
/////////////////////////
// actually solve
// assume that underlying data (nonzeroes) from b (Dune::BlockVector) are contiguous, if this is not the case, the chosen BdaSolver is expected to perform undefined behaviour
SolverStatus status = backend->solve_system(matrix, static_cast<double*>(&(b[0][0])), jacMatrix, wellContribs, result);
SolverStatus status = backend->solve_system(matrix,
static_cast<Scalar*>(&(b[0][0])),
jacMatrix, wellContribs, result);
switch(status) {
switch (status) {
case SolverStatus::BDA_SOLVER_SUCCESS:
//OpmLog::info("BdaSolver converged");
break;
case SolverStatus::BDA_SOLVER_ANALYSIS_FAILED:
OpmLog::warning("BdaSolver could not analyse level information of matrix, perhaps there is still a 0.0 on the diagonal of a block on the diagonal");
OpmLog::warning("BdaSolver could not analyse level information of matrix, "
"perhaps there is still a 0.0 on the diagonal of a "
"block on the diagonal");
break;
case SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED:
OpmLog::warning("BdaSolver could not create preconditioner, perhaps there is still a 0.0 on the diagonal of a block on the diagonal");
OpmLog::warning("BdaSolver could not create preconditioner, "
"perhaps there is still a 0.0 on the diagonal "
"of a block on the diagonal");
break;
default:
OpmLog::warning("BdaSolver returned unknown status code");
@ -307,24 +321,25 @@ solve_system(BridgeMatrix* bridgeMat,
}
}
template <class BridgeMatrix, class BridgeVector, int block_size>
void BdaBridge<BridgeMatrix, BridgeVector, block_size>::get_result([[maybe_unused]] BridgeVector& x) {
void BdaBridge<BridgeMatrix, BridgeVector, block_size>::
get_result([[maybe_unused]] BridgeVector& x)
{
if (use_gpu) {
backend->get_result(static_cast<double*>(&(x[0][0])));
backend->get_result(static_cast<Scalar*>(&(x[0][0])));
}
}
template <class BridgeMatrix, class BridgeVector, int block_size>
void BdaBridge<BridgeMatrix, BridgeVector, block_size>::
initWellContributions([[maybe_unused]] WellContributions<double>& wellContribs,
initWellContributions([[maybe_unused]] WellContributions<Scalar>& wellContribs,
[[maybe_unused]] unsigned N)
{
if (accelerator_mode.compare("opencl") == 0) {
#if HAVE_OPENCL
using OCL = Accelerator::openclSolverBackend<double,block_size>;
using OCL = Accelerator::openclSolverBackend<Scalar,block_size>;
const auto openclBackend = static_cast<const OCL*>(backend.get());
using WCOCL = WellContributionsOCL<double>;
using WCOCL = WellContributionsOCL<Scalar>;
static_cast<WCOCL&>(wellContribs).setOpenCLEnv(openclBackend->context.get(),
openclBackend->queue.get());
#else
@ -335,23 +350,20 @@ initWellContributions([[maybe_unused]] WellContributions<double>& wellContribs,
}
// the tests use Dune::FieldMatrix, Flow uses Opm::MatrixBlock
#define INSTANTIATE_BDA_FUNCTIONS(n) \
template class BdaBridge<Dune::BCRSMatrix<Opm::MatrixBlock<double, n, n>, std::allocator<Opm::MatrixBlock<double, n, n> > >, \
Dune::BlockVector<Dune::FieldVector<double, n>, std::allocator<Dune::FieldVector<double, n> > >, \
n>; \
\
template class BdaBridge<Dune::BCRSMatrix<Dune::FieldMatrix<double, n, n>, std::allocator<Dune::FieldMatrix<double, n, n> > >, \
Dune::BlockVector<Dune::FieldVector<double, n>, std::allocator<Dune::FieldVector<double, n> > >, \
n>;
#define INSTANTIATE_BDA_FUNCTIONS(T,n) \
template class BdaBridge<Dune::BCRSMatrix<MatrixBlock<T,n,n>>, \
Dune::BlockVector<Dune::FieldVector<T,n>>,n>; \
template class BdaBridge<Dune::BCRSMatrix<Dune::FieldMatrix<T,n,n>>, \
Dune::BlockVector<Dune::FieldVector<T,n>>,n>;
#define INSTANTIATE_TYPE(T) \
INSTANTIATE_BDA_FUNCTIONS(T,1) \
INSTANTIATE_BDA_FUNCTIONS(T,2) \
INSTANTIATE_BDA_FUNCTIONS(T,3) \
INSTANTIATE_BDA_FUNCTIONS(T,4) \
INSTANTIATE_BDA_FUNCTIONS(T,5) \
INSTANTIATE_BDA_FUNCTIONS(T,6)
INSTANTIATE_BDA_FUNCTIONS(1);
INSTANTIATE_BDA_FUNCTIONS(2);
INSTANTIATE_BDA_FUNCTIONS(3);
INSTANTIATE_BDA_FUNCTIONS(4);
INSTANTIATE_BDA_FUNCTIONS(5);
INSTANTIATE_BDA_FUNCTIONS(6);
#undef INSTANTIATE_BDA_FUNCTIONS
INSTANTIATE_TYPE(double)
} // namespace Opm

View File

@ -36,12 +36,13 @@ template <class BridgeMatrix, class BridgeVector, int block_size>
class BdaBridge
{
private:
using Scalar = typename BridgeVector::field_type;
int verbosity = 0;
bool use_gpu = false;
std::string accelerator_mode;
std::unique_ptr<Accelerator::BdaSolver<double,block_size>> backend;
std::shared_ptr<Accelerator::BlockedMatrix<double>> matrix; // 'stores' matrix, actually points to h_rows, h_cols and the received BridgeMatrix for the nonzeroes
std::shared_ptr<Accelerator::BlockedMatrix<double>> jacMatrix; // 'stores' preconditioner matrix, actually points to h_rows, h_cols and the received BridgeMatrix for the nonzeroes
std::unique_ptr<Accelerator::BdaSolver<Scalar,block_size>> backend;
std::shared_ptr<Accelerator::BlockedMatrix<Scalar>> matrix; // 'stores' matrix, actually points to h_rows, h_cols and the received BridgeMatrix for the nonzeroes
std::shared_ptr<Accelerator::BlockedMatrix<Scalar>> jacMatrix; // 'stores' preconditioner matrix, actually points to h_rows, h_cols and the received BridgeMatrix for the nonzeroes
std::vector<int> h_rows, h_cols; // store the sparsity pattern of the matrix
std::vector<int> h_jacRows, h_jacCols; // store the sparsity pattern of the jacMatrix
std::vector<typename BridgeMatrix::size_type> diagIndices; // contains offsets of the diagonal blocks wrt start of the row, used for replaceZeroDiagonal()
@ -57,8 +58,14 @@ public:
/// \param[in] deviceID the device ID to be used by the cusparse- and openclSolvers, too high values could cause runtime errors
/// \param[in] opencl_ilu_parallel whether to parallelize the ILU decomposition and application in OpenCL with level_scheduling
/// \param[in] linsolver indicating the preconditioner, equal to the --linear-solver cmdline argument
BdaBridge(std::string accelerator_mode, int linear_solver_verbosity, int maxit, double tolerance,
unsigned int platformID, unsigned int deviceID, bool opencl_ilu_parallel, std::string linsolver);
BdaBridge(std::string accelerator_mode,
int linear_solver_verbosity,
int maxit,
Scalar tolerance,
unsigned int platformID,
unsigned int deviceID,
bool opencl_ilu_parallel,
std::string linsolver);
/// Solve linear system, A*x = b
@ -73,7 +80,7 @@ public:
BridgeMatrix* jacMat,
int numJacobiBlocks,
BridgeVector& b,
WellContributions<double>& wellContribs,
WellContributions<Scalar>& wellContribs,
InverseOperatorResult &result);
/// Get the resulting x vector
@ -82,7 +89,8 @@ public:
/// Return whether the BdaBridge will use the GPU or not
/// return whether the BdaBridge will use the GPU or not
bool getUseGpu(){
bool getUseGpu()
{
return use_gpu;
}
@ -90,19 +98,21 @@ public:
/// \param[in] mat input matrix, probably BCRSMatrix
/// \param[out] h_rows rowpointers
/// \param[out] h_cols columnindices
static void copySparsityPatternFromISTL(const BridgeMatrix& mat, std::vector<int>& h_rows, std::vector<int>& h_cols);
static void copySparsityPatternFromISTL(const BridgeMatrix& mat,
std::vector<int>& h_rows,
std::vector<int>& h_cols);
/// Initialize the WellContributions object with opencl context and queue
/// those must be set before calling BlackOilWellModel::getWellContributions() in ISTL
/// \param[in] wellContribs container to hold all WellContributions
/// \param[in] N number of rows in scalar vector that wellContribs will be applied on
void initWellContributions(WellContributions<double>& wellContribs, unsigned N);
void initWellContributions(WellContributions<Scalar>& wellContribs, unsigned N);
/// Return the selected accelerator mode, this is input via the command-line
std::string getAccleratorName(){
std::string getAccleratorName()
{
return accelerator_mode;
}
}; // end class BdaBridge
}