amgclSolverBackend: template Scalar type

This commit is contained in:
Arne Morten Kvarving 2024-04-16 12:15:06 +02:00
parent 23250b87e3
commit 0b22b62205
4 changed files with 114 additions and 118 deletions

View File

@ -95,7 +95,9 @@ BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string acceler
} else if (accelerator_mode.compare("amgcl") == 0) {
#if HAVE_AMGCL
use_gpu = true; // should be replaced by a 'use_bridge' boolean
backend.reset(new Opm::Accelerator::amgclSolverBackend<block_size>(linear_solver_verbosity, maxit, tolerance, platformID, deviceID));
using AMGCL = Accelerator::amgclSolverBackend<double,block_size>;
backend = std::make_unique<AMGCL>(linear_solver_verbosity, maxit,
tolerance, platformID, deviceID);
#else
OPM_THROW(std::logic_error, "Error amgclSolver was chosen, but amgcl was not found by CMake");
#endif

View File

@ -46,37 +46,35 @@
#include <tuple>
#include <vector>
namespace Opm
{
namespace Accelerator
{
namespace Opm::Accelerator {
using Opm::OpmLog;
using Dune::Timer;
template <unsigned int block_size>
amgclSolverBackend<block_size>::
template<class Scalar, unsigned int block_size>
amgclSolverBackend<Scalar,block_size>::
amgclSolverBackend(const int verbosity_,
const int maxit_,
const double tolerance_,
const Scalar tolerance_,
const unsigned int platformID_,
const unsigned int deviceID_)
: Base(verbosity_, maxit_, tolerance_, platformID_, deviceID_)
{}
template <unsigned int block_size>
amgclSolverBackend<block_size>::~amgclSolverBackend() {}
template<class Scalar, unsigned int block_size>
amgclSolverBackend<Scalar,block_size>::~amgclSolverBackend()
{}
template <unsigned int block_size>
void amgclSolverBackend<block_size>::initialize(int Nb_, int nnzbs) {
template<class Scalar, unsigned int block_size>
void amgclSolverBackend<Scalar,block_size>::initialize(int Nb_, int nnzbs)
{
this->Nb = Nb_;
this->N = Nb * block_size;
this->nnzb = nnzbs;
this->nnz = nnzbs * block_size * block_size;
std::ostringstream out;
out << "Initializing amgclSolverBackend, matrix size: " << Nb << " blockrows, nnzb: " << nnzb << " blocks\n";
out << "Initializing amgclSolverBackend, matrix size: " << Nb
<< " blockrows, nnzb: " << nnzb << " blocks\n";
out << "Maxit: " << maxit << std::scientific << ", tolerance: " << tolerance << "\n";
out << "DeviceID: " << deviceID << "\n";
OpmLog::info(out.str());
@ -119,7 +117,8 @@ void amgclSolverBackend<block_size>::initialize(int Nb_, int nnzbs) {
prm.put("solver.maxiter", t3);
bool t4 = prm.get("solver.verbose", verbosity >= 2);
prm.put("solver.verbose", t4);
out << "Using parameters from " << filename << " (with default values for omitted parameters):\n";
out << "Using parameters from " << filename
<< " (with default values for omitted parameters):\n";
} else { // otherwise use default parameters, same as Dune
prm.put("backend_type", "cpu"); // put it in the tree so it gets printed
prm.put("precond.class", "relaxation");
@ -143,7 +142,8 @@ void amgclSolverBackend<block_size>::initialize(int Nb_, int nnzbs) {
} else if (backend_type_string == "vexcl") {
backend_type = Amgcl_backend_type::vexcl;
} else {
OPM_THROW(std::logic_error, "Error unknown value for amgcl parameter 'backend_type', use [cpu|cuda|vexcl]");
OPM_THROW(std::logic_error,
"Error unknown value for amgcl parameter 'backend_type', use [cpu|cuda|vexcl]");
}
if (backend_type == Amgcl_backend_type::cuda) {
@ -161,9 +161,10 @@ void amgclSolverBackend<block_size>::initialize(int Nb_, int nnzbs) {
initialized = true;
} // end initialize()
template <unsigned int block_size>
void amgclSolverBackend<block_size>::convert_sparsity_pattern(int *rows, int *cols) {
template<class Scalar, unsigned int block_size>
void amgclSolverBackend<Scalar,block_size>::
convert_sparsity_pattern(int* rows, int* cols)
{
Timer t;
const unsigned int bs = block_size;
int idx = 0; // indicates the unblocked write index
@ -190,9 +191,10 @@ void amgclSolverBackend<block_size>::convert_sparsity_pattern(int *rows, int *co
}
} // end convert_sparsity_pattern()
template <unsigned int block_size>
void amgclSolverBackend<block_size>::convert_data(double *vals, int *rows) {
template<class Scalar, unsigned int block_size>
void amgclSolverBackend<Scalar,block_size>::
convert_data(Scalar* vals, int* rows)
{
Timer t;
const unsigned int bs = block_size;
int idx = 0; // indicates the unblocked write index
@ -218,7 +220,9 @@ void amgclSolverBackend<block_size>::convert_data(double *vals, int *rows) {
} // end convert_data()
#if HAVE_VEXCL
void initialize_vexcl(std::vector<cl::CommandQueue>& ctx, unsigned int platformID, unsigned int deviceID) {
void initialize_vexcl(std::vector<cl::CommandQueue>& ctx,
unsigned int platformID, unsigned int deviceID)
{
std::vector<cl::Platform> platforms;
std::vector<cl::Device> devices;
cl::Platform::get(&platforms);
@ -246,19 +250,20 @@ void initialize_vexcl(std::vector<cl::CommandQueue>& ctx, unsigned int platformI
OpmLog::info(out.str());
}
template <typename vexcl_matrix_type, typename vexcl_vector_type, unsigned int block_size, typename AIJInfo>
void solve_vexcl(
const AIJInfo& A,
const boost::property_tree::ptree prm,
const std::vector<cl::CommandQueue>& ctx,
double *b,
std::vector<double>& x,
const int N,
int& iters,
double& error)
template <typename vexcl_matrix_type, typename vexcl_vector_type,
unsigned int block_size, typename Scalar, typename AIJInfo>
void solve_vexcl(const AIJInfo& A,
const boost::property_tree::ptree prm,
const std::vector<cl::CommandQueue>& ctx,
Scalar* b,
std::vector<Scalar>& x,
const int N,
int& iters,
Scalar& error)
{
typedef amgcl::backend::vexcl<vexcl_matrix_type> Backend;
typedef amgcl::make_solver<amgcl::runtime::preconditioner<Backend>, amgcl::runtime::solver::wrapper<Backend> > Solver;
using Backend = amgcl::backend::vexcl<vexcl_matrix_type>;
using Solver = amgcl::make_solver<amgcl::runtime::preconditioner<Backend>,
amgcl::runtime::solver::wrapper<Backend>>;
typename Solver::backend_params bprm;
bprm.q = ctx; // set vexcl context
@ -276,8 +281,10 @@ void solve_vexcl(
}
#endif
template <unsigned int block_size>
void amgclSolverBackend<block_size>::solve_system(double *b, BdaResult &res) {
template<class Scalar, unsigned int block_size>
void amgclSolverBackend<Scalar,block_size>::
solve_system(Scalar* b, BdaResult& res)
{
Timer t;
try {
@ -307,7 +314,7 @@ void amgclSolverBackend<block_size>::solve_system(double *b, BdaResult &res) {
// reset x vector
std::fill(x.begin(), x.end(), 0.0);
std::vector<double> b_(b, b + N);
std::vector<Scalar> b_(b, b + N);
// create numa vectors
typename CPU_Backend::params bprm;
@ -350,10 +357,11 @@ void amgclSolverBackend<block_size>::solve_system(double *b, BdaResult &res) {
if constexpr(block_size == 1){
auto A = std::tie(N, A_rows, A_cols, A_vals);
solve_vexcl<double, double, block_size>(A, prm, ctx, b, x, N, iters, error);
solve_vexcl<Scalar, Scalar, block_size>(A, prm, ctx, b, x, N, iters, error);
} else {
// allow vexcl to use blocked matrices
vex::scoped_program_header h1(ctx, amgcl::backend::vexcl_static_matrix_declaration<double, block_size>());
vex::scoped_program_header h1(ctx,
amgcl::backend::vexcl_static_matrix_declaration<Scalar, block_size>());
auto Atmp = std::tie(N, A_rows, A_cols, A_vals);
auto A = amgcl::adapter::block_matrix<dmat_type>(Atmp);
@ -376,8 +384,8 @@ void amgclSolverBackend<block_size>::solve_system(double *b, BdaResult &res) {
if (verbosity >= 1) {
std::ostringstream out;
out << "=== converged: " << res.converged << ", time: " << res.elapsed << \
", time per iteration: " << res.elapsed / iters << ", iterations: " << iters;
out << "=== converged: " << res.converged << ", time: " << res.elapsed
<< ", time per iteration: " << res.elapsed / iters << ", iterations: " << iters;
OpmLog::info(out.str());
}
if (verbosity >= 3) {
@ -385,14 +393,13 @@ void amgclSolverBackend<block_size>::solve_system(double *b, BdaResult &res) {
out << "amgclSolverBackend::solve_system(): " << time_elapsed << " s";
OpmLog::info(out.str());
}
} // end solve_system()
// copy result to host memory
// caller must be sure that x is a valid array
template <unsigned int block_size>
void amgclSolverBackend<block_size>::get_result(double *x_) {
template<class Scalar, unsigned int block_size>
void amgclSolverBackend<Scalar,block_size>::get_result(Scalar* x_)
{
Timer t;
std::copy(x.begin(), x.end(), x_);
@ -404,12 +411,12 @@ void amgclSolverBackend<block_size>::get_result(double *x_) {
}
} // end get_result()
template <unsigned int block_size>
SolverStatus amgclSolverBackend<block_size>::
solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
double *b,
[[maybe_unused]] std::shared_ptr<BlockedMatrix<double>> jacMatrix,
[[maybe_unused]] WellContributions<double>& wellContribs,
template<class Scalar, unsigned int block_size>
SolverStatus amgclSolverBackend<Scalar,block_size>::
solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
Scalar* b,
[[maybe_unused]] std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
[[maybe_unused]] WellContributions<Scalar>& wellContribs,
BdaResult& res)
{
if (initialized == false) {
@ -421,15 +428,14 @@ solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
return SolverStatus::BDA_SOLVER_SUCCESS;
}
#define INSTANTIATE_TYPE(T) \
template class amgclSolverBackend<1>; \
template class amgclSolverBackend<2>; \
template class amgclSolverBackend<3>; \
template class amgclSolverBackend<4>; \
template class amgclSolverBackend<5>; \
template class amgclSolverBackend<6>;
#define INSTANTIATE_TYPE(T) \
template class amgclSolverBackend<T,1>; \
template class amgclSolverBackend<T,2>; \
template class amgclSolverBackend<T,3>; \
template class amgclSolverBackend<T,4>; \
template class amgclSolverBackend<T,5>; \
template class amgclSolverBackend<T,6>;
INSTANTIATE_TYPE(double)
} // namespace Accelerator
} // namespace Opm
} // namespace Opm::Accelerator

View File

@ -41,17 +41,14 @@
#include <type_traits>
#include <vector>
namespace Opm
{
namespace Accelerator
{
namespace Opm::Accelerator {
/// This class does not implement a solver, but converts the BCSR format to normal CSR and uses amgcl for solving
/// Note amgcl also implements blocked solvers, but looks like it needs unblocked input data
template <unsigned int block_size>
class amgclSolverBackend : public BdaSolver<double,block_size>
template<class Scalar, unsigned int block_size>
class amgclSolverBackend : public BdaSolver<Scalar,block_size>
{
using Base = BdaSolver<double,block_size>;
using Base = BdaSolver<Scalar,block_size>;
using Base::N;
using Base::Nb;
@ -64,10 +61,10 @@ class amgclSolverBackend : public BdaSolver<double,block_size>
using Base::tolerance;
using Base::initialized;
using dmat_type = amgcl::static_matrix<double, block_size, block_size>; // matrix value type in double precision
using dvec_type = amgcl::static_matrix<double, block_size, 1>; // the corresponding vector value type
using dmat_type = amgcl::static_matrix<Scalar, block_size, block_size>; // matrix value type in double precision
using dvec_type = amgcl::static_matrix<Scalar, block_size, 1>; // the corresponding vector value type
using CPU_Backend = std::conditional_t<block_size == 1,
amgcl::backend::builtin<double>,
amgcl::backend::builtin<Scalar>,
amgcl::backend::builtin<dmat_type>>;
using CPU_Solver = amgcl::make_solver<amgcl::runtime::preconditioner<CPU_Backend>,
@ -83,18 +80,18 @@ private:
// store matrix in CSR format
std::vector<unsigned> A_rows, A_cols;
std::vector<double> A_vals, rhs;
std::vector<double> x;
std::vector<Scalar> A_vals, rhs;
std::vector<Scalar> x;
std::once_flag print_info;
Amgcl_backend_type backend_type = cpu;
boost::property_tree::ptree prm; // amgcl parameters
int iters = 0;
double error = 0.0;
Scalar error = 0.0;
#if HAVE_CUDA
std::once_flag cuda_initialize;
void solve_cuda(double *b);
void solve_cuda(Scalar* b);
#endif
#if HAVE_VEXCL
@ -113,21 +110,23 @@ private:
/// Convert the BCSR nonzero data to a CSR format
/// \param[in] vals array of nonzeroes, each block is stored row-wise and contiguous, contains nnz values
/// \param[in] rows array of rowPointers, contains N/dim+1 values
void convert_data(double *vals, int *rows);
void convert_data(Scalar* vals, int* rows);
/// Solve linear system
/// \param[in] b pointer to b vector
/// \param[inout] res summary of solver result
void solve_system(double *b, BdaResult &res);
void solve_system(Scalar* b, BdaResult& res);
public:
/// Construct a openclSolver
/// \param[in] linear_solver_verbosity verbosity of openclSolver
/// \param[in] maxit maximum number of iterations for openclSolver
/// \param[in] tolerance required relative tolerance for openclSolver
/// Construct an amgcl solver
/// \param[in] linear_solver_verbosity verbosity of amgclSolver
/// \param[in] maxit maximum number of iterations for amgclSolver
/// \param[in] tolerance required relative tolerance for amgclSolver
/// \param[in] platformID the OpenCL platform to be used
/// \param[in] deviceID the device to be used
amgclSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID);
amgclSolverBackend(int linear_solver_verbosity, int maxit,
Scalar tolerance, unsigned int platformID,
unsigned int deviceID);
/// Destroy a openclSolver, and free memory
~amgclSolverBackend();
@ -139,21 +138,18 @@ public:
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
/// \param[inout] res summary of solver result
/// \return status code
SolverStatus solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
double *b,
std::shared_ptr<BlockedMatrix<double>> jacMatrix,
WellContributions<double>& wellContribs,
SolverStatus solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
Scalar* b,
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
WellContributions<Scalar>& wellContribs,
BdaResult& res) override;
/// Get result after linear solve, and peform postprocessing if necessary
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array
void get_result(double *x) override;
void get_result(Scalar* x) override;
}; // end class amgclSolverBackend
} // namespace Accelerator
} // namespace Opm
} // namespace Opm::Accelerator
#endif

View File

@ -28,18 +28,14 @@
/// This file is only compiled when both amgcl and CUDA are found by CMake
namespace Opm
namespace Opm::Accelerator {
template<class Scalar, unsigned int block_size>
void amgclSolverBackend<Scalar,block_size>::solve_cuda(Scalar* b)
{
namespace Accelerator
{
using Opm::OpmLog;
template <unsigned int block_size>
void amgclSolverBackend<block_size>::solve_cuda(double *b) {
typedef amgcl::backend::cuda<double> CUDA_Backend;
typedef amgcl::make_solver<amgcl::runtime::preconditioner<CUDA_Backend>, amgcl::runtime::solver::wrapper<CUDA_Backend> > CUDA_Solver;
using CUDA_Backend = amgcl::backend::cuda<Scalar>;
using CUDA_Solver = amgcl::make_solver<amgcl::runtime::preconditioner<CUDA_Backend>,
amgcl::runtime::solver::wrapper<CUDA_Backend>>;
static typename CUDA_Backend::params CUDA_bprm; // amgcl backend parameters, only used for cusparseHandle
@ -67,8 +63,8 @@ void amgclSolverBackend<block_size>::solve_cuda(double *b) {
OpmLog::info(out.str());
});
thrust::device_vector<double> B(b, b + N);
thrust::device_vector<double> X(N, 0.0);
thrust::device_vector<Scalar> B(b, b + N);
thrust::device_vector<Scalar> X(N, 0.0);
// actually solve
std::tie(iters, error) = solve(B, X);
@ -76,19 +72,15 @@ void amgclSolverBackend<block_size>::solve_cuda(double *b) {
thrust::copy(X.begin(), X.end(), x.begin());
}
#define INSTANTIATE_TYPE(T) \
template void amgclSolverBackend<T,1>::solve_cuda(T*); \
template void amgclSolverBackend<T,2>::solve_cuda(T*); \
template void amgclSolverBackend<T,3>::solve_cuda(T*); \
template void amgclSolverBackend<T,4>::solve_cuda(T*); \
template void amgclSolverBackend<T,5>::solve_cuda(T*); \
template void amgclSolverBackend<T,6>::solve_cuda(T*);
#define INSTANTIATE_BDA_FUNCTIONS(n) \
template void amgclSolverBackend<n>::solve_cuda(double*); \
INSTANTIATE_TYPE(double)
INSTANTIATE_BDA_FUNCTIONS(1);
INSTANTIATE_BDA_FUNCTIONS(2);
INSTANTIATE_BDA_FUNCTIONS(3);
INSTANTIATE_BDA_FUNCTIONS(4);
INSTANTIATE_BDA_FUNCTIONS(5);
INSTANTIATE_BDA_FUNCTIONS(6);
#undef INSTANTIATE_BDA_FUNCTIONS
} // namespace Accelerator
} // namespace Opm
} // namespace Opm::Accelerator