amgclSolverBackend: template Scalar type

2025-02-25 18:55:30 -06:00 · 2024-04-16 12:15:06 +02:00 · 2024-04-16 12:15:06 +02:00 · 0b22b62205
commit 0b22b62205
parent 23250b87e3
4 changed files with 114 additions and 118 deletions
--- a/opm/simulators/linalg/bda/BdaBridge.cpp
+++ b/opm/simulators/linalg/bda/BdaBridge.cpp
@ -95,7 +95,9 @@ BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string acceler
    } else if (accelerator_mode.compare("amgcl") == 0) {
 #if HAVE_AMGCL
        use_gpu = true; // should be replaced by a 'use_bridge' boolean
-        backend.reset(new Opm::Accelerator::amgclSolverBackend<block_size>(linear_solver_verbosity, maxit, tolerance, platformID, deviceID));
+        using AMGCL = Accelerator::amgclSolverBackend<double,block_size>;
+        backend = std::make_unique<AMGCL>(linear_solver_verbosity, maxit,
+                                          tolerance, platformID, deviceID);
 #else
        OPM_THROW(std::logic_error, "Error amgclSolver was chosen, but amgcl was not found by CMake");
 #endif
--- a/opm/simulators/linalg/bda/amgclSolverBackend.cpp
+++ b/opm/simulators/linalg/bda/amgclSolverBackend.cpp
@ -46,37 +46,35 @@
 #include <tuple>
 #include <vector>

-namespace Opm
-{
-namespace Accelerator
-{
+namespace Opm::Accelerator {

-using Opm::OpmLog;
 using Dune::Timer;

-template <unsigned int block_size>
-amgclSolverBackend<block_size>::
+template<class Scalar, unsigned int block_size>
+amgclSolverBackend<Scalar,block_size>::
 amgclSolverBackend(const int          verbosity_,
                   const int          maxit_,
-                   const double       tolerance_,
+                   const Scalar       tolerance_,
                   const unsigned int platformID_,
                   const unsigned int deviceID_)
    : Base(verbosity_, maxit_, tolerance_, platformID_, deviceID_)
 {}

-template <unsigned int block_size>
-amgclSolverBackend<block_size>::~amgclSolverBackend() {}
+template<class Scalar, unsigned int block_size>
+amgclSolverBackend<Scalar,block_size>::~amgclSolverBackend()
+{}

-
-template <unsigned int block_size>
-void amgclSolverBackend<block_size>::initialize(int Nb_, int nnzbs) {
+template<class Scalar, unsigned int block_size>
+void amgclSolverBackend<Scalar,block_size>::initialize(int Nb_, int nnzbs)
+{
    this->Nb = Nb_;
    this->N = Nb * block_size;
    this->nnzb = nnzbs;
    this->nnz = nnzbs * block_size * block_size;

    std::ostringstream out;
-    out << "Initializing amgclSolverBackend, matrix size: " << Nb << " blockrows, nnzb: " << nnzb << " blocks\n";
+    out << "Initializing amgclSolverBackend, matrix size: " << Nb
+        << " blockrows, nnzb: " << nnzb << " blocks\n";
    out << "Maxit: " << maxit << std::scientific << ", tolerance: " << tolerance << "\n";
    out << "DeviceID: " << deviceID << "\n";
    OpmLog::info(out.str());
@ -119,7 +117,8 @@ void amgclSolverBackend<block_size>::initialize(int Nb_, int nnzbs) {
        prm.put("solver.maxiter", t3);
        bool t4 = prm.get("solver.verbose", verbosity >= 2);
        prm.put("solver.verbose", t4);
-        out << "Using parameters from " << filename << " (with default values for omitted parameters):\n";
+        out << "Using parameters from " << filename
+            << " (with default values for omitted parameters):\n";
    } else { // otherwise use default parameters, same as Dune
        prm.put("backend_type", "cpu"); // put it in the tree so it gets printed
        prm.put("precond.class", "relaxation");
@ -143,7 +142,8 @@ void amgclSolverBackend<block_size>::initialize(int Nb_, int nnzbs) {
    } else if (backend_type_string == "vexcl") {
        backend_type = Amgcl_backend_type::vexcl;
    } else {
-        OPM_THROW(std::logic_error, "Error unknown value for amgcl parameter 'backend_type', use [cpu|cuda|vexcl]");
+        OPM_THROW(std::logic_error,
+                  "Error unknown value for amgcl parameter 'backend_type', use [cpu|cuda|vexcl]");
    }

    if (backend_type == Amgcl_backend_type::cuda) {
@ -161,9 +161,10 @@ void amgclSolverBackend<block_size>::initialize(int Nb_, int nnzbs) {
    initialized = true;
 } // end initialize()

-
-template <unsigned int block_size>
-void amgclSolverBackend<block_size>::convert_sparsity_pattern(int *rows, int *cols) {
+template<class Scalar, unsigned int block_size>
+void amgclSolverBackend<Scalar,block_size>::
+convert_sparsity_pattern(int* rows, int* cols)
+{
    Timer t;
    const unsigned int bs = block_size;
    int idx = 0; // indicates the unblocked write index
@ -190,9 +191,10 @@ void amgclSolverBackend<block_size>::convert_sparsity_pattern(int *rows, int *co
    }
 } // end convert_sparsity_pattern()

-
-template <unsigned int block_size>
-void amgclSolverBackend<block_size>::convert_data(double *vals, int *rows) {
+template<class Scalar, unsigned int block_size>
+void amgclSolverBackend<Scalar,block_size>::
+convert_data(Scalar* vals, int* rows)
+{
    Timer t;
    const unsigned int bs = block_size;
    int idx = 0; // indicates the unblocked write index
@ -218,7 +220,9 @@ void amgclSolverBackend<block_size>::convert_data(double *vals, int *rows) {
 } // end convert_data()

 #if HAVE_VEXCL
-void initialize_vexcl(std::vector<cl::CommandQueue>& ctx, unsigned int platformID, unsigned int deviceID) {
+void initialize_vexcl(std::vector<cl::CommandQueue>& ctx,
+                      unsigned int platformID, unsigned int deviceID)
+{
    std::vector<cl::Platform> platforms;
    std::vector<cl::Device> devices;
    cl::Platform::get(&platforms);
@ -246,19 +250,20 @@ void initialize_vexcl(std::vector<cl::CommandQueue>& ctx, unsigned int platformI
    OpmLog::info(out.str());
 }

-template <typename vexcl_matrix_type, typename vexcl_vector_type, unsigned int block_size, typename AIJInfo>
-void solve_vexcl(
-    const AIJInfo& A,
-    const boost::property_tree::ptree prm,
-    const std::vector<cl::CommandQueue>& ctx,
-    double *b,
-    std::vector<double>& x,
-    const int N,
-    int& iters,
-    double& error)
+template <typename vexcl_matrix_type, typename vexcl_vector_type,
+          unsigned int block_size, typename Scalar, typename AIJInfo>
+void solve_vexcl(const AIJInfo& A,
+                 const boost::property_tree::ptree prm,
+                 const std::vector<cl::CommandQueue>& ctx,
+                 Scalar* b,
+                 std::vector<Scalar>& x,
+                 const int N,
+                 int& iters,
+                 Scalar& error)
 {
-    typedef amgcl::backend::vexcl<vexcl_matrix_type> Backend;
-    typedef amgcl::make_solver<amgcl::runtime::preconditioner<Backend>, amgcl::runtime::solver::wrapper<Backend> > Solver;
+    using Backend = amgcl::backend::vexcl<vexcl_matrix_type>;
+    using Solver = amgcl::make_solver<amgcl::runtime::preconditioner<Backend>,
+                                      amgcl::runtime::solver::wrapper<Backend>>;

    typename Solver::backend_params bprm;
    bprm.q = ctx;  // set vexcl context
@ -276,8 +281,10 @@ void solve_vexcl(
 }
 #endif

-template <unsigned int block_size>
-void amgclSolverBackend<block_size>::solve_system(double *b, BdaResult &res) {
+template<class Scalar, unsigned int block_size>
+void amgclSolverBackend<Scalar,block_size>::
+solve_system(Scalar* b, BdaResult& res)
+{
    Timer t;

    try {
@ -307,7 +314,7 @@ void amgclSolverBackend<block_size>::solve_system(double *b, BdaResult &res) {
                // reset x vector
                std::fill(x.begin(), x.end(), 0.0);

-                std::vector<double> b_(b, b + N);
+                std::vector<Scalar> b_(b, b + N);

                // create numa vectors
                typename CPU_Backend::params bprm;
@ -350,10 +357,11 @@ void amgclSolverBackend<block_size>::solve_system(double *b, BdaResult &res) {
            if constexpr(block_size == 1){
                auto A = std::tie(N, A_rows, A_cols, A_vals);

-                solve_vexcl<double, double, block_size>(A, prm, ctx, b, x, N, iters, error);
+                solve_vexcl<Scalar, Scalar, block_size>(A, prm, ctx, b, x, N, iters, error);
            } else {
                // allow vexcl to use blocked matrices
-                vex::scoped_program_header h1(ctx, amgcl::backend::vexcl_static_matrix_declaration<double, block_size>());
+                vex::scoped_program_header h1(ctx,
+                                              amgcl::backend::vexcl_static_matrix_declaration<Scalar, block_size>());

                auto Atmp = std::tie(N, A_rows, A_cols, A_vals);
                auto A = amgcl::adapter::block_matrix<dmat_type>(Atmp);
@ -376,8 +384,8 @@ void amgclSolverBackend<block_size>::solve_system(double *b, BdaResult &res) {

    if (verbosity >= 1) {
        std::ostringstream out;
-        out << "=== converged: " << res.converged << ", time: " << res.elapsed << \
-            ", time per iteration: " << res.elapsed / iters << ", iterations: " << iters;
+        out << "=== converged: " << res.converged << ", time: " << res.elapsed
+            << ", time per iteration: " << res.elapsed / iters << ", iterations: " << iters;
        OpmLog::info(out.str());
    }
    if (verbosity >= 3) {
@ -385,14 +393,13 @@ void amgclSolverBackend<block_size>::solve_system(double *b, BdaResult &res) {
        out << "amgclSolverBackend::solve_system(): " << time_elapsed << " s";
        OpmLog::info(out.str());
    }
-
 } // end solve_system()

-
 // copy result to host memory
 // caller must be sure that x is a valid array
-template <unsigned int block_size>
-void amgclSolverBackend<block_size>::get_result(double *x_) {
+template<class Scalar, unsigned int block_size>
+void amgclSolverBackend<Scalar,block_size>::get_result(Scalar* x_)
+{
    Timer t;

    std::copy(x.begin(), x.end(), x_);
@ -404,12 +411,12 @@ void amgclSolverBackend<block_size>::get_result(double *x_) {
    }
 } // end get_result()

-template <unsigned int block_size>
-SolverStatus amgclSolverBackend<block_size>::
-solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
-             double *b,
-             [[maybe_unused]] std::shared_ptr<BlockedMatrix<double>> jacMatrix,
-             [[maybe_unused]] WellContributions<double>& wellContribs,
+template<class Scalar, unsigned int block_size>
+SolverStatus amgclSolverBackend<Scalar,block_size>::
+solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
+             Scalar* b,
+             [[maybe_unused]] std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
+             [[maybe_unused]] WellContributions<Scalar>& wellContribs,
             BdaResult& res)
 {
    if (initialized == false) {
@ -421,15 +428,14 @@ solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
    return SolverStatus::BDA_SOLVER_SUCCESS;
 }

-#define INSTANTIATE_TYPE(T)               \
-    template class amgclSolverBackend<1>; \
-    template class amgclSolverBackend<2>; \
-    template class amgclSolverBackend<3>; \
-    template class amgclSolverBackend<4>; \
-    template class amgclSolverBackend<5>; \
-    template class amgclSolverBackend<6>;
+#define INSTANTIATE_TYPE(T)                 \
+    template class amgclSolverBackend<T,1>; \
+    template class amgclSolverBackend<T,2>; \
+    template class amgclSolverBackend<T,3>; \
+    template class amgclSolverBackend<T,4>; \
+    template class amgclSolverBackend<T,5>; \
+    template class amgclSolverBackend<T,6>;

 INSTANTIATE_TYPE(double)

-} // namespace Accelerator
-} // namespace Opm
+} // namespace Opm::Accelerator
--- a/opm/simulators/linalg/bda/amgclSolverBackend.hpp
+++ b/opm/simulators/linalg/bda/amgclSolverBackend.hpp
@ -41,17 +41,14 @@
 #include <type_traits>
 #include <vector>

-namespace Opm
-{
-namespace Accelerator
-{
+namespace Opm::Accelerator {

 /// This class does not implement a solver, but converts the BCSR format to normal CSR and uses amgcl for solving
 /// Note amgcl also implements blocked solvers, but looks like it needs unblocked input data
-template <unsigned int block_size>
-class amgclSolverBackend : public BdaSolver<double,block_size>
+template<class Scalar, unsigned int block_size>
+class amgclSolverBackend : public BdaSolver<Scalar,block_size>
 {
-    using Base = BdaSolver<double,block_size>;
+    using Base = BdaSolver<Scalar,block_size>;

    using Base::N;
    using Base::Nb;
@ -64,10 +61,10 @@ class amgclSolverBackend : public BdaSolver<double,block_size>
    using Base::tolerance;
    using Base::initialized;

-    using dmat_type = amgcl::static_matrix<double, block_size, block_size>; // matrix value type in double precision
-    using dvec_type = amgcl::static_matrix<double, block_size, 1>; // the corresponding vector value type
+    using dmat_type = amgcl::static_matrix<Scalar, block_size, block_size>; // matrix value type in double precision
+    using dvec_type = amgcl::static_matrix<Scalar, block_size, 1>; // the corresponding vector value type
    using CPU_Backend = std::conditional_t<block_size == 1,
-                                           amgcl::backend::builtin<double>,
+                                           amgcl::backend::builtin<Scalar>,
                                           amgcl::backend::builtin<dmat_type>>;

    using CPU_Solver = amgcl::make_solver<amgcl::runtime::preconditioner<CPU_Backend>,
@ -83,18 +80,18 @@ private:

    // store matrix in CSR format
    std::vector<unsigned> A_rows, A_cols;
-    std::vector<double> A_vals, rhs;
-    std::vector<double> x;
+    std::vector<Scalar> A_vals, rhs;
+    std::vector<Scalar> x;
    std::once_flag print_info;
    Amgcl_backend_type backend_type = cpu;

    boost::property_tree::ptree prm;         // amgcl parameters
    int iters = 0;
-    double error = 0.0;
+    Scalar error = 0.0;

 #if HAVE_CUDA
    std::once_flag cuda_initialize;
-    void solve_cuda(double *b);
+    void solve_cuda(Scalar* b);
 #endif

 #if HAVE_VEXCL
@ -113,21 +110,23 @@ private:
    /// Convert the BCSR nonzero data to a CSR format
    /// \param[in] vals           array of nonzeroes, each block is stored row-wise and contiguous, contains nnz values
    /// \param[in] rows           array of rowPointers, contains N/dim+1 values
-    void convert_data(double *vals, int *rows);
+    void convert_data(Scalar* vals, int* rows);

    /// Solve linear system
    /// \param[in] b              pointer to b vector
    /// \param[inout] res         summary of solver result
-    void solve_system(double *b, BdaResult &res);
+    void solve_system(Scalar* b, BdaResult& res);

 public:
-    /// Construct a openclSolver
-    /// \param[in] linear_solver_verbosity    verbosity of openclSolver
-    /// \param[in] maxit                      maximum number of iterations for openclSolver
-    /// \param[in] tolerance                  required relative tolerance for openclSolver
+    /// Construct an amgcl solver
+    /// \param[in] linear_solver_verbosity    verbosity of amgclSolver
+    /// \param[in] maxit                      maximum number of iterations for amgclSolver
+    /// \param[in] tolerance                  required relative tolerance for amgclSolver
    /// \param[in] platformID                 the OpenCL platform to be used
    /// \param[in] deviceID                   the device to be used
-    amgclSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID);
+    amgclSolverBackend(int linear_solver_verbosity, int maxit,
+                       Scalar tolerance, unsigned int platformID,
+                       unsigned int deviceID);

    /// Destroy a openclSolver, and free memory
    ~amgclSolverBackend();
@ -139,21 +138,18 @@ public:
    /// \param[in] wellContribs   WellContributions, to apply them separately, instead of adding them to matrix A
    /// \param[inout] res         summary of solver result
    /// \return                   status code
-    SolverStatus solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
-                              double *b,
-                              std::shared_ptr<BlockedMatrix<double>> jacMatrix,
-                              WellContributions<double>& wellContribs,
+    SolverStatus solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
+                              Scalar* b,
+                              std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
+                              WellContributions<Scalar>& wellContribs,
                              BdaResult& res) override;
    
    /// Get result after linear solve, and peform postprocessing if necessary
    /// \param[inout] x          resulting x vector, caller must guarantee that x points to a valid array
-    void get_result(double *x) override;
+    void get_result(Scalar* x) override;

 }; // end class amgclSolverBackend

-} // namespace Accelerator
-} // namespace Opm
+} // namespace Opm::Accelerator

 #endif
-
-
--- a/opm/simulators/linalg/bda/cuda/amgclSolverBackend.cu
+++ b/opm/simulators/linalg/bda/cuda/amgclSolverBackend.cu
@ -28,18 +28,14 @@

 /// This file is only compiled when both amgcl and CUDA are found by CMake

-namespace Opm
+namespace Opm::Accelerator {
+
+template<class Scalar, unsigned int block_size>
+void amgclSolverBackend<Scalar,block_size>::solve_cuda(Scalar* b)
 {
-namespace Accelerator
-{
-
-using Opm::OpmLog;
-
-
-template <unsigned int block_size>
-void amgclSolverBackend<block_size>::solve_cuda(double *b) {
-    typedef amgcl::backend::cuda<double> CUDA_Backend;
-    typedef amgcl::make_solver<amgcl::runtime::preconditioner<CUDA_Backend>, amgcl::runtime::solver::wrapper<CUDA_Backend> > CUDA_Solver;
+    using CUDA_Backend = amgcl::backend::cuda<Scalar>;
+    using CUDA_Solver = amgcl::make_solver<amgcl::runtime::preconditioner<CUDA_Backend>,
+                                           amgcl::runtime::solver::wrapper<CUDA_Backend>>;

    static typename CUDA_Backend::params CUDA_bprm; // amgcl backend parameters, only used for cusparseHandle

@ -67,8 +63,8 @@ void amgclSolverBackend<block_size>::solve_cuda(double *b) {
        OpmLog::info(out.str());
    });

-    thrust::device_vector<double> B(b, b + N);
-    thrust::device_vector<double> X(N, 0.0);
+    thrust::device_vector<Scalar> B(b, b + N);
+    thrust::device_vector<Scalar> X(N, 0.0);

    // actually solve
    std::tie(iters, error) = solve(B, X);
@ -76,19 +72,15 @@ void amgclSolverBackend<block_size>::solve_cuda(double *b) {
    thrust::copy(X.begin(), X.end(), x.begin());
 }

+#define INSTANTIATE_TYPE(T)                                \
+    template void amgclSolverBackend<T,1>::solve_cuda(T*); \
+    template void amgclSolverBackend<T,2>::solve_cuda(T*); \
+    template void amgclSolverBackend<T,3>::solve_cuda(T*); \
+    template void amgclSolverBackend<T,4>::solve_cuda(T*); \
+    template void amgclSolverBackend<T,5>::solve_cuda(T*); \
+    template void amgclSolverBackend<T,6>::solve_cuda(T*);

-#define INSTANTIATE_BDA_FUNCTIONS(n)                      \
-template void amgclSolverBackend<n>::solve_cuda(double*); \
+INSTANTIATE_TYPE(double)

-INSTANTIATE_BDA_FUNCTIONS(1);
-INSTANTIATE_BDA_FUNCTIONS(2);
-INSTANTIATE_BDA_FUNCTIONS(3);
-INSTANTIATE_BDA_FUNCTIONS(4);
-INSTANTIATE_BDA_FUNCTIONS(5);
-INSTANTIATE_BDA_FUNCTIONS(6);
-
-#undef INSTANTIATE_BDA_FUNCTIONS
-
-} // namespace Accelerator
-} // namespace Opm
+} // namespace Opm::Accelerator