BdaSolver: template Scalar type

2025-02-25 18:55:30 -06:00 · 2024-04-16 10:29:33 +02:00
parent e03f2394b9
commit d0773ef4f7
12 changed files with 76 additions and 41 deletions
--- a/opm/simulators/linalg/bda/BdaBridge.hpp
+++ b/opm/simulators/linalg/bda/BdaBridge.hpp
@@ -39,7 +39,7 @@ private:
    int verbosity = 0;
    bool use_gpu = false;
    std::string accelerator_mode;
-    std::unique_ptr<Accelerator::BdaSolver<block_size>> backend;
+    std::unique_ptr<Accelerator::BdaSolver<double,block_size>> backend;
    std::shared_ptr<Accelerator::BlockedMatrix<double>> matrix;  // 'stores' matrix, actually points to h_rows, h_cols and the received BridgeMatrix for the nonzeroes
    std::shared_ptr<Accelerator::BlockedMatrix<double>> jacMatrix;  // 'stores' preconditioner matrix, actually points to h_rows, h_cols and the received BridgeMatrix for the nonzeroes
    std::vector<int> h_rows, h_cols;  // store the sparsity pattern of the matrix
--- a/opm/simulators/linalg/bda/BdaSolver.hpp
+++ b/opm/simulators/linalg/bda/BdaSolver.hpp
@@ -25,7 +25,6 @@
 #include <opm/simulators/linalg/bda/BlockedMatrix.hpp>

 #include <memory>
-#include <string>

 namespace Opm {

@@ -42,7 +41,7 @@ enum class SolverStatus {

 /// This class serves to simplify choosing between different backend solvers, such as cusparseSolver and openclSolver
 /// This class is abstract, no instantiations can of it can be made, only of its children
-template <unsigned int block_size>
+template<class Scalar, unsigned int block_size>
 class BdaSolver
 {
 protected:
@@ -51,11 +50,10 @@ protected:
    // 1: print number of iterations and final norm
    // 2: also print norm each iteration
    // 3: also print timings of different backend functions
-
    int verbosity = 0;

    int maxit = 200;
-    double tolerance = 1e-2;
+    Scalar tolerance = 1e-2;

    int N;           // number of rows
    int Nb;          // number of blocked rows (Nb*block_size == N)
@@ -74,22 +72,38 @@ public:
    /// \param[in] tolerance                  required relative tolerance for solver
    /// \param[in] platformID                 the OpenCL platform to be used, only used in openclSolver
    /// \param[in] deviceID                   the device to be used
-    BdaSolver(int linear_solver_verbosity, int max_it, double tolerance_) : verbosity(linear_solver_verbosity), maxit(max_it), tolerance(tolerance_) {};
-    BdaSolver(int linear_solver_verbosity, int max_it, double tolerance_, unsigned int deviceID_) : verbosity(linear_solver_verbosity), maxit(max_it), tolerance(tolerance_), deviceID(deviceID_) {};
-    BdaSolver(int linear_solver_verbosity, int max_it, double tolerance_, unsigned int platformID_, unsigned int deviceID_) : verbosity(linear_solver_verbosity), maxit(max_it), tolerance(tolerance_), platformID(platformID_), deviceID(deviceID_) {};
+    BdaSolver(int linear_solver_verbosity, int max_it, Scalar tolerance_)
+        : verbosity(linear_solver_verbosity)
+        , maxit(max_it)
+        , tolerance(tolerance_)
+    {}
+    BdaSolver(int linear_solver_verbosity, int max_it,
+              Scalar tolerance_, unsigned int deviceID_)
+        : verbosity(linear_solver_verbosity)
+        , maxit(max_it)
+        , tolerance(tolerance_)
+        , deviceID(deviceID_) {};
+    BdaSolver(int linear_solver_verbosity, int max_it,
+              double tolerance_, unsigned int platformID_,
+              unsigned int deviceID_)
+        : verbosity(linear_solver_verbosity)
+        , maxit(max_it)
+        , tolerance(tolerance_)
+        , platformID(platformID_)
+        , deviceID(deviceID_)
+    {}

    /// Define virtual destructor, so that the derivedclass destructor will be called
-    virtual ~BdaSolver() {};
+    virtual ~BdaSolver() = default;

    /// Define as pure virtual functions, so derivedclass must implement them
-    virtual SolverStatus solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
-                                      double *b,
-                                      std::shared_ptr<BlockedMatrix<double>> jacMatrix,
-                                      WellContributions<double>& wellContribs,
+    virtual SolverStatus solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
+                                      Scalar* b,
+                                      std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
+                                      WellContributions<Scalar>& wellContribs,
                                      BdaResult& res) = 0;

-    virtual void get_result(double *x) = 0;
-
+    virtual void get_result(Scalar* x) = 0;
 }; // end class BdaSolver

 } // namespace Accelerator
--- a/opm/simulators/linalg/bda/amgclSolverBackend.cpp
+++ b/opm/simulators/linalg/bda/amgclSolverBackend.cpp
@@ -55,12 +55,13 @@ using Opm::OpmLog;
 using Dune::Timer;

 template <unsigned int block_size>
-amgclSolverBackend<block_size>::amgclSolverBackend(const int          verbosity_,
-                                                   const int          maxit_,
-                                                   const double       tolerance_,
-                                                   const unsigned int platformID_,
-                                                   const unsigned int deviceID_)
-    : BdaSolver<block_size>(verbosity_, maxit_, tolerance_, platformID_, deviceID_)
+amgclSolverBackend<block_size>::
+amgclSolverBackend(const int          verbosity_,
+                   const int          maxit_,
+                   const double       tolerance_,
+                   const unsigned int platformID_,
+                   const unsigned int deviceID_)
+    : Base(verbosity_, maxit_, tolerance_, platformID_, deviceID_)
 {}

 template <unsigned int block_size>
--- a/opm/simulators/linalg/bda/amgclSolverBackend.hpp
+++ b/opm/simulators/linalg/bda/amgclSolverBackend.hpp
@@ -49,9 +49,9 @@ namespace Accelerator
 /// This class does not implement a solver, but converts the BCSR format to normal CSR and uses amgcl for solving
 /// Note amgcl also implements blocked solvers, but looks like it needs unblocked input data
 template <unsigned int block_size>
-class amgclSolverBackend : public BdaSolver<block_size>
+class amgclSolverBackend : public BdaSolver<double,block_size>
 {
-    typedef BdaSolver<block_size> Base;
+    using Base = BdaSolver<double,block_size>;

    using Base::N;
    using Base::Nb;
@@ -74,7 +74,6 @@ class amgclSolverBackend : public BdaSolver<block_size>
                                          amgcl::runtime::solver::wrapper<CPU_Backend>>;

 private:
-
    // amgcl can use different backends, this lets the user choose
    enum Amgcl_backend_type {
        cpu,
--- a/opm/simulators/linalg/bda/cuda/cusparseSolverBackend.cu
+++ b/opm/simulators/linalg/bda/cuda/cusparseSolverBackend.cu
@@ -58,8 +58,11 @@ const cusparseDirection_t order = CUSPARSE_DIRECTION_ROW;


 template <unsigned int block_size>
-cusparseSolverBackend<block_size>::cusparseSolverBackend(int verbosity_, int maxit_, double tolerance_, unsigned int deviceID_) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_, deviceID_) {
-
+cusparseSolverBackend<block_size>::
+cusparseSolverBackend(int verbosity_, int maxit_,
+                      double tolerance_, unsigned int deviceID_)
+    : Base(verbosity_, maxit_, tolerance_, deviceID_)
+{
    // initialize CUDA device, stream and libraries
    cudaSetDevice(deviceID);
    cudaCheckLastError("Could not get device");
--- a/opm/simulators/linalg/bda/cuda/cusparseSolverBackend.hpp
+++ b/opm/simulators/linalg/bda/cuda/cusparseSolverBackend.hpp
@@ -35,9 +35,9 @@ namespace Accelerator

 /// This class implements a cusparse-based ilu0-bicgstab solver on GPU
 template <unsigned int block_size>
-class cusparseSolverBackend : public BdaSolver<block_size> {
+class cusparseSolverBackend : public BdaSolver<double,block_size> {

-    typedef BdaSolver<block_size> Base;
+    using Base = BdaSolver<double,block_size>;

    using Base::N;
    using Base::Nb;
@@ -50,7 +50,6 @@ class cusparseSolverBackend : public BdaSolver<block_size> {
    using Base::initialized;

 private:
-
    cublasHandle_t cublasHandle;
    cusparseHandle_t cusparseHandle;
    cudaStream_t stream;
--- a/opm/simulators/linalg/bda/opencl/openclSolverBackend.cpp
+++ b/opm/simulators/linalg/bda/opencl/openclSolverBackend.cpp
@@ -46,8 +46,17 @@ using Opm::OpmLog;
 using Dune::Timer;

 template <unsigned int block_size>
-openclSolverBackend<block_size>::openclSolverBackend(int verbosity_, int maxit_, double tolerance_, unsigned int platformID_, unsigned int deviceID_, bool opencl_ilu_parallel_, std::string linsolver) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_, platformID_, deviceID_), opencl_ilu_parallel(opencl_ilu_parallel_) {
-
+openclSolverBackend<block_size>::
+openclSolverBackend(int verbosity_,
+                    int maxit_,
+                    double tolerance_,
+                    unsigned int platformID_,
+                    unsigned int deviceID_,
+                    bool opencl_ilu_parallel_,
+                    std::string linsolver)
+    : Base(verbosity_, maxit_, tolerance_, platformID_, deviceID_)
+    , opencl_ilu_parallel(opencl_ilu_parallel_)
+{
    bool use_cpr, use_isai;

    if (linsolver.compare("ilu0") == 0) {
@@ -221,8 +230,11 @@ openclSolverBackend<block_size>::openclSolverBackend(int verbosity_, int maxit_,
 }

 template <unsigned int block_size>
-openclSolverBackend<block_size>::openclSolverBackend(int verbosity_, int maxit_, double tolerance_, bool opencl_ilu_parallel_) :
-    BdaSolver<block_size>(verbosity_, maxit_, tolerance_), opencl_ilu_parallel(opencl_ilu_parallel_)
+openclSolverBackend<block_size>::
+openclSolverBackend(int verbosity_, int maxit_,
+                    double tolerance_, bool opencl_ilu_parallel_)
+    : Base(verbosity_, maxit_, tolerance_)
+    , opencl_ilu_parallel(opencl_ilu_parallel_)
 {
    // prec = std::make_unique<BILU0<block_size> >(opencl_ilu_parallel, verbosity_);
    // cpr = std::make_unique<CPR<block_size> >(verbosity_, opencl_ilu_parallel, /*use_amg=*/false);
--- a/opm/simulators/linalg/bda/opencl/openclSolverBackend.hpp
+++ b/opm/simulators/linalg/bda/opencl/openclSolverBackend.hpp
@@ -34,9 +34,9 @@ namespace Accelerator

 /// This class implements a opencl-based ilu0-bicgstab solver on GPU
 template <unsigned int block_size>
-class openclSolverBackend : public BdaSolver<block_size>
+class openclSolverBackend : public BdaSolver<double,block_size>
 {
-    typedef BdaSolver<block_size> Base;
+    using Base = BdaSolver<double,block_size>;

    using Base::N;
    using Base::Nb;
--- a/opm/simulators/linalg/bda/rocalutionSolverBackend.cpp
+++ b/opm/simulators/linalg/bda/rocalutionSolverBackend.cpp
@@ -56,7 +56,10 @@ using Opm::OpmLog;
 using Dune::Timer;

 template <unsigned int block_size>
-rocalutionSolverBackend<block_size>::rocalutionSolverBackend(int verbosity_, int maxit_, double tolerance_) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_) {
+rocalutionSolverBackend<block_size>::
+rocalutionSolverBackend(int verbosity_, int maxit_, double tolerance_)
+    : Base(verbosity_, maxit_, tolerance_)
+{
    rocalution::init_rocalution();
    rocalution::info_rocalution();
    roc_solver = std::make_unique<rocalution::BiCGStab<rocalution::LocalMatrix<double>, rocalution::LocalVector<double>, double> >();
--- a/opm/simulators/linalg/bda/rocalutionSolverBackend.hpp
+++ b/opm/simulators/linalg/bda/rocalutionSolverBackend.hpp
@@ -39,9 +39,9 @@ namespace Accelerator
 /// This class implements a rocalution based linear solver solver on GPU
 /// It uses ilu0-bicgstab
 template <unsigned int block_size>
-class rocalutionSolverBackend : public BdaSolver<block_size>
+class rocalutionSolverBackend : public BdaSolver<double,block_size>
 {
-    typedef BdaSolver<block_size> Base;
+    using Base = BdaSolver<double,block_size>;

    using Base::N;
    using Base::Nb;
--- a/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp
+++ b/opm/simulators/linalg/bda/rocsparseSolverBackend.cpp
@@ -102,7 +102,11 @@ using Opm::OpmLog;
 using Dune::Timer;

 template <unsigned int block_size>
-rocsparseSolverBackend<block_size>::rocsparseSolverBackend(int verbosity_, int maxit_, double tolerance_, unsigned int platformID_, unsigned int deviceID_) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_, platformID_, deviceID_) {
+rocsparseSolverBackend<block_size>::
+rocsparseSolverBackend(int verbosity_, int maxit_, double tolerance_,
+                       unsigned int platformID_, unsigned int deviceID_)
+    : Base(verbosity_, maxit_, tolerance_, platformID_, deviceID_)
+{
    int numDevices = 0;
    HIP_CHECK(hipGetDeviceCount(&numDevices));
    if (static_cast<int>(deviceID) >= numDevices) {
--- a/opm/simulators/linalg/bda/rocsparseSolverBackend.hpp
+++ b/opm/simulators/linalg/bda/rocsparseSolverBackend.hpp
@@ -38,9 +38,9 @@ namespace Accelerator

 /// This class implements a rocsparse-based ilu0-bicgstab solver on GPU
 template <unsigned int block_size>
-class rocsparseSolverBackend : public BdaSolver<block_size>
+class rocsparseSolverBackend : public BdaSolver<double,block_size>
 {
-    typedef BdaSolver<block_size> Base;
+    using Base = BdaSolver<double,block_size>;

    using Base::N;
    using Base::Nb;