Added amgclSolverBackend

2025-02-25 18:55:30 -06:00 · 2021-06-02 16:19:00 +02:00 · 2021-06-02 16:19:00 +02:00 · c2869810e2
commit c2869810e2
parent 5918c64b54
9 changed files with 453 additions and 24 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -213,6 +213,18 @@ if(OpenCL_FOUND)
  endif()
 endif()

+# search for amgcl
+find_file(AMG_HPP amgcl/amg.hpp HINTS ${AMGCL_INCLUDE_DIRS})
+if(AMG_HPP)
+  set(HAVE_AMGCL 1)
+  set(AMGCL_FOUND 1)
+  include_directories(${AMGCL_INCLUDE_DIRS})
+  set_source_files_properties(opm/simulators/linalg/bda/amgclSolverBackend.cpp PROPERTIES LANGUAGE CUDA)
+else()
+  set(HAVE_AMGCL 0)
+  set(AMGCL_FOUND 0)
+endif()
+
 # read the list of components from this file (in the project directory);
 # it should set various lists with the names of the files to include
 include (CMakeLists_files.cmake)
--- a/CMakeLists_files.cmake
+++ b/CMakeLists_files.cmake
@ -114,7 +114,9 @@ if(HAVE_FPGA)
  list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/MultisegmentWellContribution.cpp)
  list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/WellContributions.cpp)
 endif()
-
+if(AMGCL_FOUND)
+  list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/amgclSolverBackend.cpp)
+endif()
 if(MPI_FOUND)
  list(APPEND MAIN_SOURCE_FILES opm/simulators/utils/ParallelEclipseState.cpp
                                opm/simulators/utils/ParallelSerialization.cpp)
@ -233,6 +235,7 @@ list (APPEND PUBLIC_HEADER_FILES
  opm/simulators/aquifers/AquiferNumerical.hpp
  opm/simulators/aquifers/BlackoilAquiferModel.hpp
  opm/simulators/aquifers/BlackoilAquiferModel_impl.hpp
+  opm/simulators/linalg/bda/amgclSolverBackend.hpp
  opm/simulators/linalg/bda/BdaBridge.hpp
  opm/simulators/linalg/bda/BdaResult.hpp
  opm/simulators/linalg/bda/BdaSolver.hpp
--- a/opm-simulators-prereqs.cmake
+++ b/opm-simulators-prereqs.cmake
@ -8,6 +8,7 @@ set (opm-simulators_CONFIG_VAR
  HAVE_CUDA
  HAVE_OPENCL
  HAVE_FPGA
+  HAVE_AMGCL
  HAVE_SUITESPARSE_UMFPACK_H
  HAVE_DUNE_ISTL
  DUNE_ISTL_VERSION_MAJOR
--- a/opm/simulators/linalg/FlowLinearSolverParameters.hpp
+++ b/opm/simulators/linalg/FlowLinearSolverParameters.hpp
@ -314,7 +314,7 @@ namespace Opm
            EWOMS_REGISTER_PARAM(TypeTag, int, CprMaxEllIter, "MaxIterations of the elliptic pressure part of the cpr solver");
            EWOMS_REGISTER_PARAM(TypeTag, int, CprReuseSetup, "Reuse preconditioner setup. Valid options are 0: recreate the preconditioner for every linear solve, 1: recreate once every timestep, 2: recreate if last linear solve took more than 10 iterations, 3: never recreate");
            EWOMS_REGISTER_PARAM(TypeTag, std::string, Linsolver, "Configuration of solver. Valid options are: ilu0 (default), cpr (an alias for cpr_trueimpes), cpr_quasiimpes, cpr_trueimpes or amg. Alternatively, you can request a configuration to be read from a JSON file by giving the filename here, ending with '.json.'");
-            EWOMS_REGISTER_PARAM(TypeTag, std::string, AcceleratorMode, "Use GPU (cusparseSolver or openclSolver) or FPGA (fpgaSolver) as the linear solver, usage: '--accelerator-mode=[none|cusparse|opencl|fpga]'");
+            EWOMS_REGISTER_PARAM(TypeTag, std::string, AcceleratorMode, "Use GPU (cusparseSolver or openclSolver) or FPGA (fpgaSolver) as the linear solver, usage: '--accelerator-mode=[none|cusparse|opencl|fpga|amgcl]'");
            EWOMS_REGISTER_PARAM(TypeTag, int, BdaDeviceId, "Choose device ID for cusparseSolver or openclSolver, use 'nvidia-smi' or 'clinfo' to determine valid IDs");
            EWOMS_REGISTER_PARAM(TypeTag, int, OpenclPlatformId, "Choose platform ID for openclSolver, use 'clinfo' to determine valid platform IDs");
            EWOMS_REGISTER_PARAM(TypeTag, std::string, OpenclIluReorder, "Choose the reordering strategy for ILU for openclSolver and fpgaSolver, usage: '--opencl-ilu-reorder=[level_scheduling|graph_coloring], level_scheduling behaves like Dune and cusparse, graph_coloring is more aggressive and likely to be faster, but is random-based and generally increases the number of linear solves and linear iterations significantly.");
--- a/opm/simulators/linalg/bda/BdaBridge.cpp
+++ b/opm/simulators/linalg/bda/BdaBridge.cpp
@ -38,8 +38,9 @@
 #include <opm/simulators/linalg/bda/FPGASolverBackend.hpp>
 #endif

-
-#define PRINT_TIMERS_BRIDGE 0
+#if HAVE_AMGCL
+#include <opm/simulators/linalg/bda/amgclSolverBackend.hpp>
+#endif

 typedef Dune::InverseOperatorResult InverseOperatorResult;

@ -59,7 +60,7 @@ BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string acceler
                                                             [[maybe_unused]] unsigned int platformID,
                                                             unsigned int deviceID,
                                                             [[maybe_unused]] std::string opencl_ilu_reorder)
-: accelerator_mode(accelerator_mode_)
+: verbosity(linear_solver_verbosity), accelerator_mode(accelerator_mode_)
 {
    if (accelerator_mode.compare("cusparse") == 0) {
 #if HAVE_CUDA
@ -103,12 +104,19 @@ BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string acceler
        backend.reset(new bda::FpgaSolverBackend<block_size>(fpga_bitstream, linear_solver_verbosity, maxit, tolerance, ilu_reorder));
 #else
        OPM_THROW(std::logic_error, "Error fpgaSolver was chosen, but FPGA was not enabled by CMake");
+#endif
+    } else if (accelerator_mode.compare("amgcl") == 0) {
+#if HAVE_AMGCL
+        use_gpu = true; // should be replaced by a 'use_bridge' boolean
+        backend.reset(new bda::amgclSolverBackend<block_size>(linear_solver_verbosity, maxit, tolerance, platformID, deviceID));
+#else
+        OPM_THROW(std::logic_error, "Error amgclSolver was chosen, but amgcl was not found by CMake");
 #endif
    } else if (accelerator_mode.compare("none") == 0) {
        use_gpu = false;
        use_fpga = false;
    } else {
-        OPM_THROW(std::logic_error, "Error unknown value for parameter 'AcceleratorMode', should be passed like '--accelerator-mode=[none|cusparse|opencl|fpga]");
+        OPM_THROW(std::logic_error, "Error unknown value for parameter 'AcceleratorMode', should be passed like '--accelerator-mode=[none|cusparse|opencl|fpga|amgcl]");
    }
 }

@ -197,40 +205,30 @@ void BdaBridge<BridgeMatrix, BridgeVector, block_size>::solve_system(BridgeMatri
        const int nnz = nnzb * dim * dim;

        if (dim != 3) {
-            OpmLog::warning("cusparseSolver only accepts blocksize = 3 at this time, will use Dune for the remainder of the program");
-            use_gpu = false;
+            OpmLog::warning("BdaSolver only accepts blocksize = 3 at this time, will use Dune for the remainder of the program");
+            use_gpu = use_fpga = false;
            return;
        }

        if (h_rows.capacity() == 0) {
            h_rows.reserve(Nb+1);
            h_cols.reserve(nnzb);
-#if PRINT_TIMERS_BRIDGE
-            Dune::Timer t;
-#endif
            getSparsityPattern(*mat, h_rows, h_cols);
-#if PRINT_TIMERS_BRIDGE
-            std::ostringstream out;
-            out << "getSparsityPattern() took: " << t.stop() << " s";
-            OpmLog::info(out.str());
-#endif
        }

-#if PRINT_TIMERS_BRIDGE
        Dune::Timer t_zeros;
        int numZeros = checkZeroDiagonal(*mat);
-        std::ostringstream out;
-        out << "Checking zeros took: " << t_zeros.stop() << " s, found " << numZeros << " zeros";
-        OpmLog::info(out.str());
-#else
-        checkZeroDiagonal(*mat);
-#endif
+        if (verbosity >= 2) {
+            std::ostringstream out;
+            out << "Checking zeros took: " << t_zeros.stop() << " s, found " << numZeros << " zeros";
+            OpmLog::info(out.str());
+        }


        /////////////////////////
        // actually solve

-        // assume that underlying data (nonzeroes) from mat (Dune::BCRSMatrix) are contiguous, if this is not the case, cusparseSolver is expected to perform undefined behaviour
+        // assume that underlying data (nonzeroes) from mat (Dune::BCRSMatrix) are contiguous, if this is not the case, the chosen BdaSolver is expected to perform undefined behaviour
        SolverStatus status = backend->solve_system(N, nnz, dim, static_cast<double*>(&(((*mat)[0][0][0][0]))), h_rows.data(), h_cols.data(), static_cast<double*>(&(b[0][0])), wellContribs, result);
        switch(status) {
        case SolverStatus::BDA_SOLVER_SUCCESS:
--- a/opm/simulators/linalg/bda/BdaBridge.hpp
+++ b/opm/simulators/linalg/bda/BdaBridge.hpp
@ -45,6 +45,7 @@ template <class BridgeMatrix, class BridgeVector, int block_size>
 class BdaBridge
 {
 private:
+    int verbosity = 0;
    bool use_gpu = false;
    bool use_fpga = false;
    std::string accelerator_mode;
--- a/opm/simulators/linalg/bda/WellContributions.cpp
+++ b/opm/simulators/linalg/bda/WellContributions.cpp
@ -38,6 +38,11 @@ WellContributions::WellContributions(std::string accelerator_mode, bool useWellC
    else if(accelerator_mode.compare("fpga") == 0){
        // unused for FPGA, but must be defined to avoid error
    }
+    else if(accelerator_mode.compare("amgcl") == 0){
+        if (!useWellConn) {
+            OPM_THROW(std::logic_error, "Error amgcl requires --matrix-add-well-contributions=true");
+        }
+    }
    else{
        OPM_THROW(std::logic_error, "Invalid accelerator mode");
    }
--- a/opm/simulators/linalg/bda/amgclSolverBackend.cpp
+++ b/opm/simulators/linalg/bda/amgclSolverBackend.cpp
@ -0,0 +1,285 @@
+/*
+  Copyright 2020 Equinor ASA
+
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <config.h>
+#include <sstream>
+
+#include <opm/common/OpmLog/OpmLog.hpp>
+#include <opm/common/ErrorMacros.hpp>
+#include <dune/common/timer.hh>
+
+#include <opm/simulators/linalg/bda/BdaResult.hpp>
+#include <opm/simulators/linalg/bda/amgclSolverBackend.hpp>
+
+#include <amgcl/backend/builtin.hpp>
+#include <amgcl/backend/cuda.hpp>
+#include <amgcl/relaxation/cusparse_ilu0.hpp>
+#include <amgcl/adapter/crs_tuple.hpp>
+#include <amgcl/make_block_solver.hpp>
+#include <amgcl/relaxation/as_preconditioner.hpp>
+#include <amgcl/relaxation/ilu0.hpp>
+#include <amgcl/solver/bicgstab.hpp>
+
+#include <amgcl/value_type/static_matrix.hpp>
+#include <amgcl/adapter/block_matrix.hpp>
+
+
+namespace bda
+{
+
+using Opm::OpmLog;
+using Dune::Timer;
+
+template <unsigned int block_size>
+amgclSolverBackend<block_size>::amgclSolverBackend(int verbosity_, int maxit_, double tolerance_, unsigned int platformID_, unsigned int deviceID_) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_, platformID_, deviceID_) {}
+
+
+template <unsigned int block_size>
+amgclSolverBackend<block_size>::~amgclSolverBackend() {}
+
+
+template <unsigned int block_size>
+void amgclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, double *vals, int *rows, int *cols) {
+    this->N = N_;
+    this->nnz = nnz_;
+    this->nnzb = nnz_ / block_size / block_size;
+
+    Nb = (N + dim - 1) / dim;
+    std::ostringstream out;
+    out << "Initializing amgclSolverBackend, matrix size: " << N << " blocks, nnzb: " << nnzb << "\n";
+    out << "Maxit: " << maxit << std::scientific << ", tolerance: " << tolerance << "\n";
+    out << "PlatformID: " << platformID << ", deviceID: " << deviceID << "\n";
+    OpmLog::info(out.str());
+    out.str("");
+    out.clear();
+
+    A_vals.resize(nnz);
+    A_cols.resize(nnz);
+    A_rows.resize(N + 1);
+    rhs.resize(N);
+    x.resize(N);
+
+    initialized = true;
+} // end initialize()
+
+
+
+template <unsigned int block_size>
+void amgclSolverBackend<block_size>::convert_sparsity_pattern(int *rows, int *cols) {
+    Timer t;
+    const unsigned int bs = block_size;
+    int idx = 0; // indicates the unblocked write index
+
+    A_rows[0] = 0;
+    for (int row = 0; row < Nb; ++row) {
+        int rowStart = rows[row];
+        int rowEnd = rows[row+1];
+        for (unsigned r = 0; r < bs; ++r) {
+            for (int ij = rowStart; ij < rowEnd; ++ij) {
+                for (unsigned c = 0; c < bs; ++c) {
+                    A_cols[idx] = cols[ij] * bs + c;
+                    idx++;
+                }
+            }
+            A_rows[row*bs + r + 1] = idx;
+        }
+    }
+
+    if (verbosity >= 3) {
+        std::ostringstream out;
+        out << "amgclSolverBackend::convert_sparsity_pattern(): " << t.stop() << " s";
+        OpmLog::info(out.str());
+    }
+} // end convert_sparsity_pattern()
+
+template <unsigned int block_size>
+void amgclSolverBackend<block_size>::convert_data(double *vals, int *rows) {
+    Timer t;
+    const unsigned int bs = block_size;
+    int idx = 0; // indicates the unblocked write index
+
+    for (int row = 0; row < Nb; ++row) {
+        int rowStart = rows[row];
+        int rowEnd = rows[row+1];
+        for (unsigned r = 0; r < bs; ++r) {
+            for (int ij = rowStart; ij < rowEnd; ++ij) {
+                for (unsigned c = 0; c < bs; ++c) {
+                    A_vals[idx] = vals[ij*bs*bs + r*bs + c];
+                    idx++;
+                }
+            }
+        }
+    }
+
+    if (verbosity >= 3) {
+        std::ostringstream out;
+        out << "amgclSolverBackend::convert_data(): " << t.stop() << " s";
+        OpmLog::info(out.str());
+    }
+} // end convert_data()
+
+
+template <unsigned int block_size>
+void amgclSolverBackend<block_size>::solve_system(double *b, WellContributions &wellContribs, BdaResult &res) {
+    Timer t;
+    int iters = 0;
+    double error = 0.0;
+
+#define AMGCL_CUDA 0
+
+    try {
+#if AMGCL_CUDA
+#if !HAVE_CUDA
+        #error "Error amgcl is trying to use CUDA, but CUDA was not found by CMake"
+#endif
+        typedef amgcl::backend::cuda<double> Backend;
+
+        Backend::params bprm;
+        cusparseCreate(&bprm.cusparse_handle);
+#else
+        typedef amgcl::static_matrix<double, block_size, block_size> dmat_type; // matrix value type in double precision
+        typedef amgcl::static_matrix<double, block_size, 1> dvec_type; // the corresponding vector value type
+        typedef amgcl::backend::builtin<dmat_type> Backend;
+
+        typename Backend::params bprm; // leave empty
+#endif
+
+        // choose linear solver components
+        typedef amgcl::relaxation::as_preconditioner<Backend, amgcl::relaxation::ilu0> Precond;
+        typedef amgcl::solver::bicgstab<Backend> IterSolver;
+
+#if AMGCL_CUDA
+        typedef amgcl::make_solver<Precond, IterSolver> Solver;
+#else
+        typedef amgcl::make_block_solver<Precond, IterSolver> Solver;
+#endif
+
+        // create matrix object
+        auto A = std::tie(N, A_rows, A_cols, A_vals);
+
+        // set parameters
+        typename Solver::params prm;
+        prm.precond.damping = 0.9;
+        prm.solver.maxiter = maxit;
+        prm.solver.tol = tolerance;
+        prm.solver.verbose = (verbosity >= 2);
+
+        // create solver
+        Solver solve(A, prm, bprm);
+
+        // print info (once)
+        std::call_once(print_info, [&](){
+#if AMGCL_CUDA
+            cudaDeviceProp prop;
+            cudaGetDeviceProperties(&prop, deviceID);
+            std::cout << prop.name << std::endl;
+#endif
+            // print solver structure
+            std::cout << solve << std::endl;
+        });
+
+#if AMGCL_CUDA
+        thrust::device_vector<double> B(b, b + N);
+        thrust::device_vector<double> X(N, 0.0);
+
+        // actually solve
+        std::tie(iters, error) = solve(B, X);
+
+        thrust::copy(X.begin(), X.end(), x.begin());
+#else
+        // reset x vector
+        std::fill(x.begin(), x.end(), 0.0);
+
+        // create blocked vectors
+        auto b_ptr = reinterpret_cast<dvec_type*>(b);
+        auto x_ptr = reinterpret_cast<dvec_type*>(x.data());
+        auto B = amgcl::make_iterator_range(b_ptr, b_ptr + N / block_size);
+        auto X = amgcl::make_iterator_range(x_ptr, x_ptr + N / block_size);
+
+        // actually solve
+        std::tie(iters, error) = solve(A, B, X);
+#endif
+
+    } catch (const std::exception& ex) {
+        std::cerr << "Caught exception: " << ex.what() << std::endl;
+        throw ex;
+    }
+
+    double time_elapsed = t.stop();
+
+    res.iterations = iters;
+    res.reduction = 0.0;
+    res.elapsed = time_elapsed;
+    res.converged = (iters != maxit);
+
+    if (verbosity >= 1) {
+        std::ostringstream out;
+        out << "=== converged: " << res.converged << ", time: " << res.elapsed << \
+            ", time per iteration: " << res.elapsed / iters << ", iterations: " << iters;
+        OpmLog::info(out.str());
+    }
+    if (verbosity >= 3) {
+        std::ostringstream out;
+        out << "amgclSolverBackend::solve_system(): " << time_elapsed << " s";
+        OpmLog::info(out.str());
+    }
+
+} // end solve_system()
+
+
+// copy result to host memory
+// caller must be sure that x is a valid array
+template <unsigned int block_size>
+void amgclSolverBackend<block_size>::get_result(double *x_) {
+    Timer t;
+
+    std::copy(x.begin(), x.end(), x_);
+
+    if (verbosity >= 3) {
+        std::ostringstream out;
+        out << "amgclSolverBackend::get_result(): " << t.stop() << " s";
+        OpmLog::info(out.str());
+    }
+} // end get_result()
+
+
+template <unsigned int block_size>
+SolverStatus amgclSolverBackend<block_size>::solve_system(int N_, int nnz_, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) {
+    if (initialized == false) {
+        initialize(N_, nnz_, dim, vals, rows, cols);
+        convert_sparsity_pattern(rows, cols);
+    }
+    convert_data(vals, rows);
+    solve_system(b, wellContribs, res);
+    return SolverStatus::BDA_SOLVER_SUCCESS;
+}
+
+
+#define INSTANTIATE_BDA_FUNCTIONS(n)                                                                \
+template amgclSolverBackend<n>::amgclSolverBackend(int, int, double, unsigned int, unsigned int);   \
+
+INSTANTIATE_BDA_FUNCTIONS(1);
+INSTANTIATE_BDA_FUNCTIONS(2);
+INSTANTIATE_BDA_FUNCTIONS(3);
+INSTANTIATE_BDA_FUNCTIONS(4);
+
+#undef INSTANTIATE_BDA_FUNCTIONS
+
+} // namespace bda
+
--- a/opm/simulators/linalg/bda/amgclSolverBackend.hpp
+++ b/opm/simulators/linalg/bda/amgclSolverBackend.hpp
@ -0,0 +1,124 @@
+/*
+  Copyright 2020 Equinor ASA
+
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef OPM_AMGCLSOLVER_BACKEND_HEADER_INCLUDED
+#define OPM_AMGCLSOLVER_BACKEND_HEADER_INCLUDED
+
+#include <mutex>
+
+#include <opm/simulators/linalg/bda/BdaResult.hpp>
+#include <opm/simulators/linalg/bda/BdaSolver.hpp>
+#include <opm/simulators/linalg/bda/WellContributions.hpp>
+
+#include <amgcl/amg.hpp>
+
+namespace bda
+{
+
+/// This class does not implement a solver, but converts the BCSR format to normal CSR and uses amgcl for solving
+/// Note amgcl also implements blocked solvers, but looks like it needs unblocked input data
+template <unsigned int block_size>
+class amgclSolverBackend : public BdaSolver<block_size>
+{
+    typedef BdaSolver<block_size> Base;
+
+    using Base::N;
+    using Base::Nb;
+    using Base::nnz;
+    using Base::nnzb;
+    using Base::verbosity;
+    using Base::platformID;
+    using Base::deviceID;
+    using Base::maxit;
+    using Base::tolerance;
+    using Base::initialized;
+
+private:
+    // store matrix in CSR format
+    std::vector<unsigned> A_rows, A_cols;
+    std::vector<double> A_vals, rhs;
+    std::vector<double> x;
+    std::once_flag print_info;
+
+
+    /// Initialize GPU and allocate memory
+    /// \param[in] N              number of nonzeroes, divide by dim*dim to get number of blocks
+    /// \param[in] nnz            number of nonzeroes, divide by dim*dim to get number of blocks
+    /// \param[in] dim            size of block
+    /// \param[in] vals           array of nonzeroes, each block is stored row-wise and contiguous, contains nnz values
+    /// \param[in] rows           array of rowPointers, contains N/dim+1 values
+    /// \param[in] cols           array of columnIndices, contains nnz values
+    void initialize(int N, int nnz, int dim, double *vals, int *rows, int *cols);
+
+    /// Convert the BCSR sparsity pattern to a CSR one
+    /// \param[in] rows           array of rowPointers, contains N/dim+1 values
+    /// \param[in] cols           array of columnIndices, contains nnz values
+    void convert_sparsity_pattern(int *rows, int *cols);
+
+    /// Convert the BCSR nonzero data to a CSR format
+    /// \param[in] vals           array of nonzeroes, each block is stored row-wise and contiguous, contains nnz values
+    /// \param[in] rows           array of rowPointers, contains N/dim+1 values
+    void convert_data(double *vals, int *rows);
+
+    /// Solve linear system
+    /// \param[in] b              pointer to b vector
+    /// \param[in] wellContribs   WellContributions, to apply them separately, instead of adding them to matrix A
+    /// \param[inout] res         summary of solver result
+    void solve_system(double *b, WellContributions &wellContribs, BdaResult &res);
+
+public:
+    /// Construct a openclSolver
+    /// \param[in] linear_solver_verbosity    verbosity of openclSolver
+    /// \param[in] maxit                      maximum number of iterations for openclSolver
+    /// \param[in] tolerance                  required relative tolerance for openclSolver
+    /// \param[in] platformID                 the OpenCL platform to be used
+    /// \param[in] deviceID                   the device to be used
+    amgclSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID);
+
+    /// Destroy a openclSolver, and free memory
+    ~amgclSolverBackend();
+
+    /// Solve linear system, A*x = b, matrix A must be in blocked-CSR format
+    /// \param[in] N              number of rows, divide by dim to get number of blockrows
+    /// \param[in] nnz            number of nonzeroes, divide by dim*dim to get number of blocks
+    /// \param[in] nnz_prec       number of nonzeroes of matrix for ILU0, divide by dim*dim to get number of blocks
+    /// \param[in] dim            size of block
+    /// \param[in] vals           array of nonzeroes, each block is stored row-wise and contiguous, contains nnz values
+    /// \param[in] rows           array of rowPointers, contains N/dim+1 values
+    /// \param[in] cols           array of columnIndices, contains nnz values
+    /// \param[in] vals_prec      array of nonzeroes for preconditioner
+    /// \param[in] rows_prec      array of rowPointers for preconditioner
+    /// \param[in] cols_prec      array of columnIndices for preconditioner
+    /// \param[in] b              input vector, contains N values
+    /// \param[in] wellContribs   WellContributions, to apply them separately, instead of adding them to matrix A
+    /// \param[inout] res         summary of solver result
+    /// \return                   status code
+    SolverStatus solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) override;
+
+    /// Get result after linear solve, and peform postprocessing if necessary
+    /// \param[inout] x          resulting x vector, caller must guarantee that x points to a valid array
+    void get_result(double *x) override;
+
+}; // end class amgclSolverBackend
+
+} // namespace bda
+
+#endif
+
+