refactor preconditioner class

2025-02-25 18:55:30 -06:00 · 2024-06-04 09:47:03 +02:00
parent 83b50f08e6
commit 071f009bf3
12 changed files with 220 additions and 157 deletions
--- a/CMakeLists_files.cmake
+++ b/CMakeLists_files.cmake
@@ -256,7 +256,7 @@ if(USE_BDA_BRIDGE)
    list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/opencl/opencl.cpp)
    list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/opencl/openclKernels.cpp)
    list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/opencl/OpenclMatrix.cpp)
-    list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/opencl/Preconditioner.cpp)
+    list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/opencl/openclPreconditioner.cpp)
    list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/opencl/openclSolverBackend.cpp)
    list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/opencl/openclWellContributions.cpp)
  endif()
@@ -650,6 +650,7 @@ if (USE_BDA_BRIDGE)
    opm/simulators/linalg/bda/BdaBridge.hpp
    opm/simulators/linalg/bda/BdaResult.hpp
    opm/simulators/linalg/bda/BdaSolver.hpp
+    opm/simulators/linalg/bda/Preconditioner.hpp
    opm/simulators/linalg/bda/opencl/openclBILU0.hpp
    opm/simulators/linalg/bda/BlockedMatrix.hpp
    opm/simulators/linalg/bda/opencl/openclCPR.hpp
@@ -661,7 +662,7 @@ if (USE_BDA_BRIDGE)
    opm/simulators/linalg/bda/opencl/opencl.hpp
    opm/simulators/linalg/bda/opencl/openclKernels.hpp
    opm/simulators/linalg/bda/opencl/OpenclMatrix.hpp
-    opm/simulators/linalg/bda/opencl/Preconditioner.hpp
+    opm/simulators/linalg/bda/opencl/openclPreconditioner.hpp
    opm/simulators/linalg/bda/opencl/openclSolverBackend.hpp
    opm/simulators/linalg/bda/opencl/openclWellContributions.hpp
    opm/simulators/linalg/bda/Matrix.hpp
--- a/opm/simulators/linalg/bda/opencl/Preconditioner.hpp
+++ b/opm/simulators/linalg/bda/opencl/Preconditioner.hpp
@@ -1,5 +1,5 @@
 /*
-  Copyright 2021 Equinor ASA
+  Copyright 2024 Equinor ASA

  This file is part of the Open Porous Media project (OPM).

@@ -20,12 +20,18 @@
 #ifndef OPM_PRECONDITIONER_HEADER_INCLUDED
 #define OPM_PRECONDITIONER_HEADER_INCLUDED

+#if HAVE_OPENCL
 #include <opm/simulators/linalg/bda/opencl/opencl.hpp>
-
-#include <memory>
+#endif

 namespace Opm::Accelerator {

+enum PreconditionerType {
+    BILU0,
+    CPR,
+    BISAI
+};
+
 template<class Scalar> class BlockedMatrix;

 template<class Scalar, unsigned int block_size>
@@ -38,47 +44,38 @@ protected:
    int nnzb = 0;    // number of blocks of the matrix
    int verbosity = 0;

-    std::shared_ptr<cl::Context> context;
-    std::shared_ptr<cl::CommandQueue> queue;
-    std::vector<cl::Event> events;
-    cl_int err;
-
    Preconditioner(int verbosity_) :
    verbosity(verbosity_)
    {};

 public:
-    enum class Type {
-        BILU0,
-        CPR,
-        BISAI
-    };

-    static std::unique_ptr<Preconditioner> create(Type type,
+    virtual ~Preconditioner() = default;
+    
+    static std::unique_ptr<Preconditioner> create(PreconditionerType type,
                                                  bool opencl_ilu_parallel,
                                                  int verbosity);

-    virtual ~Preconditioner() = default;
-
-    // nested Preconditioners might need to override this
-    virtual void setOpencl(std::shared_ptr<cl::Context>& context,
-                           std::shared_ptr<cl::CommandQueue>& queue);
-
+#if HAVE_OPENCL
    // apply preconditioner, x = prec(y)
    virtual void apply(const cl::Buffer& y, cl::Buffer& x) = 0;
+#endif
+
+    // apply preconditioner, x = prec(y)
+    virtual void apply(double& y, double& x) = 0;

    // analyze matrix, e.g. the sparsity pattern
    // probably only called once
    // the version with two params can be overloaded, if not, it will default to using the one param version
    virtual bool analyze_matrix(BlockedMatrix<Scalar>* mat) = 0;
    virtual bool analyze_matrix(BlockedMatrix<Scalar>* mat,
-                                BlockedMatrix<Scalar>* jacMat);
+                                BlockedMatrix<Scalar>* jacMat) = 0;

    // create/update preconditioner, probably used every linear solve
    // the version with two params can be overloaded, if not, it will default to using the one param version
    virtual bool create_preconditioner(BlockedMatrix<Scalar>* mat) = 0;
    virtual bool create_preconditioner(BlockedMatrix<Scalar>* mat,
-                                       BlockedMatrix<Scalar>* jacMat);
+                                       BlockedMatrix<Scalar>* jacMat) = 0;
 };

 } // namespace Opm::Accelerator
--- a/opm/simulators/linalg/bda/opencl/openclBILU0.cpp
+++ b/opm/simulators/linalg/bda/opencl/openclBILU0.cpp
@@ -36,7 +36,7 @@ namespace Opm::Accelerator {
 using Dune::Timer;

 template<class Scalar, unsigned int block_size>
-BILU0<Scalar,block_size>::BILU0(bool opencl_ilu_parallel_, int verbosity_)
+openclBILU0<Scalar,block_size>::openclBILU0(bool opencl_ilu_parallel_, int verbosity_)
    : Base(verbosity_)
    , opencl_ilu_parallel(opencl_ilu_parallel_)
 {
@@ -46,13 +46,13 @@ BILU0<Scalar,block_size>::BILU0(bool opencl_ilu_parallel_, int verbosity_)
 }

 template<class Scalar, unsigned int block_size>
-bool BILU0<Scalar,block_size>::analyze_matrix(BlockedMatrix<Scalar>* mat)
+bool openclBILU0<Scalar,block_size>::analyze_matrix(BlockedMatrix<Scalar>* mat)
 {
    return analyze_matrix(mat, nullptr);
 }

 template<class Scalar, unsigned int block_size>
-bool BILU0<Scalar,block_size>::
+bool openclBILU0<Scalar,block_size>::
 analyze_matrix(BlockedMatrix<Scalar>* mat, BlockedMatrix<Scalar>* jacMat)
 {
    const unsigned int bs = block_size;
@@ -80,7 +80,7 @@ analyze_matrix(BlockedMatrix<Scalar>* mat, BlockedMatrix<Scalar>* jacMat)
                        CSCRowIndices.data(), CSCColPointers.data(), Nb);
        if(verbosity >= 3){
            std::ostringstream out;
-            out << "BILU0 convert CSR to CSC: " << t_convert.stop() << " s";
+            out << "openclBILU0 convert CSR to CSC: " << t_convert.stop() << " s";
            OpmLog::info(out.str());
        }
    } else {
@@ -105,11 +105,11 @@ analyze_matrix(BlockedMatrix<Scalar>* mat, BlockedMatrix<Scalar>* jacMat)
    }

    if (verbosity >= 1) {
-        out << "BILU0 analysis took: " << t_analysis.stop() << " s, " << numColors << " colors\n";
+        out << "openclBILU0 analysis took: " << t_analysis.stop() << " s, " << numColors << " colors\n";
    }

 #if CHOW_PATEL
-    out << "BILU0 CHOW_PATEL: " << CHOW_PATEL << ", CHOW_PATEL_GPU: " << CHOW_PATEL_GPU;
+    out << "openclBILU0 CHOW_PATEL: " << CHOW_PATEL << ", CHOW_PATEL_GPU: " << CHOW_PATEL_GPU;
 #endif
    OpmLog::info(out.str());

@@ -169,20 +169,20 @@ analyze_matrix(BlockedMatrix<Scalar>* mat, BlockedMatrix<Scalar>* jacMat)
    events.clear();
    if (err != CL_SUCCESS) {
        // enqueueWriteBuffer is C and does not throw exceptions like C++ OpenCL
-        OPM_THROW(std::logic_error, "BILU0 OpenCL enqueueWriteBuffer error");
+        OPM_THROW(std::logic_error, "openclBILU0 OpenCL enqueueWriteBuffer error");
    }

    return true;
 }

 template<class Scalar, unsigned int block_size>
-bool BILU0<Scalar,block_size>::create_preconditioner(BlockedMatrix<Scalar>* mat)
+bool openclBILU0<Scalar,block_size>::create_preconditioner(BlockedMatrix<Scalar>* mat)
 {
    return create_preconditioner(mat, nullptr);
 }

 template<class Scalar, unsigned int block_size>
-bool BILU0<Scalar,block_size>::
+bool openclBILU0<Scalar,block_size>::
 create_preconditioner(BlockedMatrix<Scalar>* mat, BlockedMatrix<Scalar>* jacMat)
 {
    const unsigned int bs = block_size;
@@ -196,7 +196,7 @@ create_preconditioner(BlockedMatrix<Scalar>* mat, BlockedMatrix<Scalar>* jacMat)

    if (verbosity >= 3){
        std::ostringstream out;
-        out << "BILU0 memcpy: " << t_copy.stop() << " s";
+        out << "openclBILU0 memcpy: " << t_copy.stop() << " s";
        OpmLog::info(out.str());
    }

@@ -239,12 +239,12 @@ create_preconditioner(BlockedMatrix<Scalar>* mat, BlockedMatrix<Scalar>* jacMat)
    events.clear();
    if (err != CL_SUCCESS) {
        // enqueueWriteBuffer is C and does not throw exceptions like C++ OpenCL
-        OPM_THROW(std::logic_error, "BILU0 OpenCL enqueueWriteBuffer error");
+        OPM_THROW(std::logic_error, "openclBILU0 OpenCL enqueueWriteBuffer error");
    }

    if (verbosity >= 3) {
        std::ostringstream out;
-        out << "BILU0 copy to GPU: " << t_copyToGpu.stop() << " s";
+        out << "openclBILU0 copy to GPU: " << t_copyToGpu.stop() << " s";
        OpmLog::info(out.str());
    }

@@ -264,7 +264,7 @@ create_preconditioner(BlockedMatrix<Scalar>* mat, BlockedMatrix<Scalar>* jacMat)

    if (verbosity >= 3) {
        queue->finish();
-        out << "BILU0 decomposition: " << t_decomposition.stop() << " s";
+        out << "openclBILU0 decomposition: " << t_decomposition.stop() << " s";
        OpmLog::info(out.str());
    }
 #endif // CHOW_PATEL
@@ -276,7 +276,7 @@ create_preconditioner(BlockedMatrix<Scalar>* mat, BlockedMatrix<Scalar>* jacMat)
 // however, if individual kernel calls are timed, waiting for events is needed
 // behavior on other GPUs is untested
 template<class Scalar, unsigned int block_size>
-void BILU0<Scalar,block_size>::apply(const cl::Buffer& y, cl::Buffer& x)
+void openclBILU0<Scalar,block_size>::apply(const cl::Buffer& y, cl::Buffer& x)
 {
    const Scalar relaxation = 0.9;
    cl::Event event;
@@ -311,18 +311,18 @@ void BILU0<Scalar,block_size>::apply(const cl::Buffer& y, cl::Buffer& x)

    if (verbosity >= 4) {
        std::ostringstream out;
-        out << "BILU0 apply: " << t_apply.stop() << " s";
+        out << "openclBILU0 apply: " << t_apply.stop() << " s";
        OpmLog::info(out.str());
    }
 }

 #define INSTANCE_TYPE(T)       \
-    template class BILU0<T,1>; \
-    template class BILU0<T,2>; \
-    template class BILU0<T,3>; \
-    template class BILU0<T,4>; \
-    template class BILU0<T,5>; \
-    template class BILU0<T,6>;
+    template class openclBILU0<T,1>; \
+    template class openclBILU0<T,2>; \
+    template class openclBILU0<T,3>; \
+    template class openclBILU0<T,4>; \
+    template class openclBILU0<T,5>; \
+    template class openclBILU0<T,6>;

 INSTANCE_TYPE(double)

--- a/opm/simulators/linalg/bda/opencl/openclBILU0.hpp
+++ b/opm/simulators/linalg/bda/opencl/openclBILU0.hpp
@@ -17,15 +17,15 @@
  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
 */

-#ifndef BILU0_HPP
-#define BILU0_HPP
+#ifndef OPM_OPENCLBILU0_HPP
+#define OPM_OPENCLBILU0_HPP

 #include <mutex>

 #include <opm/simulators/linalg/bda/BlockedMatrix.hpp>

 #include <opm/simulators/linalg/bda/opencl/opencl.hpp>
-#include <opm/simulators/linalg/bda/opencl/Preconditioner.hpp>
+#include <opm/simulators/linalg/bda/opencl/openclPreconditioner.hpp>
 #include <opm/simulators/linalg/bda/opencl/ChowPatelIlu.hpp>


@@ -35,9 +35,9 @@ namespace Opm::Accelerator {
 /// The decomposition is done on GPU, using exact decomposition, or ChowPatel decomposition
 /// The preconditioner is applied via two exact triangular solves
 template<class Scalar, unsigned int block_size>
-class BILU0 : public Preconditioner<Scalar,block_size>
+class openclBILU0 : public openclPreconditioner<Scalar,block_size>
 {
-    using Base = Preconditioner<Scalar,block_size>;
+    using Base = openclPreconditioner<Scalar,block_size>;

    using Base::N;
    using Base::Nb;
@@ -87,7 +87,7 @@ private:

 public:

-    BILU0(bool opencl_ilu_parallel, int verbosity);
+    openclBILU0(bool opencl_ilu_parallel, int verbosity);

    // analysis, extract parallelism if specified
    bool analyze_matrix(BlockedMatrix<Scalar>* mat) override;
@@ -103,6 +103,7 @@ public:
    // via Lz = y
    // and Ux = z
    void apply(const cl::Buffer& y, cl::Buffer& x) override;
+    void apply(double& y, double& x) {}

    std::tuple<std::vector<int>, std::vector<int>, std::vector<int>>
    get_preconditioner_structure()
--- a/opm/simulators/linalg/bda/opencl/openclBISAI.cpp
+++ b/opm/simulators/linalg/bda/opencl/openclBISAI.cpp
@@ -40,17 +40,17 @@ using Opm::OpmLog;
 using Dune::Timer;

 template<class Scalar, unsigned int block_size>
-BISAI<Scalar,block_size>::BISAI(bool opencl_ilu_parallel_, int verbosity_)
+openclBISAI<Scalar,block_size>::openclBISAI(bool opencl_ilu_parallel_, int verbosity_)
    : Base(verbosity_)
 {
 #if CHOW_PATEL
    OPM_THROW(std::logic_error, "Error --linear-solver=isai cannot be used if ChowPatelIlu is used, probably defined by CMake\n");
 #endif
-    bilu0 = std::make_unique<BILU0<Scalar,block_size>>(opencl_ilu_parallel_, verbosity_);
+    bilu0 = std::make_unique<openclBILU0<Scalar,block_size>>(opencl_ilu_parallel_, verbosity_);
 }

 template<class Scalar, unsigned int block_size>
-void BISAI<Scalar,block_size>::
+void openclBISAI<Scalar,block_size>::
 setOpencl(std::shared_ptr<cl::Context>& context_,
          std::shared_ptr<cl::CommandQueue>& queue_)
 {
@@ -79,13 +79,13 @@ buildCsrToCscOffsetMap(std::vector<int> colPointers, std::vector<int> rowIndices
 }

 template<class Scalar, unsigned int block_size>
-bool BISAI<Scalar,block_size>::analyze_matrix(BlockedMatrix<Scalar>* mat)
+bool openclBISAI<Scalar,block_size>::analyze_matrix(BlockedMatrix<Scalar>* mat)
 {
    return analyze_matrix(mat, nullptr);
 }

 template<class Scalar, unsigned int block_size>
-bool BISAI<Scalar,block_size>::
+bool openclBISAI<Scalar,block_size>::
 analyze_matrix(BlockedMatrix<Scalar>* mat, BlockedMatrix<Scalar>* jacMat)
 {
    const unsigned int bs = block_size;
@@ -108,7 +108,7 @@ analyze_matrix(BlockedMatrix<Scalar>* mat, BlockedMatrix<Scalar>* jacMat)
 }

 template<class Scalar, unsigned int block_size>
-void BISAI<Scalar,block_size>::buildLowerSubsystemsStructures()
+void openclBISAI<Scalar,block_size>::buildLowerSubsystemsStructures()
 {
    lower.subsystemPointers.assign(Nb + 1, 0);

@@ -138,14 +138,14 @@ void BISAI<Scalar,block_size>::buildLowerSubsystemsStructures()

    if (verbosity >= 4) {
        std::ostringstream out;
-        out << "BISAI buildLowerSubsystemsStructures time: "
+        out << "openclBISAI buildLowerSubsystemsStructures time: "
            << t_buildLowerSubsystemsStructures.stop() << " s";
        OpmLog::info(out.str());
    }
 }

 template<class Scalar, unsigned int block_size>
-void BISAI<Scalar,block_size>::buildUpperSubsystemsStructures()
+void openclBISAI<Scalar,block_size>::buildUpperSubsystemsStructures()
 {
    upper.subsystemPointers.assign(Nb + 1, 0);

@@ -175,14 +175,14 @@ void BISAI<Scalar,block_size>::buildUpperSubsystemsStructures()

    if (verbosity >= 4) {
        std::ostringstream out;
-        out << "BISAI buildUpperSubsystemsStructures time: "
+        out << "openclBISAI buildUpperSubsystemsStructures time: "
            << t_buildUpperSubsystemsStructures.stop() << " s";
        OpmLog::info(out.str());
    }
 }

 template<class Scalar, unsigned int block_size>
-bool BISAI<Scalar,block_size>::
+bool openclBISAI<Scalar,block_size>::
 create_preconditioner(BlockedMatrix<Scalar>* mat, BlockedMatrix<Scalar>* jacMat)
 {
    const unsigned int bs = block_size;
@@ -300,7 +300,7 @@ create_preconditioner(BlockedMatrix<Scalar>* mat, BlockedMatrix<Scalar>* jacMat)

        if (err != CL_SUCCESS) {
            // enqueueWriteBuffer is C and does not throw exceptions like C++ OpenCL
-            OPM_THROW(std::logic_error, "BISAI OpenCL enqueueWriteBuffer error");
+            OPM_THROW(std::logic_error, "openclBISAI OpenCL enqueueWriteBuffer error");
        }
    });

@@ -326,7 +326,7 @@ create_preconditioner(BlockedMatrix<Scalar>* mat, BlockedMatrix<Scalar>* jacMat)

    if (verbosity >= 4) {
        std::ostringstream out;
-        out << "BISAI createPreconditioner time: " << t_preconditioner.stop() << " s";
+        out << "openclBISAI createPreconditioner time: " << t_preconditioner.stop() << " s";
        OpmLog::info(out.str());
    }

@@ -334,14 +334,14 @@ create_preconditioner(BlockedMatrix<Scalar>* mat, BlockedMatrix<Scalar>* jacMat)
 }

 template<class Scalar, unsigned int block_size>
-bool BISAI<Scalar,block_size>::
+bool openclBISAI<Scalar,block_size>::
 create_preconditioner(BlockedMatrix<Scalar>* mat)
 {
    return create_preconditioner(mat, nullptr);
 }

 template<class Scalar, unsigned int block_size>
-void BISAI<Scalar,block_size>::apply(const cl::Buffer& x, cl::Buffer& y)
+void openclBISAI<Scalar,block_size>::apply(const cl::Buffer& x, cl::Buffer& y)
 {
    const unsigned int bs = block_size;

@@ -354,12 +354,12 @@ void BISAI<Scalar,block_size>::apply(const cl::Buffer& x, cl::Buffer& y)
 }

 #define INSTANCE_TYPE(T)       \
-    template class BISAI<T,1>; \
-    template class BISAI<T,2>; \
-    template class BISAI<T,3>; \
-    template class BISAI<T,4>; \
-    template class BISAI<T,5>; \
-    template class BISAI<T,6>;
+    template class openclBISAI<T,1>; \
+    template class openclBISAI<T,2>; \
+    template class openclBISAI<T,3>; \
+    template class openclBISAI<T,4>; \
+    template class openclBISAI<T,5>; \
+    template class openclBISAI<T,6>;

 INSTANCE_TYPE(double)

--- a/opm/simulators/linalg/bda/opencl/openclBISAI.hpp
+++ b/opm/simulators/linalg/bda/opencl/openclBISAI.hpp
@@ -17,14 +17,14 @@
  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
 */

-#ifndef BISAI_HPP
-#define BISAI_HPP
+#ifndef OPM_OPENCLBISAI_HPP
+#define OPM_OPENCLBISAI_HPP

 #include <mutex>

 #include <opm/simulators/linalg/bda/opencl/opencl.hpp>
 #include <opm/simulators/linalg/bda/opencl/openclBILU0.hpp>
-#include <opm/simulators/linalg/bda/opencl/Preconditioner.hpp>
+#include <opm/simulators/linalg/bda/opencl/openclPreconditioner.hpp>

 namespace Opm::Accelerator {

@@ -33,9 +33,9 @@ template<class Scalar> class BlockedMatrix;
 /// This class implements a Blocked version of the Incomplete Sparse Approximate Inverse (ISAI) preconditioner.
 /// Inspired by the paper "Incomplete Sparse Approximate Inverses for Parallel Preconditioning" by Anzt et. al.
 template<class Scalar, unsigned int block_size>
-class BISAI : public Preconditioner<Scalar,block_size>
+class openclBISAI : public openclPreconditioner<Scalar,block_size>
 {
-    using Base = Preconditioner<Scalar,block_size>;
+    using Base = openclPreconditioner<Scalar,block_size>;

    using Base::N;
    using Base::Nb;
@@ -68,7 +68,7 @@ private:
    cl::Buffer d_invL_x;

    bool opencl_ilu_parallel;
-    std::unique_ptr<BILU0<Scalar,block_size>> bilu0;
+    std::unique_ptr<openclBILU0<Scalar,block_size>> bilu0;

    /// Struct that holds the structure of the small subsystems for each column
    struct subsystemStructure {
@@ -107,7 +107,7 @@ private:
    void buildUpperSubsystemsStructures();

 public:
-    BISAI(bool opencl_ilu_parallel, int verbosity);
+    openclBISAI(bool opencl_ilu_parallel, int verbosity);

    // set own Opencl variables, but also that of the bilu0 preconditioner
    void setOpencl(std::shared_ptr<cl::Context>& context,
@@ -125,6 +125,7 @@ public:

    // apply preconditioner, x = prec(y)
    void apply(const cl::Buffer& y, cl::Buffer& x) override;
+    void apply(double& y, double& x) {}
 };

 /// Similar function to csrPatternToCsc. It gives an offset map from CSR to CSC instead of the full CSR to CSC conversion.
--- a/opm/simulators/linalg/bda/opencl/openclCPR.cpp
+++ b/opm/simulators/linalg/bda/opencl/openclCPR.cpp
@@ -39,16 +39,16 @@ namespace Opm::Accelerator {
 using Dune::Timer;

 template<class Scalar, unsigned int block_size>
-CPR<Scalar,block_size>::CPR(bool opencl_ilu_parallel_, int verbosity_)
+openclCPR<Scalar,block_size>::openclCPR(bool opencl_ilu_parallel_, int verbosity_)
    : Base(verbosity_)
    , opencl_ilu_parallel(opencl_ilu_parallel_)
 {
-    bilu0 = std::make_unique<BILU0<Scalar,block_size> >(opencl_ilu_parallel, verbosity_);
+    bilu0 = std::make_unique<openclBILU0<Scalar,block_size> >(opencl_ilu_parallel, verbosity_);
    diagIndices.resize(1);
 }

 template<class Scalar, unsigned int block_size>
-void CPR<Scalar,block_size>::
+void openclCPR<Scalar,block_size>::
 setOpencl(std::shared_ptr<cl::Context>& context_, std::shared_ptr<cl::CommandQueue>& queue_)
 {
    context = context_;
@@ -58,7 +58,7 @@ setOpencl(std::shared_ptr<cl::Context>& context_, std::shared_ptr<cl::CommandQue
 }

 template<class Scalar, unsigned int block_size>
-bool CPR<Scalar,block_size>::analyze_matrix(BlockedMatrix<Scalar>* mat_)
+bool openclCPR<Scalar,block_size>::analyze_matrix(BlockedMatrix<Scalar>* mat_)
 {
    this->Nb = mat_->Nb;
    this->nnzb = mat_->nnzbs;
@@ -71,7 +71,7 @@ bool CPR<Scalar,block_size>::analyze_matrix(BlockedMatrix<Scalar>* mat_)
 }

 template<class Scalar, unsigned int block_size>
-bool CPR<Scalar,block_size>::
+bool openclCPR<Scalar,block_size>::
 analyze_matrix(BlockedMatrix<Scalar>* mat_, BlockedMatrix<Scalar>* jacMat)
 {
    this->Nb = mat_->Nb;
@@ -86,14 +86,14 @@ analyze_matrix(BlockedMatrix<Scalar>* mat_, BlockedMatrix<Scalar>* jacMat)
 }

 template<class Scalar, unsigned int block_size>
-bool CPR<Scalar,block_size>::
+bool openclCPR<Scalar,block_size>::
 create_preconditioner(BlockedMatrix<Scalar>* mat_, BlockedMatrix<Scalar>* jacMat)
 {
    Dune::Timer t_bilu0;
    bool result = bilu0->create_preconditioner(mat_, jacMat);
    if (verbosity >= 3) {
        std::ostringstream out;
-        out << "CPR create_preconditioner bilu0(): " << t_bilu0.stop() << " s";
+        out << "openclCPR create_preconditioner bilu0(): " << t_bilu0.stop() << " s";
        OpmLog::info(out.str());
    }

@@ -101,21 +101,21 @@ create_preconditioner(BlockedMatrix<Scalar>* mat_, BlockedMatrix<Scalar>* jacMat
    create_preconditioner_amg(mat); // already points to bilu0::rmat if needed
    if (verbosity >= 3) {
        std::ostringstream out;
-        out << "CPR create_preconditioner_amg(): " << t_amg.stop() << " s";
+        out << "openclCPR create_preconditioner_amg(): " << t_amg.stop() << " s";
        OpmLog::info(out.str());
    }
    return result;
 }

 template<class Scalar, unsigned int block_size>
-bool CPR<Scalar,block_size>::
+bool openclCPR<Scalar,block_size>::
 create_preconditioner(BlockedMatrix<Scalar>* mat_)
 {
    Dune::Timer t_bilu0;
    bool result = bilu0->create_preconditioner(mat_);
    if (verbosity >= 3) {
        std::ostringstream out;
-        out << "CPR create_preconditioner bilu0(): " << t_bilu0.stop() << " s";
+        out << "openclCPR create_preconditioner bilu0(): " << t_bilu0.stop() << " s";
        OpmLog::info(out.str());
    }

@@ -123,7 +123,7 @@ create_preconditioner(BlockedMatrix<Scalar>* mat_)
    create_preconditioner_amg(mat); // already points to bilu0::rmat if needed
    if (verbosity >= 3) {
        std::ostringstream out;
-        out << "CPR create_preconditioner_amg(): " << t_amg.stop() << " s";
+        out << "openclCPR create_preconditioner_amg(): " << t_amg.stop() << " s";
        OpmLog::info(out.str());
    }
    return result;
@@ -168,7 +168,7 @@ void solve_transposed_3x3(const Scalar* A, const Scalar* b, Scalar* x)
 }

 template<class Scalar, unsigned int block_size>
-void CPR<Scalar, block_size>::init_opencl_buffers()
+void openclCPR<Scalar, block_size>::init_opencl_buffers()
 {
    d_Amatrices.reserve(num_levels);
    d_Rmatrices.reserve(num_levels - 1);
@@ -193,7 +193,7 @@ void CPR<Scalar, block_size>::init_opencl_buffers()
 }

 template<class Scalar, unsigned int block_size>
-void CPR<Scalar,block_size>::opencl_upload()
+void openclCPR<Scalar,block_size>::opencl_upload()
 {
    d_mat->upload(queue.get(), mat);

@@ -215,7 +215,7 @@ void CPR<Scalar,block_size>::opencl_upload()
    events.clear();
    if (err != CL_SUCCESS) {
        // enqueueWriteBuffer is C and does not throw exceptions like C++ OpenCL
-        OPM_THROW(std::logic_error, "CPR OpenCL enqueueWriteBuffer error");
+        OPM_THROW(std::logic_error, "openclCPR OpenCL enqueueWriteBuffer error");
    }
    for (unsigned int i = 0; i < Rmatrices.size(); ++i) {
        d_Rmatrices[i].upload(queue.get(), &Rmatrices[i]);
@@ -223,7 +223,7 @@ void CPR<Scalar,block_size>::opencl_upload()
 }

 template<class Scalar, unsigned int block_size>
-void CPR<Scalar,block_size>::
+void openclCPR<Scalar,block_size>::
 create_preconditioner_amg(BlockedMatrix<Scalar>* mat_)
 {
    this->mat = mat_;
@@ -350,7 +350,7 @@ create_preconditioner_amg(BlockedMatrix<Scalar>* mat_)
        }

        // initialize OpenclMatrices and Buffers if needed
-        auto init_func = std::bind(&CPR::init_opencl_buffers, this);
+        auto init_func = std::bind(&openclCPR::init_opencl_buffers, this);
        std::call_once(opencl_buffers_allocated, init_func);

        // upload matrices and vectors to GPU
@@ -363,7 +363,7 @@ create_preconditioner_amg(BlockedMatrix<Scalar>* mat_)
 }

 template<class Scalar, unsigned int block_size>
-void CPR<Scalar,block_size>::analyzeHierarchy()
+void openclCPR<Scalar,block_size>::analyzeHierarchy()
 {
    const typename DuneAmg::ParallelMatrixHierarchy& matrixHierarchy = dune_amg->matrices();

@@ -418,7 +418,7 @@ void CPR<Scalar,block_size>::analyzeHierarchy()
 }

 template<class Scalar, unsigned int block_size>
-void CPR<Scalar,block_size>::analyzeAggregateMaps()
+void openclCPR<Scalar,block_size>::analyzeAggregateMaps()
 {
    PcolIndices.resize(num_levels - 1);
    Rmatrices.clear();
@@ -458,7 +458,7 @@ void CPR<Scalar,block_size>::analyzeAggregateMaps()
 }

 template<class Scalar, unsigned int block_size>
-void CPR<Scalar,block_size>::amg_cycle_gpu(const int level, cl::Buffer& y, cl::Buffer& x)
+void openclCPR<Scalar,block_size>::amg_cycle_gpu(const int level, cl::Buffer& y, cl::Buffer& x)
 {
    OpenclMatrix<Scalar>* A = &d_Amatrices[level];
    OpenclMatrix<Scalar>* R = &d_Rmatrices[level];
@@ -475,7 +475,7 @@ void CPR<Scalar,block_size>::amg_cycle_gpu(const int level, cl::Buffer& y, cl::B
        events.clear();
        if (err != CL_SUCCESS) {
            // enqueueWriteBuffer is C and does not throw exceptions like C++ OpenCL
-            OPM_THROW(std::logic_error, "CPR OpenCL enqueueReadBuffer error");
+            OPM_THROW(std::logic_error, "openclCPR OpenCL enqueueReadBuffer error");
        }

        // solve coarsest level using umfpack
@@ -488,7 +488,7 @@ void CPR<Scalar,block_size>::amg_cycle_gpu(const int level, cl::Buffer& y, cl::B
        events.clear();
        if (err != CL_SUCCESS) {
            // enqueueWriteBuffer is C and does not throw exceptions like C++ OpenCL
-            OPM_THROW(std::logic_error, "CPR OpenCL enqueueWriteBuffer error");
+            OPM_THROW(std::logic_error, "openclCPR OpenCL enqueueWriteBuffer error");
        }
        return;
    }
@@ -521,7 +521,7 @@ void CPR<Scalar,block_size>::amg_cycle_gpu(const int level, cl::Buffer& y, cl::B

 // x = prec(y)
 template<class Scalar, unsigned int block_size>
-void CPR<Scalar,block_size>::apply_amg(const cl::Buffer& y, cl::Buffer& x)
+void openclCPR<Scalar,block_size>::apply_amg(const cl::Buffer& y, cl::Buffer& x)
 {
    // 0-initialize u and x vectors
    events.resize(d_u.size() + 1);
@@ -548,13 +548,13 @@ void CPR<Scalar,block_size>::apply_amg(const cl::Buffer& y, cl::Buffer& x)
 }

 template<class Scalar, unsigned int block_size>
-void CPR<Scalar,block_size>::apply(const cl::Buffer& y, cl::Buffer& x)
+void openclCPR<Scalar,block_size>::apply(const cl::Buffer& y, cl::Buffer& x)
 {
    Dune::Timer t_bilu0;
    bilu0->apply(y, x);
    if (verbosity >= 4) {
        std::ostringstream out;
-        out << "CPR apply bilu0(): " << t_bilu0.stop() << " s";
+        out << "openclCPR apply bilu0(): " << t_bilu0.stop() << " s";
        OpmLog::info(out.str());
    }

@@ -562,18 +562,18 @@ void CPR<Scalar,block_size>::apply(const cl::Buffer& y, cl::Buffer& x)
    apply_amg(y, x);
    if (verbosity >= 4) {
        std::ostringstream out;
-        out << "CPR apply amg(): " << t_amg.stop() << " s";
+        out << "openclCPR apply amg(): " << t_amg.stop() << " s";
        OpmLog::info(out.str());
    }
 }

 #define INSTANCE_TYPE(T)     \
-    template class CPR<T,1>; \
-    template class CPR<T,2>; \
-    template class CPR<T,3>; \
-    template class CPR<T,4>; \
-    template class CPR<T,5>; \
-    template class CPR<T,6>;
+    template class openclCPR<T,1>; \
+    template class openclCPR<T,2>; \
+    template class openclCPR<T,3>; \
+    template class openclCPR<T,4>; \
+    template class openclCPR<T,5>; \
+    template class openclCPR<T,6>;

 INSTANCE_TYPE(double)

--- a/opm/simulators/linalg/bda/opencl/openclCPR.hpp
+++ b/opm/simulators/linalg/bda/opencl/openclCPR.hpp
@@ -17,8 +17,8 @@
  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
 */

-#ifndef OPM_CPR_HPP
-#define OPM_CPR_HPP
+#ifndef OPM_OPENCLCPR_HPP
+#define OPM_OPENCLCPR_HPP

 #include <mutex>

@@ -29,7 +29,7 @@
 #include <opm/simulators/linalg/bda/opencl/openclBILU0.hpp>
 #include <opm/simulators/linalg/bda/Matrix.hpp>
 #include <opm/simulators/linalg/bda/opencl/OpenclMatrix.hpp>
-#include <opm/simulators/linalg/bda/opencl/Preconditioner.hpp>
+#include <opm/simulators/linalg/bda/opencl/openclPreconditioner.hpp>

 #include <opm/simulators/linalg/bda/opencl/openclSolverBackend.hpp>

@@ -39,9 +39,9 @@ template<class Scalar> class BlockedMatrix;

 /// This class implements a Constrained Pressure Residual (CPR) preconditioner
 template<class Scalar, unsigned int block_size>
-class CPR : public Preconditioner<Scalar,block_size>
+class openclCPR : public openclPreconditioner<Scalar,block_size>
 {
-    using Base = Preconditioner<Scalar,block_size>;
+    using Base = openclPreconditioner<Scalar,block_size>;

    using Base::N;
    using Base::Nb;
@@ -69,7 +69,7 @@ private:
    std::unique_ptr<cl::Buffer> d_coarse_y, d_coarse_x; // stores the scalar vectors
    std::once_flag opencl_buffers_allocated;  // only allocate OpenCL Buffers once

-    std::unique_ptr<BILU0<Scalar,block_size>> bilu0;                    // Blocked ILU0 preconditioner
+    std::unique_ptr<openclBILU0<Scalar,block_size>> bilu0;                    // Blocked ILU0 preconditioner
    BlockedMatrix<Scalar>* mat = nullptr;    // input matrix, blocked

    using DuneMat = Dune::BCRSMatrix<Dune::FieldMatrix<Scalar, 1, 1> >;
@@ -112,7 +112,7 @@ private:
    void create_preconditioner_amg(BlockedMatrix<Scalar>* mat);

 public:
-    CPR(bool opencl_ilu_parallel, int verbosity);
+    openclCPR(bool opencl_ilu_parallel, int verbosity);

    bool analyze_matrix(BlockedMatrix<Scalar>* mat) override;
    bool analyze_matrix(BlockedMatrix<Scalar>* mat,
@@ -125,6 +125,7 @@ public:
    // applies blocked ilu0
    // also applies amg for pressure component
    void apply(const cl::Buffer& y, cl::Buffer& x) override;
+    void apply(double& y, double& x) {}

    bool create_preconditioner(BlockedMatrix<Scalar>* mat) override;
    bool create_preconditioner(BlockedMatrix<Scalar>* mat,
--- a/opm/simulators/linalg/bda/opencl/openclPreconditioner.cpp
+++ b/opm/simulators/linalg/bda/opencl/openclPreconditioner.cpp
@@ -18,7 +18,6 @@
 */

 #include <config.h>
-#include <opm/simulators/linalg/bda/opencl/Preconditioner.hpp>

 #include <opm/common/TimingMacros.hpp>
 #include <opm/common/ErrorMacros.hpp>
@@ -26,6 +25,7 @@
 #include <opm/simulators/linalg/bda/opencl/openclBILU0.hpp>
 #include <opm/simulators/linalg/bda/opencl/openclBISAI.hpp>
 #include <opm/simulators/linalg/bda/opencl/openclCPR.hpp>
+#include <opm/simulators/linalg/bda/opencl/openclPreconditioner.hpp>

 #include <memory>
 #include <string>
@@ -33,25 +33,16 @@
 namespace Opm::Accelerator {

 template<class Scalar, unsigned int block_size>
-void Preconditioner<Scalar,block_size>::
- setOpencl(std::shared_ptr<cl::Context>& context_,
-           std::shared_ptr<cl::CommandQueue>& queue_)
-{
-    context = context_;
-    queue = queue_;
-}
-
-template<class Scalar, unsigned int block_size>
-std::unique_ptr<Preconditioner<Scalar,block_size>>
-Preconditioner<Scalar,block_size>::create(Type type, bool opencl_ilu_parallel, int verbosity)
+std::unique_ptr<openclPreconditioner<Scalar,block_size>>
+openclPreconditioner<Scalar,block_size>::create(PreconditionerType type, int verbosity, bool opencl_ilu_parallel)
 {
    switch (type ) {
-    case Type::BILU0:
-        return std::make_unique<BILU0<Scalar,block_size>>(opencl_ilu_parallel, verbosity);
-    case Type::CPR:
-        return std::make_unique<CPR<Scalar,block_size>>(opencl_ilu_parallel, verbosity);
-    case Type::BISAI:
-        return std::make_unique<BISAI<Scalar,block_size>>(opencl_ilu_parallel, verbosity);
+    case PreconditionerType::BILU0:
+        return std::make_unique<openclBILU0<Scalar,block_size>>(opencl_ilu_parallel, verbosity);
+    case PreconditionerType::CPR:
+        return std::make_unique<openclCPR<Scalar,block_size>>(opencl_ilu_parallel, verbosity);
+    case PreconditionerType::BISAI:
+        return std::make_unique<openclBISAI<Scalar,block_size>>(opencl_ilu_parallel, verbosity);
    }

    OPM_THROW(std::logic_error,
@@ -59,28 +50,37 @@ Preconditioner<Scalar,block_size>::create(Type type, bool opencl_ilu_parallel, i
 }

 template<class Scalar, unsigned int block_size>
-bool Preconditioner<Scalar,block_size>::
-analyze_matrix(BlockedMatrix<Scalar>* mat,
-               [[maybe_unused]] BlockedMatrix<Scalar>* jacMat)
+void openclPreconditioner<Scalar,block_size>::
+ setOpencl(std::shared_ptr<cl::Context>& context_,
+           std::shared_ptr<cl::CommandQueue>& queue_)
 {
-    return analyze_matrix(mat);
+    context = context_;
+    queue = queue_;
 }

-template<class Scalar, unsigned int block_size>
-bool Preconditioner<Scalar,block_size>::
-create_preconditioner(BlockedMatrix<Scalar>* mat,
-                      [[maybe_unused]] BlockedMatrix<Scalar>* jacMat)
-{
-    return create_preconditioner(mat);
-}
+// template<class Scalar, unsigned int block_size>
+// bool Preconditioner<Scalar,block_size>::
+// analyze_matrix(BlockedMatrix<Scalar>* mat,
+//                [[maybe_unused]] BlockedMatrix<Scalar>* jacMat)
+// {
+//     return analyze_matrix(mat);
+// }
+
+// template<class Scalar, unsigned int block_size>
+// bool Preconditioner<Scalar,block_size>::
+// create_preconditioner(BlockedMatrix<Scalar>* mat,
+//                       [[maybe_unused]] BlockedMatrix<Scalar>* jacMat)
+// {
+//     return create_preconditioner(mat);
+// }

 #define INSTANCE_TYPE(T)                \
-    template class Preconditioner<T,1>; \
-    template class Preconditioner<T,2>; \
-    template class Preconditioner<T,3>; \
-    template class Preconditioner<T,4>; \
-    template class Preconditioner<T,5>; \
-    template class Preconditioner<T,6>;
+    template class openclPreconditioner<T,1>; \
+    template class openclPreconditioner<T,2>; \
+    template class openclPreconditioner<T,3>; \
+    template class openclPreconditioner<T,4>; \
+    template class openclPreconditioner<T,5>; \
+    template class openclPreconditioner<T,6>;

 INSTANCE_TYPE(double)

--- a/opm/simulators/linalg/bda/opencl/openclPreconditioner.hpp
+++ b/opm/simulators/linalg/bda/opencl/openclPreconditioner.hpp
@@ -0,0 +1,62 @@
+/*
+  Copyright 2021 Equinor ASA
+
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef OPM_OPENCLPRECONDITIONER_HEADER_INCLUDED
+#define OPM_OPENCLPRECONDITIONER_HEADER_INCLUDED
+
+#include <opm/simulators/linalg/bda/opencl/opencl.hpp>
+#include <opm/simulators/linalg/bda/Preconditioner.hpp>
+
+namespace Opm::Accelerator {
+
+template<class Scalar> class BlockedMatrix;
+
+template <class Scalar, unsigned int block_size>
+class openclPreconditioner : public Preconditioner<Scalar, block_size>
+{
+
+protected:
+    std::shared_ptr<cl::Context> context;
+    std::shared_ptr<cl::CommandQueue> queue;
+    std::vector<cl::Event> events;
+    cl_int err;
+
+    openclPreconditioner(int verbosity_) :
+    Preconditioner<Scalar, block_size>(verbosity_)
+    {};
+
+public:
+    virtual ~openclPreconditioner() = default;
+
+    static std::unique_ptr<openclPreconditioner<Scalar, block_size>> create(PreconditionerType type, int verbosity, bool opencl_ilu_parallel);
+
+    // nested Preconditioners might need to override this
+    virtual void setOpencl(std::shared_ptr<cl::Context>& context, std::shared_ptr<cl::CommandQueue>& queue);
+
+    // apply preconditioner, x = prec(y)
+    virtual void apply(const cl::Buffer& y, cl::Buffer& x) = 0;
+ 
+    // create/update preconditioner, probably used every linear solve
+    // the version with two params can be overloaded, if not, it will default to using the one param version
+    virtual bool create_preconditioner(BlockedMatrix<Scalar> *mat) = 0;
+    virtual bool create_preconditioner(BlockedMatrix<Scalar> *mat, BlockedMatrix<Scalar> *jacMat) = 0;
+};
+} //namespace Opm
+
+#endif
--- a/opm/simulators/linalg/bda/opencl/openclSolverBackend.cpp
+++ b/opm/simulators/linalg/bda/opencl/openclSolverBackend.cpp
@@ -71,15 +71,15 @@ openclSolverBackend(int verbosity_,
        OPM_THROW(std::logic_error, "Error unknown value for argument --linear-solver, " + linsolver);
    }

-    using PreconditionerType = Preconditioner<Scalar,block_size>;
+    using PreconditionerType = typename Opm::Accelerator::PreconditionerType;
    if (use_cpr) {
-        prec = PreconditionerType::create(PreconditionerType::Type::CPR,
+        prec = openclPreconditioner<Scalar,block_size>::create(PreconditionerType::CPR,
                                          opencl_ilu_parallel, verbosity);
    } else if (use_isai) {
-        prec = PreconditionerType::create(PreconditionerType::Type::BISAI,
+        prec = openclPreconditioner<Scalar,block_size>::create(PreconditionerType::BISAI,
                                          opencl_ilu_parallel, verbosity);
    } else {
-        prec = PreconditionerType::create(PreconditionerType::Type::BILU0,
+        prec = openclPreconditioner<Scalar,block_size>::create(PreconditionerType::BILU0,
                                          opencl_ilu_parallel, verbosity);
    }

--- a/opm/simulators/linalg/bda/opencl/openclSolverBackend.hpp
+++ b/opm/simulators/linalg/bda/opencl/openclSolverBackend.hpp
@@ -25,7 +25,7 @@
 #include <opm/simulators/linalg/bda/BdaSolver.hpp>
 #include <opm/simulators/linalg/bda/WellContributions.hpp>

-#include <opm/simulators/linalg/bda/opencl/Preconditioner.hpp>
+#include <opm/simulators/linalg/bda/opencl/openclPreconditioner.hpp>

 namespace Opm::Accelerator {

@@ -60,7 +60,7 @@ private:

    bool useJacMatrix = false;

-    std::unique_ptr<Preconditioner<Scalar,block_size>> prec;
+    std::unique_ptr<openclPreconditioner<Scalar,block_size>> prec;
                                                                  // can perform blocked ILU0 and AMG on pressure component
    bool is_root;                                                 // allow for nested solvers, the root solver is called by BdaBridge
    bool analysis_done = false;