refactor cuistl namespace

2025-02-25 18:55:30 -06:00 · 2024-08-22 13:52:50 +02:00
parent c4f686227b
commit 3f4ae4ddf4
72 changed files with 304 additions and 304 deletions
--- a/opm/simulators/flow/Main.cpp
+++ b/opm/simulators/flow/Main.cpp
@@ -163,7 +163,7 @@ void Main::initMPI()
    }

 #if HAVE_CUDA
-    Opm::cuistl::setDevice(FlowGenericVanguard::comm().rank(), FlowGenericVanguard::comm().size());
+    Opm::gpuistl::setDevice(FlowGenericVanguard::comm().rank(), FlowGenericVanguard::comm().size());
 #endif

 #endif // HAVE_MPI
--- a/opm/simulators/linalg/FlexibleSolver_impl.hpp
+++ b/opm/simulators/linalg/FlexibleSolver_impl.hpp
@@ -201,7 +201,7 @@ namespace Dune
 #endif
 #if HAVE_CUDA
        } else if (solver_type == "cubicgstab") {
-            linsolver_.reset(new Opm::cuistl::SolverAdapter<Operator, Dune::BiCGSTABSolver, VectorType>(
+            linsolver_.reset(new Opm::gpuistl::SolverAdapter<Operator, Dune::BiCGSTABSolver, VectorType>(
                *linearoperator_for_solver_,
                *scalarproduct_,
                preconditioner_,
--- a/opm/simulators/linalg/PreconditionerFactory_impl.hpp
+++ b/opm/simulators/linalg/PreconditionerFactory_impl.hpp
@@ -325,12 +325,12 @@ struct StandardPreconditioners {
        F::addCreator("CUILU0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
            const double w = prm.get<double>("relaxation", 1.0);
            using field_type = typename V::field_type;
-            using CuILU0 = typename cuistl::
-                CuSeqILU0<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
+            using CuILU0 = typename gpuistl::
+                CuSeqILU0<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
            auto cuILU0 = std::make_shared<CuILU0>(op.getmat(), w);

-            auto adapted = std::make_shared<cuistl::PreconditionerAdapter<V, V, CuILU0>>(cuILU0);
-            auto wrapped = std::make_shared<cuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
+            auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, CuILU0>>(cuILU0);
+            auto wrapped = std::make_shared<gpuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
            return wrapped;
        });

@@ -338,11 +338,11 @@ struct StandardPreconditioners {
            const double w = prm.get<double>("relaxation", 1.0);
            using field_type = typename V::field_type;
            using CuJac =
-                typename cuistl::CuJac<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
+                typename gpuistl::CuJac<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
            auto cuJac = std::make_shared<CuJac>(op.getmat(), w);

-            auto adapted = std::make_shared<cuistl::PreconditionerAdapter<V, V, CuJac>>(cuJac);
-            auto wrapped = std::make_shared<cuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
+            auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, CuJac>>(cuJac);
+            auto wrapped = std::make_shared<gpuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
            return wrapped;
        });

@@ -350,11 +350,11 @@ struct StandardPreconditioners {
            const bool split_matrix = prm.get<bool>("split_matrix", true);
            const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
            using field_type = typename V::field_type;
-            using CuDILU = typename cuistl::CuDILU<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
+            using CuDILU = typename gpuistl::CuDILU<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
            auto cuDILU = std::make_shared<CuDILU>(op.getmat(), split_matrix, tune_gpu_kernels);

-            auto adapted = std::make_shared<cuistl::PreconditionerAdapter<V, V, CuDILU>>(cuDILU);
-            auto wrapped = std::make_shared<cuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
+            auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, CuDILU>>(cuDILU);
+            auto wrapped = std::make_shared<gpuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
            return wrapped;
        });

@@ -362,11 +362,11 @@ struct StandardPreconditioners {
            const bool split_matrix = prm.get<bool>("split_matrix", true);
            const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
            using field_type = typename V::field_type;
-            using OpmCuILU0 = typename cuistl::OpmCuILU0<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
+            using OpmCuILU0 = typename gpuistl::OpmCuILU0<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
            auto cuilu0 = std::make_shared<OpmCuILU0>(op.getmat(), split_matrix, tune_gpu_kernels);

-            auto adapted = std::make_shared<cuistl::PreconditionerAdapter<V, V, OpmCuILU0>>(cuilu0);
-            auto wrapped = std::make_shared<cuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
+            auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, OpmCuILU0>>(cuilu0);
+            auto wrapped = std::make_shared<gpuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
            return wrapped;
        });
 #endif
@@ -585,9 +585,9 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
        F::addCreator("CUILU0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
            const double w = prm.get<double>("relaxation", 1.0);
            using field_type = typename V::field_type;
-            using CuILU0 = typename cuistl::
-                CuSeqILU0<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
-            return std::make_shared<cuistl::PreconditionerAdapter<V, V, CuILU0>>(
+            using CuILU0 = typename gpuistl::
+                CuSeqILU0<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
+            return std::make_shared<gpuistl::PreconditionerAdapter<V, V, CuILU0>>(
                std::make_shared<CuILU0>(op.getmat(), w));
        });

@@ -597,10 +597,10 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
            using VTo = Dune::BlockVector<Dune::FieldVector<float, block_type::dimension>>;
            using matrix_type_to =
                typename Dune::BCRSMatrix<Dune::FieldMatrix<float, block_type::dimension, block_type::dimension>>;
-            using CuILU0 = typename cuistl::
-                CuSeqILU0<matrix_type_to, cuistl::CuVector<float>, cuistl::CuVector<float>>;
-            using Adapter = typename cuistl::PreconditionerAdapter<VTo, VTo, CuILU0>;
-            using Converter = typename cuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>;
+            using CuILU0 = typename gpuistl::
+                CuSeqILU0<matrix_type_to, gpuistl::CuVector<float>, gpuistl::CuVector<float>>;
+            using Adapter = typename gpuistl::PreconditionerAdapter<VTo, VTo, CuILU0>;
+            using Converter = typename gpuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>;
            auto converted = std::make_shared<Converter>(op.getmat());
            auto adapted = std::make_shared<Adapter>(std::make_shared<CuILU0>(converted->getConvertedMatrix(), w));
            converted->setUnderlyingPreconditioner(adapted);
@@ -611,8 +611,8 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
            const double w = prm.get<double>("relaxation", 1.0);
            using field_type = typename V::field_type;
            using CUJac =
-                typename cuistl::CuJac<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
-            return std::make_shared<cuistl::PreconditionerAdapter<V, V, CUJac>>(
+                typename gpuistl::CuJac<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
+            return std::make_shared<gpuistl::PreconditionerAdapter<V, V, CUJac>>(
                std::make_shared<CUJac>(op.getmat(), w));
        });

@@ -620,17 +620,17 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
            const bool split_matrix = prm.get<bool>("split_matrix", true);
            const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
            using field_type = typename V::field_type;
-            using CUILU0 = typename cuistl::OpmCuILU0<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
+            using CUILU0 = typename gpuistl::OpmCuILU0<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;

-            return std::make_shared<cuistl::PreconditionerAdapter<V, V, CUILU0>>(std::make_shared<CUILU0>(op.getmat(), split_matrix, tune_gpu_kernels));
+            return std::make_shared<gpuistl::PreconditionerAdapter<V, V, CUILU0>>(std::make_shared<CUILU0>(op.getmat(), split_matrix, tune_gpu_kernels));
        });

        F::addCreator("CUDILU", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t) {
            const bool split_matrix = prm.get<bool>("split_matrix", true);
            const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
            using field_type = typename V::field_type;
-            using CUDILU = typename cuistl::CuDILU<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
-            return std::make_shared<cuistl::PreconditionerAdapter<V, V, CUDILU>>(std::make_shared<CUDILU>(op.getmat(), split_matrix, tune_gpu_kernels));
+            using CUDILU = typename gpuistl::CuDILU<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
+            return std::make_shared<gpuistl::PreconditionerAdapter<V, V, CUDILU>>(std::make_shared<CUDILU>(op.getmat(), split_matrix, tune_gpu_kernels));
        });

        F::addCreator("CUDILUFloat", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t) {
@@ -639,9 +639,9 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
            using block_type = typename V::block_type;
            using VTo = Dune::BlockVector<Dune::FieldVector<float, block_type::dimension>>;
            using matrix_type_to = typename Dune::BCRSMatrix<Dune::FieldMatrix<float, block_type::dimension, block_type::dimension>>;
-            using CuDILU = typename cuistl::CuDILU<matrix_type_to, cuistl::CuVector<float>, cuistl::CuVector<float>>;
-            using Adapter = typename cuistl::PreconditionerAdapter<VTo, VTo, CuDILU>;
-            using Converter = typename cuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>;
+            using CuDILU = typename gpuistl::CuDILU<matrix_type_to, gpuistl::CuVector<float>, gpuistl::CuVector<float>>;
+            using Adapter = typename gpuistl::PreconditionerAdapter<VTo, VTo, CuDILU>;
+            using Converter = typename gpuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>;
            auto converted = std::make_shared<Converter>(op.getmat());
            auto adapted = std::make_shared<Adapter>(std::make_shared<CuDILU>(converted->getConvertedMatrix(), split_matrix, tune_gpu_kernels));
            converted->setUnderlyingPreconditioner(adapted);
--- a/opm/simulators/linalg/cuistl/CuBlockPreconditioner.hpp
+++ b/opm/simulators/linalg/cuistl/CuBlockPreconditioner.hpp
@@ -25,7 +25,7 @@
 #include <opm/simulators/linalg/cuistl/PreconditionerHolder.hpp>
 #include <opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp>

-namespace Opm::cuistl
+namespace Opm::gpuistl
 {
 //! @brief Is an adaptation of Dune::BlockPreconditioner that works within the CuISTL framework.
 //!
@@ -125,5 +125,5 @@ private:
    //! \brief the communication object
    std::shared_ptr<const communication_type> m_communication;
 };
-} // namespace Opm::cuistl
+} // namespace Opm::gpuistl
 #endif
--- a/opm/simulators/linalg/cuistl/CuBuffer.cpp
+++ b/opm/simulators/linalg/cuistl/CuBuffer.cpp
@@ -24,7 +24,7 @@
 #include <opm/simulators/linalg/cuistl/CuView.hpp>
 #include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>

-namespace Opm::cuistl
+namespace Opm::gpuistl
 {

 template <class T>
@@ -201,4 +201,4 @@ template CuView<const double> make_view<double>(const CuBuffer<double>&);
 template CuView<const float> make_view<float>(const CuBuffer<float>&);
 template CuView<const int> make_view<int>(const CuBuffer<int>&);

-} // namespace Opm::cuistl
+} // namespace Opm::gpuistl
--- a/opm/simulators/linalg/cuistl/CuBuffer.hpp
+++ b/opm/simulators/linalg/cuistl/CuBuffer.hpp
@@ -29,7 +29,7 @@
 #include <string>


-namespace Opm::cuistl
+namespace Opm::gpuistl
 {

 /**
@@ -276,5 +276,5 @@ private:
 template <class T>
 CuView<const T> make_view(const CuBuffer<T>&);

-} // namespace Opm::cuistl
+} // namespace Opm::gpuistl
 #endif
--- a/opm/simulators/linalg/cuistl/CuDILU.cpp
+++ b/opm/simulators/linalg/cuistl/CuDILU.cpp
@@ -37,7 +37,7 @@
 #include <functional>
 #include <utility>
 #include <string>
-namespace Opm::cuistl
+namespace Opm::gpuistl
 {

 template <class M, class X, class Y, int l>
@@ -293,14 +293,14 @@ CuDILU<M, X, Y, l>::tuneThreadBlockSizes()
    m_upperSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneUpperSolveThreadBlockSizeInApply, "Kernel computing an upper triangular solve for a level set");
 }

-} // namespace Opm::cuistl
+} // namespace Opm::gpuistl
 #define INSTANTIATE_CUDILU_DUNE(realtype, blockdim)                                                                    \
-    template class ::Opm::cuistl::CuDILU<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>,            \
-                                         ::Opm::cuistl::CuVector<realtype>,                                            \
-                                         ::Opm::cuistl::CuVector<realtype>>;                                           \
-    template class ::Opm::cuistl::CuDILU<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>,             \
-                                         ::Opm::cuistl::CuVector<realtype>,                                            \
-                                         ::Opm::cuistl::CuVector<realtype>>
+    template class ::Opm::gpuistl::CuDILU<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>,            \
+                                         ::Opm::gpuistl::CuVector<realtype>,                                            \
+                                         ::Opm::gpuistl::CuVector<realtype>>;                                           \
+    template class ::Opm::gpuistl::CuDILU<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>,             \
+                                         ::Opm::gpuistl::CuVector<realtype>,                                            \
+                                         ::Opm::gpuistl::CuVector<realtype>>

 INSTANTIATE_CUDILU_DUNE(double, 1);
 INSTANTIATE_CUDILU_DUNE(double, 2);
--- a/opm/simulators/linalg/cuistl/CuDILU.hpp
+++ b/opm/simulators/linalg/cuistl/CuDILU.hpp
@@ -27,7 +27,7 @@



-namespace Opm::cuistl
+namespace Opm::gpuistl
 {
 //! \brief DILU preconditioner on the GPU.
 //!
@@ -144,6 +144,6 @@ private:
    int m_moveThreadBlockSize = -1;
    int m_DILUFactorizationThreadBlockSize = -1;
 };
-} // end namespace Opm::cuistl
+} // end namespace Opm::gpuistl

 #endif
--- a/opm/simulators/linalg/cuistl/CuJac.cpp
+++ b/opm/simulators/linalg/cuistl/CuJac.cpp
@@ -26,7 +26,7 @@
 #include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
 #include <opm/simulators/linalg/matrixblock.hh>

-namespace Opm::cuistl
+namespace Opm::gpuistl
 {

 template <class M, class X, class Y, int l>
@@ -108,14 +108,14 @@ CuJac<M, X, Y, l>::invertDiagonalAndFlatten()
        m_diagInvFlattened.data());
 }

-} // namespace Opm::cuistl
+} // namespace Opm::gpuistl
 #define INSTANTIATE_CUJAC_DUNE(realtype, blockdim)                                                                     \
-    template class ::Opm::cuistl::CuJac<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>,             \
-                                        ::Opm::cuistl::CuVector<realtype>,                                             \
-                                        ::Opm::cuistl::CuVector<realtype>>;                                            \
-    template class ::Opm::cuistl::CuJac<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>,              \
-                                        ::Opm::cuistl::CuVector<realtype>,                                             \
-                                        ::Opm::cuistl::CuVector<realtype>>
+    template class ::Opm::gpuistl::CuJac<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>,             \
+                                        ::Opm::gpuistl::CuVector<realtype>,                                             \
+                                        ::Opm::gpuistl::CuVector<realtype>>;                                            \
+    template class ::Opm::gpuistl::CuJac<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>,              \
+                                        ::Opm::gpuistl::CuVector<realtype>,                                             \
+                                        ::Opm::gpuistl::CuVector<realtype>>

 INSTANTIATE_CUJAC_DUNE(double, 1);
 INSTANTIATE_CUJAC_DUNE(double, 2);
--- a/opm/simulators/linalg/cuistl/CuJac.hpp
+++ b/opm/simulators/linalg/cuistl/CuJac.hpp
@@ -28,7 +28,7 @@



-namespace Opm::cuistl
+namespace Opm::gpuistl
 {
 //! \brief Jacobi preconditioner on the GPU.
 //!
@@ -110,6 +110,6 @@ private:

    void invertDiagonalAndFlatten();
 };
-} // end namespace Opm::cuistl
+} // end namespace Opm::gpuistl

 #endif
--- a/opm/simulators/linalg/cuistl/CuOwnerOverlapCopy.hpp
+++ b/opm/simulators/linalg/cuistl/CuOwnerOverlapCopy.hpp
@@ -24,7 +24,7 @@
 #include <opm/simulators/linalg/cuistl/CuVector.hpp>
 #include <vector>

-namespace Opm::cuistl
+namespace Opm::gpuistl
 {
 /**
 * @brief GPUSender is a wrapper class for classes which will implement copOwnerToAll
@@ -409,5 +409,5 @@ public:
 private:
    std::shared_ptr<GPUSender<field_type, OwnerOverlapCopyCommunicationType>> m_sender;
 };
-} // namespace Opm::cuistl
+} // namespace Opm::gpuistl
 #endif
--- a/opm/simulators/linalg/cuistl/CuSeqILU0.cpp
+++ b/opm/simulators/linalg/cuistl/CuSeqILU0.cpp
@@ -37,7 +37,7 @@
 // it highly recommended to read that before proceeding.


-namespace Opm::cuistl
+namespace Opm::gpuistl
 {

 template <class M, class X, class Y, int l>
@@ -337,14 +337,14 @@ CuSeqILU0<M, X, Y, l>::updateILUConfiguration()
    analyzeMatrix();
    createILU();
 }
-} // namespace Opm::cuistl
+} // namespace Opm::gpuistl
 #define INSTANTIATE_CUSEQILU0_DUNE(realtype, blockdim)                                                                 \
-    template class ::Opm::cuistl::CuSeqILU0<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>,         \
-                                            ::Opm::cuistl::CuVector<realtype>,                                         \
-                                            ::Opm::cuistl::CuVector<realtype>>;                                        \
-    template class ::Opm::cuistl::CuSeqILU0<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>,          \
-                                            ::Opm::cuistl::CuVector<realtype>,                                         \
-                                            ::Opm::cuistl::CuVector<realtype>>
+    template class ::Opm::gpuistl::CuSeqILU0<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>,         \
+                                            ::Opm::gpuistl::CuVector<realtype>,                                         \
+                                            ::Opm::gpuistl::CuVector<realtype>>;                                        \
+    template class ::Opm::gpuistl::CuSeqILU0<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>,          \
+                                            ::Opm::gpuistl::CuVector<realtype>,                                         \
+                                            ::Opm::gpuistl::CuVector<realtype>>


 INSTANTIATE_CUSEQILU0_DUNE(double, 1);
--- a/opm/simulators/linalg/cuistl/CuSeqILU0.hpp
+++ b/opm/simulators/linalg/cuistl/CuSeqILU0.hpp
@@ -28,7 +28,7 @@



-namespace Opm::cuistl
+namespace Opm::gpuistl
 {
 //! \brief Sequential ILU0 preconditioner on the GPU through the CuSparse library.
 //!
@@ -133,6 +133,6 @@ private:

    void updateILUConfiguration();
 };
-} // end namespace Opm::cuistl
+} // end namespace Opm::gpuistl

 #endif
--- a/opm/simulators/linalg/cuistl/CuSparseMatrix.cpp
+++ b/opm/simulators/linalg/cuistl/CuSparseMatrix.cpp
@@ -29,7 +29,7 @@
 #include <opm/simulators/linalg/matrixblock.hh>
 #include <type_traits>

-namespace Opm::cuistl
+namespace Opm::gpuistl
 {

 namespace
@@ -343,4 +343,4 @@ INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 4);
 INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 5);
 INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 6);

-} // namespace Opm::cuistl
+} // namespace Opm::gpuistl
--- a/opm/simulators/linalg/cuistl/CuSparseMatrix.hpp
+++ b/opm/simulators/linalg/cuistl/CuSparseMatrix.hpp
@@ -28,7 +28,7 @@
 #include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp>
 #include <vector>

-namespace Opm::cuistl
+namespace Opm::gpuistl
 {

 /**
@@ -298,5 +298,5 @@ private:
    template <class VectorType>
    void assertSameSize(const VectorType& vector) const;
 };
-} // namespace Opm::cuistl
+} // namespace Opm::gpuistl
 #endif
--- a/opm/simulators/linalg/cuistl/CuVector.cpp
+++ b/opm/simulators/linalg/cuistl/CuVector.cpp
@@ -26,7 +26,7 @@
 #include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
 #include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>

-namespace Opm::cuistl
+namespace Opm::gpuistl
 {

 template <class T>
@@ -304,4 +304,4 @@ template class CuVector<double>;
 template class CuVector<float>;
 template class CuVector<int>;

-} // namespace Opm::cuistl
+} // namespace Opm::gpuistl
--- a/opm/simulators/linalg/cuistl/CuVector.hpp
+++ b/opm/simulators/linalg/cuistl/CuVector.hpp
@@ -29,7 +29,7 @@
 #include <string>


-namespace Opm::cuistl
+namespace Opm::gpuistl
 {

 /**
@@ -391,5 +391,5 @@ private:
    void assertHasElements() const;
 };

-} // namespace Opm::cuistl
+} // namespace Opm::gpuistl
 #endif
--- a/opm/simulators/linalg/cuistl/CuView.cpp
+++ b/opm/simulators/linalg/cuistl/CuView.cpp
@@ -23,7 +23,7 @@
 #include <opm/simulators/linalg/cuistl/CuView.hpp>
 #include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>

-namespace Opm::cuistl
+namespace Opm::gpuistl
 {

 template <class T>
@@ -79,4 +79,4 @@ template class CuView<double>;
 template class CuView<float>;
 template class CuView<int>;

-} // namespace Opm::cuistl
+} // namespace Opm::gpuistl
--- a/opm/simulators/linalg/cuistl/CuView.hpp
+++ b/opm/simulators/linalg/cuistl/CuView.hpp
@@ -37,7 +37,7 @@
 #define OPM_IS_INSIDE_DEVICE_FUNCTION_TEMPORARY 0
 #endif

-namespace Opm::cuistl
+namespace Opm::gpuistl
 {

 /**
@@ -410,6 +410,6 @@ private:
    }
 };

-} // namespace Opm::cuistl
+} // namespace Opm::gpuistl

 #endif
--- a/opm/simulators/linalg/cuistl/OpmCuILU0.cpp
+++ b/opm/simulators/linalg/cuistl/OpmCuILU0.cpp
@@ -37,7 +37,7 @@
 #include <string>
 #include <tuple>
 #include <utility>
-namespace Opm::cuistl
+namespace Opm::gpuistl
 {

 template <class M, class X, class Y, int l>
@@ -289,14 +289,14 @@ OpmCuILU0<M, X, Y, l>::tuneThreadBlockSizes()
        tuneUpperSolveThreadBlockSizeInApply, "Kernel computing an upper triangular solve for a level set");
 }

-} // namespace Opm::cuistl
+} // namespace Opm::gpuistl
 #define INSTANTIATE_CUDILU_DUNE(realtype, blockdim)                                                                    \
-    template class ::Opm::cuistl::OpmCuILU0<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>,         \
-                                            ::Opm::cuistl::CuVector<realtype>,                                         \
-                                            ::Opm::cuistl::CuVector<realtype>>;                                        \
-    template class ::Opm::cuistl::OpmCuILU0<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>,          \
-                                            ::Opm::cuistl::CuVector<realtype>,                                         \
-                                            ::Opm::cuistl::CuVector<realtype>>
+    template class ::Opm::gpuistl::OpmCuILU0<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>,         \
+                                            ::Opm::gpuistl::CuVector<realtype>,                                         \
+                                            ::Opm::gpuistl::CuVector<realtype>>;                                        \
+    template class ::Opm::gpuistl::OpmCuILU0<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>,          \
+                                            ::Opm::gpuistl::CuVector<realtype>,                                         \
+                                            ::Opm::gpuistl::CuVector<realtype>>

 INSTANTIATE_CUDILU_DUNE(double, 1);
 INSTANTIATE_CUDILU_DUNE(double, 2);
--- a/opm/simulators/linalg/cuistl/OpmCuILU0.hpp
+++ b/opm/simulators/linalg/cuistl/OpmCuILU0.hpp
@@ -29,7 +29,7 @@
 #include <vector>


-namespace Opm::cuistl
+namespace Opm::gpuistl
 {
 //! \brief ILU0 preconditioner on the GPU.
 //!
@@ -144,6 +144,6 @@ private:
    int m_moveThreadBlockSize = -1;
    int m_ILU0FactorizationThreadBlockSize = -1;
 };
-} // end namespace Opm::cuistl
+} // end namespace Opm::gpuistl

 #endif
--- a/opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp
+++ b/opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp
@@ -26,7 +26,7 @@
 #include <opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp>


-namespace Opm::cuistl
+namespace Opm::gpuistl
 {
 //!\brief Makes a CUDA preconditioner available to a CPU simulator.
 //!
@@ -134,6 +134,6 @@ private:
    std::unique_ptr<CuVector<field_type>> m_inputBuffer;
    std::unique_ptr<CuVector<field_type>> m_outputBuffer;
 };
-} // end namespace Opm::cuistl
+} // end namespace Opm::gpuistl

 #endif
--- a/opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp
+++ b/opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp
@@ -31,7 +31,7 @@
 #include <opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp>


-namespace Opm::cuistl
+namespace Opm::gpuistl
 {
 //! \brief Converts the field type (eg. double to float) to benchmark single precision preconditioners
 //!
@@ -64,7 +64,7 @@ namespace Opm::cuistl
 //! void applyILU0AsFloat(const MDouble& matrix, const XDouble& x, XDouble& y) {
 //!
 //!     using FloatILU0 = typename Opm::ParallelOverlappingILU0<MFloat, XFloat, XFloat, ParallelInfo>;
-//!     using DoubleToFloatConverter = typename Opm::cuistl::PreconditionerConvertFieldTypeAdapter<FloatILU0, MDouble,
+//!     using DoubleToFloatConverter = typename Opm::gpuistl::PreconditionerConvertFieldTypeAdapter<FloatILU0, MDouble,
 //!     XDouble, XDouble>;
 //!
 //!     // Note that we do not need to make a new instance for every invocation, this
@@ -239,6 +239,6 @@ private:
    //! \brief the underlying preconditioner to use
    std::shared_ptr<CudaPreconditionerType> m_underlyingPreconditioner;
 };
-} // end namespace Opm::cuistl
+} // end namespace Opm::gpuistl

 #endif
--- a/opm/simulators/linalg/cuistl/PreconditionerHolder.hpp
+++ b/opm/simulators/linalg/cuistl/PreconditionerHolder.hpp
@@ -21,7 +21,7 @@
 #include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>


-namespace Opm::cuistl
+namespace Opm::gpuistl
 {
 //! \brief Common interface for adapters that hold preconditioners.
 //!
@@ -38,6 +38,6 @@ public:
     */
    virtual std::shared_ptr<Dune::PreconditionerWithUpdate<X, Y>> getUnderlyingPreconditioner() = 0;
 };
-} // end namespace Opm::cuistl
+} // end namespace Opm::gpuistl

 #endif
--- a/opm/simulators/linalg/cuistl/SolverAdapter.hpp
+++ b/opm/simulators/linalg/cuistl/SolverAdapter.hpp
@@ -39,7 +39,7 @@
 #endif
 #endif

-namespace Opm::cuistl
+namespace Opm::gpuistl
 {
 //! @brief Wraps a CUDA solver to work with CPU data.
 //!
@@ -56,7 +56,7 @@ public:
    using typename Dune::IterativeSolver<X, X>::real_type;
    using typename Dune::IterativeSolver<X, X>::scalar_real_type;
    static constexpr auto block_size = domain_type::block_type::dimension;
-    using XGPU = Opm::cuistl::CuVector<real_type>;
+    using XGPU = Opm::gpuistl::CuVector<real_type>;

    // TODO: Use a std::forward
    SolverAdapter(Operator& op,
@@ -148,8 +148,8 @@ private:
            if (!precAsHolder) {
                OPM_THROW(std::invalid_argument,
                          "The preconditioner needs to be a CUDA preconditioner (eg. CuILU0) wrapped in a "
-                          "Opm::cuistl::PreconditionerAdapter wrapped in a "
-                          "Opm::cuistl::CuBlockPreconditioner. If you are unsure what this means, set "
+                          "Opm::gpuistl::PreconditionerAdapter wrapped in a "
+                          "Opm::gpuistl::CuBlockPreconditioner. If you are unsure what this means, set "
                          "preconditioner to 'CUILU0'"); // TODO: Suggest a better preconditioner
            }

@@ -159,8 +159,8 @@ private:
            if (!preconditionerAdapterAsHolder) {
                OPM_THROW(std::invalid_argument,
                          "The preconditioner needs to be a CUDA preconditioner (eg. CuILU0) wrapped in a "
-                          "Opm::cuistl::PreconditionerAdapter wrapped in a "
-                          "Opm::cuistl::CuBlockPreconditioner. If you are unsure what this means, set "
+                          "Opm::gpuistl::PreconditionerAdapter wrapped in a "
+                          "Opm::gpuistl::CuBlockPreconditioner. If you are unsure what this means, set "
                          "preconditioner to 'CUILU0'"); // TODO: Suggest a better preconditioner
            }
            // We need to get the underlying preconditioner:
@@ -183,12 +183,12 @@ private:


            // TODO add typename Operator communication type as a named type with using
-            std::shared_ptr<Opm::cuistl::GPUSender<real_type, typename Operator::communication_type>> gpuComm;
+            std::shared_ptr<Opm::gpuistl::GPUSender<real_type, typename Operator::communication_type>> gpuComm;
            if (mpiSupportsCudaAwareAtCompileTime && mpiSupportsCudaAwareAtRunTime){
-                gpuComm = std::make_shared<Opm::cuistl::GPUAwareMPISender<real_type, block_size, typename Operator::communication_type>>(communication);
+                gpuComm = std::make_shared<Opm::gpuistl::GPUAwareMPISender<real_type, block_size, typename Operator::communication_type>>(communication);
            }
            else{
-                gpuComm = std::make_shared<Opm::cuistl::GPUObliviousMPISender<real_type, block_size, typename Operator::communication_type>>(communication);
+                gpuComm = std::make_shared<Opm::gpuistl::GPUObliviousMPISender<real_type, block_size, typename Operator::communication_type>>(communication);
            }

            using CudaCommunication = CuOwnerOverlapCopy<real_type, block_size, typename Operator::communication_type>;
@@ -222,7 +222,7 @@ private:
            if (!precAsHolder) {
                OPM_THROW(std::invalid_argument,
                          "The preconditioner needs to be a CUDA preconditioner wrapped in a "
-                          "Opm::cuistl::PreconditionerHolder (eg. CuILU0).");
+                          "Opm::gpuistl::PreconditionerHolder (eg. CuILU0).");
            }
            auto preconditionerOnGPU = precAsHolder->getUnderlyingPreconditioner();

@@ -237,6 +237,6 @@ private:
    std::unique_ptr<XGPU> m_inputBuffer;
    std::unique_ptr<XGPU> m_outputBuffer;
 };
-} // namespace Opm::cuistl
+} // namespace Opm::gpuistl

 #endif
--- a/opm/simulators/linalg/cuistl/detail/CuBlasHandle.cpp
+++ b/opm/simulators/linalg/cuistl/detail/CuBlasHandle.cpp
@@ -19,7 +19,7 @@
 #include <cublas_v2.h>
 #include <opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp>
 #include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp>
-namespace Opm::cuistl::detail
+namespace Opm::gpuistl::detail
 {


@@ -46,4 +46,4 @@ CuBlasHandle::getInstance()
    return instance;
 }

-} // namespace Opm::cuistl::detail
+} // namespace Opm::gpuistl::detail
--- a/opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp
+++ b/opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp
@@ -21,7 +21,7 @@
 #include <cublas_v2.h>
 #include <memory>

-namespace Opm::cuistl::detail
+namespace Opm::gpuistl::detail
 {

 /**
@@ -31,7 +31,7 @@ namespace Opm::cuistl::detail
 * @code{.cpp}
 * #include <opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp>
 * void someFunction() {
- *     auto& cublasHandle = ::Opm::cuistl::detail::CuBlasHandle::getInstance();
+ *     auto& cublasHandle = ::Opm::gpuistl::detail::CuBlasHandle::getInstance();
 *     int cuBlasVersion = -1;
 *     OPM_CUBLAS_SAFE_CALL(cublasGetVersion(cublasHandle.get(), &cuBlasVersion));
 * }
@@ -64,5 +64,5 @@ private:
    CuBlasHandle();
    cublasHandle_t m_handle;
 };
-} // namespace Opm::cuistl::detail
+} // namespace Opm::gpuistl::detail
 #endif // OPM_CUBLASHANDLE_HPP
--- a/opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp
+++ b/opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp
@@ -21,7 +21,7 @@
 #include <opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp>
 #include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>

-namespace Opm::cuistl::detail
+namespace Opm::gpuistl::detail
 {

 /**
@@ -52,7 +52,7 @@ createMatrixDescription()

 /**
 * @brief createLowerDiagonalDescription creates a lower diagonal matrix description
- * @return a lower diagonal matrix description overlapped with options from ::Opm::cuistl::detail::createMatrixDescription()
+ * @return a lower diagonal matrix description overlapped with options from ::Opm::gpuistl::detail::createMatrixDescription()
 *
 * @note This will assume it has a unit diagonal
 */
@@ -67,7 +67,7 @@ createLowerDiagonalDescription()

 /**
 * @brief createUpperDiagonalDescription creates an upper diagonal matrix description
- * @return an upper diagonal matrix description overlapped with options from ::Opm::cuistl::detail::createMatrixDescription()
+ * @return an upper diagonal matrix description overlapped with options from ::Opm::gpuistl::detail::createMatrixDescription()
 *
 * @note This will assume it has a non-unit diagonal.
 */
@@ -81,6 +81,6 @@ createUpperDiagonalDescription()
    return description;
 }

-} // namespace Opm::cuistl::detail
+} // namespace Opm::gpuistl::detail

 #endif // CU_MATRIX_DESCRIPTION_HPP
--- a/opm/simulators/linalg/cuistl/detail/CuSparseHandle.cpp
+++ b/opm/simulators/linalg/cuistl/detail/CuSparseHandle.cpp
@@ -18,7 +18,7 @@
 */
 #include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
 #include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
-namespace Opm::cuistl::detail
+namespace Opm::gpuistl::detail
 {


@@ -46,4 +46,4 @@ CuSparseHandle::getInstance()
    return instance;
 }

-} // namespace Opm::cuistl::detail
+} // namespace Opm::gpuistl::detail
--- a/opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp
+++ b/opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp
@@ -21,7 +21,7 @@
 #include <cusparse.h>
 #include <memory>

-namespace Opm::cuistl::detail
+namespace Opm::gpuistl::detail
 {

 /**
@@ -31,7 +31,7 @@ namespace Opm::cuistl::detail
 * @code{.cpp}
 * #include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
 * void someFunction() {
- *     auto& cuSparseHandle = ::Opm::cuistl::detail::CuSparseHandle::getInstance();
+ *     auto& cuSparseHandle = ::Opm::gpuistl::detail::CuSparseHandle::getInstance();
 *     int cuSparseVersion = -1;
 *     OPM_CUSPARSE_SAFE_CALL(cusparseGetVersion(cuSparseHandle.get(), &cuSparseVersion));
 * }
@@ -63,5 +63,5 @@ private:
    CuSparseHandle();
    cusparseHandle_t m_handle;
 };
-} // namespace Opm::cuistl::detail
+} // namespace Opm::gpuistl::detail
 #endif // OPM_CUSPARSEHANDLE_HPP
--- a/opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp
+++ b/opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp
@@ -23,7 +23,7 @@
 #include <memory>
 #include <type_traits>

-namespace Opm::cuistl::detail
+namespace Opm::gpuistl::detail
 {

 /**
@@ -94,6 +94,6 @@ private:
    DeleterType m_deleter;
 };

-} // namespace Opm::cuistl::impl
+} // namespace Opm::gpuistl::impl
 #include <opm/simulators/linalg/cuistl/detail/CuSparseResource_impl.hpp>
 #endif // CUSPARSERESOURCE_HPP
--- a/opm/simulators/linalg/cuistl/detail/CuSparseResource_impl.hpp
+++ b/opm/simulators/linalg/cuistl/detail/CuSparseResource_impl.hpp
@@ -20,7 +20,7 @@
 #include <opm/common/ErrorMacros.hpp>
 #include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>

-namespace Opm::cuistl::detail
+namespace Opm::gpuistl::detail
 {

 namespace
@@ -100,4 +100,4 @@ CuSparseResource<T>::~CuSparseResource()
    // proper name of the function being called.
    OPM_CUSPARSE_WARN_IF_ERROR(m_deleter(m_resource));
 }
-} // namespace Opm::cuistl::detail
+} // namespace Opm::gpuistl::detail
--- a/opm/simulators/linalg/cuistl/detail/autotuner.hpp
+++ b/opm/simulators/linalg/cuistl/detail/autotuner.hpp
@@ -25,7 +25,7 @@
 #include <string>
 #include <utility>

-namespace Opm::cuistl::detail
+namespace Opm::gpuistl::detail
 {

 /// @brief Function that tests the best thread block size, assumes the provided function depends on threadblock-size
@@ -88,6 +88,6 @@ tuneThreadBlockSize(func& f, std::string descriptionOfFunction)
    return bestBlockSize;
 }

-} // end namespace Opm::cuistl::detail
+} // end namespace Opm::gpuistl::detail

 #endif
--- a/opm/simulators/linalg/cuistl/detail/coloringAndReorderingUtils.hpp
+++ b/opm/simulators/linalg/cuistl/detail/coloringAndReorderingUtils.hpp
@@ -31,16 +31,16 @@ This file contains a collection of utility functions used in the GPU implementat
 The functions deal with creating the mappings between reordered and natural indices, as well as
 extracting sparsity structures from dune matrices and creating cusparsematrix indices
 */
-namespace Opm::cuistl::detail
+namespace Opm::gpuistl::detail
 {
 inline std::vector<int>
 createReorderedToNatural(const Opm::SparseTable<size_t>& levelSets)
 {
-    auto res = std::vector<int>(Opm::cuistl::detail::to_size_t(levelSets.dataSize()));
+    auto res = std::vector<int>(Opm::gpuistl::detail::to_size_t(levelSets.dataSize()));
    int globCnt = 0;
    for (auto row : levelSets) {
        for (auto col : row) {
-            OPM_ERROR_IF(Opm::cuistl::detail::to_size_t(globCnt) >= res.size(),
+            OPM_ERROR_IF(Opm::gpuistl::detail::to_size_t(globCnt) >= res.size(),
                         fmt::format("Internal error. globCnt = {}, res.size() = {}", globCnt, res.size()));
            res[globCnt++] = static_cast<int>(col);
        }
@@ -51,11 +51,11 @@ createReorderedToNatural(const Opm::SparseTable<size_t>& levelSets)
 inline std::vector<int>
 createNaturalToReordered(const Opm::SparseTable<size_t>& levelSets)
 {
-    auto res = std::vector<int>(Opm::cuistl::detail::to_size_t(levelSets.dataSize()));
+    auto res = std::vector<int>(Opm::gpuistl::detail::to_size_t(levelSets.dataSize()));
    int globCnt = 0;
    for (auto row : levelSets) {
        for (auto col : row) {
-            OPM_ERROR_IF(Opm::cuistl::detail::to_size_t(globCnt) >= res.size(),
+            OPM_ERROR_IF(Opm::gpuistl::detail::to_size_t(globCnt) >= res.size(),
                         fmt::format("Internal error. globCnt = {}, res.size() = {}", globCnt, res.size()));
            res[col] = globCnt++;
        }
@@ -105,6 +105,6 @@ extractLowerAndUpperMatrices(const M& naturalMatrix, const std::vector<int>& reo
    return {std::unique_ptr<GPUM>(new auto(GPUM::fromMatrix(reorderedLower, true))),
            std::unique_ptr<GPUM>(new auto(GPUM::fromMatrix(reorderedUpper, true)))};
 }
-} // namespace Opm::cuistl::detail
+} // namespace Opm::gpuistl::detail

 #endif
--- a/opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp
+++ b/opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp
@@ -28,7 +28,7 @@



-namespace Opm::cuistl::detail
+namespace Opm::gpuistl::detail
 {

 #define CHECK_CUBLAS_ERROR_TYPE(code, x)                                                                               \
@@ -174,7 +174,7 @@ cublasWarnIfError(cublasStatus_t error,

    return error;
 }
-} // namespace Opm::cuistl::detail
+} // namespace Opm::gpuistl::detail

 /**
 * @brief OPM_CUBLAS_SAFE_CALL checks the return type of the cublas expression (function call) and throws an exception
@@ -194,7 +194,7 @@ cublasWarnIfError(cublasStatus_t error,
 * @note This should be used for any call to cuBlas unless you have a good reason not to.
 */
 #define OPM_CUBLAS_SAFE_CALL(expression)                                                                               \
-    ::Opm::cuistl::detail::cublasSafeCall(expression, #expression, __FILE__, __func__, __LINE__)
+    ::Opm::gpuistl::detail::cublasSafeCall(expression, #expression, __FILE__, __func__, __LINE__)

 /**
 * @brief OPM_CUBLAS_WARN_IF_ERROR checks the return type of the cublas expression (function call) and issues a warning
@@ -214,6 +214,6 @@ cublasWarnIfError(cublasStatus_t error,
 * @note Prefer the cublasSafeCall/OPM_CUBLAS_SAFE_CALL counterpart unless you really don't want to throw an exception.
 */
 #define OPM_CUBLAS_WARN_IF_ERROR(expression)                                                                           \
-    ::Opm::cuistl::detail::cublasWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
+    ::Opm::gpuistl::detail::cublasWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)

 #endif // OPM_CUBLAS_SAFE_CALL_HPP
--- a/opm/simulators/linalg/cuistl/detail/cublas_wrapper.hpp
+++ b/opm/simulators/linalg/cuistl/detail/cublas_wrapper.hpp
@@ -29,7 +29,7 @@
 #include <cublas_v2.h>
 #include <opm/common/ErrorMacros.hpp>

-namespace Opm::cuistl::detail
+namespace Opm::gpuistl::detail
 {

 inline cublasStatus_t
@@ -164,5 +164,5 @@ cublasNrm2([[maybe_unused]] cublasHandle_t handle,
    OPM_THROW(std::runtime_error, "norm2 for integer vectors is not implemented yet.");
 }

-} // namespace Opm::cuistl::detail
+} // namespace Opm::gpuistl::detail
 #endif
--- a/opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp
+++ b/opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp
@@ -24,7 +24,7 @@
 #include <opm/common/OpmLog/OpmLog.hpp>
 #include <string_view>

-namespace Opm::cuistl::detail
+namespace Opm::gpuistl::detail
 {
 /**
 * @brief getCudaErrorMessage generates the error message to display for a given error.
@@ -128,7 +128,7 @@ cudaWarnIfError(cudaError_t error,
        OpmLog::warning(getCudaErrorMessage(error, expression, filename, functionName, lineNumber));
    }
 }
-} // namespace Opm::cuistl::detail
+} // namespace Opm::gpuistl::detail

 /**
 * @brief OPM_CUDA_SAFE_CALL checks the return type of the CUDA expression (function call) and throws an exception if it
@@ -148,7 +148,7 @@ cudaWarnIfError(cudaError_t error,
 * @note This should be used for any call to the CUDA runtime API unless you have a good reason not to.
 */
 #define OPM_CUDA_SAFE_CALL(expression)                                                                                 \
-    ::Opm::cuistl::detail::cudaSafeCall(expression, #expression, __FILE__, __func__, __LINE__)
+    ::Opm::gpuistl::detail::cudaSafeCall(expression, #expression, __FILE__, __func__, __LINE__)


 /**
@@ -169,6 +169,6 @@ cudaWarnIfError(cudaError_t error,
 * @note Prefer the cudaSafeCall/OPM_CUDA_SAFE_CALL counterpart unless you really don't want to throw an exception.
 */
 #define OPM_CUDA_WARN_IF_ERROR(expression)                                                                             \
-    ::Opm::cuistl::detail::cudaWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
+    ::Opm::gpuistl::detail::cudaWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)

 #endif
--- a/opm/simulators/linalg/cuistl/detail/cusparse_constants.hpp
+++ b/opm/simulators/linalg/cuistl/detail/cusparse_constants.hpp
@@ -19,7 +19,7 @@
 #ifndef CUSPARSE_CONSTANTS_HPP
 #define CUSPARSE_CONSTANTS_HPP
 #include <cusparse.h>
-namespace Opm::cuistl::detail
+namespace Opm::gpuistl::detail
 {
 const constexpr auto CUSPARSE_MATRIX_ORDER = CUSPARSE_DIRECTION_ROW;
 }
--- a/opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.cu
+++ b/opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.cu
@@ -23,7 +23,7 @@
 #include <opm/simulators/linalg/cuistl/detail/gpuThreadUtils.hpp>
 #include <stdexcept>

-namespace Opm::cuistl::detail
+namespace Opm::gpuistl::detail
 {
 namespace
 {
@@ -110,8 +110,8 @@ copyMatDataToReordered(T* srcMatrix,
                       int thrBlockSize)
 {
    int threadBlockSize
-        = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuMoveDataToReordered<T, blocksize>, thrBlockSize);
-    int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
+        = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuMoveDataToReordered<T, blocksize>, thrBlockSize);
+    int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
    cuMoveDataToReordered<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
        srcMatrix, srcRowIndices, dstMatrix, dstRowIndices, naturalToReordered, numberOfRows);
 }
@@ -130,9 +130,9 @@ copyMatDataToReorderedSplit(T* srcMatrix,
                            size_t numberOfRows,
                            int thrBlockSize)
 {
-    int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
+    int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
        cuMoveDataToReorderedSplit<T, blocksize>, thrBlockSize);
-    int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
+    int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
    cuMoveDataToReorderedSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(srcMatrix,
                                                                                 srcRowIndices,
                                                                                 srcColumnIndices,
@@ -161,4 +161,4 @@ INSTANTIATE_KERNEL_WRAPPERS(double, 3);
 INSTANTIATE_KERNEL_WRAPPERS(double, 4);
 INSTANTIATE_KERNEL_WRAPPERS(double, 5);
 INSTANTIATE_KERNEL_WRAPPERS(double, 6);
-} // namespace Opm::cuistl::detail
+} // namespace Opm::gpuistl::detail
--- a/opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp
+++ b/opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp
@@ -20,7 +20,7 @@
 #define OPM_CUISTL_CUSPARSE_MATRIX_OPERATIONS_HPP
 #include <cstddef>
 #include <vector>
-namespace Opm::cuistl::detail
+namespace Opm::gpuistl::detail
 {
 /**
 * @brief Reorders the elements of a matrix by copying them from one matrix to another using a permutation list
@@ -68,5 +68,5 @@ void copyMatDataToReorderedSplit(T* srcMatrix,
                                 size_t numberOfRows,
                                 int threadBlockSize);

-} // namespace Opm::cuistl::detail
+} // namespace Opm::gpuistl::detail
 #endif
--- a/opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp
+++ b/opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp
@@ -24,7 +24,7 @@
 #include <opm/common/ErrorMacros.hpp>
 #include <opm/common/OpmLog/OpmLog.hpp>

-namespace Opm::cuistl::detail
+namespace Opm::gpuistl::detail
 {

 #define CHECK_CUSPARSE_ERROR_TYPE(code, x)                                                                             \
@@ -161,7 +161,7 @@ cusparseWarnIfError(cusparseStatus_t error,

    return error;
 }
-} // namespace Opm::cuistl::detail
+} // namespace Opm::gpuistl::detail



@@ -183,7 +183,7 @@ cusparseWarnIfError(cusparseStatus_t error,
 * @note This should be used for any call to cuSparse unless you have a good reason not to.
 */
 #define OPM_CUSPARSE_SAFE_CALL(expression)                                                                             \
-    ::Opm::cuistl::detail::cusparseSafeCall(expression, #expression, __FILE__, __func__, __LINE__)
+    ::Opm::gpuistl::detail::cusparseSafeCall(expression, #expression, __FILE__, __func__, __LINE__)

 /**
 * @brief OPM_CUSPARSE_WARN_IF_ERROR checks the return type of the cusparse expression (function call) and issues a
@@ -204,5 +204,5 @@ cusparseWarnIfError(cusparseStatus_t error,
 * exception.
 */
 #define OPM_CUSPARSE_WARN_IF_ERROR(expression)                                                                         \
-    ::Opm::cuistl::detail::cusparseWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
+    ::Opm::gpuistl::detail::cusparseWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
 #endif // OPM_CUSPARSE_SAFE_CALL_HPP
--- a/opm/simulators/linalg/cuistl/detail/cusparse_wrapper.hpp
+++ b/opm/simulators/linalg/cuistl/detail/cusparse_wrapper.hpp
@@ -27,7 +27,7 @@
 #include <type_traits>
 #ifndef OPM_CUSPARSE_WRAPPER_HPP
 #define OPM_CUSPARSE_WRAPPER_HPP
-namespace Opm::cuistl::detail
+namespace Opm::gpuistl::detail
 {

 inline cusparseStatus_t
@@ -450,5 +450,5 @@ cusparseBsrmv(cusparseHandle_t handle,
                          beta,
                          y);
 }
-} // namespace Opm::cuistl::detail
+} // namespace Opm::gpuistl::detail
 #endif
--- a/opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp
+++ b/opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp
@@ -22,7 +22,7 @@
 #include <limits>
 #include <vector>

-namespace Opm::cuistl::detail
+namespace Opm::gpuistl::detail
 {

 /**
@@ -53,6 +53,6 @@ makeMatrixWithNonzeroDiagonal(const Matrix& matrix,

    return newMatrix;
 }
-} // namespace Opm::cuistl::detail
+} // namespace Opm::gpuistl::detail

 #endif
--- a/opm/simulators/linalg/cuistl/detail/gpuThreadUtils.hpp
+++ b/opm/simulators/linalg/cuistl/detail/gpuThreadUtils.hpp
@@ -26,7 +26,7 @@
 /*
    This file provides some logic for handling how to choose the correct thread-block size
 */
-namespace Opm::cuistl::detail
+namespace Opm::gpuistl::detail
 {
 constexpr inline size_t
 getThreads([[maybe_unused]] size_t numberOfRows)
@@ -61,6 +61,6 @@ getNumberOfBlocks(int wantedThreads, int threadBlockSize)
    return (wantedThreads + threadBlockSize - 1) / threadBlockSize;
 }

-} // namespace Opm::cuistl::detail
+} // namespace Opm::gpuistl::detail

 #endif
--- a/opm/simulators/linalg/cuistl/detail/has_function.hpp
+++ b/opm/simulators/linalg/cuistl/detail/has_function.hpp
@@ -29,7 +29,7 @@
 * TODO: Use the requires-keyword once C++20 becomes availble (https://en.cppreference.com/w/cpp/language/constraints ).
 * With C++20 this file can be removed.
 */
-namespace Opm::cuistl::detail
+namespace Opm::gpuistl::detail
 {

 /**
@@ -121,5 +121,5 @@ public:
    static constexpr bool value = std::is_same_v<decltype(test<T>(0)), std::true_type>;
 };

-} // namespace Opm::cuistl::detail
+} // namespace Opm::gpuistl::detail
 #endif
--- a/opm/simulators/linalg/cuistl/detail/preconditionerKernels/DILUKernels.cu
+++ b/opm/simulators/linalg/cuistl/detail/preconditionerKernels/DILUKernels.cu
@@ -23,7 +23,7 @@
 #include <opm/simulators/linalg/cuistl/detail/preconditionerKernels/DILUKernels.hpp>
 #include <stdexcept>

-namespace Opm::cuistl::detail::DILU
+namespace Opm::gpuistl::detail::DILU
 {
 namespace
 {
@@ -282,8 +282,8 @@ solveLowerLevelSet(T* reorderedMat,
                   int thrBlockSize)
 {
    int threadBlockSize
-        = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveLowerLevelSet<T, blocksize>, thrBlockSize);
-    int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
+        = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveLowerLevelSet<T, blocksize>, thrBlockSize);
+    int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
    cuSolveLowerLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
        reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, d, v);
 }
@@ -302,9 +302,9 @@ solveLowerLevelSetSplit(T* reorderedMat,
                        T* v,
                        int thrBlockSize)
 {
-    int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
+    int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
        cuSolveLowerLevelSetSplit<T, blocksize>, thrBlockSize);
-    int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
+    int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
    cuSolveLowerLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
        reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, d, v);
 }
@@ -322,8 +322,8 @@ solveUpperLevelSet(T* reorderedMat,
                   int thrBlockSize)
 {
    int threadBlockSize
-        = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveUpperLevelSet<T, blocksize>, thrBlockSize);
-    int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
+        = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveUpperLevelSet<T, blocksize>, thrBlockSize);
+    int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
    cuSolveUpperLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
        reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, v);
 }
@@ -340,9 +340,9 @@ solveUpperLevelSetSplit(T* reorderedMat,
                        T* v,
                        int thrBlockSize)
 {
-    int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
+    int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
        cuSolveUpperLevelSetSplit<T, blocksize>, thrBlockSize);
-    int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
+    int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
    cuSolveUpperLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
        reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, v);
 }
@@ -360,9 +360,9 @@ computeDiluDiagonal(T* reorderedMat,
                    int thrBlockSize)
 {
    if (blocksize <= 3) {
-        int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
+        int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
            cuComputeDiluDiagonal<T, blocksize>, thrBlockSize);
-        int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
+        int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
        cuComputeDiluDiagonal<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(reorderedMat,
                                                                                rowIndices,
                                                                                colIndices,
@@ -393,9 +393,9 @@ computeDiluDiagonalSplit(T* reorderedLowerMat,
                         int thrBlockSize)
 {
    if (blocksize <= 3) {
-        int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
+        int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
            cuComputeDiluDiagonalSplit<T, blocksize>, thrBlockSize);
-        int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
+        int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
        cuComputeDiluDiagonalSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(reorderedLowerMat,
                                                                                     lowerRowIndices,
                                                                                     lowerColIndices,
@@ -434,4 +434,4 @@ INSTANTIATE_KERNEL_WRAPPERS(double, 3);
 INSTANTIATE_KERNEL_WRAPPERS(double, 4);
 INSTANTIATE_KERNEL_WRAPPERS(double, 5);
 INSTANTIATE_KERNEL_WRAPPERS(double, 6);
-} // namespace Opm::cuistl::detail::DILU
+} // namespace Opm::gpuistl::detail::DILU
--- a/opm/simulators/linalg/cuistl/detail/preconditionerKernels/DILUKernels.hpp
+++ b/opm/simulators/linalg/cuistl/detail/preconditionerKernels/DILUKernels.hpp
@@ -24,7 +24,7 @@
 #include <cuda_runtime.h>
 #include <vector>

-namespace Opm::cuistl::detail::DILU
+namespace Opm::gpuistl::detail::DILU
 {

 /**
@@ -198,5 +198,5 @@ void computeDiluDiagonalSplit(T* reorderedLowerMat,
                              T* dInv,
                              int threadBlockSize);

-} // namespace Opm::cuistl::detail::DILU
+} // namespace Opm::gpuistl::detail::DILU
 #endif
--- a/opm/simulators/linalg/cuistl/detail/preconditionerKernels/ILU0Kernels.cu
+++ b/opm/simulators/linalg/cuistl/detail/preconditionerKernels/ILU0Kernels.cu
@@ -27,7 +27,7 @@
    The LU factorization and apply step is written based on the Dune implementations
 */

-namespace Opm::cuistl::detail::ILU0
+namespace Opm::gpuistl::detail::ILU0
 {
 namespace
 {
@@ -341,8 +341,8 @@ solveLowerLevelSet(T* reorderedMat,
                   int thrBlockSize)
 {
    int threadBlockSize
-        = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveLowerLevelSet<T, blocksize>, thrBlockSize);
-    int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
+        = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveLowerLevelSet<T, blocksize>, thrBlockSize);
+    int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
    cuSolveLowerLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
        reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, d, v);
 }
@@ -359,8 +359,8 @@ solveUpperLevelSet(T* reorderedMat,
                   int thrBlockSize)
 {
    int threadBlockSize
-        = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveUpperLevelSet<T, blocksize>, thrBlockSize);
-    int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
+        = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveUpperLevelSet<T, blocksize>, thrBlockSize);
+    int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
    cuSolveUpperLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
        reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, v);
 }
@@ -377,9 +377,9 @@ solveLowerLevelSetSplit(T* reorderedMat,
                        T* v,
                        int thrBlockSize)
 {
-    int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
+    int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
        cuSolveLowerLevelSetSplit<T, blocksize>, thrBlockSize);
-    int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
+    int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
    cuSolveLowerLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
        reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, d, v);
 }
@@ -396,9 +396,9 @@ solveUpperLevelSetSplit(T* reorderedMat,
                        T* v,
                        int thrBlockSize)
 {
-    int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
+    int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
        cuSolveUpperLevelSetSplit<T, blocksize>, thrBlockSize);
-    int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
+    int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
    cuSolveUpperLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
        reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, v);
 }
@@ -415,8 +415,8 @@ LUFactorization(T* srcMatrix,
                int thrBlockSize)
 {
    int threadBlockSize
-        = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuLUFactorization<T, blocksize>, thrBlockSize);
-    int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
+        = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuLUFactorization<T, blocksize>, thrBlockSize);
+    int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
    cuLUFactorization<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
        srcMatrix, srcRowIndices, srcColumnIndices, naturalToReordered, reorderedToNatual, rowsInLevelSet, startIdx);
 }
@@ -437,8 +437,8 @@ LUFactorizationSplit(T* reorderedLowerMat,
                     int thrBlockSize)
 {
    int threadBlockSize
-        = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuLUFactorizationSplit<T, blocksize>, thrBlockSize);
-    int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
+        = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuLUFactorizationSplit<T, blocksize>, thrBlockSize);
+    int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
    cuLUFactorizationSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(reorderedLowerMat,
                                                                             lowerRowIndices,
                                                                             lowerColIndices,
@@ -473,4 +473,4 @@ INSTANTIATE_KERNEL_WRAPPERS(double, 3);
 INSTANTIATE_KERNEL_WRAPPERS(double, 4);
 INSTANTIATE_KERNEL_WRAPPERS(double, 5);
 INSTANTIATE_KERNEL_WRAPPERS(double, 6);
-} // namespace Opm::cuistl::detail::ILU0
+} // namespace Opm::gpuistl::detail::ILU0
--- a/opm/simulators/linalg/cuistl/detail/preconditionerKernels/ILU0Kernels.hpp
+++ b/opm/simulators/linalg/cuistl/detail/preconditionerKernels/ILU0Kernels.hpp
@@ -20,7 +20,7 @@
 #define OPM_ILU0_KERNELS_HPP
 #include <cstddef>
 #include <vector>
-namespace Opm::cuistl::detail::ILU0
+namespace Opm::gpuistl::detail::ILU0
 {

 /**
@@ -189,5 +189,5 @@ void LUFactorizationSplit(T* reorderedLowerMat,
                          int rowsInLevelSet,
                          int threadBlockSize);

-} // namespace Opm::cuistl::detail::ILU0
+} // namespace Opm::gpuistl::detail::ILU0
 #endif
--- a/opm/simulators/linalg/cuistl/detail/preconditionerKernels/JacKernels.cu
+++ b/opm/simulators/linalg/cuistl/detail/preconditionerKernels/JacKernels.cu
@@ -23,7 +23,7 @@
 #include <opm/simulators/linalg/cuistl/detail/preconditionerKernels/JacKernels.hpp>
 #include <stdexcept>

-namespace Opm::cuistl::detail::JAC
+namespace Opm::gpuistl::detail::JAC
 {
 namespace
 {
@@ -60,8 +60,8 @@ invertDiagonalAndFlatten(T* mat, int* rowIndices, int* colIndices, size_t number
 {
    if (blocksize <= 3) {
        int threadBlockSize
-            = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuInvertDiagonalAndFlatten<T, blocksize>);
-        int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
+            = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuInvertDiagonalAndFlatten<T, blocksize>);
+        int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
        cuInvertDiagonalAndFlatten<T, blocksize>
            <<<nThreadBlocks, threadBlockSize>>>(mat, rowIndices, colIndices, numberOfRows, vec);
    } else {
@@ -85,4 +85,4 @@ INSTANTIATE_KERNEL_WRAPPERS(double, 4);
 INSTANTIATE_KERNEL_WRAPPERS(double, 5);
 INSTANTIATE_KERNEL_WRAPPERS(double, 6);

-} // namespace Opm::cuistl::detail::JAC
+} // namespace Opm::gpuistl::detail::JAC
--- a/opm/simulators/linalg/cuistl/detail/preconditionerKernels/JacKernels.hpp
+++ b/opm/simulators/linalg/cuistl/detail/preconditionerKernels/JacKernels.hpp
@@ -20,7 +20,7 @@
 #define OPM_JAC_KERNELS_HPP
 #include <cstddef>
 #include <vector>
-namespace Opm::cuistl::detail::JAC
+namespace Opm::gpuistl::detail::JAC
 {

 /**
@@ -34,5 +34,5 @@ namespace Opm::cuistl::detail::JAC
 template <class T, int blocksize>
 void invertDiagonalAndFlatten(T* mat, int* rowIndices, int* colIndices, size_t numberOfRows, T* vec);

-} // namespace Opm::cuistl::detail::JAC
+} // namespace Opm::gpuistl::detail::JAC
 #endif
--- a/opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp
+++ b/opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp
@@ -22,7 +22,7 @@

 #include <opm/simulators/linalg/cuistl/detail/has_function.hpp>

-namespace Opm::cuistl::detail
+namespace Opm::gpuistl::detail
 {

 //! @brief Tests (compile time) if the preconditioner type needs to call pre() before a call to apply()
@@ -60,5 +60,5 @@ shouldCallPreconditionerPost()
        return true;
    }
 }
-} // namespace Opm::cuistl::detail
+} // namespace Opm::gpuistl::detail
 #endif
--- a/opm/simulators/linalg/cuistl/detail/safe_conversion.hpp
+++ b/opm/simulators/linalg/cuistl/detail/safe_conversion.hpp
@@ -37,7 +37,7 @@
 * while Dune::BlockVector (and relatives) use unsigned size_t.
 */

-namespace Opm::cuistl::detail
+namespace Opm::gpuistl::detail
 {

 /**
@@ -106,6 +106,6 @@ to_size_t(int i)

    return std::size_t(i);
 }
-} // namespace Opm::cuistl::detail
+} // namespace Opm::gpuistl::detail

 #endif
--- a/opm/simulators/linalg/cuistl/detail/vector_operations.cu
+++ b/opm/simulators/linalg/cuistl/detail/vector_operations.cu
@@ -25,7 +25,7 @@
 #include <opm/simulators/linalg/cuistl/detail/gpuThreadUtils.hpp>
 #include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
 #include <stdexcept>
-namespace Opm::cuistl::detail
+namespace Opm::gpuistl::detail
 {

 namespace
@@ -108,8 +108,8 @@ template <class T>
 void
 setVectorValue(T* deviceData, size_t numberOfElements, const T& value)
 {
-    int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(setVectorValueKernel<T>);
-    int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
+    int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(setVectorValueKernel<T>);
+    int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
    setVectorValueKernel<<<nThreadBlocks, threadBlockSize>>>(deviceData, numberOfElements, value);
 }

@@ -121,8 +121,8 @@ template <class T>
 void
 setZeroAtIndexSet(T* deviceData, size_t numberOfElements, const int* indices)
 {
-    int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(setZeroAtIndexSetKernel<T>);
-    int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
+    int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(setZeroAtIndexSetKernel<T>);
+    int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
    setZeroAtIndexSetKernel<<<nThreadBlocks, threadBlockSize>>>(deviceData, numberOfElements, indices);
 }
 template void setZeroAtIndexSet(double*, size_t, const int*);
@@ -138,8 +138,8 @@ innerProductAtIndices(cublasHandle_t cublasHandle,
                      size_t numberOfElements,
                      const int* indices)
 {
-    int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(elementWiseMultiplyKernel<T>);
-    int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
+    int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(elementWiseMultiplyKernel<T>);
+    int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
    elementWiseMultiplyKernel<<<nThreadBlocks, threadBlockSize>>>(deviceA, deviceB, buffer, numberOfElements, indices);

    // TODO: [perf] Get rid of the allocation here.
@@ -158,8 +158,8 @@ template <class T>
 void
 prepareSendBuf(const T* deviceA, T* buffer, size_t numberOfElements, const int* indices)
 {
-    int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(prepareSendBufKernel<T>);
-    int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
+    int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(prepareSendBufKernel<T>);
+    int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
    prepareSendBufKernel<<<nThreadBlocks, threadBlockSize>>>(deviceA, buffer, numberOfElements, indices);
    OPM_CUDA_SAFE_CALL(cudaDeviceSynchronize()); // The buffers are prepared for MPI. Wait for them to finish.
 }
@@ -171,8 +171,8 @@ template <class T>
 void
 syncFromRecvBuf(T* deviceA, T* buffer, size_t numberOfElements, const int* indices)
 {
-    int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(syncFromRecvBufKernel<T>);
-    int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
+    int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(syncFromRecvBufKernel<T>);
+    int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
    syncFromRecvBufKernel<<<nThreadBlocks, threadBlockSize>>>(deviceA, buffer, numberOfElements, indices);
    // cudaDeviceSynchronize(); // Not needed, I guess...
 }
@@ -191,20 +191,20 @@ weightedDiagMV(const T* squareBlockVector,
 {
    switch (blocksize) {
    case 1: {
-        int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 1>);
-        int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
+        int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 1>);
+        int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
        weightedDiagMV<T, 1>
            <<<nThreadBlocks, threadBlockSize>>>(squareBlockVector, numberOfElements, relaxationFactor, srcVec, dstVec);
    } break;
    case 2: {
-        int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 2>);
-        int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
+        int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 2>);
+        int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
        weightedDiagMV<T, 2>
            <<<nThreadBlocks, threadBlockSize>>>(squareBlockVector, numberOfElements, relaxationFactor, srcVec, dstVec);
    } break;
    case 3: {
-        int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 3>);
-        int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
+        int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 3>);
+        int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
        weightedDiagMV<T, 3>
            <<<nThreadBlocks, threadBlockSize>>>(squareBlockVector, numberOfElements, relaxationFactor, srcVec, dstVec);
    } break;
@@ -217,4 +217,4 @@ weightedDiagMV(const T* squareBlockVector,
 template void weightedDiagMV(const double*, const size_t, const size_t, double, const double*, double*);
 template void weightedDiagMV(const float*, const size_t, const size_t, float, const float*, float*);

-} // namespace Opm::cuistl::detail
+} // namespace Opm::gpuistl::detail
--- a/opm/simulators/linalg/cuistl/detail/vector_operations.hpp
+++ b/opm/simulators/linalg/cuistl/detail/vector_operations.hpp
@@ -20,7 +20,7 @@
 #define OPM_CUISTL_VECTOR_OPERATIONS_HPP
 #include <cstddef>
 #include <cublas_v2.h>
-namespace Opm::cuistl::detail
+namespace Opm::gpuistl::detail
 {

 /**
@@ -81,5 +81,5 @@ void weightedDiagMV(const T* squareBlockVector,
                    T relaxationFactor,
                    const T* srcVec,
                    T* dstVec);
-} // namespace Opm::cuistl::detail
+} // namespace Opm::gpuistl::detail
 #endif
--- a/opm/simulators/linalg/cuistl/set_device.cpp
+++ b/opm/simulators/linalg/cuistl/set_device.cpp
@@ -21,7 +21,7 @@
 #include <opm/common/OpmLog/OpmLog.hpp>
 #include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
 #include <opm/simulators/linalg/cuistl/set_device.hpp>
-namespace Opm::cuistl
+namespace Opm::gpuistl
 {
 void
 setDevice(int mpiRank, [[maybe_unused]] int numberOfMpiRanks)
@@ -46,4 +46,4 @@ setDevice(int mpiRank, [[maybe_unused]] int numberOfMpiRanks)
    OpmLog::info("Set CUDA device to " + std::to_string(deviceId) + " (out of " + std::to_string(deviceCount)
                 + " devices).");
 }
-} // namespace Opm::cuistl
+} // namespace Opm::gpuistl
--- a/opm/simulators/linalg/cuistl/set_device.hpp
+++ b/opm/simulators/linalg/cuistl/set_device.hpp
@@ -20,7 +20,7 @@
 #ifndef OPM_CUISTL_SET_DEVICE_HEADER
 #define OPM_CUISTL_SET_DEVICE_HEADER

-namespace Opm::cuistl
+namespace Opm::gpuistl
 {
 //! @brief Sets the correct CUDA device in the setting of MPI
 //!
@@ -32,5 +32,5 @@ namespace Opm::cuistl
 //!
 //! @note If no CUDA device is present, this does nothing.
 void setDevice(int mpiRank, int numberOfMpiRanks);
-} // namespace Opm::cuistl
+} // namespace Opm::gpuistl
 #endif
--- a/tests/cuistl/test_converttofloatadapter.cpp
+++ b/tests/cuistl/test_converttofloatadapter.cpp
@@ -167,7 +167,7 @@ BOOST_AUTO_TEST_CASE(TestFiniteDifference1D)
        expectedOutputVector[i][1] = 43.0;
        inputVector[i][0] = 1.0;
        auto converter
-            = Opm::cuistl::PreconditionerConvertFieldTypeAdapter<TestPreconditioner, SpMatrixDouble, XDouble, XDouble>(
+            = Opm::gpuistl::PreconditionerConvertFieldTypeAdapter<TestPreconditioner, SpMatrixDouble, XDouble, XDouble>(
                B);
        auto underlyingPreconditioner = std::make_shared<TestPreconditioner>(
            converter.getConvertedMatrix(), inputVector, B, expectedOutputVector);
--- a/tests/cuistl/test_cuSparse_matrix_operations.cpp
+++ b/tests/cuistl/test_cuSparse_matrix_operations.cpp
@@ -85,10 +85,10 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(FlattenAndInvertDiagonalWith3By3Blocks, T, Numeric
    B[1][1][1][1] = -1.0;
    B[1][1][2][2] = -1.0;

-    Opm::cuistl::CuSparseMatrix<T> m = Opm::cuistl::CuSparseMatrix<T>::fromMatrix(B);
-    Opm::cuistl::CuVector<T> dInvDiag(blocksize * blocksize * N);
+    Opm::gpuistl::CuSparseMatrix<T> m = Opm::gpuistl::CuSparseMatrix<T>::fromMatrix(B);
+    Opm::gpuistl::CuVector<T> dInvDiag(blocksize * blocksize * N);

-    Opm::cuistl::detail::JAC::invertDiagonalAndFlatten<T, 3>(
+    Opm::gpuistl::detail::JAC::invertDiagonalAndFlatten<T, 3>(
        m.getNonZeroValues().data(), m.getRowIndices().data(), m.getColumnIndices().data(), N, dInvDiag.data());

    std::vector<T> expectedInvDiag {-1.0 / 4.0,
@@ -159,10 +159,10 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(FlattenAndInvertDiagonalWith2By2Blocks, T, Numeric
    B[1][1][0][0] = -1.0;
    B[1][1][1][1] = -1.0;

-    Opm::cuistl::CuSparseMatrix<T> m = Opm::cuistl::CuSparseMatrix<T>::fromMatrix(B);
-    Opm::cuistl::CuVector<T> dInvDiag(blocksize * blocksize * N);
+    Opm::gpuistl::CuSparseMatrix<T> m = Opm::gpuistl::CuSparseMatrix<T>::fromMatrix(B);
+    Opm::gpuistl::CuVector<T> dInvDiag(blocksize * blocksize * N);

-    Opm::cuistl::detail::JAC::invertDiagonalAndFlatten<T, 2>(
+    Opm::gpuistl::detail::JAC::invertDiagonalAndFlatten<T, 2>(
        m.getNonZeroValues().data(), m.getRowIndices().data(), m.getColumnIndices().data(), N, dInvDiag.data());

    std::vector<T> expectedInvDiag {2.0, -2.0, -1.0 / 2.0, 1.0, -1.0, 0.0, 0.0, -1.0};
--- a/tests/cuistl/test_cuVector_operations.cpp
+++ b/tests/cuistl/test_cuVector_operations.cpp
@@ -47,11 +47,11 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(ElementWiseMultiplicationOf3By3BlockVectorAndVecto
    std::vector<T> hostBlockVector({1.0, 2.0, 3.0, 5.0, 2.0, 3.0, 2.0, 1.0, 2.0});
    std::vector<T> hostVecVector({3.0, 2.0, 1.0});
    std::vector<T> hostDstVector({0, 0, 0});
-    Opm::cuistl::CuVector<T> deviceBlockVector(hostBlockVector);
-    Opm::cuistl::CuVector<T> deviceVecVector(hostVecVector);
-    Opm::cuistl::CuVector<T> deviceDstVector(hostDstVector);
+    Opm::gpuistl::CuVector<T> deviceBlockVector(hostBlockVector);
+    Opm::gpuistl::CuVector<T> deviceVecVector(hostVecVector);
+    Opm::gpuistl::CuVector<T> deviceDstVector(hostDstVector);

-    Opm::cuistl::detail::weightedDiagMV(
+    Opm::gpuistl::detail::weightedDiagMV(
        deviceBlockVector.data(), N, blocksize, weight, deviceVecVector.data(), deviceDstVector.data());

    std::vector<T> expectedVec {10.0, 22.0, 10.0};
@@ -81,11 +81,11 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(ElementWiseMultiplicationOf2By2BlockVectorAndVecto
    std::vector<T> hostBlockVector({1.0, 2.0, 3.0, 4.0, 4.0, 3.0, 2.0, 1.0});
    std::vector<T> hostVecVector({1.0, 3.0, 2.0, 4.0});
    std::vector<T> hostDstVector({0, 0, 0, 0});
-    Opm::cuistl::CuVector<T> deviceBlockVector(hostBlockVector);
-    Opm::cuistl::CuVector<T> deviceVecVector(hostVecVector);
-    Opm::cuistl::CuVector<T> deviceDstVector(hostDstVector);
+    Opm::gpuistl::CuVector<T> deviceBlockVector(hostBlockVector);
+    Opm::gpuistl::CuVector<T> deviceVecVector(hostVecVector);
+    Opm::gpuistl::CuVector<T> deviceDstVector(hostDstVector);

-    Opm::cuistl::detail::weightedDiagMV(
+    Opm::gpuistl::detail::weightedDiagMV(
        deviceBlockVector.data(), N, blocksize, weight, deviceVecVector.data(), deviceDstVector.data());

    std::vector<T> expectedVec {3.5, 7.5, 10.0, 4.0};
@@ -95,4 +95,4 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(ElementWiseMultiplicationOf2By2BlockVectorAndVecto
    for (size_t i = 0; i < expectedVec.size(); i++) {
        BOOST_CHECK_CLOSE(expectedVec[i], computedVec[i], 1e-7);
    }
-}
+}
--- a/tests/cuistl/test_cublas_handle.cpp
+++ b/tests/cuistl/test_cublas_handle.cpp
@@ -32,7 +32,7 @@ BOOST_AUTO_TEST_CASE(TestGetCublasVersion)
    // that checks the version of blas programatically. Let the test pass for now.
    BOOST_CHECK(true);
 #else
-    auto& cublasHandle = ::Opm::cuistl::detail::CuBlasHandle::getInstance();
+    auto& cublasHandle = ::Opm::gpuistl::detail::CuBlasHandle::getInstance();
    int cuBlasVersion = -1;
    OPM_CUBLAS_SAFE_CALL(cublasGetVersion(cublasHandle.get(), &cuBlasVersion));

--- a/tests/cuistl/test_cubuffer.cu
+++ b/tests/cuistl/test_cubuffer.cu
@@ -35,15 +35,15 @@ BOOST_AUTO_TEST_CASE(TestMakeView)
 {
    // test that we can create buffers and make views of the buffers using the pointer constructor
    auto buf = std::vector<int>({1, 2, 3, 4, 5, 6});
-    const auto gpubuf = ::Opm::cuistl::CuBuffer<int>(buf);
-    auto gpuview = ::Opm::cuistl::CuView<int>(buf.data(), buf.size());
-    bool gpuBufCreatedView = std::is_same<::Opm::cuistl::CuView<int>, decltype(gpuview)>::value;
+    const auto gpubuf = ::Opm::gpuistl::CuBuffer<int>(buf);
+    auto gpuview = ::Opm::gpuistl::CuView<int>(buf.data(), buf.size());
+    bool gpuBufCreatedView = std::is_same<::Opm::gpuistl::CuView<int>, decltype(gpuview)>::value;

    BOOST_CHECK(gpuBufCreatedView);

    // test that we can make views of buffers by using the cubuffer constructor
-    auto gpuview2 = ::Opm::cuistl::make_view(gpubuf);
-    bool gpuBufCreatedView2 = std::is_same<::Opm::cuistl::CuView<const int>, decltype(gpuview2)>::value;
+    auto gpuview2 = ::Opm::gpuistl::make_view(gpubuf);
+    bool gpuBufCreatedView2 = std::is_same<::Opm::gpuistl::CuView<const int>, decltype(gpuview2)>::value;

    BOOST_CHECK(gpuBufCreatedView2);

--- a/tests/cuistl/test_cudilu.cpp
+++ b/tests/cuistl/test_cudilu.cpp
@@ -41,11 +41,11 @@ using B1x1Vec = Dune::BlockVector<Dune::FieldVector<double, 1>>;
 using B2x2Vec = Dune::BlockVector<Dune::FieldVector<double, 2>>;
 using Sp1x1BlockMatrix = Dune::BCRSMatrix<FM1x1>;
 using Sp2x2BlockMatrix = Dune::BCRSMatrix<FM2x2>;
-using CuMatrix = Opm::cuistl::CuSparseMatrix<T>;
-using CuIntVec = Opm::cuistl::CuVector<int>;
-using CuFloatingPointVec = Opm::cuistl::CuVector<T>;
-using CuDilu1x1 = Opm::cuistl::CuDILU<Sp1x1BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
-using CuDilu2x2 = Opm::cuistl::CuDILU<Sp2x2BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
+using CuMatrix = Opm::gpuistl::CuSparseMatrix<T>;
+using CuIntVec = Opm::gpuistl::CuVector<int>;
+using CuFloatingPointVec = Opm::gpuistl::CuVector<T>;
+using CuDilu1x1 = Opm::gpuistl::CuDILU<Sp1x1BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
+using CuDilu2x2 = Opm::gpuistl::CuDILU<Sp2x2BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;

 Sp1x1BlockMatrix
 get1x1BlockTestMatrix()
--- a/tests/cuistl/test_cujac.cpp
+++ b/tests/cuistl/test_cujac.cpp
@@ -49,7 +49,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize2, T, NumericTypes)
    using M = Dune::FieldMatrix<T, blocksize, blocksize>;
    using SpMatrix = Dune::BCRSMatrix<M>;
    using Vector = Dune::BlockVector<Dune::FieldVector<T, blocksize>>;
-    using CuJac = Opm::cuistl::CuJac<SpMatrix, Opm::cuistl::CuVector<T>, Opm::cuistl::CuVector<T>>;
+    using CuJac = Opm::gpuistl::CuJac<SpMatrix, Opm::gpuistl::CuVector<T>, Opm::gpuistl::CuVector<T>>;

    SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
    for (auto row = B.createbegin(); row != B.createend(); ++row) {
@@ -70,7 +70,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize2, T, NumericTypes)
    B[1][1][0][0] = -1.0;
    B[1][1][1][1] = -1.0;

-    auto cujac = Opm::cuistl::PreconditionerAdapter<Vector, Vector, CuJac>(std::make_shared<CuJac>(B, 0.5));
+    auto cujac = Opm::gpuistl::PreconditionerAdapter<Vector, Vector, CuJac>(std::make_shared<CuJac>(B, 0.5));

    Vector vVector(2);
    Vector dVector(2);
@@ -103,7 +103,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize1, T, NumericTypes)
    using M = Dune::FieldMatrix<T, blocksize, blocksize>;
    using SpMatrix = Dune::BCRSMatrix<M>;
    using Vector = Dune::BlockVector<Dune::FieldVector<T, blocksize>>;
-    using CuJac = Opm::cuistl::CuJac<SpMatrix, Opm::cuistl::CuVector<T>, Opm::cuistl::CuVector<T>>;
+    using CuJac = Opm::gpuistl::CuJac<SpMatrix, Opm::gpuistl::CuVector<T>, Opm::gpuistl::CuVector<T>>;

    SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
    for (auto row = B.createbegin(); row != B.createend(); ++row) {
@@ -129,7 +129,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize1, T, NumericTypes)
    B[2][2][0][0] = -1.0;
    B[3][3][0][0] = -1.0;

-    auto cujac = Opm::cuistl::PreconditionerAdapter<Vector, Vector, CuJac>(std::make_shared<CuJac>(B, 0.5));
+    auto cujac = Opm::gpuistl::PreconditionerAdapter<Vector, Vector, CuJac>(std::make_shared<CuJac>(B, 0.5));

    Vector vVector(4);
    Vector dVector(4);
--- a/tests/cuistl/test_cuowneroverlapcopy.cpp
+++ b/tests/cuistl/test_cuowneroverlapcopy.cpp
@@ -46,7 +46,7 @@ main(int argc, char** argv)
    int rank, totalRanks;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &totalRanks);
-    Opm::cuistl::setDevice(rank, totalRanks);
+    Opm::gpuistl::setDevice(rank, totalRanks);
    boost::unit_test::unit_test_main(&init_unit_test_func, argc, argv);
 }

@@ -62,12 +62,12 @@ BOOST_AUTO_TEST_CASE(TestProject)

    auto ownerOverlapCopy = Dune::OwnerOverlapCopyCommunication<int>(indexInfo, MPI_COMM_WORLD);
    auto xCPU = std::vector<double> {{1.0, 2.0, 3.0}};
-    auto xGPU = Opm::cuistl::CuVector<double>(xCPU);
+    auto xGPU = Opm::gpuistl::CuVector<double>(xCPU);

-    auto gpuComm = std::make_shared<Opm::cuistl::GPUObliviousMPISender<double, 1, Dune::OwnerOverlapCopyCommunication<int>>>(ownerOverlapCopy);
+    auto gpuComm = std::make_shared<Opm::gpuistl::GPUObliviousMPISender<double, 1, Dune::OwnerOverlapCopyCommunication<int>>>(ownerOverlapCopy);
    
    auto cuOwnerOverlapCopy
-        = Opm::cuistl::CuOwnerOverlapCopy<double, 1, Dune::OwnerOverlapCopyCommunication<int>>(gpuComm);
+        = Opm::gpuistl::CuOwnerOverlapCopy<double, 1, Dune::OwnerOverlapCopyCommunication<int>>(gpuComm);

    cuOwnerOverlapCopy.project(xGPU);

@@ -94,12 +94,12 @@ BOOST_AUTO_TEST_CASE(TestDot)
    indexInfo.addRemoteIndex(std::make_tuple(0, 2, Dune::OwnerOverlapCopyAttributeSet::copy));
    auto ownerOverlapCopy = Dune::OwnerOverlapCopyCommunication<int>(indexInfo, MPI_COMM_WORLD);
    auto xCPU = std::vector<double> {{1.0, 2.0, 3.0}};
-    auto xGPU = Opm::cuistl::CuVector<double>(xCPU);
+    auto xGPU = Opm::gpuistl::CuVector<double>(xCPU);

-    auto gpuComm = std::make_shared<Opm::cuistl::GPUObliviousMPISender<double, 1, Dune::OwnerOverlapCopyCommunication<int>>>(ownerOverlapCopy);
+    auto gpuComm = std::make_shared<Opm::gpuistl::GPUObliviousMPISender<double, 1, Dune::OwnerOverlapCopyCommunication<int>>>(ownerOverlapCopy);

    auto cuOwnerOverlapCopy
-        = Opm::cuistl::CuOwnerOverlapCopy<double, 1, Dune::OwnerOverlapCopyCommunication<int>>(gpuComm);
+        = Opm::gpuistl::CuOwnerOverlapCopy<double, 1, Dune::OwnerOverlapCopyCommunication<int>>(gpuComm);

    double outputDune = -1.0;
    auto xDune = xGPU.asDuneBlockVector<1>();
--- a/tests/cuistl/test_cuseqilu0.cpp
+++ b/tests/cuistl/test_cuseqilu0.cpp
@@ -63,7 +63,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifference1D, T, NumericTypes)
    using M = Dune::FieldMatrix<T, 1, 1>;
    using SpMatrix = Dune::BCRSMatrix<M>;
    using Vector = Dune::BlockVector<Dune::FieldVector<T, 1>>;
-    using CuILU0 = Opm::cuistl::CuSeqILU0<SpMatrix, Opm::cuistl::CuVector<T>, Opm::cuistl::CuVector<T>>;
+    using CuILU0 = Opm::gpuistl::CuSeqILU0<SpMatrix, Opm::gpuistl::CuVector<T>, Opm::gpuistl::CuVector<T>>;

    SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
    for (auto row = B.createbegin(); row != B.createend(); ++row) {
@@ -91,7 +91,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifference1D, T, NumericTypes)

    auto duneILU = Dune::SeqILU<SpMatrix, Vector, Vector>(B, 1.0);

-    auto cuILU = Opm::cuistl::PreconditionerAdapter<Vector, Vector, CuILU0>(std::make_shared<CuILU0>(B, 1.0));
+    auto cuILU = Opm::gpuistl::PreconditionerAdapter<Vector, Vector, CuILU0>(std::make_shared<CuILU0>(B, 1.0));

    // check for the standard basis {e_i}
    // (e_i=(0,...,0, 1 (i-th place), 0, ..., 0))
@@ -158,7 +158,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifferenceBlock2, T, NumericTypes)
    using M = Dune::FieldMatrix<T, 2, 2>;
    using SpMatrix = Dune::BCRSMatrix<M>;
    using Vector = Dune::BlockVector<Dune::FieldVector<T, 2>>;
-    using CuILU0 = Opm::cuistl::CuSeqILU0<SpMatrix, Opm::cuistl::CuVector<T>, Opm::cuistl::CuVector<T>>;
+    using CuILU0 = Opm::gpuistl::CuSeqILU0<SpMatrix, Opm::gpuistl::CuVector<T>, Opm::gpuistl::CuVector<T>>;

    SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
    for (auto row = B.createbegin(); row != B.createend(); ++row) {
@@ -181,7 +181,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifferenceBlock2, T, NumericTypes)

    auto duneILU = Dune::SeqILU<SpMatrix, Vector, Vector>(B, 1.0);

-    auto cuILU = Opm::cuistl::PreconditionerAdapter<Vector, Vector, CuILU0>(std::make_shared<CuILU0>(B, 1.0));
+    auto cuILU = Opm::gpuistl::PreconditionerAdapter<Vector, Vector, CuILU0>(std::make_shared<CuILU0>(B, 1.0));

    // check for the standard basis {e_i}
    // (e_i=(0,...,0, 1 (i-th place), 0, ..., 0))
--- a/tests/cuistl/test_cusparse_handle.cpp
+++ b/tests/cuistl/test_cusparse_handle.cpp
@@ -26,7 +26,7 @@

 BOOST_AUTO_TEST_CASE(TestGetSparseVersion)
 {
-    auto& cuSparseHandle = ::Opm::cuistl::detail::CuSparseHandle::getInstance();
+    auto& cuSparseHandle = ::Opm::gpuistl::detail::CuSparseHandle::getInstance();
    int cuSparseVersion = -1;
    OPM_CUSPARSE_SAFE_CALL(cusparseGetVersion(cuSparseHandle.get(), &cuSparseVersion));
    BOOST_CHECK_LT(0, cuSparseVersion);
--- a/tests/cuistl/test_cusparsematrix.cpp
+++ b/tests/cuistl/test_cusparsematrix.cpp
@@ -76,7 +76,7 @@ BOOST_AUTO_TEST_CASE(TestConstruction1D)
        }
    }

-    auto cuSparseMatrix = Opm::cuistl::CuSparseMatrix<double>::fromMatrix(B);
+    auto cuSparseMatrix = Opm::gpuistl::CuSparseMatrix<double>::fromMatrix(B);

    const auto& nonZeroValuesCuda = cuSparseMatrix.getNonZeroValues();
    std::vector<double> buffer(cuSparseMatrix.nonzeroes(), 0.0);
@@ -143,14 +143,14 @@ BOOST_AUTO_TEST_CASE(RandomSparsityMatrix)
        }
    }

-    auto cuSparseMatrix = Opm::cuistl::CuSparseMatrix<double>::fromMatrix(B);
+    auto cuSparseMatrix = Opm::gpuistl::CuSparseMatrix<double>::fromMatrix(B);
    // check each column
    for (size_t component = 0; component < N; ++component) {
        std::vector<double> inputDataX(N * dim, 0.0);
        inputDataX[component] = 1.0;
        std::vector<double> inputDataY(N * dim, .25);
-        auto inputVectorX = Opm::cuistl::CuVector<double>(inputDataX.data(), inputDataX.size());
-        auto inputVectorY = Opm::cuistl::CuVector<double>(inputDataY.data(), inputDataY.size());
+        auto inputVectorX = Opm::gpuistl::CuVector<double>(inputDataX.data(), inputDataX.size());
+        auto inputVectorY = Opm::gpuistl::CuVector<double>(inputDataY.data(), inputDataY.size());
        Vector xHost(N), yHost(N);
        yHost = inputDataY[0];
        inputVectorX.copyToHost(xHost);
--- a/tests/cuistl/test_cuvector.cpp
+++ b/tests/cuistl/test_cuvector.cpp
@@ -32,7 +32,7 @@ BOOST_AUTO_TEST_CASE(TestDocumentedUsage)
 {
    auto someDataOnCPU = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});

-    auto dataOnGPU = ::Opm::cuistl::CuVector<double>(someDataOnCPU);
+    auto dataOnGPU = ::Opm::gpuistl::CuVector<double>(someDataOnCPU);

    // Multiply by 4.0:
    dataOnGPU *= 4.0;
@@ -50,14 +50,14 @@ BOOST_AUTO_TEST_CASE(TestDocumentedUsage)
 BOOST_AUTO_TEST_CASE(TestConstructionSize)
 {
    const int numberOfElements = 1234;
-    auto vectorOnGPU = Opm::cuistl::CuVector<double>(numberOfElements);
+    auto vectorOnGPU = Opm::gpuistl::CuVector<double>(numberOfElements);
    BOOST_CHECK_EQUAL(numberOfElements, vectorOnGPU.dim());
 }

 BOOST_AUTO_TEST_CASE(TestCopyFromHostConstructor)
 {
    std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
-    auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
+    auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
    BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
    std::vector<double> buffer(data.size(), 0.0);
    vectorOnGPU.copyToHost(buffer.data(), buffer.size());
@@ -68,7 +68,7 @@ BOOST_AUTO_TEST_CASE(TestCopyFromHostConstructor)
 BOOST_AUTO_TEST_CASE(TestCopyFromHostFunction)
 {
    std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
-    auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.size());
+    auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.size());
    BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
    vectorOnGPU.copyFromHost(data.data(), data.size());
    std::vector<double> buffer(data.size(), 0.0);
@@ -80,7 +80,7 @@ BOOST_AUTO_TEST_CASE(TestCopyFromHostFunction)
 BOOST_AUTO_TEST_CASE(TestCopyFromBvector)
 {
    auto blockVector = Dune::BlockVector<Dune::FieldVector<double, 2>> {{{42, 43}, {44, 45}, {46, 47}}};
-    auto vectorOnGPU = Opm::cuistl::CuVector<double>(blockVector.dim());
+    auto vectorOnGPU = Opm::gpuistl::CuVector<double>(blockVector.dim());
    vectorOnGPU.copyFromHost(blockVector);
    std::vector<double> buffer(vectorOnGPU.dim());
    vectorOnGPU.copyToHost(buffer.data(), buffer.size());
@@ -93,7 +93,7 @@ BOOST_AUTO_TEST_CASE(TestCopyToBvector)
 {
    std::vector<double> data {{1, 2, 3, 4, 5, 6, 7, 8, 9}};
    auto blockVector = Dune::BlockVector<Dune::FieldVector<double, 3>>(3);
-    auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
+    auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
    vectorOnGPU.copyToHost(blockVector);


@@ -103,7 +103,7 @@ BOOST_AUTO_TEST_CASE(TestCopyToBvector)
 BOOST_AUTO_TEST_CASE(TestDataPointer)
 {
    std::vector<double> data {{1, 2, 3, 4, 5, 6, 7, 8, 9}};
-    auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
+    auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());

    std::vector<double> buffer(data.size(), 0.0);
    OPM_CUDA_SAFE_CALL(cudaMemcpy(buffer.data(), vectorOnGPU.data(), sizeof(double) * data.size(), cudaMemcpyDeviceToHost));
@@ -113,7 +113,7 @@ BOOST_AUTO_TEST_CASE(TestDataPointer)
 BOOST_AUTO_TEST_CASE(TestCopyScalarMultiply)
 {
    std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
-    auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
+    auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
    BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
    const double scalar = 42.25;
    vectorOnGPU *= scalar;
@@ -128,7 +128,7 @@ BOOST_AUTO_TEST_CASE(TestCopyScalarMultiply)
 BOOST_AUTO_TEST_CASE(TestTwoNorm)
 {
    std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
-    auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
+    auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
    auto twoNorm = vectorOnGPU.two_norm();

    double correctAnswer = 0.0;
@@ -143,8 +143,8 @@ BOOST_AUTO_TEST_CASE(TestDot)
 {
    std::vector<double> dataA {{1, 2, 3, 4, 5, 6, 7}};
    std::vector<double> dataB {{8, 9, 10, 11, 12, 13, 14}};
-    auto vectorOnGPUA = Opm::cuistl::CuVector<double>(dataA.data(), dataA.size());
-    auto vectorOnGPUB = Opm::cuistl::CuVector<double>(dataB.data(), dataB.size());
+    auto vectorOnGPUA = Opm::gpuistl::CuVector<double>(dataA.data(), dataA.size());
+    auto vectorOnGPUB = Opm::gpuistl::CuVector<double>(dataB.data(), dataB.size());
    auto dot = vectorOnGPUA.dot(vectorOnGPUB);

    double correctAnswer = 0.0;
@@ -158,7 +158,7 @@ BOOST_AUTO_TEST_CASE(TestDot)
 BOOST_AUTO_TEST_CASE(Assigment)
 {
    std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
-    auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
+    auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
    vectorOnGPU = 10.0;
    vectorOnGPU.copyToHost(data.data(), data.size());

@@ -171,9 +171,9 @@ BOOST_AUTO_TEST_CASE(Assigment)
 BOOST_AUTO_TEST_CASE(CopyAssignment)
 {
    std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
-    auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
+    auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
    vectorOnGPU.copyToHost(data.data(), data.size());
-    auto vectorOnGPUB = Opm::cuistl::CuVector<double>(data.size());
+    auto vectorOnGPUB = Opm::gpuistl::CuVector<double>(data.size());
    vectorOnGPUB = 4.0;
    vectorOnGPUB = vectorOnGPU;

@@ -185,7 +185,7 @@ BOOST_AUTO_TEST_CASE(CopyAssignment)
 BOOST_AUTO_TEST_CASE(RandomVectors)
 {

-    using GVector = Opm::cuistl::CuVector<double>;
+    using GVector = Opm::gpuistl::CuVector<double>;
    std::srand(0);
    std::mt19937 generator;
    std::uniform_real_distribution<double> distribution(-100.0, 100.0);
@@ -268,7 +268,7 @@ BOOST_AUTO_TEST_CASE(RandomVectors)
                indexSet.push_back(i);
            }
        }
-        auto indexSetGPU = Opm::cuistl::CuVector<int>(indexSet);
+        auto indexSetGPU = Opm::gpuistl::CuVector<int>(indexSet);

        aGPU.setZeroAtIndexSet(indexSetGPU);
        auto projectedA = aGPU.asStdVector();
--- a/tests/cuistl/test_cuview.cu
+++ b/tests/cuistl/test_cuview.cu
@@ -32,8 +32,8 @@
 #include <algorithm>
 #include <type_traits>

-using CuViewDouble = ::Opm::cuistl::CuView<double>;
-using CuBufferDouble = ::Opm::cuistl::CuBuffer<double>;
+using CuViewDouble = ::Opm::gpuistl::CuView<double>;
+using CuBufferDouble = ::Opm::gpuistl::CuBuffer<double>;

 __global__ void useCuViewOnGPU(CuViewDouble a, CuViewDouble b){
    b[0] = a.front();
--- a/tests/cuistl/test_safe_conversion.cpp
+++ b/tests/cuistl/test_safe_conversion.cpp
@@ -25,7 +25,7 @@

 BOOST_AUTO_TEST_CASE(TestToIntThrowsOutofRange)
 {
-    BOOST_CHECK_THROW(Opm::cuistl::detail::to_int(size_t(std::numeric_limits<int>::max()) + size_t(1));
+    BOOST_CHECK_THROW(Opm::gpuistl::detail::to_int(size_t(std::numeric_limits<int>::max()) + size_t(1));
                      , std::invalid_argument);
 }

@@ -33,26 +33,26 @@ BOOST_AUTO_TEST_CASE(TestToIntConvertInRange)
 {
    // This might seem slow, but it is really fast:
    for (size_t i = 0; i <= size_t(1024 * 1024); ++i) {
-        BOOST_CHECK_EQUAL(int(i), Opm::cuistl::detail::to_int(i));
+        BOOST_CHECK_EQUAL(int(i), Opm::gpuistl::detail::to_int(i));
    }

    BOOST_CHECK_EQUAL(std::numeric_limits<int>::max(),
-                      Opm::cuistl::detail::to_int(size_t(std::numeric_limits<int>::max())));
+                      Opm::gpuistl::detail::to_int(size_t(std::numeric_limits<int>::max())));
 }


 BOOST_AUTO_TEST_CASE(TestToSizeTThrowsOutofRange)
 {
-    BOOST_CHECK_THROW(Opm::cuistl::detail::to_size_t(-1);, std::invalid_argument);
+    BOOST_CHECK_THROW(Opm::gpuistl::detail::to_size_t(-1);, std::invalid_argument);
 }

 BOOST_AUTO_TEST_CASE(TestToSizeTConvertInRange)
 {
    // This might seem slow, but it is really fast:
    for (int i = 0; i <= 1024 * 1024; ++i) {
-        BOOST_CHECK_EQUAL(size_t(i), Opm::cuistl::detail::to_size_t(i));
+        BOOST_CHECK_EQUAL(size_t(i), Opm::gpuistl::detail::to_size_t(i));
    }

    BOOST_CHECK_EQUAL(size_t(std::numeric_limits<int>::max()),
-                      Opm::cuistl::detail::to_size_t(std::numeric_limits<int>::max()));
+                      Opm::gpuistl::detail::to_size_t(std::numeric_limits<int>::max()));
 }
--- a/tests/cuistl/test_solver_adapter.cpp
+++ b/tests/cuistl/test_solver_adapter.cpp
@@ -31,7 +31,7 @@ using Matrix = Dune::BCRSMatrix<Dune::FieldMatrix<double, dim, dim>>;
 using Vector = Dune::BlockVector<Dune::FieldVector<double, dim>>;
 using Moperator = Dune::MatrixAdapter<Matrix, Vector, Vector>;
 using PrecondFactory = Opm::PreconditionerFactory<Moperator, Dune::Amg::SequentialInformation>;
-using SolverAdapter = Opm::cuistl::SolverAdapter<Moperator, Dune::BiCGSTABSolver, Vector>;
+using SolverAdapter = Opm::gpuistl::SolverAdapter<Moperator, Dune::BiCGSTABSolver, Vector>;

 namespace
 {