Merge pull request #4635 from kjetilly/cuistl_preconditioners

Path to multigpu: Cuistl preconditioners
2025-02-25 18:55:30 -06:00 · 2023-05-31 14:43:49 +02:00 · 2023-05-31 14:43:49 +02:00 · 0923698667
commit 0923698667
parent 03b7d06351 90c9eedc79
12 changed files with 1542 additions and 3 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -551,6 +551,7 @@ if(CUDA_FOUND)
                       cusparse_handle
                       cuvector
                       cusparsematrix
+                       cuseqilu0
                       PROPERTIES LABELS gpu_cuda)
 endif()

--- a/CMakeLists_files.cmake
+++ b/CMakeLists_files.cmake
@ -150,7 +150,7 @@ if(CUDA_FOUND)
  list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/CuVector.cpp)
  list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/detail/vector_operations.cu)
  list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/CuSparseMatrix.cpp)
-
+  list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/CuSeqILU0.cpp)

  # CUISTL HEADERS
  list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp)
@ -164,12 +164,17 @@ if(CUDA_FOUND)
  list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp)
  list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp)
  list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/CuSparseResource_impl.hpp)
-  list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/safe_conversion.hpp)     
+  list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/safe_conversion.hpp)
  list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/cublas_wrapper.hpp)
  list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/cusparse_wrapper.hpp)
  list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/cusparse_constants.hpp)  
  list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/vector_operations.hpp)
-
+  list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/has_function.hpp)
+  list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp)
+  list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp)
+  list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/CuSeqILU0.hpp)
+  list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp)
+  list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp)


 endif()
@ -258,6 +263,8 @@ if(CUDA_FOUND)
  list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuvector.cpp)
  list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cusparsematrix.cpp)
  list(APPEND TEST_SOURCE_FILES tests/cuistl/test_safe_conversion.cpp)
+  list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuseqilu0.cpp)
+  list(APPEND TEST_SOURCE_FILES tests/cuistl/test_converttofloatadapter.cpp)

 endif()
 if(OPENCL_FOUND)
--- a/opm/simulators/linalg/PreconditionerFactory_impl.hpp
+++ b/opm/simulators/linalg/PreconditionerFactory_impl.hpp
@ -40,6 +40,14 @@
 #include <dune/istl/paamg/kamg.hh>
 #include <dune/istl/paamg/fastamg.hh>

+#include <config.h>
+#if HAVE_CUDA
+#include <opm/simulators/linalg/cuistl/CuSeqILU0.hpp>
+#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
+#include <opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp>
+#endif
+
+
 namespace Opm {

 template<class Smoother>
@ -398,6 +406,30 @@ struct StandardPreconditioners<Operator,Dune::Amg::SequentialInformation>
                                using LevelTransferPolicy = Opm::PressureTransferPolicy<O, Dune::Amg::SequentialInformation, true>;
                                return std::make_shared<OwningTwoLevelPreconditioner<O, V, LevelTransferPolicy>>(op, prm, weightsCalculator, pressureIndex);
        });
+
+#if HAVE_CUDA
+        F::addCreator("CUILU0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
+            const double w = prm.get<double>("relaxation", 1.0);
+            using field_type = typename V::field_type;
+            using CuILU0 = typename Opm::cuistl::CuSeqILU0<M, Opm::cuistl::CuVector<field_type>, Opm::cuistl::CuVector<field_type>>;
+            return std::make_shared<Opm::cuistl::PreconditionerAdapter<V, V, CuILU0>>(std::make_shared<CuILU0>(op.getmat(), w));
+        });
+
+        F::addCreator("CUILU0Float", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
+            const double w = prm.get<double>("relaxation", 1.0);
+            using block_type = typename V::block_type;
+            using VTo = Dune::BlockVector<Dune::FieldVector<float, block_type::dimension>>;
+            using matrix_type_to = typename Dune::BCRSMatrix<Dune::FieldMatrix<float, block_type::dimension, block_type::dimension>>;
+            using CuILU0 = typename Opm::cuistl::CuSeqILU0<matrix_type_to, Opm::cuistl::CuVector<float>, Opm::cuistl::CuVector<float>>;
+            using Adapter = typename Opm::cuistl::PreconditionerAdapter<VTo, VTo, CuILU0>;
+            using Converter = typename Opm::cuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>;
+            auto converted = std::make_shared<Converter>(op.getmat());
+            auto adapted = std::make_shared<Adapter>(std::make_shared<CuILU0>(converted->getConvertedMatrix(), w));
+            converted->setUnderlyingPreconditioner(adapted);
+            return converted;
+
+        });
+#endif
    }
 };

--- a/opm/simulators/linalg/cuistl/CuSeqILU0.cpp
+++ b/opm/simulators/linalg/cuistl/CuSeqILU0.cpp
@ -0,0 +1,362 @@
+/*
+  Copyright 2022-2023 SINTEF AS
+
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <cusparse.h>
+#include <dune/common/fmatrix.hh>
+#include <dune/common/fvector.hh>
+#include <dune/istl/bcrsmatrix.hh>
+#include <dune/istl/bvector.hh>
+#include <fmt/core.h>
+#include <opm/common/ErrorMacros.hpp>
+#include <opm/simulators/linalg/cuistl/CuSeqILU0.hpp>
+#include <opm/simulators/linalg/cuistl/detail/cusparse_constants.hpp>
+#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
+#include <opm/simulators/linalg/cuistl/detail/cusparse_wrapper.hpp>
+#include <opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp>
+#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp>
+#include <opm/simulators/linalg/matrixblock.hh>
+
+// This file is based on the guide at https://docs.nvidia.com/cuda/cusparse/index.html#csrilu02_solve ,
+// it highly recommended to read that before proceeding.
+
+
+namespace Opm::cuistl
+{
+
+template <class M, class X, class Y, int l>
+CuSeqILU0<M, X, Y, l>::CuSeqILU0(const M& A, field_type w)
+    : m_underlyingMatrix(A)
+    , m_w(w)
+    , m_LU(CuSparseMatrix<field_type>::fromMatrix(detail::makeMatrixWithNonzeroDiagonal(A)))
+    , m_temporaryStorage(m_LU.N() * m_LU.blockSize())
+    , m_descriptionL(detail::createLowerDiagonalDescription())
+    , m_descriptionU(detail::createUpperDiagonalDescription())
+    , m_cuSparseHandle(detail::CuSparseHandle::getInstance())
+{
+    // Some sanity check
+    OPM_ERROR_IF(A.N() != m_LU.N(),
+                 fmt::format("CuSparse matrix not same size as DUNE matrix. {} vs {}.", m_LU.N(), A.N()));
+    OPM_ERROR_IF(
+        A[0][0].N() != m_LU.blockSize(),
+        fmt::format("CuSparse matrix not same blocksize as DUNE matrix. {} vs {}.", m_LU.blockSize(), A[0][0].N()));
+    OPM_ERROR_IF(
+        A.N() * A[0][0].N() != m_LU.dim(),
+        fmt::format("CuSparse matrix not same dimension as DUNE matrix. {} vs {}.", m_LU.dim(), A.N() * A[0][0].N()));
+    OPM_ERROR_IF(A.nonzeroes() != m_LU.nonzeroes(),
+                 fmt::format("CuSparse matrix not same number of non zeroes as DUNE matrix. {} vs {}. ",
+                             m_LU.nonzeroes(),
+                             A.nonzeroes()));
+
+
+    updateILUConfiguration();
+}
+
+template <class M, class X, class Y, int l>
+void
+CuSeqILU0<M, X, Y, l>::pre([[maybe_unused]] X& x, [[maybe_unused]] Y& b)
+{
+}
+
+template <class M, class X, class Y, int l>
+void
+CuSeqILU0<M, X, Y, l>::apply(X& v, const Y& d)
+{
+
+    // We need to pass the solve routine a scalar to multiply.
+    // In our case this scalar is 1.0
+    const field_type one = 1.0;
+
+    const auto numberOfRows = detail::to_int(m_LU.N());
+    const auto numberOfNonzeroBlocks = detail::to_int(m_LU.nonzeroes());
+    const auto blockSize = detail::to_int(m_LU.blockSize());
+
+    auto nonZeroValues = m_LU.getNonZeroValues().data();
+    auto rowIndices = m_LU.getRowIndices().data();
+    auto columnIndices = m_LU.getColumnIndices().data();
+
+    // Solve L m_temporaryStorage = d
+    OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrsv2_solve(m_cuSparseHandle.get(),
+                                                        detail::CUSPARSE_MATRIX_ORDER,
+                                                        CUSPARSE_OPERATION_NON_TRANSPOSE,
+                                                        numberOfRows,
+                                                        numberOfNonzeroBlocks,
+                                                        &one,
+                                                        m_descriptionL->get(),
+                                                        nonZeroValues,
+                                                        rowIndices,
+                                                        columnIndices,
+                                                        blockSize,
+                                                        m_infoL.get(),
+                                                        d.data(),
+                                                        m_temporaryStorage.data(),
+                                                        CUSPARSE_SOLVE_POLICY_USE_LEVEL,
+                                                        m_buffer->data()));
+
+    // Solve U v = m_temporaryStorage
+    OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrsv2_solve(m_cuSparseHandle.get(),
+                                                        detail::CUSPARSE_MATRIX_ORDER,
+                                                        CUSPARSE_OPERATION_NON_TRANSPOSE,
+                                                        numberOfRows,
+                                                        numberOfNonzeroBlocks,
+                                                        &one,
+                                                        m_descriptionU->get(),
+                                                        nonZeroValues,
+                                                        rowIndices,
+                                                        columnIndices,
+                                                        blockSize,
+                                                        m_infoU.get(),
+                                                        m_temporaryStorage.data(),
+                                                        v.data(),
+                                                        CUSPARSE_SOLVE_POLICY_USE_LEVEL,
+                                                        m_buffer->data()));
+
+
+    v *= m_w;
+}
+
+template <class M, class X, class Y, int l>
+void
+CuSeqILU0<M, X, Y, l>::post([[maybe_unused]] X& x)
+{
+}
+
+template <class M, class X, class Y, int l>
+Dune::SolverCategory::Category
+CuSeqILU0<M, X, Y, l>::category() const
+{
+    return Dune::SolverCategory::sequential;
+}
+
+template <class M, class X, class Y, int l>
+void
+CuSeqILU0<M, X, Y, l>::update()
+{
+    m_LU.updateNonzeroValues(detail::makeMatrixWithNonzeroDiagonal(m_underlyingMatrix));
+    createILU();
+}
+
+template <class M, class X, class Y, int l>
+void
+CuSeqILU0<M, X, Y, l>::analyzeMatrix()
+{
+
+    if (!m_buffer) {
+        OPM_THROW(std::runtime_error,
+                  "Buffer not initialized. Call findBufferSize() then initialize with the appropiate size.");
+    }
+    const auto numberOfRows = detail::to_int(m_LU.N());
+    const auto numberOfNonzeroBlocks = detail::to_int(m_LU.nonzeroes());
+    const auto blockSize = detail::to_int(m_LU.blockSize());
+
+    auto nonZeroValues = m_LU.getNonZeroValues().data();
+    auto rowIndices = m_LU.getRowIndices().data();
+    auto columnIndices = m_LU.getColumnIndices().data();
+    // analysis of ilu LU decomposition
+    OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrilu02_analysis(m_cuSparseHandle.get(),
+                                                             detail::CUSPARSE_MATRIX_ORDER,
+                                                             numberOfRows,
+                                                             numberOfNonzeroBlocks,
+                                                             m_LU.getDescription().get(),
+                                                             nonZeroValues,
+                                                             rowIndices,
+                                                             columnIndices,
+                                                             blockSize,
+                                                             m_infoM.get(),
+                                                             CUSPARSE_SOLVE_POLICY_USE_LEVEL,
+                                                             m_buffer->data()));
+
+    // Make sure we can decompose the matrix.
+    int structuralZero;
+    auto statusPivot = cusparseXbsrilu02_zeroPivot(m_cuSparseHandle.get(), m_infoM.get(), &structuralZero);
+    OPM_ERROR_IF(statusPivot != CUSPARSE_STATUS_SUCCESS,
+                 fmt::format("Found a structural zero at A({}, {}). Could not decompose LU \\approx A.\n\n A has "
+                             "dimension {}, and has {} nonzeroes.",
+                             structuralZero,
+                             structuralZero,
+                             m_LU.N(),
+                             m_LU.nonzeroes()));
+
+    // analysis of ilu apply
+    OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrsv2_analysis(m_cuSparseHandle.get(),
+                                                           detail::CUSPARSE_MATRIX_ORDER,
+                                                           CUSPARSE_OPERATION_NON_TRANSPOSE,
+                                                           numberOfRows,
+                                                           numberOfNonzeroBlocks,
+                                                           m_descriptionL->get(),
+                                                           nonZeroValues,
+                                                           rowIndices,
+                                                           columnIndices,
+                                                           blockSize,
+                                                           m_infoL.get(),
+                                                           CUSPARSE_SOLVE_POLICY_USE_LEVEL,
+                                                           m_buffer->data()));
+
+    OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrsv2_analysis(m_cuSparseHandle.get(),
+                                                           detail::CUSPARSE_MATRIX_ORDER,
+                                                           CUSPARSE_OPERATION_NON_TRANSPOSE,
+                                                           numberOfRows,
+                                                           numberOfNonzeroBlocks,
+                                                           m_descriptionU->get(),
+                                                           nonZeroValues,
+                                                           rowIndices,
+                                                           columnIndices,
+                                                           blockSize,
+                                                           m_infoU.get(),
+                                                           CUSPARSE_SOLVE_POLICY_USE_LEVEL,
+                                                           m_buffer->data()));
+    m_analysisDone = true;
+}
+
+template <class M, class X, class Y, int l>
+size_t
+CuSeqILU0<M, X, Y, l>::findBufferSize()
+{
+    // We have three calls that need buffers:
+    //   1) LU decomposition
+    //   2) solve Lv = y
+    //   3) solve Ux = z
+    // we combine these buffers into one since it is not used across calls,
+    const auto numberOfRows = detail::to_int(m_LU.N());
+    const auto numberOfNonzeroBlocks = detail::to_int(m_LU.nonzeroes());
+    const auto blockSize = detail::to_int(m_LU.blockSize());
+
+    auto nonZeroValues = m_LU.getNonZeroValues().data();
+    auto rowIndices = m_LU.getRowIndices().data();
+    auto columnIndices = m_LU.getColumnIndices().data();
+
+    int bufferSizeM = 0;
+    OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrilu02_bufferSize(m_cuSparseHandle.get(),
+                                                               detail::CUSPARSE_MATRIX_ORDER,
+                                                               numberOfRows,
+                                                               numberOfNonzeroBlocks,
+                                                               m_LU.getDescription().get(),
+                                                               nonZeroValues,
+                                                               rowIndices,
+                                                               columnIndices,
+                                                               blockSize,
+                                                               m_infoM.get(),
+                                                               &bufferSizeM));
+    int bufferSizeL = 0;
+    OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrsv2_bufferSize(m_cuSparseHandle.get(),
+                                                             detail::CUSPARSE_MATRIX_ORDER,
+                                                             CUSPARSE_OPERATION_NON_TRANSPOSE,
+                                                             numberOfRows,
+                                                             numberOfNonzeroBlocks,
+                                                             m_descriptionL->get(),
+                                                             nonZeroValues,
+                                                             rowIndices,
+                                                             columnIndices,
+                                                             blockSize,
+                                                             m_infoL.get(),
+                                                             &bufferSizeL));
+
+    int bufferSizeU = 0;
+    OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrsv2_bufferSize(m_cuSparseHandle.get(),
+                                                             detail::CUSPARSE_MATRIX_ORDER,
+                                                             CUSPARSE_OPERATION_NON_TRANSPOSE,
+                                                             numberOfRows,
+                                                             numberOfNonzeroBlocks,
+                                                             m_descriptionL->get(),
+                                                             nonZeroValues,
+                                                             rowIndices,
+                                                             columnIndices,
+                                                             blockSize,
+                                                             m_infoU.get(),
+                                                             &bufferSizeU));
+
+    OPM_ERROR_IF(bufferSizeL <= 0, fmt::format("bufferSizeL is non-positive. Given value is {}.", bufferSizeL));
+    OPM_ERROR_IF(bufferSizeU <= 0, fmt::format("bufferSizeU is non-positive. Given value is {}.", bufferSizeU));
+    OPM_ERROR_IF(bufferSizeM <= 0, fmt::format("bufferSizeM is non-positive. Given value is {}.", bufferSizeM));
+
+    return size_t(std::max(bufferSizeL, std::max(bufferSizeU, bufferSizeM)));
+}
+
+template <class M, class X, class Y, int l>
+void
+CuSeqILU0<M, X, Y, l>::createILU()
+{
+    OPM_ERROR_IF(!m_buffer, "Buffer not initialized. Call findBufferSize() then initialize with the appropiate size.");
+    OPM_ERROR_IF(!m_analysisDone, "Analyzis of matrix not done. Call analyzeMatrix() first.");
+
+    const auto numberOfRows = detail::to_int(m_LU.N());
+    const auto numberOfNonzeroBlocks = detail::to_int(m_LU.nonzeroes());
+    const auto blockSize = detail::to_int(m_LU.blockSize());
+
+    auto nonZeroValues = m_LU.getNonZeroValues().data();
+    auto rowIndices = m_LU.getRowIndices().data();
+    auto columnIndices = m_LU.getColumnIndices().data();
+    OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrilu02(m_cuSparseHandle.get(),
+                                                    detail::CUSPARSE_MATRIX_ORDER,
+                                                    numberOfRows,
+                                                    numberOfNonzeroBlocks,
+                                                    m_LU.getDescription().get(),
+                                                    nonZeroValues,
+                                                    rowIndices,
+                                                    columnIndices,
+                                                    blockSize,
+                                                    m_infoM.get(),
+                                                    CUSPARSE_SOLVE_POLICY_USE_LEVEL,
+                                                    m_buffer->data()));
+
+    // We need to do this here as well. The first call was to check that we could decompose the system A=LU
+    // the second call here is to make sure we can solve LUx=y
+    int structuralZero;
+    // cusparseXbsrilu02_zeroPivot() calls cudaDeviceSynchronize()
+    auto statusPivot = cusparseXbsrilu02_zeroPivot(m_cuSparseHandle.get(), m_infoM.get(), &structuralZero);
+
+    OPM_ERROR_IF(
+        statusPivot != CUSPARSE_STATUS_SUCCESS,
+        fmt::format("Found a structucal zero at LU({}, {}). Could not solve LUx = y.", structuralZero, structuralZero));
+}
+
+template <class M, class X, class Y, int l>
+void
+CuSeqILU0<M, X, Y, l>::updateILUConfiguration()
+{
+    auto bufferSize = findBufferSize();
+    if (!m_buffer || m_buffer->dim() < bufferSize) {
+        m_buffer.reset(new CuVector<field_type>((bufferSize + sizeof(field_type) - 1) / sizeof(field_type)));
+    }
+    analyzeMatrix();
+    createILU();
+}
+} // namespace Opm::cuistl
+#define INSTANTIATE_CUSEQILU0_DUNE(realtype, blockdim)                                                                 \
+    template class ::Opm::cuistl::CuSeqILU0<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>,         \
+                                            ::Opm::cuistl::CuVector<realtype>,                                         \
+                                            ::Opm::cuistl::CuVector<realtype>>;                                        \
+    template class ::Opm::cuistl::CuSeqILU0<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>,          \
+                                            ::Opm::cuistl::CuVector<realtype>,                                         \
+                                            ::Opm::cuistl::CuVector<realtype>>
+
+
+INSTANTIATE_CUSEQILU0_DUNE(double, 1);
+INSTANTIATE_CUSEQILU0_DUNE(double, 2);
+INSTANTIATE_CUSEQILU0_DUNE(double, 3);
+INSTANTIATE_CUSEQILU0_DUNE(double, 4);
+INSTANTIATE_CUSEQILU0_DUNE(double, 5);
+INSTANTIATE_CUSEQILU0_DUNE(double, 6);
+
+INSTANTIATE_CUSEQILU0_DUNE(float, 1);
+INSTANTIATE_CUSEQILU0_DUNE(float, 2);
+INSTANTIATE_CUSEQILU0_DUNE(float, 3);
+INSTANTIATE_CUSEQILU0_DUNE(float, 4);
+INSTANTIATE_CUSEQILU0_DUNE(float, 5);
+INSTANTIATE_CUSEQILU0_DUNE(float, 6);
--- a/opm/simulators/linalg/cuistl/CuSeqILU0.hpp
+++ b/opm/simulators/linalg/cuistl/CuSeqILU0.hpp
@ -0,0 +1,134 @@
+/*
+  Copyright 2022-2023 SINTEF AS
+
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifndef OPM_CUSEQILU0_HPP
+#define OPM_CUSEQILU0_HPP
+
+#include <dune/istl/preconditioner.hh>
+#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
+#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
+#include <opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp>
+#include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
+#include <opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp>
+
+
+
+namespace Opm::cuistl
+{
+//! \brief Sequential ILU0 preconditioner on the GPU through the CuSparse library.
+//!
+//! This implementation calls the CuSparse functions, which in turn essentially
+//! does a level decomposition to get some parallelism.
+//!
+//! \note This is not expected to be a fast preconditioner.
+//!
+//! \tparam M The matrix type to operate on
+//! \tparam X Type of the update
+//! \tparam Y Type of the defect
+//! \tparam l Ignored. Just there to have the same number of template arguments
+//!    as other preconditioners.
+//!
+//! \note We assume X and Y are both CuVector<real_type>, but we leave them as template
+//! arguments in case of future additions.
+template <class M, class X, class Y, int l = 1>
+class CuSeqILU0 : public Dune::PreconditionerWithUpdate<X, Y>
+{
+public:
+    //! \brief The matrix type the preconditioner is for.
+    using matrix_type = typename std::remove_const<M>::type ;
+    //! \brief The domain type of the preconditioner.
+    using domain_type = X;
+    //! \brief The range type of the preconditioner.
+    using range_type = Y;
+    //! \brief The field type of the preconditioner.
+    using field_type = typename X::field_type;
+
+    //! \brief Constructor.
+    //!
+    //!  Constructor gets all parameters to operate the prec.
+    //! \param A The matrix to operate on.
+    //! \param w The relaxation factor.
+    //!
+    CuSeqILU0(const M& A, field_type w);
+
+    //! \brief Prepare the preconditioner.
+    //! \note Does nothing at the time being.
+    virtual void pre(X& x, Y& b) override;
+
+    //! \brief Apply the preconditoner.
+    virtual void apply(X& v, const Y& d) override;
+
+    //! \brief Post processing
+    //! \note Does nothing at the moment
+    virtual void post(X& x) override;
+
+    //! Category of the preconditioner (see SolverCategory::Category)
+    virtual Dune::SolverCategory::Category category() const override;
+
+    //! \brief Updates the matrix data.
+    virtual void update() override;
+
+
+    //! \returns false
+    static constexpr bool shouldCallPre()
+    {
+        return false;
+    }
+
+    //! \returns false
+    static constexpr bool shouldCallPost()
+    {
+        return false;
+    }
+
+
+private:
+    //! \brief Reference to the underlying matrix
+    const M& m_underlyingMatrix;
+    //! \brief The relaxation factor to use.
+    field_type m_w;
+
+    //! This is the storage for the LU composition.
+    //! Initially this will have the values of A, but will be
+    //! modified in the constructor to be the proper LU decomposition.
+    CuSparseMatrix<field_type> m_LU;
+
+    CuVector<field_type> m_temporaryStorage;
+
+
+    detail::CuSparseMatrixDescriptionPtr m_descriptionL;
+    detail::CuSparseMatrixDescriptionPtr m_descriptionU;
+    detail::CuSparseResource<bsrsv2Info_t> m_infoL;
+    detail::CuSparseResource<bsrsv2Info_t> m_infoU;
+    detail::CuSparseResource<bsrilu02Info_t> m_infoM;
+
+    std::unique_ptr<CuVector<field_type>> m_buffer;
+    detail::CuSparseHandle& m_cuSparseHandle;
+
+    bool m_analysisDone = false;
+
+    void analyzeMatrix();
+    size_t findBufferSize();
+
+    void createILU();
+
+    void updateILUConfiguration();
+};
+} // end namespace Opm::cuistl
+
+#endif
--- a/opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp
+++ b/opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp
@ -0,0 +1,126 @@
+/*
+  Copyright 2022-2023 SINTEF AS
+
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifndef OPM_PRECONDITIONERADAPTER_HPP
+#define OPM_PRECONDITIONERADAPTER_HPP
+#include <cusparse.h>
+#include <dune/istl/preconditioner.hh>
+#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
+#include <opm/simulators/linalg/cuistl/CuVector.hpp>
+#include <opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp>
+
+
+namespace Opm::cuistl
+{
+//!\brief Makes a CUDA preconditioner available to a CPU simulator.
+//!
+//! The use case for this adapter is to use a CUDA preconditioner during a linear
+//! solver that works on the CPU. The motivation for this is benchmarking new preconditioners on the GPU.
+//!
+//! \tparam X the domain type (should be on the CPU). Typicall a Dune::BlockVector
+//! \tparam Y the range type (should be on the CPU). Typicall a Dune::BlockVector
+//! \tparam CudaPreconditionerType the preconditioner taking CuVector<real_type> as arguments to apply
+template <class X, class Y, class CudaPreconditionerType>
+class PreconditionerAdapter : public Dune::PreconditionerWithUpdate<X, Y>
+{
+public:
+    //! \brief The domain type of the preconditioner.
+    using domain_type = X;
+    //! \brief The range type of the preconditioner.
+    using range_type = Y;
+    //! \brief The field type of the preconditioner.
+    using field_type = typename X::field_type;
+
+    //! \brief Constructor.
+    //!
+    //! Constructor gets all parameters to operate the prec.
+    //!    \param A The matrix to operate on.
+    //!    \param w The relaxation factor.
+    //!
+    explicit PreconditionerAdapter(std::shared_ptr<CudaPreconditionerType> preconditioner)
+        : m_underlyingPreconditioner(preconditioner)
+    {
+    }
+
+
+    //! \brief Prepare the preconditioner.
+    //!
+    //! Currently not supported.
+    virtual void pre([[maybe_unused]] X& x, [[maybe_unused]] Y& b) override
+    {
+        static_assert(!detail::shouldCallPreconditionerPre<CudaPreconditionerType>(),
+                      "We currently do not support Preconditioner::pre().");
+    }
+
+
+    //! \brief Apply the preconditoner.
+    //!
+    //! \copydoc Preconditioner::apply(X&,const Y&)
+    virtual void apply(X& v, const Y& d) override
+    {
+        if (!m_inputBuffer) {
+            m_inputBuffer.reset(new CuVector<field_type>(v.dim()));
+            m_outputBuffer.reset(new CuVector<field_type>(v.dim()));
+        }
+        m_inputBuffer->copyFromHost(d);
+        m_underlyingPreconditioner->apply(*m_outputBuffer, *m_inputBuffer);
+        m_outputBuffer->copyToHost(v);
+    }
+
+
+    //! \brief Clean up.
+    //!
+    //! Currently not supported.
+    virtual void post([[maybe_unused]] X& x) override
+    {
+        static_assert(!detail::shouldCallPreconditionerPost<CudaPreconditionerType>(),
+                      "We currently do not support Preconditioner::post().");
+    }
+
+
+    //! Category of the preconditioner (see SolverCategory::Category)
+    virtual Dune::SolverCategory::Category category() const
+    {
+        return m_underlyingPreconditioner->category();
+    }
+
+    //! Calls update on the underlying CUDA preconditioner
+    virtual void update() override
+    {
+        m_underlyingPreconditioner->update();
+    }
+
+    static constexpr bool shouldCallPre()
+    {
+        return detail::shouldCallPreconditionerPost<CudaPreconditionerType>();
+    }
+    static constexpr bool shouldCallPost()
+    {
+        return detail::shouldCallPreconditionerPre<CudaPreconditionerType>();
+    }
+
+private:
+    //! \brief the underlying preconditioner to use
+    std::shared_ptr<CudaPreconditionerType> m_underlyingPreconditioner;
+
+    std::unique_ptr<CuVector<field_type>> m_inputBuffer;
+    std::unique_ptr<CuVector<field_type>> m_outputBuffer;
+};
+} // end namespace Opm::cuistl
+
+#endif
--- a/opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp
+++ b/opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp
@ -0,0 +1,240 @@
+/*
+  Copyright 2022-2023 SINTEF AS
+
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifndef OPM_PRECONDITIONERCONVERTOFLOATADAPTER_HPP
+#define OPM_PRECONDITIONERCONVERTOFLOATADAPTER_HPP
+#include <cusparse.h>
+#include <dune/istl/bcrsmatrix.hh>
+#include <dune/istl/preconditioner.hh>
+#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
+#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
+#include <opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp>
+#include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
+#include <opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp>
+#include <opm/simulators/linalg/cuistl/detail/cusparse_constants.hpp>
+#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
+#include <opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp>
+
+
+namespace Opm::cuistl
+{
+//! \brief Converts the field type (eg. double to float) to benchmark single precision preconditioners
+//!
+//! \note This is not a fast conversion, it is simply meant to benchmark the potential of some
+//!       preconditioners on consumer grade GPUs where the double precision performance is often
+//!       artificially limited.
+//!
+//! \note In theory this can do any field_type conversion that is meaningful, but it is only tested
+//! on double to float conversion.
+//!
+//! \note Remember to set the underlying preconditioner with setUnderlyingPreconditioner (should use the matrix from
+//! getConvertedMatrix())
+//!
+//! \note One could simply change the constructor design by accepting a creator function for the underlying
+//! preconditioner. For the current use cases this is however not needed.
+//!
+//! To use this, use something like the following code:
+//! \code{.cpp}
+//! #include <opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp>
+//! #include <opm/simulators/linalg/ParallelOverlappingILU0.hpp>
+//!
+//! using XDouble = Dune::BlockVector<Dune::FieldVector<double, 2>>;
+//! using MDouble = Dune::FieldMatrix<double, 2, 2>;
+//! using SpMatrixDouble = Dune::BCRSMatrix<MDouble>;
+//! using XFloat = Dune::BlockVector<Dune::FieldVector<float, 2>>;
+//! using MFloat = Dune::FieldMatrix<float, 2, 2>;
+//! using SpMatrixFloat = Dune::BCRSMatrix<MFloat>;
+//!
+//! template<class ParallelInfo>
+//! void applyILU0AsFloat(const MDouble& matrix, const XDouble& x, XDouble& y) {
+//!
+//!     using FloatILU0 = typename Opm::ParallelOverlappingILU0<MFloat, XFloat, XFloat, ParallelInfo>;
+//!     using DoubleToFloatConverter = typename Opm::cuistl::PreconditionerConvertFieldTypeAdapter<FloatILU0, MDouble,
+//!     XDouble, XDouble>;
+//!
+//!     // Note that we do not need to make a new instance for every invocation, this
+//!     // is just done for this example
+//!     auto doubleToFloatConverter = DoubleToFloatConverter(matrix);
+//!     const auto& convertedMatrix = doubleToFloatConverter.getConvertedMatrix();
+//!
+//!     auto floatILU0 = std::make_shared<FloatILU0>(convertedMatrix, 0, 1.0, 0);
+//!
+//!     doubleToFloatConverter.setUnderlyingPreconditioner(floatILU0);
+//!
+//!     // This will convert x and y to float, then call floatILU0.apply on the converted arguments
+//!     doubleToFloatConverter.apply(x, y);
+//! }
+//!
+//! \endcode
+template <class CudaPreconditionerType, class M, class X, class Y, int l = 1>
+class PreconditionerConvertFieldTypeAdapter : public Dune::PreconditionerWithUpdate<X, Y>
+{
+public:
+    //! \brief The matrix type the preconditioner is for.
+    using matrix_type = typename std::remove_const<M>::type;
+
+    //! \brief The domain type of the preconditioner.
+    using domain_type = X;
+    //! \brief The range type of the preconditioner.
+    using range_type = Y;
+    //! \brief The field type of the preconditioner.
+    using field_type = typename X::field_type;
+
+
+    using domain_type_to = typename CudaPreconditionerType::domain_type;
+    //! \brief The range type of the preconditioner.
+    using range_type_to = typename CudaPreconditionerType::range_type;
+    //! \brief The field type of the preconditioner.
+    using field_type_to = typename domain_type_to::field_type;
+
+
+    using block_type = typename domain_type::block_type;
+
+    using XTo = Dune::BlockVector<Dune::FieldVector<field_type_to, block_type::dimension>>;
+    using YTo = Dune::BlockVector<Dune::FieldVector<field_type_to, block_type::dimension>>;
+    using matrix_type_to =
+        typename Dune::BCRSMatrix<Dune::FieldMatrix<field_type_to, block_type::dimension, block_type::dimension>>;
+
+    //! \brief Constructor.
+    //!
+    //! \param A The matrix to operate on.
+    //!
+    //! \note After the PreconditionerConvertFieldTypeAdapter you can get the converted matrix
+    //! by calling getConvertedMatrix(), which in turn can be used to create the underlying preconditioner.
+    //! Once the underlying preconditioner has been called, this must be supplied to setUnderlyingPreconditioner.
+    explicit PreconditionerConvertFieldTypeAdapter(const M& matrix)
+        : m_matrix(matrix)
+        , m_convertedMatrix(createConvertedMatrix())
+    {
+    }
+
+    //! \brief Not used at the moment
+    virtual void pre([[maybe_unused]] X& x, [[maybe_unused]] Y& b) override
+    {
+        static_assert(!detail::shouldCallPreconditionerPre<CudaPreconditionerType>(),
+                      "We currently do not support Preconditioner::pre().");
+    }
+
+    //! \brief Apply the preconditoner.
+    virtual void apply(X& v, const Y& d) override
+    {
+        OPM_ERROR_IF(!m_underlyingPreconditioner,
+                     "You need to set the underlying preconditioner with setUnderlyingPreconditioner.");
+        XTo convertedV(v.N());
+        for (size_t i = 0; i < v.N(); ++i) {
+            for (size_t j = 0; j < block_type::dimension; ++j) {
+                // This is probably unnecessary, but doing it anyway:
+                convertedV[i][j] = field_type_to(v[i][j]);
+            }
+        }
+        YTo convertedD(d.N());
+        for (size_t i = 0; i < d.N(); ++i) {
+            for (size_t j = 0; j < block_type::dimension; ++j) {
+                convertedD[i][j] = field_type_to(d[i][j]);
+            }
+        }
+
+        m_underlyingPreconditioner->apply(convertedV, convertedD);
+
+        for (size_t i = 0; i < v.N(); ++i) {
+            for (size_t j = 0; j < block_type::dimension; ++j) {
+                v[i][j] = field_type(convertedV[i][j]);
+            }
+        }
+    }
+
+    //! \brief Not used at the moment
+    virtual void post([[maybe_unused]] X& x) override
+    {
+        static_assert(!detail::shouldCallPreconditionerPost<CudaPreconditionerType>(),
+                      "We currently do not support Preconditioner::post().");
+    }
+
+    //! Category of the preconditioner (see SolverCategory::Category)
+    virtual Dune::SolverCategory::Category category() const override
+    {
+        return m_underlyingPreconditioner->category();
+    }
+
+    virtual void update() override
+    {
+        OPM_ERROR_IF(!m_underlyingPreconditioner,
+                     "You need to set the underlying preconditioner with setUnderlyingPreconditioner.");
+        updateMatrix();
+        m_underlyingPreconditioner->update();
+    }
+
+    const matrix_type_to& getConvertedMatrix() const
+    {
+        return m_convertedMatrix;
+    }
+
+    void setUnderlyingPreconditioner(const std::shared_ptr<CudaPreconditionerType>& conditioner)
+    {
+        m_underlyingPreconditioner = conditioner;
+    }
+
+
+private:
+    void updateMatrix()
+    {
+        const auto nnz = m_matrix.nonzeroes() * m_matrix[0][0].N() * m_matrix[0][0].N();
+        const auto dataPointerIn = static_cast<const field_type*>(&((m_matrix[0][0][0][0])));
+        auto dataPointerOut = static_cast<field_type_to*>(&((m_convertedMatrix[0][0][0][0])));
+
+        std::vector<field_type_to> buffer(nnz, 0);
+        for (size_t i = 0; i < nnz; ++i) {
+            dataPointerOut[i] = field_type_to(dataPointerIn[i]);
+        }
+    }
+    matrix_type_to createConvertedMatrix()
+    {
+        // TODO: Check if this whole conversion can be done more efficiently.
+        const auto N = m_matrix.N();
+        matrix_type_to matrixBuilder(N, N, m_matrix.nonzeroes(), matrix_type_to::row_wise);
+        {
+            auto rowIn = m_matrix.begin();
+            for (auto rowOut = matrixBuilder.createbegin(); rowOut != matrixBuilder.createend(); ++rowOut) {
+                for (auto column = rowIn->begin(); column != rowIn->end(); ++column) {
+                    rowOut.insert(column.index());
+                }
+                ++rowIn;
+            }
+        }
+
+        for (auto row = m_matrix.begin(); row != m_matrix.end(); ++row) {
+            for (auto column = row->begin(); column != row->end(); ++column) {
+                for (size_t i = 0; i < block_type::dimension; ++i) {
+                    for (size_t j = 0; j < block_type::dimension; ++j) {
+                        matrixBuilder[row.index()][column.index()][i][j]
+                            = field_type_to(m_matrix[row.index()][column.index()][i][j]);
+                    }
+                }
+            }
+        }
+
+        return matrixBuilder;
+    }
+    const M& m_matrix;
+    matrix_type_to m_convertedMatrix;
+    //! \brief the underlying preconditioner to use
+    std::shared_ptr<CudaPreconditionerType> m_underlyingPreconditioner;
+};
+} // end namespace Opm::cuistl
+
+#endif
--- a/opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp
+++ b/opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp
@ -0,0 +1,58 @@
+/*
+  Copyright 2022-2023 SINTEF AS
+
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifndef OPM_FIXZERODIAGONAL_HEADER_INCLUDED
+#define OPM_FIXZERODIAGONAL_HEADER_INCLUDED
+
+#include <limits>
+#include <vector>
+
+namespace Opm::cuistl::detail
+{
+
+/**
+ * @brief makeMatrixWithNonzeroDiagonal creates a new matrix with the zero diagonal elements (when viewed as a matrix of
+ * scalrars) set to replacementValue
+ * @param matrix the matrix to replace
+ * @param replacementValue the value to set in the diagonal elements that are zero
+ * @return a new matrix with non non-zero diagonal elements.
+ *
+ * @note This modification is needed for the CuSparse implementation of ILU0. While the the algorithm operates on block
+ * matrices, it still requires that the scalar matrix has no zero diagonal elements.
+ */
+template <class Matrix>
+const Matrix
+makeMatrixWithNonzeroDiagonal(const Matrix& matrix,
+                              const typename Matrix::field_type replacementValue
+                              = std::numeric_limits<typename Matrix::field_type>::epsilon())
+{
+    auto newMatrix = matrix;
+    // TODO: [perf] Is this fast enough?
+    for (size_t row = 0; row < newMatrix.N(); ++row) {
+        for (size_t component = 0; component < Matrix::block_type::cols; ++component) {
+            if (newMatrix[row][row][component][component] == 0) {
+                newMatrix[row][row][component][component] = replacementValue;
+            }
+        }
+    }
+
+    return newMatrix;
+}
+} // namespace Opm::cuistl::detail
+
+#endif
--- a/opm/simulators/linalg/cuistl/detail/has_function.hpp
+++ b/opm/simulators/linalg/cuistl/detail/has_function.hpp
@ -0,0 +1,88 @@
+/*
+  Copyright 2022-2023 SINTEF AS
+
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifndef OPM_CUISTL_DETAIL_HAS_FUNCTION_HPP
+#define OPM_CUISTL_DETAIL_HAS_FUNCTION_HPP
+#include <type_traits>
+/**
+ * Simple utility structs to test for the existence of functions in types.
+ *
+ * Note that there are alternatives to this, see for instance
+ * https://stackoverflow.com/questions/257288/templated-check-for-the-existence-of-a-class-member-function , however,
+ * this is by far the cleanest approach for where this is going to be used for now.
+ *
+ * TODO: Use the requires-keyword once C++20 becomes availble (https://en.cppreference.com/w/cpp/language/constraints ).
+ * With C++20 this file can be removed.
+ */
+namespace Opm::cuistl::detail
+{
+
+/**
+ * @brief The has_should_call_pre class detects the presence of the method shouldCallPre
+ *
+ * Usage:
+ *
+ * @code{.cpp}
+ * if constexpr (has_should_call_pre<decltype(preconditioner)>::value) {
+ *     // We know that the function shouldCallPre is present:
+ *     auto shouldCallPre = preconditioner.shouldCallPre();
+ * }
+ * @endcode
+ *
+ * @note This is mainly done in the GPU preconditioner to avoid having to copy data in the pre step.
+ */
+template <typename T>
+class has_should_call_pre
+{
+    template <typename U>
+    static std::true_type test(decltype(&U::shouldCallPre));
+    template <typename U>
+    static std::false_type test(...);
+
+public:
+    static constexpr bool value = std::is_same_v<decltype(test<T>(0)), std::true_type>;
+};
+
+/**
+ * @brief The has_should_call_post class detects the presence of the method shouldCallPost
+ *
+ * Usage:
+ *
+ * @code{.cpp}
+ * if constexpr (has_should_call_post<decltype(preconditioner)>::value) {
+ *     // We know that the function shouldCallPost is present:
+ *     auto shouldCallPost = preconditioner.shouldCallPost();
+ * }
+ * @endcode
+ *
+ * @note This is mainly done in the GPU preconditioner to avoid having to copy data in the post step.
+ */
+template <typename T>
+class has_should_call_post
+{
+    template <typename U>
+    static std::true_type test(decltype(&U::shouldCallPost));
+    template <typename U>
+    static std::false_type test(...);
+
+public:
+    static constexpr bool value = std::is_same_v<decltype(test<T>(0)), std::true_type>;
+};
+
+} // namespace Opm::cuistl::detail
+#endif
--- a/opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp
+++ b/opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp
@ -0,0 +1,64 @@
+/*
+  Copyright 2022-2023 SINTEF AS
+
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef OPM_CUISTL_PRECONDIDTIONER_SHOULD_CALL_POST_PRE_HPP
+#define OPM_CUISTL_PRECONDIDTIONER_SHOULD_CALL_POST_PRE_HPP
+
+#include <opm/simulators/linalg/cuistl/detail/has_function.hpp>
+
+namespace Opm::cuistl::detail
+{
+
+//! @brief Tests (compile time) if the preconditioner type needs to call pre() before a call to apply()
+//!
+//! @note This is mostly used to avoid unneeded copying back and front to the GPU, as well
+//! as avoiding communication.
+template <class PreconditionerType>
+constexpr bool
+shouldCallPreconditionerPre()
+{
+    if constexpr (has_should_call_pre<PreconditionerType>::value) {
+        return PreconditionerType::shouldCallPre();
+    } else {
+        // If the preconditioner type does not have a way of signalling the need
+        // to call pre(), we should always call it. This is because it could be an
+        // old design.
+        return true;
+    }
+}
+
+//! @brief Tests (compile time) if the preconditioner type needs to call post() after a call to apply(...)
+//!
+//! @note This is mostly used to avoid unneeded copying back and front to the GPU, as well
+//! as avoiding communication.
+template <class PreconditionerType>
+constexpr bool
+shouldCallPreconditionerPost()
+{
+    if constexpr (has_should_call_post<PreconditionerType>::value) {
+        return PreconditionerType::shouldCallPost();
+    } else {
+        // If the preconditioner type does not have a way of signalling the need
+        // to call post(), we should always call it. This is because it could be an
+        // old design.
+        return true;
+    }
+}
+} // namespace Opm::cuistl::detail
+#endif
--- a/tests/cuistl/test_converttofloatadapter.cpp
+++ b/tests/cuistl/test_converttofloatadapter.cpp
@ -0,0 +1,192 @@
+/*
+  Copyright 2022-2023 SINTEF AS
+
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#include <config.h>
+
+#define BOOST_TEST_MODULE TestConvertToFloatAdapter
+#define BOOST_TEST_NO_MAIN
+
+
+#include <boost/mpl/list.hpp>
+#include <boost/test/unit_test.hpp>
+#include <dune/common/parallel/mpihelper.hh>
+#include <dune/istl/bcrsmatrix.hh>
+#include <dune/istl/preconditioners.hh>
+#include <limits>
+#include <memory>
+#include <opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp>
+
+
+using XDouble = Dune::BlockVector<Dune::FieldVector<double, 2>>;
+using MDouble = Dune::FieldMatrix<double, 2, 2>;
+using SpMatrixDouble = Dune::BCRSMatrix<MDouble>;
+using XFloat = Dune::BlockVector<Dune::FieldVector<float, 2>>;
+using MFloat = Dune::FieldMatrix<float, 2, 2>;
+using SpMatrixFloat = Dune::BCRSMatrix<MFloat>;
+namespace
+{
+class TestPreconditioner : Dune::PreconditionerWithUpdate<XFloat, XFloat>
+{
+public:
+    using range_type = XFloat;
+    using domain_type = XFloat;
+    TestPreconditioner(const SpMatrixFloat& matrix,
+                       const XDouble& expectedInput,
+                       const SpMatrixDouble& expectedMatrix,
+                       const XDouble& expectedOutputVector)
+        : m_matrix(matrix)
+        , m_expectedInput(expectedInput)
+        , m_expectedMatrix(expectedMatrix)
+        , m_expectedOutputVector(expectedOutputVector)
+    {
+    }
+
+    virtual void pre([[maybe_unused]] XFloat& x, [[maybe_unused]] XFloat& b) override
+    {
+    }
+
+
+    virtual void apply([[maybe_unused]] XFloat& v, const XFloat& d) override
+    {
+        // Make sure the correct input is copied
+        for (size_t i = 0; i < d.N(); ++i) {
+            for (size_t j = 0; j < d[i].N(); ++j) {
+                BOOST_CHECK_EQUAL(d[i][j], float(m_expectedInput[i][j]));
+                v[i][j] = float(m_expectedOutputVector[i][j]);
+            }
+        }
+
+        // make sure we get the correct matrix
+        BOOST_CHECK_EQUAL(m_expectedMatrix.N(), m_matrix.N());
+        BOOST_CHECK_EQUAL(m_expectedMatrix.nonzeroes(), m_matrix.nonzeroes());
+
+        for (auto row = m_matrix.begin(); row != m_matrix.end(); ++row) {
+            for (auto column = row->begin(); column != row->end(); ++column) {
+                for (int i = 0; i < MFloat::rows; ++i) {
+                    for (int j = 0; j < MFloat::cols; ++j) {
+                        BOOST_CHECK_EQUAL(float(m_expectedMatrix[i][j][i][j]), m_matrix[i][j][i][j]);
+                    }
+                }
+            }
+        }
+    }
+
+    virtual void post([[maybe_unused]] XFloat& x) override
+    {
+    }
+
+    virtual void update() override
+    {
+    }
+
+
+    //! Category of the preconditioner (see SolverCategory::Category)
+    virtual Dune::SolverCategory::Category category() const override
+    {
+        return Dune::SolverCategory::sequential;
+    }
+
+    static constexpr bool shouldCallPre()
+    {
+        return false;
+    }
+    static constexpr bool shouldCallPost()
+    {
+        return false;
+    }
+
+
+private:
+    const SpMatrixFloat& m_matrix;
+    const XDouble& m_expectedInput;
+    const SpMatrixDouble& m_expectedMatrix;
+    const XDouble& m_expectedOutputVector;
+};
+} // namespace
+
+using NumericTypes = boost::mpl::list<double, float>;
+
+BOOST_AUTO_TEST_CASE(TestFiniteDifference1D)
+{
+
+
+    const int N = 5;
+    const int nonZeroes = N * 3 - 2;
+
+    SpMatrixDouble B(N, N, nonZeroes, SpMatrixDouble::row_wise);
+    for (auto row = B.createbegin(); row != B.createend(); ++row) {
+        // Add nonzeros for left neighbour, diagonal and right neighbour
+        if (row.index() > 0) {
+            row.insert(row.index() - 1);
+        }
+        row.insert(row.index());
+        if (row.index() < B.N() - 1) {
+            row.insert(row.index() + 1);
+        }
+    }
+    // This might not be the most elegant way of filling in a Dune sparse matrix, but it works.
+    for (int i = 0; i < N; ++i) {
+        B[i][i] = -2;
+        if (i < N - 1) {
+            B[i][i + 1] = 1;
+        }
+
+        if (i > 0) {
+            B[i][i - 1] = 1;
+        }
+    }
+
+
+
+    // check for the standard basis {e_i}
+    // (e_i=(0,...,0, 1 (i-th place), 0, ..., 0))
+    for (int i = 0; i < N; ++i) {
+        XDouble inputVector(N);
+        XDouble outputVector(N);
+        XDouble expectedOutputVector(N);
+        expectedOutputVector[i][0] = 42.0;
+        expectedOutputVector[i][1] = 43.0;
+        inputVector[i][0] = 1.0;
+        auto converter
+            = Opm::cuistl::PreconditionerConvertFieldTypeAdapter<TestPreconditioner, SpMatrixDouble, XDouble, XDouble>(
+                B);
+        auto underlyingPreconditioner = std::make_shared<TestPreconditioner>(
+            converter.getConvertedMatrix(), inputVector, B, expectedOutputVector);
+        converter.setUnderlyingPreconditioner(underlyingPreconditioner);
+        converter.apply(outputVector, inputVector);
+
+        for (size_t j = 0; j < expectedOutputVector.N(); ++j) {
+            for (size_t k = 0; k < expectedOutputVector[i].N(); ++k) {
+                BOOST_CHECK_EQUAL(outputVector[j][k], float(expectedOutputVector[j][k]));
+            }
+        }
+    }
+}
+
+bool
+init_unit_test_func()
+{
+    return true;
+}
+
+int
+main(int argc, char** argv)
+{
+    [[maybe_unused]] const auto& helper = Dune::MPIHelper::instance(argc, argv);
+    boost::unit_test::unit_test_main(&init_unit_test_func, argc, argv);
+}
--- a/tests/cuistl/test_cuseqilu0.cpp
+++ b/tests/cuistl/test_cuseqilu0.cpp
@ -0,0 +1,235 @@
+/*
+  Copyright 2022-2023 SINTEF AS
+
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#include <config.h>
+
+#define BOOST_TEST_MODULE TestCuSeqILU0
+#define BOOST_TEST_NO_MAIN
+
+
+#include <boost/mpl/list.hpp>
+#include <boost/test/unit_test.hpp>
+#include <dune/common/parallel/mpihelper.hh>
+#include <dune/istl/bcrsmatrix.hh>
+#include <dune/istl/preconditioners.hh>
+#include <opm/simulators/linalg/cuistl/CuSeqILU0.hpp>
+#include <opm/simulators/linalg/cuistl/CuVector.hpp>
+#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
+#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
+
+#include <limits>
+#include <memory>
+
+
+using NumericTypes = boost::mpl::list<double, float>;
+
+BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifference1D, T, NumericTypes)
+{
+    // Here we will test a simple 1D finite difference scheme for
+    // the Laplace equation:
+    //
+    //    -\Delta u = f on [0,1]
+    //
+    // Using a central difference approximation of \Delta u, this can
+    // be approximated by
+    //
+    //    -(u_{i+1}-2u_i+u_{i-1})/Dx^2 = f(x_i)
+    //
+    // giving rise to the matrix
+    //
+    //     -2  1  0  0 ... 0  0
+    //      1 -2  1  0  0 ... 0
+    //      ....
+    //      0  0  0  ...1 -2  1
+    //      0  0  0  ...   1 -2
+
+    const int N = 5;
+    const int nonZeroes = N * 3 - 2;
+    using M = Dune::FieldMatrix<T, 1, 1>;
+    using SpMatrix = Dune::BCRSMatrix<M>;
+    using Vector = Dune::BlockVector<Dune::FieldVector<T, 1>>;
+    using CuILU0 = Opm::cuistl::CuSeqILU0<SpMatrix, Opm::cuistl::CuVector<T>, Opm::cuistl::CuVector<T>>;
+
+    SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
+    for (auto row = B.createbegin(); row != B.createend(); ++row) {
+        // Add nonzeros for left neighbour, diagonal and right neighbour
+        if (row.index() > 0) {
+            row.insert(row.index() - 1);
+        }
+        row.insert(row.index());
+        if (row.index() < B.N() - 1) {
+            row.insert(row.index() + 1);
+        }
+    }
+    // This might not be the most elegant way of filling in a Dune sparse matrix, but it works.
+    for (int i = 0; i < N; ++i) {
+        B[i][i] = -2;
+        if (i < N - 1) {
+            B[i][i + 1] = 1;
+        }
+
+        if (i > 0) {
+            B[i][i - 1] = 1;
+        }
+    }
+
+
+    auto duneILU = Dune::SeqILU<SpMatrix, Vector, Vector>(B, 1.0);
+
+    auto cuILU = Opm::cuistl::PreconditionerAdapter<Vector, Vector, CuILU0>(std::make_shared<CuILU0>(B, 1.0));
+
+    // check for the standard basis {e_i}
+    // (e_i=(0,...,0, 1 (i-th place), 0, ..., 0))
+    for (int i = 0; i < N; ++i) {
+        Vector inputVector(N);
+        inputVector[i][0] = 1.0;
+        Vector outputVectorDune(N);
+        Vector outputVectorCuistl(N);
+        duneILU.apply(outputVectorDune, inputVector);
+        cuILU.apply(outputVectorCuistl, inputVector);
+
+        for (int component = 0; component < N; ++component) {
+            BOOST_CHECK_CLOSE(outputVectorDune[component][0],
+                              outputVectorCuistl[component][0],
+                              std::numeric_limits<T>::epsilon() * 1000);
+        }
+    }
+
+    // Now we check that we can update the matrix. We basically just negate B
+    B *= -1.0;
+    auto duneILUNew = Dune::SeqILU<SpMatrix, Vector, Vector>(B, 1.0);
+    cuILU.update();
+    // check for the standard basis {e_i}
+    // (e_i=(0,...,0, 1 (i-th place), 0, ..., 0))
+    for (int i = 0; i < N; ++i) {
+        Vector inputVector(N);
+        inputVector[i][0] = 1.0;
+        Vector outputVectorDune(N);
+        Vector outputVectorCuistl(N);
+        duneILUNew.apply(outputVectorDune, inputVector);
+        cuILU.apply(outputVectorCuistl, inputVector);
+
+        for (int component = 0; component < N; ++component) {
+            BOOST_CHECK_CLOSE(outputVectorDune[component][0],
+                              outputVectorCuistl[component][0],
+                              std::numeric_limits<T>::epsilon() * 1000);
+        }
+    }
+}
+
+
+BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifferenceBlock2, T, NumericTypes)
+{
+    // Here we will test a simple 1D finite difference scheme for
+    // the Laplace equation:
+    //
+    //    -\Delta u = f on [0,1]
+    //
+    // Using a central difference approximation of \Delta u, this can
+    // be approximated by
+    //
+    //    -(u_{i+1}-2u_i+u_{i-1})/Dx^2 = f(x_i)
+    //
+    // giving rise to the matrix
+    //
+    //     -2  1  0  0 ... 0  0
+    //      1 -2  1  0  0 ... 0
+    //      ....
+    //      0  0  0  ...1 -2  1
+    //      0  0  0  ...   1 -2
+
+    const int N = 5;
+    const int nonZeroes = N * 3 - 2;
+    using M = Dune::FieldMatrix<T, 2, 2>;
+    using SpMatrix = Dune::BCRSMatrix<M>;
+    using Vector = Dune::BlockVector<Dune::FieldVector<T, 2>>;
+    using CuILU0 = Opm::cuistl::CuSeqILU0<SpMatrix, Opm::cuistl::CuVector<T>, Opm::cuistl::CuVector<T>>;
+
+    SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
+    for (auto row = B.createbegin(); row != B.createend(); ++row) {
+        row.insert(row.index());
+        if (row.index() < N - 1) {
+            row.insert(row.index() + 1);
+        }
+        if (row.index() > 0) {
+            row.insert(row.index() - 1);
+        }
+    }
+    // This might not be the most elegant way of filling in a Dune sparse matrix, but it works.
+    for (int i = 0; i < N; ++i) {
+        B[i][i][0][0] = -2;
+        B[i][i][1][1] = -2;
+        B[i][i][0][1] = 1;
+        B[i][i][1][0] = 1;
+    }
+
+
+    auto duneILU = Dune::SeqILU<SpMatrix, Vector, Vector>(B, 1.0);
+
+    auto cuILU = Opm::cuistl::PreconditionerAdapter<Vector, Vector, CuILU0>(std::make_shared<CuILU0>(B, 1.0));
+
+    // check for the standard basis {e_i}
+    // (e_i=(0,...,0, 1 (i-th place), 0, ..., 0))
+    for (int i = 0; i < N; ++i) {
+        Vector inputVector(N);
+        inputVector[i][0] = 1.0;
+        Vector outputVectorDune(N);
+        Vector outputVectorCuistl(N);
+        duneILU.apply(outputVectorDune, inputVector);
+        cuILU.apply(outputVectorCuistl, inputVector);
+
+        for (int component = 0; component < N; ++component) {
+            BOOST_CHECK_CLOSE(outputVectorDune[component][0],
+                              outputVectorCuistl[component][0],
+                              std::numeric_limits<T>::epsilon() * 1000);
+        }
+    }
+
+    // Now we check that we can update the matrix. We basically just negate B
+    B *= -1.0;
+    auto duneILUNew = Dune::SeqILU<SpMatrix, Vector, Vector>(B, 1.0);
+    cuILU.update();
+    // check for the standard basis {e_i}
+    // (e_i=(0,...,0, 1 (i-th place), 0, ..., 0))
+    for (int i = 0; i < N; ++i) {
+        Vector inputVector(N);
+        inputVector[i][0] = 1.0;
+        Vector outputVectorDune(N);
+        Vector outputVectorCuistl(N);
+        duneILUNew.apply(outputVectorDune, inputVector);
+        cuILU.apply(outputVectorCuistl, inputVector);
+
+        for (int component = 0; component < N; ++component) {
+            BOOST_CHECK_CLOSE(outputVectorDune[component][0],
+                              outputVectorCuistl[component][0],
+                              std::numeric_limits<T>::epsilon() * 1000);
+        }
+    }
+}
+bool
+init_unit_test_func()
+{
+    return true;
+}
+
+int
+main(int argc, char** argv)
+{
+    [[maybe_unused]] const auto& helper = Dune::MPIHelper::instance(argc, argv);
+    boost::unit_test::unit_test_main(&init_unit_test_func, argc, argv);
+}