refactor cusparsematrix

2025-02-25 18:55:30 -06:00 · 2024-08-22 15:14:33 +02:00 · 2024-08-22 15:14:33 +02:00 · 3aa1767548
commit 3aa1767548
parent 0c1ea3ee4d
20 changed files with 96 additions and 96 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -554,7 +554,7 @@ if(CUDA_FOUND)
                       cuSparse_matrix_operations
                       cuVector_operations
                       cuvector
-                       cusparsematrix
+                       GpuSparseMatrix
                       GpuSeqILU0
                       GpuOwnerOverlapCopy
                       solver_adapter
--- a/CMakeLists_files.cmake
+++ b/CMakeLists_files.cmake
@ -216,7 +216,7 @@ if (HAVE_CUDA)
  ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuVector.cpp)
  ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuView.cpp)
  ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/vector_operations.cu)
-  ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuSparseMatrix.cpp)
+  ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuSparseMatrix.cpp)
  ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuDILU.cpp)
  ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg OpmCuILU0.cpp)
  ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuJac.cpp)
@ -242,7 +242,7 @@ if (HAVE_CUDA)
  ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuJac.hpp)
  ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuVector.hpp)
  ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuView.hpp)
-  ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuSparseMatrix.hpp)
+  ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuSparseMatrix.hpp)
  ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuMatrixDescription.hpp)
  ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuSparseResource.hpp)
  ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuSparseResource_impl.hpp)
@ -400,7 +400,7 @@ if (HAVE_CUDA)
  ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuSeqILU0.cpp)
  ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cusparse_handle.cpp)
  ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuSparse_matrix_operations.cpp)
-  ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cusparsematrix.cpp)
+  ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuSparseMatrix.cpp)
  ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuvector.cpp)
  ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuVector_operations.cpp)
  ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_safe_conversion.cpp)
--- a/opm/simulators/linalg/cuistl/GpuDILU.cpp
+++ b/opm/simulators/linalg/cuistl/GpuDILU.cpp
@ -27,7 +27,7 @@
 #include <opm/simulators/linalg/GraphColoring.hpp>
 #include <opm/simulators/linalg/cuistl/detail/autotuner.hpp>
 #include <opm/simulators/linalg/cuistl/GpuDILU.hpp>
-#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
+#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
 #include <opm/simulators/linalg/cuistl/CuVector.hpp>
 #include <opm/simulators/linalg/cuistl/detail/coloringAndReorderingUtils.hpp>
 #include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
@ -46,7 +46,7 @@ GpuDILU<M, X, Y, l>::GpuDILU(const M& A, bool splitMatrix, bool tuneKernels)
    , m_levelSets(Opm::getMatrixRowColoring(m_cpuMatrix, Opm::ColoringType::LOWER))
    , m_reorderedToNatural(detail::createReorderedToNatural(m_levelSets))
    , m_naturalToReordered(detail::createNaturalToReordered(m_levelSets))
-    , m_gpuMatrix(CuSparseMatrix<field_type>::fromMatrix(m_cpuMatrix, true))
+    , m_gpuMatrix(GpuSparseMatrix<field_type>::fromMatrix(m_cpuMatrix, true))
    , m_gpuNaturalToReorder(m_naturalToReordered)
    , m_gpuReorderToNatural(m_reorderedToNatural)
    , m_gpuDInv(m_gpuMatrix.N() * m_gpuMatrix.blockSize() * m_gpuMatrix.blockSize())
@ -73,11 +73,11 @@ GpuDILU<M, X, Y, l>::GpuDILU(const M& A, bool splitMatrix, bool tuneKernels)
    if (m_splitMatrix) {
        m_gpuMatrixReorderedDiag = std::make_unique<CuVector<field_type>>(blocksize_ * blocksize_ * m_cpuMatrix.N());
        std::tie(m_gpuMatrixReorderedLower, m_gpuMatrixReorderedUpper)
-            = detail::extractLowerAndUpperMatrices<M, field_type, CuSparseMatrix<field_type>>(m_cpuMatrix,
+            = detail::extractLowerAndUpperMatrices<M, field_type, GpuSparseMatrix<field_type>>(m_cpuMatrix,
                                                                                              m_reorderedToNatural);
    }
    else {
-        m_gpuMatrixReordered = detail::createReorderedMatrix<M, field_type, CuSparseMatrix<field_type>>(
+        m_gpuMatrixReordered = detail::createReorderedMatrix<M, field_type, GpuSparseMatrix<field_type>>(
            m_cpuMatrix, m_reorderedToNatural);
    }
    computeDiagAndMoveReorderedData(m_moveThreadBlockSize, m_DILUFactorizationThreadBlockSize);
--- a/opm/simulators/linalg/cuistl/GpuDILU.hpp
+++ b/opm/simulators/linalg/cuistl/GpuDILU.hpp
@ -22,7 +22,7 @@
 #include <memory>
 #include <opm/grid/utility/SparseTable.hpp>
 #include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
-#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
+#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
 #include <vector>


@ -52,7 +52,7 @@ public:
    //! \brief The field type of the preconditioner.
    using field_type = typename X::field_type;
    //! \brief The GPU matrix type
-    using CuMat = CuSparseMatrix<field_type>;
+    using CuMat = GpuSparseMatrix<field_type>;

    //! \brief Constructor.
    //!
--- a/opm/simulators/linalg/cuistl/GpuJac.cpp
+++ b/opm/simulators/linalg/cuistl/GpuJac.cpp
@ -33,7 +33,7 @@ template <class M, class X, class Y, int l>
 GpuJac<M, X, Y, l>::GpuJac(const M& A, field_type w)
    : m_cpuMatrix(A)
    , m_relaxationFactor(w)
-    , m_gpuMatrix(CuSparseMatrix<field_type>::fromMatrix(A))
+    , m_gpuMatrix(GpuSparseMatrix<field_type>::fromMatrix(A))
    , m_diagInvFlattened(m_gpuMatrix.N() * m_gpuMatrix.blockSize() * m_gpuMatrix.blockSize())
 {
    // Some sanity check
--- a/opm/simulators/linalg/cuistl/GpuJac.hpp
+++ b/opm/simulators/linalg/cuistl/GpuJac.hpp
@ -21,7 +21,7 @@

 #include <dune/istl/preconditioner.hh>
 #include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
-#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
+#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
 #include <opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp>
 #include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
 #include <opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp>
@ -104,7 +104,7 @@ private:
    //! \brief The relaxation factor to use.
    const field_type m_relaxationFactor;
    //! \brief The A matrix stored on the gpu
-    CuSparseMatrix<field_type> m_gpuMatrix;
+    GpuSparseMatrix<field_type> m_gpuMatrix;
    //! \brief the diagonal of cuMatrix inverted, and then flattened to fit in a vector
    CuVector<field_type> m_diagInvFlattened;

--- a/opm/simulators/linalg/cuistl/GpuSeqILU0.cpp
+++ b/opm/simulators/linalg/cuistl/GpuSeqILU0.cpp
@ -44,7 +44,7 @@ template <class M, class X, class Y, int l>
 GpuSeqILU0<M, X, Y, l>::GpuSeqILU0(const M& A, field_type w)
    : m_underlyingMatrix(A)
    , m_w(w)
-    , m_LU(CuSparseMatrix<field_type>::fromMatrix(detail::makeMatrixWithNonzeroDiagonal(A)))
+    , m_LU(GpuSparseMatrix<field_type>::fromMatrix(detail::makeMatrixWithNonzeroDiagonal(A)))
    , m_temporaryStorage(m_LU.N() * m_LU.blockSize())
    , m_descriptionL(detail::createLowerDiagonalDescription())
    , m_descriptionU(detail::createUpperDiagonalDescription())
--- a/opm/simulators/linalg/cuistl/GpuSeqILU0.hpp
+++ b/opm/simulators/linalg/cuistl/GpuSeqILU0.hpp
@ -21,7 +21,7 @@

 #include <dune/istl/preconditioner.hh>
 #include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
-#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
+#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
 #include <opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp>
 #include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
 #include <opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp>
@ -110,13 +110,13 @@ private:
    //! This is the storage for the LU composition.
    //! Initially this will have the values of A, but will be
    //! modified in the constructor to be the proper LU decomposition.
-    CuSparseMatrix<field_type> m_LU;
+    GpuSparseMatrix<field_type> m_LU;

    CuVector<field_type> m_temporaryStorage;


-    detail::CuSparseMatrixDescriptionPtr m_descriptionL;
-    detail::CuSparseMatrixDescriptionPtr m_descriptionU;
+    detail::GpuSparseMatrixDescriptionPtr m_descriptionL;
+    detail::GpuSparseMatrixDescriptionPtr m_descriptionU;
    detail::CuSparseResource<bsrsv2Info_t> m_infoL;
    detail::CuSparseResource<bsrsv2Info_t> m_infoU;
    detail::CuSparseResource<bsrilu02Info_t> m_infoM;
--- a/opm/simulators/linalg/cuistl/GpuSparseMatrix.cpp
+++ b/opm/simulators/linalg/cuistl/GpuSparseMatrix.cpp
@ -22,7 +22,7 @@
 #include <dune/istl/bcrsmatrix.hh>
 #include <dune/istl/bvector.hh>
 #include <fmt/core.h>
-#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
+#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
 #include <opm/simulators/linalg/cuistl/detail/cusparse_constants.hpp>
 #include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
 #include <opm/simulators/linalg/cuistl/detail/cusparse_wrapper.hpp>
@ -61,7 +61,7 @@ namespace


 template <class T>
-CuSparseMatrix<T>::CuSparseMatrix(const T* nonZeroElements,
+GpuSparseMatrix<T>::GpuSparseMatrix(const T* nonZeroElements,
                                  const int* rowIndices,
                                  const int* columnIndices,
                                  size_t numberOfNonzeroBlocks,
@ -82,15 +82,15 @@ CuSparseMatrix<T>::CuSparseMatrix(const T* nonZeroElements,
 }

 template <class T>
-CuSparseMatrix<T>::~CuSparseMatrix()
+GpuSparseMatrix<T>::~GpuSparseMatrix()
 {
    // empty
 }

 template <typename T>
 template <typename MatrixType>
-CuSparseMatrix<T>
-CuSparseMatrix<T>::fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly)
+GpuSparseMatrix<T>
+GpuSparseMatrix<T>::fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly)
 {
    // TODO: Do we need this intermediate storage? Or this shuffling of data?
    std::vector<int> columnIndices;
@ -129,18 +129,18 @@ CuSparseMatrix<T>::fromMatrix(const MatrixType& matrix, bool copyNonZeroElements
    // Sanity check
    // h_rows and h_cols could be changed to 'unsigned int', but cusparse expects 'int'
    OPM_ERROR_IF(rowIndices[matrix.N()] != detail::to_int(matrix.nonzeroes()),
-                 "Error size of rows do not sum to number of nonzeroes in CuSparseMatrix.");
-    OPM_ERROR_IF(rowIndices.size() != numberOfRows + 1, "Row indices do not match for CuSparseMatrix.");
-    OPM_ERROR_IF(columnIndices.size() != numberOfNonzeroBlocks, "Column indices do not match for CuSparseMatrix.");
+                 "Error size of rows do not sum to number of nonzeroes in GpuSparseMatrix.");
+    OPM_ERROR_IF(rowIndices.size() != numberOfRows + 1, "Row indices do not match for GpuSparseMatrix.");
+    OPM_ERROR_IF(columnIndices.size() != numberOfNonzeroBlocks, "Column indices do not match for GpuSparseMatrix.");


    if (copyNonZeroElementsDirectly) {
        const T* nonZeroElements = nonZeroElementsTmp;
-        return CuSparseMatrix<T>(
+        return GpuSparseMatrix<T>(
            nonZeroElements, rowIndices.data(), columnIndices.data(), numberOfNonzeroBlocks, blockSize, numberOfRows);
    } else {
        auto nonZeroElementData = extractNonzeroValues<T>(matrix);
-        return CuSparseMatrix<T>(nonZeroElementData.data(),
+        return GpuSparseMatrix<T>(nonZeroElementData.data(),
                                 rowIndices.data(),
                                 columnIndices.data(),
                                 numberOfNonzeroBlocks,
@ -152,7 +152,7 @@ CuSparseMatrix<T>::fromMatrix(const MatrixType& matrix, bool copyNonZeroElements
 template <class T>
 template <class MatrixType>
 void
-CuSparseMatrix<T>::updateNonzeroValues(const MatrixType& matrix, bool copyNonZeroElementsDirectly)
+GpuSparseMatrix<T>::updateNonzeroValues(const MatrixType& matrix, bool copyNonZeroElementsDirectly)
 {
    OPM_ERROR_IF(nonzeroes() != matrix.nonzeroes(), "Matrix does not have the same number of non-zero elements.");
    OPM_ERROR_IF(matrix[0][0].N() != blockSize(), "Matrix does not have the same blocksize.");
@ -170,42 +170,42 @@ CuSparseMatrix<T>::updateNonzeroValues(const MatrixType& matrix, bool copyNonZer

 template <typename T>
 void
-CuSparseMatrix<T>::setUpperTriangular()
+GpuSparseMatrix<T>::setUpperTriangular()
 {
    OPM_CUSPARSE_SAFE_CALL(cusparseSetMatFillMode(m_matrixDescription->get(), CUSPARSE_FILL_MODE_UPPER));
 }

 template <typename T>
 void
-CuSparseMatrix<T>::setLowerTriangular()
+GpuSparseMatrix<T>::setLowerTriangular()
 {
    OPM_CUSPARSE_SAFE_CALL(cusparseSetMatFillMode(m_matrixDescription->get(), CUSPARSE_FILL_MODE_LOWER));
 }

 template <typename T>
 void
-CuSparseMatrix<T>::setUnitDiagonal()
+GpuSparseMatrix<T>::setUnitDiagonal()
 {
    OPM_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(m_matrixDescription->get(), CUSPARSE_DIAG_TYPE_UNIT));
 }

 template <typename T>
 void
-CuSparseMatrix<T>::setNonUnitDiagonal()
+GpuSparseMatrix<T>::setNonUnitDiagonal()
 {
    OPM_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(m_matrixDescription->get(), CUSPARSE_DIAG_TYPE_NON_UNIT));
 }

 template <typename T>
 void
-CuSparseMatrix<T>::mv(const CuVector<T>& x, CuVector<T>& y) const
+GpuSparseMatrix<T>::mv(const CuVector<T>& x, CuVector<T>& y) const
 {
    assertSameSize(x);
    assertSameSize(y);
    if (blockSize() < 2u) {
        OPM_THROW(
            std::invalid_argument,
-            "CuSparseMatrix<T>::usmv and CuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
+            "GpuSparseMatrix<T>::usmv and GpuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
    }
    const auto nonzeroValues = getNonZeroValues().data();

@ -232,14 +232,14 @@ CuSparseMatrix<T>::mv(const CuVector<T>& x, CuVector<T>& y) const

 template <typename T>
 void
-CuSparseMatrix<T>::umv(const CuVector<T>& x, CuVector<T>& y) const
+GpuSparseMatrix<T>::umv(const CuVector<T>& x, CuVector<T>& y) const
 {
    assertSameSize(x);
    assertSameSize(y);
    if (blockSize() < 2u) {
        OPM_THROW(
            std::invalid_argument,
-            "CuSparseMatrix<T>::usmv and CuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
+            "GpuSparseMatrix<T>::usmv and GpuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
    }

    const auto nonzeroValues = getNonZeroValues().data();
@ -267,14 +267,14 @@ CuSparseMatrix<T>::umv(const CuVector<T>& x, CuVector<T>& y) const

 template <typename T>
 void
-CuSparseMatrix<T>::usmv(T alpha, const CuVector<T>& x, CuVector<T>& y) const
+GpuSparseMatrix<T>::usmv(T alpha, const CuVector<T>& x, CuVector<T>& y) const
 {
    assertSameSize(x);
    assertSameSize(y);
    if (blockSize() < 2) {
        OPM_THROW(
            std::invalid_argument,
-            "CuSparseMatrix<T>::usmv and CuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
+            "GpuSparseMatrix<T>::usmv and GpuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
    }
    const auto numberOfRows = N();
    const auto numberOfNonzeroBlocks = nonzeroes();
@ -304,7 +304,7 @@ CuSparseMatrix<T>::usmv(T alpha, const CuVector<T>& x, CuVector<T>& y) const
 template <class T>
 template <class VectorType>
 void
-CuSparseMatrix<T>::assertSameSize(const VectorType& x) const
+GpuSparseMatrix<T>::assertSameSize(const VectorType& x) const
 {
    if (x.dim() != blockSize() * N()) {
        OPM_THROW(std::invalid_argument,
@ -317,17 +317,17 @@ CuSparseMatrix<T>::assertSameSize(const VectorType& x) const


 #define INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(realtype, blockdim)                                     \
-    template CuSparseMatrix<realtype> CuSparseMatrix<realtype>::fromMatrix(                                            \
+    template GpuSparseMatrix<realtype> GpuSparseMatrix<realtype>::fromMatrix(                                            \
        const Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>&, bool);                               \
-    template CuSparseMatrix<realtype> CuSparseMatrix<realtype>::fromMatrix(                                            \
+    template GpuSparseMatrix<realtype> GpuSparseMatrix<realtype>::fromMatrix(                                            \
        const Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>&, bool);                                \
-    template void CuSparseMatrix<realtype>::updateNonzeroValues(                                                       \
+    template void GpuSparseMatrix<realtype>::updateNonzeroValues(                                                       \
        const Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>&, bool);                               \
-    template void CuSparseMatrix<realtype>::updateNonzeroValues(                                                       \
+    template void GpuSparseMatrix<realtype>::updateNonzeroValues(                                                       \
        const Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>&, bool)

-template class CuSparseMatrix<float>;
-template class CuSparseMatrix<double>;
+template class GpuSparseMatrix<float>;
+template class GpuSparseMatrix<double>;

 INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(double, 1);
 INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(double, 2);
--- a/opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp
+++ b/opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp
@ -16,8 +16,8 @@
  You should have received a copy of the GNU General Public License
  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
 */
-#ifndef OPM_CUSPARSEMATRIX_HPP
-#define OPM_CUSPARSEMATRIX_HPP
+#ifndef OPM_GPUSPARSEMATRIX_HPP
+#define OPM_GPUSPARSEMATRIX_HPP
 #include <cusparse.h>
 #include <iostream>
 #include <memory>
@ -32,7 +32,7 @@ namespace Opm::gpuistl
 {

 /**
- * @brief The CuSparseMatrix class simple wrapper class for a CuSparse matrix.
+ * @brief The GpuSparseMatrix class simple wrapper class for a CuSparse matrix.
 *
 * @note we currently only support simple raw primitives for T (double and float). Block size is handled through the
 * block size parameter
@ -44,7 +44,7 @@ namespace Opm::gpuistl
 * @note We only support Block Compressed Sparse Row Format (BSR) for now.
 */
 template <typename T>
-class CuSparseMatrix
+class GpuSparseMatrix
 {
 public:
    //! Create the sparse matrix specified by the raw data.
@ -60,7 +60,7 @@ public:
    //!
    //! \note We assume numberOfNonzeroBlocks, blockSize and numberOfRows all are representable as int due to
    //!       restrictions in the current version of cusparse. This might change in future versions.
-    CuSparseMatrix(const T* nonZeroElements,
+    GpuSparseMatrix(const T* nonZeroElements,
                   const int* rowIndices,
                   const int* columnIndices,
                   size_t numberOfNonzeroBlocks,
@ -70,14 +70,14 @@ public:
    /**
     * We don't want to be able to copy this for now (too much hassle in copying the cusparse resources)
     */
-    CuSparseMatrix(const CuSparseMatrix&) = delete;
+    GpuSparseMatrix(const GpuSparseMatrix&) = delete;

    /**
     * We don't want to be able to copy this for now (too much hassle in copying the cusparse resources)
     */
-    CuSparseMatrix& operator=(const CuSparseMatrix&) = delete;
+    GpuSparseMatrix& operator=(const GpuSparseMatrix&) = delete;

-    virtual ~CuSparseMatrix();
+    virtual ~GpuSparseMatrix();

    /**
     * @brief fromMatrix creates a new matrix with the same block size and values as the given matrix
@ -89,7 +89,7 @@ public:
     * @tparam MatrixType is assumed to be a Dune::BCRSMatrix compatible matrix.
     */
    template <class MatrixType>
-    static CuSparseMatrix<T> fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false);
+    static GpuSparseMatrix<T> fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false);

    /**
     * @brief setUpperTriangular sets the CuSparse flag that this is an upper diagonal (with unit diagonal) matrix.
@ -233,7 +233,7 @@ public:
     *
     * This description is needed for most calls to the CuSparse library
     */
-    detail::CuSparseMatrixDescription& getDescription()
+    detail::GpuSparseMatrixDescription& getDescription()
    {
        return *m_matrixDescription;
    }
@ -292,7 +292,7 @@ private:
    const int m_numberOfRows;
    const int m_blockSize;

-    detail::CuSparseMatrixDescriptionPtr m_matrixDescription;
+    detail::GpuSparseMatrixDescriptionPtr m_matrixDescription;
    detail::CuSparseHandle& m_cusparseHandle;

    template <class VectorType>
--- a/opm/simulators/linalg/cuistl/OpmCuILU0.cpp
+++ b/opm/simulators/linalg/cuistl/OpmCuILU0.cpp
@ -26,7 +26,7 @@
 #include <opm/common/ErrorMacros.hpp>
 #include <opm/common/TimingMacros.hpp>
 #include <opm/simulators/linalg/GraphColoring.hpp>
-#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
+#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
 #include <opm/simulators/linalg/cuistl/CuVector.hpp>
 #include <opm/simulators/linalg/cuistl/OpmCuILU0.hpp>
 #include <opm/simulators/linalg/cuistl/detail/autotuner.hpp>
@ -46,7 +46,7 @@ OpmCuILU0<M, X, Y, l>::OpmCuILU0(const M& A, bool splitMatrix, bool tuneKernels)
    , m_levelSets(Opm::getMatrixRowColoring(m_cpuMatrix, Opm::ColoringType::LOWER))
    , m_reorderedToNatural(detail::createReorderedToNatural(m_levelSets))
    , m_naturalToReordered(detail::createNaturalToReordered(m_levelSets))
-    , m_gpuMatrix(CuSparseMatrix<field_type>::fromMatrix(m_cpuMatrix, true))
+    , m_gpuMatrix(GpuSparseMatrix<field_type>::fromMatrix(m_cpuMatrix, true))
    , m_gpuMatrixReorderedLower(nullptr)
    , m_gpuMatrixReorderedUpper(nullptr)
    , m_gpuNaturalToReorder(m_naturalToReordered)
@ -74,10 +74,10 @@ OpmCuILU0<M, X, Y, l>::OpmCuILU0(const M& A, bool splitMatrix, bool tuneKernels)
    if (m_splitMatrix) {
        m_gpuMatrixReorderedDiag.emplace(CuVector<field_type>(blocksize_ * blocksize_ * m_cpuMatrix.N()));
        std::tie(m_gpuMatrixReorderedLower, m_gpuMatrixReorderedUpper)
-            = detail::extractLowerAndUpperMatrices<M, field_type, CuSparseMatrix<field_type>>(m_cpuMatrix,
+            = detail::extractLowerAndUpperMatrices<M, field_type, GpuSparseMatrix<field_type>>(m_cpuMatrix,
                                                                                              m_reorderedToNatural);
    } else {
-        m_gpuReorderedLU = detail::createReorderedMatrix<M, field_type, CuSparseMatrix<field_type>>(
+        m_gpuReorderedLU = detail::createReorderedMatrix<M, field_type, GpuSparseMatrix<field_type>>(
            m_cpuMatrix, m_reorderedToNatural);
    }
    LUFactorizeAndMoveData(m_moveThreadBlockSize, m_ILU0FactorizationThreadBlockSize);
--- a/opm/simulators/linalg/cuistl/OpmCuILU0.hpp
+++ b/opm/simulators/linalg/cuistl/OpmCuILU0.hpp
@ -22,7 +22,7 @@
 #include <memory>
 #include <opm/grid/utility/SparseTable.hpp>
 #include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
-#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
+#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
 #include <opm/simulators/linalg/cuistl/CuVector.hpp>
 #include <optional>
 #include <type_traits>
@ -54,7 +54,7 @@ public:
    //! \brief The field type of the preconditioner.
    using field_type = typename X::field_type;
    //! \brief The GPU matrix type
-    using CuMat = CuSparseMatrix<field_type>;
+    using CuMat = GpuSparseMatrix<field_type>;

    //! \brief Constructor.
    //!
--- a/opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp
+++ b/opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp
@ -22,7 +22,7 @@
 #include <dune/istl/bcrsmatrix.hh>
 #include <dune/istl/preconditioner.hh>
 #include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
-#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
+#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
 #include <opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp>
 #include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
 #include <opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp>
--- a/opm/simulators/linalg/cuistl/SolverAdapter.hpp
+++ b/opm/simulators/linalg/cuistl/SolverAdapter.hpp
@ -28,7 +28,7 @@
 #include <opm/common/ErrorMacros.hpp>
 #include <opm/simulators/linalg/cuistl/GpuBlockPreconditioner.hpp>
 #include <opm/simulators/linalg/cuistl/GpuOwnerOverlapCopy.hpp>
-#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
+#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
 #include <opm/simulators/linalg/cuistl/CuVector.hpp>
 #include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
 #include <opm/simulators/linalg/cuistl/detail/has_function.hpp>
@ -67,7 +67,7 @@ public:
                  int verbose)
        : Dune::IterativeSolver<X, X>(op, sp, *prec, reduction, maxit, verbose)
        , m_opOnCPUWithMatrix(op)
-        , m_matrix(CuSparseMatrix<real_type>::fromMatrix(op.getmat()))
+        , m_matrix(GpuSparseMatrix<real_type>::fromMatrix(op.getmat()))
        , m_underlyingSolver(constructSolver(prec, reduction, maxit, verbose))
    {
    }
@ -116,7 +116,7 @@ public:

 private:
    Operator& m_opOnCPUWithMatrix;
-    CuSparseMatrix<real_type> m_matrix;
+    GpuSparseMatrix<real_type> m_matrix;

    UnderlyingSolver<XGPU> m_underlyingSolver;

@ -193,7 +193,7 @@ private:

            using CudaCommunication = GpuOwnerOverlapCopy<real_type, block_size, typename Operator::communication_type>;
            using SchwarzOperator
-                = Dune::OverlappingSchwarzOperator<CuSparseMatrix<real_type>, XGPU, XGPU, CudaCommunication>;
+                = Dune::OverlappingSchwarzOperator<GpuSparseMatrix<real_type>, XGPU, XGPU, CudaCommunication>;
            auto cudaCommunication = std::make_shared<CudaCommunication>(gpuComm);

            auto mpiPreconditioner = std::make_shared<GpuBlockPreconditioner<XGPU, XGPU, CudaCommunication>>(
@ -227,7 +227,7 @@ private:
            auto preconditionerOnGPU = precAsHolder->getUnderlyingPreconditioner();

            auto matrixOperator
-                = std::make_shared<Dune::MatrixAdapter<CuSparseMatrix<real_type>, XGPU, XGPU>>(m_matrix);
+                = std::make_shared<Dune::MatrixAdapter<GpuSparseMatrix<real_type>, XGPU, XGPU>>(m_matrix);
            auto scalarProduct = std::make_shared<Dune::SeqScalarProduct<XGPU>>();
            return UnderlyingSolver<XGPU>(
                matrixOperator, scalarProduct, preconditionerOnGPU, reduction, maxit, verbose);
--- a/opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp
+++ b/opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp
@ -25,23 +25,23 @@ namespace Opm::gpuistl::detail
 {

 /**
- * CuSparseMatrixDescription holder. This is internal information needed for most calls to the CuSparse API.
+ * GpuSparseMatrixDescription holder. This is internal information needed for most calls to the CuSparse API.
 */
-using CuSparseMatrixDescription = CuSparseResource<cusparseMatDescr_t>;
+using GpuSparseMatrixDescription = CuSparseResource<cusparseMatDescr_t>;

 /**
- * Pointer to CuSparseMatrixDescription holder. This is internal information needed for most calls to the CuSparse API.
+ * Pointer to GpuSparseMatrixDescription holder. This is internal information needed for most calls to the CuSparse API.
 */
-using CuSparseMatrixDescriptionPtr = std::shared_ptr<CuSparseResource<cusparseMatDescr_t>>;
+using GpuSparseMatrixDescriptionPtr = std::shared_ptr<CuSparseResource<cusparseMatDescr_t>>;

 /**
 * @brief createMatrixDescription creates a default matrix description
 * @return a matrix description to a general sparse matrix with zero based indexing.
 */
-inline CuSparseMatrixDescriptionPtr
+inline GpuSparseMatrixDescriptionPtr
 createMatrixDescription()
 {
-    auto description = std::make_shared<CuSparseMatrixDescription>();
+    auto description = std::make_shared<GpuSparseMatrixDescription>();

    // Note: We always want to use zero base indexing.
    OPM_CUSPARSE_SAFE_CALL(cusparseSetMatType(description->get(), CUSPARSE_MATRIX_TYPE_GENERAL));
@ -56,7 +56,7 @@ createMatrixDescription()
 *
 * @note This will assume it has a unit diagonal
 */
-inline CuSparseMatrixDescriptionPtr
+inline GpuSparseMatrixDescriptionPtr
 createLowerDiagonalDescription()
 {
    auto description = createMatrixDescription();
@ -71,7 +71,7 @@ createLowerDiagonalDescription()
 *
 * @note This will assume it has a non-unit diagonal.
 */
-inline CuSparseMatrixDescriptionPtr
+inline GpuSparseMatrixDescriptionPtr
 createUpperDiagonalDescription()
 {
    auto description = createMatrixDescription();
--- a/opm/simulators/linalg/cuistl/detail/coloringAndReorderingUtils.hpp
+++ b/opm/simulators/linalg/cuistl/detail/coloringAndReorderingUtils.hpp
@ -29,7 +29,7 @@
 /*
 This file contains a collection of utility functions used in the GPU implementation of ILU and DILU
 The functions deal with creating the mappings between reordered and natural indices, as well as
-extracting sparsity structures from dune matrices and creating cusparsematrix indices
+extracting sparsity structures from dune matrices and creating gpusparsematrix indices
 */
 namespace Opm::gpuistl::detail
 {
--- a/tests/cuistl/test_GpuDILU.cpp
+++ b/tests/cuistl/test_GpuDILU.cpp
@ -26,7 +26,7 @@
 #include <memory>
 #include <opm/simulators/linalg/DILU.hpp>
 #include <opm/simulators/linalg/cuistl/GpuDILU.hpp>
-#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
+#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
 #include <opm/simulators/linalg/cuistl/CuVector.hpp>
 #include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
 #include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
@ -41,7 +41,7 @@ using B1x1Vec = Dune::BlockVector<Dune::FieldVector<double, 1>>;
 using B2x2Vec = Dune::BlockVector<Dune::FieldVector<double, 2>>;
 using Sp1x1BlockMatrix = Dune::BCRSMatrix<FM1x1>;
 using Sp2x2BlockMatrix = Dune::BCRSMatrix<FM2x2>;
-using CuMatrix = Opm::gpuistl::CuSparseMatrix<T>;
+using CuMatrix = Opm::gpuistl::GpuSparseMatrix<T>;
 using CuIntVec = Opm::gpuistl::CuVector<int>;
 using CuFloatingPointVec = Opm::gpuistl::CuVector<T>;
 using GpuDilu1x1 = Opm::gpuistl::GpuDILU<Sp1x1BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
--- a/tests/cuistl/test_GpuJac.cpp
+++ b/tests/cuistl/test_GpuJac.cpp
@ -24,7 +24,7 @@
 #include <cuda_runtime.h>
 #include <dune/istl/bcrsmatrix.hh>
 #include <opm/simulators/linalg/cuistl/GpuJac.hpp>
-#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
+#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
 #include <opm/simulators/linalg/cuistl/CuVector.hpp>
 #include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
 #include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
--- a/tests/cuistl/test_GpuSparseMatrix.cpp
+++ b/tests/cuistl/test_GpuSparseMatrix.cpp
@ -18,12 +18,12 @@
 */
 #include <config.h>

-#define BOOST_TEST_MODULE TestCuSparseMatrix
+#define BOOST_TEST_MODULE TestGpuSparseMatrix

 #include <boost/test/unit_test.hpp>
 #include <dune/istl/bcrsmatrix.hh>
 #include <memory>
-#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
+#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
 #include <opm/simulators/linalg/cuistl/CuVector.hpp>
 #include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
 #include <random>
@ -76,17 +76,17 @@ BOOST_AUTO_TEST_CASE(TestConstruction1D)
        }
    }

-    auto cuSparseMatrix = Opm::gpuistl::CuSparseMatrix<double>::fromMatrix(B);
+    auto gpuSparseMatrix = Opm::gpuistl::GpuSparseMatrix<double>::fromMatrix(B);

-    const auto& nonZeroValuesCuda = cuSparseMatrix.getNonZeroValues();
-    std::vector<double> buffer(cuSparseMatrix.nonzeroes(), 0.0);
+    const auto& nonZeroValuesCuda = gpuSparseMatrix.getNonZeroValues();
+    std::vector<double> buffer(gpuSparseMatrix.nonzeroes(), 0.0);
    nonZeroValuesCuda.copyToHost(buffer.data(), buffer.size());
    const double* nonZeroElements = static_cast<const double*>(&((B[0][0][0][0])));
    BOOST_CHECK_EQUAL_COLLECTIONS(buffer.begin(), buffer.end(), nonZeroElements, nonZeroElements + B.nonzeroes());
-    BOOST_CHECK_EQUAL(N * 3 - 2, cuSparseMatrix.nonzeroes());
+    BOOST_CHECK_EQUAL(N * 3 - 2, gpuSparseMatrix.nonzeroes());

    std::vector<int> rowIndicesFromCUDA(N + 1);
-    cuSparseMatrix.getRowIndices().copyToHost(rowIndicesFromCUDA.data(), rowIndicesFromCUDA.size());
+    gpuSparseMatrix.getRowIndices().copyToHost(rowIndicesFromCUDA.data(), rowIndicesFromCUDA.size());
    BOOST_CHECK_EQUAL(rowIndicesFromCUDA[0], 0);
    BOOST_CHECK_EQUAL(rowIndicesFromCUDA[1], 2);
    for (int i = 2; i < N; ++i) {
@ -95,7 +95,7 @@ BOOST_AUTO_TEST_CASE(TestConstruction1D)


    std::vector<int> columnIndicesFromCUDA(B.nonzeroes(), 0);
-    cuSparseMatrix.getColumnIndices().copyToHost(columnIndicesFromCUDA.data(), columnIndicesFromCUDA.size());
+    gpuSparseMatrix.getColumnIndices().copyToHost(columnIndicesFromCUDA.data(), columnIndicesFromCUDA.size());

    BOOST_CHECK_EQUAL(columnIndicesFromCUDA[0], 0);
    BOOST_CHECK_EQUAL(columnIndicesFromCUDA[1], 1);
@ -143,7 +143,7 @@ BOOST_AUTO_TEST_CASE(RandomSparsityMatrix)
        }
    }

-    auto cuSparseMatrix = Opm::gpuistl::CuSparseMatrix<double>::fromMatrix(B);
+    auto gpuSparseMatrix = Opm::gpuistl::GpuSparseMatrix<double>::fromMatrix(B);
    // check each column
    for (size_t component = 0; component < N; ++component) {
        std::vector<double> inputDataX(N * dim, 0.0);
@ -155,7 +155,7 @@ BOOST_AUTO_TEST_CASE(RandomSparsityMatrix)
        yHost = inputDataY[0];
        inputVectorX.copyToHost(xHost);
        const double alpha = 1.42;
-        cuSparseMatrix.usmv(alpha, inputVectorX, inputVectorY);
+        gpuSparseMatrix.usmv(alpha, inputVectorX, inputVectorY);

        inputVectorY.copyToHost(inputDataY);

@ -167,7 +167,7 @@ BOOST_AUTO_TEST_CASE(RandomSparsityMatrix)
        }
        inputVectorX.copyToHost(xHost);

-        cuSparseMatrix.mv(inputVectorX, inputVectorY);
+        gpuSparseMatrix.mv(inputVectorX, inputVectorY);

        inputVectorY.copyToHost(inputDataY);

--- a/tests/cuistl/test_cuSparse_matrix_operations.cpp
+++ b/tests/cuistl/test_cuSparse_matrix_operations.cpp
@ -18,12 +18,12 @@
 */
 #include <config.h>

-#define BOOST_TEST_MODULE TestCuSparseMatrixOperations
+#define BOOST_TEST_MODULE TestGpuSparseMatrixOperations
 #include <boost/mpl/list.hpp>
 #include <boost/test/unit_test.hpp>
 #include <cuda_runtime.h>
 #include <dune/istl/bcrsmatrix.hh>
-#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
+#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
 #include <opm/simulators/linalg/cuistl/CuVector.hpp>
 #include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
 #include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
@ -85,7 +85,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(FlattenAndInvertDiagonalWith3By3Blocks, T, Numeric
    B[1][1][1][1] = -1.0;
    B[1][1][2][2] = -1.0;

-    Opm::gpuistl::CuSparseMatrix<T> m = Opm::gpuistl::CuSparseMatrix<T>::fromMatrix(B);
+    Opm::gpuistl::GpuSparseMatrix<T> m = Opm::gpuistl::GpuSparseMatrix<T>::fromMatrix(B);
    Opm::gpuistl::CuVector<T> dInvDiag(blocksize * blocksize * N);

    Opm::gpuistl::detail::JAC::invertDiagonalAndFlatten<T, 3>(
@ -159,7 +159,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(FlattenAndInvertDiagonalWith2By2Blocks, T, Numeric
    B[1][1][0][0] = -1.0;
    B[1][1][1][1] = -1.0;

-    Opm::gpuistl::CuSparseMatrix<T> m = Opm::gpuistl::CuSparseMatrix<T>::fromMatrix(B);
+    Opm::gpuistl::GpuSparseMatrix<T> m = Opm::gpuistl::GpuSparseMatrix<T>::fromMatrix(B);
    Opm::gpuistl::CuVector<T> dInvDiag(blocksize * blocksize * N);

    Opm::gpuistl::detail::JAC::invertDiagonalAndFlatten<T, 2>(