Now handling sparsity pattern copying in a more elegant way.

2025-02-25 18:55:30 -06:00 · 2023-03-29 16:12:30 +02:00 · 2023-03-29 16:12:30 +02:00 · 67f94ce8a3
commit 67f94ce8a3
parent e14da5e053
2 changed files with 65 additions and 50 deletions
--- a/opm/simulators/linalg/cuistl/CuSparseMatrix.cpp
+++ b/opm/simulators/linalg/cuistl/CuSparseMatrix.cpp
@ -27,7 +27,6 @@
 #include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
 #include <opm/simulators/linalg/cuistl/detail/cusparse_wrapper.hpp>
 #include <opm/simulators/linalg/matrixblock.hh>
 #define CUSPARSE_ASSUME_UNSAFE_SPARSITY 1
 #define CHECK_SIZE(x)                                                                                                  \
    if (x.dim() != blockSize() * N()) {                                                                                \
@ -40,6 +39,34 @@
 namespace Opm::cuistl
 {
 namespace
 {
    template <class T, class M>
    std::vector<T> extractNonzeroValues(const M& matrix)
    {
        const size_t blockSize = matrix[0][0].N();
        const size_t numberOfNonzeroBlocks = matrix.nonzeroes();
        const size_t numberOfNonzeroElements = blockSize * blockSize * numberOfNonzeroBlocks;
        std::vector<T> nonZeroElementsData;
        // TODO: [perf] Can we avoid building nonZeroElementsData?
        nonZeroElementsData.reserve(numberOfNonzeroElements);
        for (auto& row : matrix) {
            for (auto columnIterator = row.begin(); columnIterator != row.end(); ++columnIterator) {
                for (size_t c = 0; c < blockSize; ++c) {
                    for (size_t d = 0; d < blockSize; ++d) {
                        nonZeroElementsData.push_back((*columnIterator)[c][d]);
                    }
                }
            }
        }
        return nonZeroElementsData;
    }
 } // namespace
 template <class T>
 CuSparseMatrix<T>::CuSparseMatrix(const T* nonZeroElements,
                                  const int* rowIndices,
@ -67,7 +94,7 @@ CuSparseMatrix<T>::~CuSparseMatrix()
 template <typename T>
 template <typename MatrixType>
 CuSparseMatrix<T>
-CuSparseMatrix<T>::fromMatrix(const MatrixType& matrix)
+CuSparseMatrix<T>::fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly)
 {
    // TODO: Do we need this intermediate storage? Or this shuffling of data?
    std::vector<int> columnIndices;
@ -78,32 +105,16 @@ CuSparseMatrix<T>::fromMatrix(const MatrixType& matrix)
    const size_t blockSize = matrix[0][0].N();
    const size_t numberOfRows = matrix.N();
    const size_t numberOfNonzeroBlocks = matrix.nonzeroes();
-#ifndef CUSPARSE_ASSUME_UNSAFE_SPARSITY
+
    const size_t numberOfNonzeroElements = blockSize * blockSize * numberOfNonzeroBlocks;
    std::vector<T> nonZeroElementsData;
    // TODO: [perf] Can we avoid building nonZeroElementsData?
    nonZeroElementsData.reserve(numberOfNonzeroElements);
 #endif
    columnIndices.reserve(numberOfNonzeroBlocks);
    rowIndices.reserve(numberOfRows + 1);
    for (auto& row : matrix) {
        for (auto columnIterator = row.begin(); columnIterator != row.end(); ++columnIterator) {
            columnIndices.push_back(columnIterator.index());
 #ifndef CUSPARSE_ASSUME_UNSAFE_SPARSITY
            for (size_t c = 0; c < blockSize; ++c) {
                for (size_t d = 0; d < blockSize; ++d) {
                    nonZeroElementsData.push_back((*columnIterator)[c][d]);
                }
            }
 #endif
        }
        rowIndices.push_back(detail::to_int(columnIndices.size()));
    }
-#ifndef CUSPARSE_ASSUME_UNSAFE_SPARSITY
+
    auto nonZeroElements = nonZeroElementsData.data();
 #else
    const T* nonZeroElements = static_cast<const T*>(&((matrix[0][0][0][0])));
 #endif
    // Sanity check
    // h_rows and h_cols could be changed to 'unsigned int', but cusparse expects 'int'
    OPM_ERROR_IF(rowIndices[matrix.N()] != detail::to_int(matrix.nonzeroes()),
@ -112,42 +123,39 @@ CuSparseMatrix<T>::fromMatrix(const MatrixType& matrix)
    OPM_ERROR_IF(columnIndices.size() != numberOfNonzeroBlocks, "Column indices do not match for CuSparseMatrix.");
    if (copyNonZeroElementsDirectly) {
        const T* nonZeroElements = static_cast<const T*>(&((matrix[0][0][0][0])));
        return CuSparseMatrix<T>(
            nonZeroElements, rowIndices.data(), columnIndices.data(), numberOfNonzeroBlocks, blockSize, numberOfRows);
    } else {
        auto nonZeroElementData = extractNonzeroValues<T>(matrix);
        return CuSparseMatrix<T>(nonZeroElementData.data(),
                                 rowIndices.data(),
                                 columnIndices.data(),
                                 numberOfNonzeroBlocks,
                                 blockSize,
                                 numberOfRows);
    }
 }
 template <class T>
 template <class MatrixType>
 void
-CuSparseMatrix<T>::updateNonzeroValues(const MatrixType& matrix)
+CuSparseMatrix<T>::updateNonzeroValues(const MatrixType& matrix, bool copyNonZeroElementsDirectly)
 {
    OPM_ERROR_IF(nonzeroes() != matrix.nonzeroes(), "Matrix does not have the same number of non-zero elements.");
    OPM_ERROR_IF(matrix[0][0].N() != blockSize(), "Matrix does not have the same blocksize.");
    OPM_ERROR_IF(matrix.N() != N(), "Matrix does not have the same number of rows.");
-#ifndef CUSPARSE_ASSUME_UNSAFE_SPARSITY
+    if (!copyNonZeroElementsDirectly) {
-    const size_t numberOfRows = N();
+        auto nonZeroElementsData = extractNonzeroValues<T>(matrix);
-    const size_t numberOfNonzeroBlocks = nonzeroes();
+        m_nonZeroElements.copyFromHost(nonZeroElementsData.data(), nonzeroes() * blockSize() * blockSize());
    const size_t numberOfNonzeroElements = blockSize() * blockSize() * numberOfNonzeroBlocks;
-    std::vector<T> nonZeroElementsData;
+    } else {
    // TODO: [perf] Can we avoid building nonZeroElementsData?
    nonZeroElementsData.reserve(numberOfNonzeroElements);
    for (auto& row : matrix) {
        for (auto columnIterator = row.begin(); columnIterator != row.end(); ++columnIterator) {
            for (size_t c = 0; c < blockSize(); ++c) {
                for (size_t d = 0; d < blockSize(); ++d) {
                    nonZeroElementsData.push_back((*columnIterator)[c][d]);
                }
            }
        }
    }
    auto newNonZeroElements = nonZeroElementsData.data();
 #else
        const T* newNonZeroElements = static_cast<const T*>(&((matrix[0][0][0][0])));
 #endif
        m_nonZeroElements.copyFromHost(newNonZeroElements, nonzeroes() * blockSize() * blockSize());
    }
 }
 template <typename T>
 void
@ -286,13 +294,13 @@ CuSparseMatrix<T>::usmv(T alpha, const CuVector<T>& x, CuVector<T>& y) const
 #define INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(realtype, blockdim)                                     \
    template CuSparseMatrix<realtype> CuSparseMatrix<realtype>::fromMatrix(                                            \
-        const Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>&);                                     \
+        const Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>&, bool);                               \
    template CuSparseMatrix<realtype> CuSparseMatrix<realtype>::fromMatrix(                                            \
-        const Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>&);                                      \
+        const Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>&, bool);                                \
    template void CuSparseMatrix<realtype>::updateNonzeroValues(                                                       \
-        const Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>&);                                     \
+        const Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>&, bool);                               \
    template void CuSparseMatrix<realtype>::updateNonzeroValues(                                                       \
-        const Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>&)
+        const Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>&, bool)
 template class CuSparseMatrix<float>;
 template class CuSparseMatrix<double>;
--- a/opm/simulators/linalg/cuistl/CuSparseMatrix.hpp
+++ b/opm/simulators/linalg/cuistl/CuSparseMatrix.hpp
@ -74,10 +74,14 @@ public:
    /**
     * @brief fromMatrix creates a new matrix with the same block size and values as the given matrix
     * @param matrix the matrix to copy from
     * @param copyNonZeroElementsDirectly if true will do a memcpy from matrix[0][0][0][0], otherwise will build up the
     * non-zero elements by looping over the matrix. Note that setting this to true will yield a performance increase,
     * but might not always yield correct results depending on how the matrix matrix has been initialized. If unsure,
     * leave it as false.
     * @tparam MatrixType is assumed to be a Dune::BCRSMatrix compatible matrix.
     */
    template <class MatrixType>
-    static CuSparseMatrix<T> fromMatrix(const MatrixType& matrix);
+    static CuSparseMatrix<T> fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false);
    /**
     * @brief setUpperTriangular sets the CuSparse flag that this is an upper diagonal (with unit diagonal) matrix.
@ -257,12 +261,15 @@ public:
    /**
     * @brief updateNonzeroValues updates the non-zero values by using the non-zero values of the supplied matrix
     * @param matrix the matrix to extract the non-zero values from
-     *
+     * @param copyNonZeroElementsDirectly if true will do a memcpy from matrix[0][0][0][0], otherwise will build up the
     * non-zero elements by looping over the matrix. Note that setting this to true will yield a performance increase,
     * but might not always yield correct results depending on how the matrix matrix has been initialized. If unsure,
     * leave it as false.
     * @note This assumes the given matrix has the same sparsity pattern.
     * @tparam MatrixType is assumed to be a Dune::BCRSMatrix compatible matrix.
     */
    template <class MatrixType>
-    void updateNonzeroValues(const MatrixType& matrix);
+    void updateNonzeroValues(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false);
 private:
    CuVector<T> m_nonZeroElements;