/* Copyright 2022-2023 SINTEF AS This file is part of the Open Porous Media project (OPM). OPM is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. OPM is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OPM. If not, see . */ #ifndef OPM_GPUSPARSEMATRIX_HPP #define OPM_GPUSPARSEMATRIX_HPP #include #include #include #include #include #include #include #include #include namespace Opm::gpuistl { /** * @brief The GpuSparseMatrix class simple wrapper class for a CuSparse matrix. * * @note we currently only support simple raw primitives for T (double and float). Block size is handled through the * block size parameter * * @tparam T the type to store. Can be either float, double or int. * * @note we only support square matrices. * * @note We only support Block Compressed Sparse Row Format (BSR) for now. */ template class GpuSparseMatrix { public: //! Create the sparse matrix specified by the raw data. //! //! \note Prefer to use the constructor taking a const reference to a matrix instead. //! //! \param[in] nonZeroElements the non-zero values of the matrix //! \param[in] rowIndices the row indices of the non-zero elements //! \param[in] columnIndices the column indices of the non-zero elements //! \param[in] numberOfNonzeroElements number of nonzero elements //! \param[in] blockSize size of each block matrix (typically 3) //! \param[in] numberOfRows the number of rows //! //! \note We assume numberOfNonzeroBlocks, blockSize and numberOfRows all are representable as int due to //! restrictions in the current version of cusparse. This might change in future versions. GpuSparseMatrix(const T* nonZeroElements, const int* rowIndices, const int* columnIndices, size_t numberOfNonzeroBlocks, size_t blockSize, size_t numberOfRows); //! Create a sparse matrix by copying the sparsity structure of another matrix, not filling in the values //! //! \note Prefer to use the constructor taking a const reference to a matrix instead. //! //! \param[in] rowIndices the row indices of the non-zero elements //! \param[in] columnIndices the column indices of the non-zero elements //! \param[in] blockSize size of each block matrix (typically 3) //! //! \note We assume numberOfNonzeroBlocks, blockSize and numberOfRows all are representable as int due to //! restrictions in the current version of cusparse. This might change in future versions. GpuSparseMatrix(const GpuVector& rowIndices, const GpuVector& columnIndices, size_t blockSize); /** * We don't want to be able to copy this for now (too much hassle in copying the cusparse resources) */ GpuSparseMatrix(const GpuSparseMatrix&) = delete; /** * We don't want to be able to copy this for now (too much hassle in copying the cusparse resources) */ GpuSparseMatrix& operator=(const GpuSparseMatrix&) = delete; virtual ~GpuSparseMatrix(); /** * @brief fromMatrix creates a new matrix with the same block size and values as the given matrix * @param matrix the matrix to copy from * @param copyNonZeroElementsDirectly if true will do a memcpy from matrix[0][0][0][0], otherwise will build up the * non-zero elements by looping over the matrix. Note that setting this to true will yield a performance increase, * but might not always yield correct results depending on how the matrix has been initialized. If unsure, * leave it as false. * @tparam MatrixType is assumed to be a Dune::BCRSMatrix compatible matrix. */ template static GpuSparseMatrix fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false); /** * @brief setUpperTriangular sets the CuSparse flag that this is an upper diagonal (with unit diagonal) matrix. */ void setUpperTriangular(); /** * @brief setLowerTriangular sets the CuSparse flag that this is an lower diagonal (with non-unit diagonal) matrix. */ void setLowerTriangular(); /** * @brief setUnitDiagonal sets the CuSparse flag that this has unit diagional. */ void setUnitDiagonal(); /** * @brief setNonUnitDiagonal sets the CuSparse flag that this has non-unit diagional. */ void setNonUnitDiagonal(); /** * @brief N returns the number of rows (which is equal to the number of columns) */ size_t N() const { // Technically this safe conversion is not needed since we enforce these to be // non-negative in the constructor, but keeping them for added sanity for now. // // We don't believe this will yield any performance penality (it's used too far away from the inner loop), // but should that be false, they can be removed. return detail::to_size_t(m_numberOfRows); } /** * @brief nonzeroes behaves as the Dune::BCRSMatrix::nonzeros() function and returns the number of non zero blocks * @return number of non zero blocks. */ size_t nonzeroes() const { // Technically this safe conversion is not needed since we enforce these to be // non-negative in the constructor, but keeping them for added sanity for now. // // We don't believe this will yield any performance penality (it's used too far away from the inner loop), // but should that be false, they can be removed. return detail::to_size_t(m_numberOfNonzeroBlocks); } /** * @brief getNonZeroValues returns the GPU vector containing the non-zero values (ordered by block) * * @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering. */ GpuVector& getNonZeroValues() { return m_nonZeroElements; } /** * @brief getNonZeroValues returns the GPU vector containing the non-zero values (ordered by block) * * @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering. */ const GpuVector& getNonZeroValues() const { return m_nonZeroElements; } /** * @brief getRowIndices returns the row indices used to represent the BSR structure. * * @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering. */ GpuVector& getRowIndices() { return m_rowIndices; } /** * @brief getRowIndices returns the row indices used to represent the BSR structure. * * @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering. */ const GpuVector& getRowIndices() const { return m_rowIndices; } /** * @brief getColumnIndices returns the column indices used to represent the BSR structure. * * @return Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering. */ GpuVector& getColumnIndices() { return m_columnIndices; } /** * @brief getColumnIndices returns the column indices used to represent the BSR structure. * * @return Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering. */ const GpuVector& getColumnIndices() const { return m_columnIndices; } /** * @brief dim returns the dimension of the vector space on which this matrix acts * * This is equivalent to matrix.N() * matrix.blockSize() * @return matrix.N() * matrix.blockSize() */ size_t dim() const { // Technically this safe conversion is not needed since we enforce these to be // non-negative in the constructor, but keeping them for added sanity for now. // // We don't believe this will yield any performance penality (it's used too far away from the inner loop), // but should that be false, they can be removed. return detail::to_size_t(m_blockSize) * detail::to_size_t(m_numberOfRows); } /** * @brief blockSize size of the blocks */ size_t blockSize() const { // Technically this safe conversion is not needed since we enforce these to be // non-negative in the constructor, but keeping them for added sanity for now. // // We don't believe this will yield any performance penality (it's used too far away from the inner loop), // but should that be false, they can be removed. return detail::to_size_t(m_blockSize); } /** * @brief getDescription the cusparse matrix description. * * This description is needed for most calls to the CuSparse library */ detail::GpuSparseMatrixDescription& getDescription() { return *m_matrixDescription; } /** * @brief mv performs matrix vector multiply y = Ax * @param[in] x the vector to multiply the matrix with * @param[out] y the output vector * * @note Due to limitations of CuSparse, this is only supported for block sizes greater than 1. */ virtual void mv(const GpuVector& x, GpuVector& y) const; /** * @brief umv computes y=Ax+y * @param[in] x the vector to multiply with A * @param[inout] y the vector to add and store the output in * * @note Due to limitations of CuSparse, this is only supported for block sizes greater than 1. */ virtual void umv(const GpuVector& x, GpuVector& y) const; /** * @brief umv computes y=alpha * Ax + y * @param[in] x the vector to multiply with A * @param[inout] y the vector to add and store the output in * * @note Due to limitations of CuSparse, this is only supported for block sizes greater than 1. */ virtual void usmv(T alpha, const GpuVector& x, GpuVector& y) const; /** * @brief updateNonzeroValues updates the non-zero values by using the non-zero values of the supplied matrix * @param matrix the matrix to extract the non-zero values from * @param copyNonZeroElementsDirectly if true will do a memcpy from matrix[0][0][0][0], otherwise will build up the * non-zero elements by looping over the matrix. Note that setting this to true will yield a performance increase, * but might not always yield correct results depending on how the matrix matrix has been initialized. If unsure, * leave it as false. * @note This assumes the given matrix has the same sparsity pattern. * @tparam MatrixType is assumed to be a Dune::BCRSMatrix compatible matrix. */ template void updateNonzeroValues(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false); private: GpuVector m_nonZeroElements; GpuVector m_columnIndices; GpuVector m_rowIndices; // Notice that we store these three as int to make sure we are cusparse compatible. // // This gives the added benefit of checking the size constraints at construction of the matrix // rather than in some call to cusparse. const int m_numberOfNonzeroBlocks; const int m_numberOfRows; const int m_blockSize; detail::GpuSparseMatrixDescriptionPtr m_matrixDescription; detail::CuSparseHandle& m_cusparseHandle; template void assertSameSize(const VectorType& vector) const; }; } // namespace Opm::gpuistl #endif