diff --git a/opm/simulators/linalg/cuistl/CuSparseMatrix.cpp b/opm/simulators/linalg/cuistl/CuSparseMatrix.cpp index abe73b07e..b6466e3bb 100644 --- a/opm/simulators/linalg/cuistl/CuSparseMatrix.cpp +++ b/opm/simulators/linalg/cuistl/CuSparseMatrix.cpp @@ -27,7 +27,6 @@ #include #include #include -#define CUSPARSE_ASSUME_UNSAFE_SPARSITY 1 #define CHECK_SIZE(x) \ if (x.dim() != blockSize() * N()) { \ @@ -40,6 +39,34 @@ namespace Opm::cuistl { +namespace +{ + template + std::vector extractNonzeroValues(const M& matrix) + { + const size_t blockSize = matrix[0][0].N(); + const size_t numberOfNonzeroBlocks = matrix.nonzeroes(); + const size_t numberOfNonzeroElements = blockSize * blockSize * numberOfNonzeroBlocks; + + std::vector nonZeroElementsData; + // TODO: [perf] Can we avoid building nonZeroElementsData? + nonZeroElementsData.reserve(numberOfNonzeroElements); + for (auto& row : matrix) { + for (auto columnIterator = row.begin(); columnIterator != row.end(); ++columnIterator) { + for (size_t c = 0; c < blockSize; ++c) { + for (size_t d = 0; d < blockSize; ++d) { + nonZeroElementsData.push_back((*columnIterator)[c][d]); + } + } + } + } + + return nonZeroElementsData; + } +} // namespace + + + template CuSparseMatrix::CuSparseMatrix(const T* nonZeroElements, const int* rowIndices, @@ -67,7 +94,7 @@ CuSparseMatrix::~CuSparseMatrix() template template CuSparseMatrix -CuSparseMatrix::fromMatrix(const MatrixType& matrix) +CuSparseMatrix::fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly) { // TODO: Do we need this intermediate storage? Or this shuffling of data? std::vector columnIndices; @@ -78,32 +105,16 @@ CuSparseMatrix::fromMatrix(const MatrixType& matrix) const size_t blockSize = matrix[0][0].N(); const size_t numberOfRows = matrix.N(); const size_t numberOfNonzeroBlocks = matrix.nonzeroes(); -#ifndef CUSPARSE_ASSUME_UNSAFE_SPARSITY - const size_t numberOfNonzeroElements = blockSize * blockSize * numberOfNonzeroBlocks; - std::vector nonZeroElementsData; - // TODO: [perf] Can we avoid building nonZeroElementsData? - nonZeroElementsData.reserve(numberOfNonzeroElements); -#endif + columnIndices.reserve(numberOfNonzeroBlocks); rowIndices.reserve(numberOfRows + 1); for (auto& row : matrix) { for (auto columnIterator = row.begin(); columnIterator != row.end(); ++columnIterator) { columnIndices.push_back(columnIterator.index()); -#ifndef CUSPARSE_ASSUME_UNSAFE_SPARSITY - for (size_t c = 0; c < blockSize; ++c) { - for (size_t d = 0; d < blockSize; ++d) { - nonZeroElementsData.push_back((*columnIterator)[c][d]); - } - } -#endif } rowIndices.push_back(detail::to_int(columnIndices.size())); } -#ifndef CUSPARSE_ASSUME_UNSAFE_SPARSITY - auto nonZeroElements = nonZeroElementsData.data(); -#else - const T* nonZeroElements = static_cast(&((matrix[0][0][0][0]))); -#endif + // Sanity check // h_rows and h_cols could be changed to 'unsigned int', but cusparse expects 'int' OPM_ERROR_IF(rowIndices[matrix.N()] != detail::to_int(matrix.nonzeroes()), @@ -112,41 +123,38 @@ CuSparseMatrix::fromMatrix(const MatrixType& matrix) OPM_ERROR_IF(columnIndices.size() != numberOfNonzeroBlocks, "Column indices do not match for CuSparseMatrix."); - return CuSparseMatrix( - nonZeroElements, rowIndices.data(), columnIndices.data(), numberOfNonzeroBlocks, blockSize, numberOfRows); + if (copyNonZeroElementsDirectly) { + const T* nonZeroElements = static_cast(&((matrix[0][0][0][0]))); + return CuSparseMatrix( + nonZeroElements, rowIndices.data(), columnIndices.data(), numberOfNonzeroBlocks, blockSize, numberOfRows); + } else { + auto nonZeroElementData = extractNonzeroValues(matrix); + return CuSparseMatrix(nonZeroElementData.data(), + rowIndices.data(), + columnIndices.data(), + numberOfNonzeroBlocks, + blockSize, + numberOfRows); + } } template template void -CuSparseMatrix::updateNonzeroValues(const MatrixType& matrix) +CuSparseMatrix::updateNonzeroValues(const MatrixType& matrix, bool copyNonZeroElementsDirectly) { OPM_ERROR_IF(nonzeroes() != matrix.nonzeroes(), "Matrix does not have the same number of non-zero elements."); OPM_ERROR_IF(matrix[0][0].N() != blockSize(), "Matrix does not have the same blocksize."); OPM_ERROR_IF(matrix.N() != N(), "Matrix does not have the same number of rows."); -#ifndef CUSPARSE_ASSUME_UNSAFE_SPARSITY - const size_t numberOfRows = N(); - const size_t numberOfNonzeroBlocks = nonzeroes(); - const size_t numberOfNonzeroElements = blockSize() * blockSize() * numberOfNonzeroBlocks; + if (!copyNonZeroElementsDirectly) { + auto nonZeroElementsData = extractNonzeroValues(matrix); + m_nonZeroElements.copyFromHost(nonZeroElementsData.data(), nonzeroes() * blockSize() * blockSize()); - std::vector nonZeroElementsData; - // TODO: [perf] Can we avoid building nonZeroElementsData? - nonZeroElementsData.reserve(numberOfNonzeroElements); - for (auto& row : matrix) { - for (auto columnIterator = row.begin(); columnIterator != row.end(); ++columnIterator) { - for (size_t c = 0; c < blockSize(); ++c) { - for (size_t d = 0; d < blockSize(); ++d) { - nonZeroElementsData.push_back((*columnIterator)[c][d]); - } - } - } + } else { + const T* newNonZeroElements = static_cast(&((matrix[0][0][0][0]))); + m_nonZeroElements.copyFromHost(newNonZeroElements, nonzeroes() * blockSize() * blockSize()); } - auto newNonZeroElements = nonZeroElementsData.data(); -#else - const T* newNonZeroElements = static_cast(&((matrix[0][0][0][0]))); -#endif - m_nonZeroElements.copyFromHost(newNonZeroElements, nonzeroes() * blockSize() * blockSize()); } template @@ -286,13 +294,13 @@ CuSparseMatrix::usmv(T alpha, const CuVector& x, CuVector& y) const #define INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(realtype, blockdim) \ template CuSparseMatrix CuSparseMatrix::fromMatrix( \ - const Dune::BCRSMatrix>&); \ + const Dune::BCRSMatrix>&, bool); \ template CuSparseMatrix CuSparseMatrix::fromMatrix( \ - const Dune::BCRSMatrix>&); \ + const Dune::BCRSMatrix>&, bool); \ template void CuSparseMatrix::updateNonzeroValues( \ - const Dune::BCRSMatrix>&); \ + const Dune::BCRSMatrix>&, bool); \ template void CuSparseMatrix::updateNonzeroValues( \ - const Dune::BCRSMatrix>&) + const Dune::BCRSMatrix>&, bool) template class CuSparseMatrix; template class CuSparseMatrix; diff --git a/opm/simulators/linalg/cuistl/CuSparseMatrix.hpp b/opm/simulators/linalg/cuistl/CuSparseMatrix.hpp index e45f51f57..0a87d494c 100644 --- a/opm/simulators/linalg/cuistl/CuSparseMatrix.hpp +++ b/opm/simulators/linalg/cuistl/CuSparseMatrix.hpp @@ -74,10 +74,14 @@ public: /** * @brief fromMatrix creates a new matrix with the same block size and values as the given matrix * @param matrix the matrix to copy from + * @param copyNonZeroElementsDirectly if true will do a memcpy from matrix[0][0][0][0], otherwise will build up the + * non-zero elements by looping over the matrix. Note that setting this to true will yield a performance increase, + * but might not always yield correct results depending on how the matrix matrix has been initialized. If unsure, + * leave it as false. * @tparam MatrixType is assumed to be a Dune::BCRSMatrix compatible matrix. */ template - static CuSparseMatrix fromMatrix(const MatrixType& matrix); + static CuSparseMatrix fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false); /** * @brief setUpperTriangular sets the CuSparse flag that this is an upper diagonal (with unit diagonal) matrix. @@ -257,12 +261,15 @@ public: /** * @brief updateNonzeroValues updates the non-zero values by using the non-zero values of the supplied matrix * @param matrix the matrix to extract the non-zero values from - * + * @param copyNonZeroElementsDirectly if true will do a memcpy from matrix[0][0][0][0], otherwise will build up the + * non-zero elements by looping over the matrix. Note that setting this to true will yield a performance increase, + * but might not always yield correct results depending on how the matrix matrix has been initialized. If unsure, + * leave it as false. * @note This assumes the given matrix has the same sparsity pattern. * @tparam MatrixType is assumed to be a Dune::BCRSMatrix compatible matrix. */ template - void updateNonzeroValues(const MatrixType& matrix); + void updateNonzeroValues(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false); private: CuVector m_nonZeroElements;