Now handling sparsity pattern copying in a more elegant way.

This commit is contained in:
Kjetil Olsen Lye 2023-03-29 16:12:30 +02:00
parent e14da5e053
commit 67f94ce8a3
2 changed files with 65 additions and 50 deletions

View File

@ -27,7 +27,6 @@
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp> #include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_wrapper.hpp> #include <opm/simulators/linalg/cuistl/detail/cusparse_wrapper.hpp>
#include <opm/simulators/linalg/matrixblock.hh> #include <opm/simulators/linalg/matrixblock.hh>
#define CUSPARSE_ASSUME_UNSAFE_SPARSITY 1
#define CHECK_SIZE(x) \ #define CHECK_SIZE(x) \
if (x.dim() != blockSize() * N()) { \ if (x.dim() != blockSize() * N()) { \
@ -40,6 +39,34 @@
namespace Opm::cuistl namespace Opm::cuistl
{ {
namespace
{
template <class T, class M>
std::vector<T> extractNonzeroValues(const M& matrix)
{
const size_t blockSize = matrix[0][0].N();
const size_t numberOfNonzeroBlocks = matrix.nonzeroes();
const size_t numberOfNonzeroElements = blockSize * blockSize * numberOfNonzeroBlocks;
std::vector<T> nonZeroElementsData;
// TODO: [perf] Can we avoid building nonZeroElementsData?
nonZeroElementsData.reserve(numberOfNonzeroElements);
for (auto& row : matrix) {
for (auto columnIterator = row.begin(); columnIterator != row.end(); ++columnIterator) {
for (size_t c = 0; c < blockSize; ++c) {
for (size_t d = 0; d < blockSize; ++d) {
nonZeroElementsData.push_back((*columnIterator)[c][d]);
}
}
}
}
return nonZeroElementsData;
}
} // namespace
template <class T> template <class T>
CuSparseMatrix<T>::CuSparseMatrix(const T* nonZeroElements, CuSparseMatrix<T>::CuSparseMatrix(const T* nonZeroElements,
const int* rowIndices, const int* rowIndices,
@ -67,7 +94,7 @@ CuSparseMatrix<T>::~CuSparseMatrix()
template <typename T> template <typename T>
template <typename MatrixType> template <typename MatrixType>
CuSparseMatrix<T> CuSparseMatrix<T>
CuSparseMatrix<T>::fromMatrix(const MatrixType& matrix) CuSparseMatrix<T>::fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly)
{ {
// TODO: Do we need this intermediate storage? Or this shuffling of data? // TODO: Do we need this intermediate storage? Or this shuffling of data?
std::vector<int> columnIndices; std::vector<int> columnIndices;
@ -78,32 +105,16 @@ CuSparseMatrix<T>::fromMatrix(const MatrixType& matrix)
const size_t blockSize = matrix[0][0].N(); const size_t blockSize = matrix[0][0].N();
const size_t numberOfRows = matrix.N(); const size_t numberOfRows = matrix.N();
const size_t numberOfNonzeroBlocks = matrix.nonzeroes(); const size_t numberOfNonzeroBlocks = matrix.nonzeroes();
#ifndef CUSPARSE_ASSUME_UNSAFE_SPARSITY
const size_t numberOfNonzeroElements = blockSize * blockSize * numberOfNonzeroBlocks;
std::vector<T> nonZeroElementsData;
// TODO: [perf] Can we avoid building nonZeroElementsData?
nonZeroElementsData.reserve(numberOfNonzeroElements);
#endif
columnIndices.reserve(numberOfNonzeroBlocks); columnIndices.reserve(numberOfNonzeroBlocks);
rowIndices.reserve(numberOfRows + 1); rowIndices.reserve(numberOfRows + 1);
for (auto& row : matrix) { for (auto& row : matrix) {
for (auto columnIterator = row.begin(); columnIterator != row.end(); ++columnIterator) { for (auto columnIterator = row.begin(); columnIterator != row.end(); ++columnIterator) {
columnIndices.push_back(columnIterator.index()); columnIndices.push_back(columnIterator.index());
#ifndef CUSPARSE_ASSUME_UNSAFE_SPARSITY
for (size_t c = 0; c < blockSize; ++c) {
for (size_t d = 0; d < blockSize; ++d) {
nonZeroElementsData.push_back((*columnIterator)[c][d]);
}
}
#endif
} }
rowIndices.push_back(detail::to_int(columnIndices.size())); rowIndices.push_back(detail::to_int(columnIndices.size()));
} }
#ifndef CUSPARSE_ASSUME_UNSAFE_SPARSITY
auto nonZeroElements = nonZeroElementsData.data();
#else
const T* nonZeroElements = static_cast<const T*>(&((matrix[0][0][0][0])));
#endif
// Sanity check // Sanity check
// h_rows and h_cols could be changed to 'unsigned int', but cusparse expects 'int' // h_rows and h_cols could be changed to 'unsigned int', but cusparse expects 'int'
OPM_ERROR_IF(rowIndices[matrix.N()] != detail::to_int(matrix.nonzeroes()), OPM_ERROR_IF(rowIndices[matrix.N()] != detail::to_int(matrix.nonzeroes()),
@ -112,41 +123,38 @@ CuSparseMatrix<T>::fromMatrix(const MatrixType& matrix)
OPM_ERROR_IF(columnIndices.size() != numberOfNonzeroBlocks, "Column indices do not match for CuSparseMatrix."); OPM_ERROR_IF(columnIndices.size() != numberOfNonzeroBlocks, "Column indices do not match for CuSparseMatrix.");
return CuSparseMatrix<T>( if (copyNonZeroElementsDirectly) {
nonZeroElements, rowIndices.data(), columnIndices.data(), numberOfNonzeroBlocks, blockSize, numberOfRows); const T* nonZeroElements = static_cast<const T*>(&((matrix[0][0][0][0])));
return CuSparseMatrix<T>(
nonZeroElements, rowIndices.data(), columnIndices.data(), numberOfNonzeroBlocks, blockSize, numberOfRows);
} else {
auto nonZeroElementData = extractNonzeroValues<T>(matrix);
return CuSparseMatrix<T>(nonZeroElementData.data(),
rowIndices.data(),
columnIndices.data(),
numberOfNonzeroBlocks,
blockSize,
numberOfRows);
}
} }
template <class T> template <class T>
template <class MatrixType> template <class MatrixType>
void void
CuSparseMatrix<T>::updateNonzeroValues(const MatrixType& matrix) CuSparseMatrix<T>::updateNonzeroValues(const MatrixType& matrix, bool copyNonZeroElementsDirectly)
{ {
OPM_ERROR_IF(nonzeroes() != matrix.nonzeroes(), "Matrix does not have the same number of non-zero elements."); OPM_ERROR_IF(nonzeroes() != matrix.nonzeroes(), "Matrix does not have the same number of non-zero elements.");
OPM_ERROR_IF(matrix[0][0].N() != blockSize(), "Matrix does not have the same blocksize."); OPM_ERROR_IF(matrix[0][0].N() != blockSize(), "Matrix does not have the same blocksize.");
OPM_ERROR_IF(matrix.N() != N(), "Matrix does not have the same number of rows."); OPM_ERROR_IF(matrix.N() != N(), "Matrix does not have the same number of rows.");
#ifndef CUSPARSE_ASSUME_UNSAFE_SPARSITY if (!copyNonZeroElementsDirectly) {
const size_t numberOfRows = N(); auto nonZeroElementsData = extractNonzeroValues<T>(matrix);
const size_t numberOfNonzeroBlocks = nonzeroes(); m_nonZeroElements.copyFromHost(nonZeroElementsData.data(), nonzeroes() * blockSize() * blockSize());
const size_t numberOfNonzeroElements = blockSize() * blockSize() * numberOfNonzeroBlocks;
std::vector<T> nonZeroElementsData; } else {
// TODO: [perf] Can we avoid building nonZeroElementsData? const T* newNonZeroElements = static_cast<const T*>(&((matrix[0][0][0][0])));
nonZeroElementsData.reserve(numberOfNonzeroElements); m_nonZeroElements.copyFromHost(newNonZeroElements, nonzeroes() * blockSize() * blockSize());
for (auto& row : matrix) {
for (auto columnIterator = row.begin(); columnIterator != row.end(); ++columnIterator) {
for (size_t c = 0; c < blockSize(); ++c) {
for (size_t d = 0; d < blockSize(); ++d) {
nonZeroElementsData.push_back((*columnIterator)[c][d]);
}
}
}
} }
auto newNonZeroElements = nonZeroElementsData.data();
#else
const T* newNonZeroElements = static_cast<const T*>(&((matrix[0][0][0][0])));
#endif
m_nonZeroElements.copyFromHost(newNonZeroElements, nonzeroes() * blockSize() * blockSize());
} }
template <typename T> template <typename T>
@ -286,13 +294,13 @@ CuSparseMatrix<T>::usmv(T alpha, const CuVector<T>& x, CuVector<T>& y) const
#define INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(realtype, blockdim) \ #define INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(realtype, blockdim) \
template CuSparseMatrix<realtype> CuSparseMatrix<realtype>::fromMatrix( \ template CuSparseMatrix<realtype> CuSparseMatrix<realtype>::fromMatrix( \
const Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>&); \ const Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>&, bool); \
template CuSparseMatrix<realtype> CuSparseMatrix<realtype>::fromMatrix( \ template CuSparseMatrix<realtype> CuSparseMatrix<realtype>::fromMatrix( \
const Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>&); \ const Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>&, bool); \
template void CuSparseMatrix<realtype>::updateNonzeroValues( \ template void CuSparseMatrix<realtype>::updateNonzeroValues( \
const Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>&); \ const Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>&, bool); \
template void CuSparseMatrix<realtype>::updateNonzeroValues( \ template void CuSparseMatrix<realtype>::updateNonzeroValues( \
const Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>&) const Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>&, bool)
template class CuSparseMatrix<float>; template class CuSparseMatrix<float>;
template class CuSparseMatrix<double>; template class CuSparseMatrix<double>;

View File

@ -74,10 +74,14 @@ public:
/** /**
* @brief fromMatrix creates a new matrix with the same block size and values as the given matrix * @brief fromMatrix creates a new matrix with the same block size and values as the given matrix
* @param matrix the matrix to copy from * @param matrix the matrix to copy from
* @param copyNonZeroElementsDirectly if true will do a memcpy from matrix[0][0][0][0], otherwise will build up the
* non-zero elements by looping over the matrix. Note that setting this to true will yield a performance increase,
* but might not always yield correct results depending on how the matrix matrix has been initialized. If unsure,
* leave it as false.
* @tparam MatrixType is assumed to be a Dune::BCRSMatrix compatible matrix. * @tparam MatrixType is assumed to be a Dune::BCRSMatrix compatible matrix.
*/ */
template <class MatrixType> template <class MatrixType>
static CuSparseMatrix<T> fromMatrix(const MatrixType& matrix); static CuSparseMatrix<T> fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false);
/** /**
* @brief setUpperTriangular sets the CuSparse flag that this is an upper diagonal (with unit diagonal) matrix. * @brief setUpperTriangular sets the CuSparse flag that this is an upper diagonal (with unit diagonal) matrix.
@ -257,12 +261,15 @@ public:
/** /**
* @brief updateNonzeroValues updates the non-zero values by using the non-zero values of the supplied matrix * @brief updateNonzeroValues updates the non-zero values by using the non-zero values of the supplied matrix
* @param matrix the matrix to extract the non-zero values from * @param matrix the matrix to extract the non-zero values from
* * @param copyNonZeroElementsDirectly if true will do a memcpy from matrix[0][0][0][0], otherwise will build up the
* non-zero elements by looping over the matrix. Note that setting this to true will yield a performance increase,
* but might not always yield correct results depending on how the matrix matrix has been initialized. If unsure,
* leave it as false.
* @note This assumes the given matrix has the same sparsity pattern. * @note This assumes the given matrix has the same sparsity pattern.
* @tparam MatrixType is assumed to be a Dune::BCRSMatrix compatible matrix. * @tparam MatrixType is assumed to be a Dune::BCRSMatrix compatible matrix.
*/ */
template <class MatrixType> template <class MatrixType>
void updateNonzeroValues(const MatrixType& matrix); void updateNonzeroValues(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false);
private: private:
CuVector<T> m_nonZeroElements; CuVector<T> m_nonZeroElements;