/*
Copyright 2022-2023 SINTEF AS
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see .
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
namespace Opm::cuistl
{
namespace
{
template
std::vector extractNonzeroValues(const M& matrix)
{
const size_t blockSize = matrix[0][0].N();
const size_t numberOfNonzeroBlocks = matrix.nonzeroes();
const size_t numberOfNonzeroElements = blockSize * blockSize * numberOfNonzeroBlocks;
std::vector nonZeroElementsData;
// TODO: [perf] Can we avoid building nonZeroElementsData?
nonZeroElementsData.reserve(numberOfNonzeroElements);
for (auto& row : matrix) {
for (auto columnIterator = row.begin(); columnIterator != row.end(); ++columnIterator) {
for (size_t c = 0; c < blockSize; ++c) {
for (size_t d = 0; d < blockSize; ++d) {
nonZeroElementsData.push_back((*columnIterator)[c][d]);
}
}
}
}
return nonZeroElementsData;
}
} // namespace
template
CuSparseMatrix::CuSparseMatrix(const T* nonZeroElements,
const int* rowIndices,
const int* columnIndices,
size_t numberOfNonzeroBlocks,
size_t blockSize,
size_t numberOfRows)
: m_nonZeroElements(nonZeroElements, numberOfNonzeroBlocks * blockSize * blockSize)
, m_columnIndices(columnIndices, numberOfNonzeroBlocks)
, m_rowIndices(rowIndices, numberOfRows + 1)
, m_numberOfNonzeroBlocks(detail::to_int(numberOfNonzeroBlocks))
, m_numberOfRows(detail::to_int(numberOfRows))
, m_blockSize(detail::to_int(blockSize))
, m_matrixDescription(detail::createMatrixDescription())
, m_cusparseHandle(detail::CuSparseHandle::getInstance())
{
}
template
CuSparseMatrix::~CuSparseMatrix()
{
// empty
}
template
template
CuSparseMatrix
CuSparseMatrix::fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly)
{
// TODO: Do we need this intermediate storage? Or this shuffling of data?
std::vector columnIndices;
std::vector rowIndices;
rowIndices.push_back(0);
const size_t blockSize = matrix[0][0].N();
const size_t numberOfRows = matrix.N();
const size_t numberOfNonzeroBlocks = matrix.nonzeroes();
columnIndices.reserve(numberOfNonzeroBlocks);
rowIndices.reserve(numberOfRows + 1);
for (auto& row : matrix) {
for (auto columnIterator = row.begin(); columnIterator != row.end(); ++columnIterator) {
columnIndices.push_back(columnIterator.index());
}
rowIndices.push_back(detail::to_int(columnIndices.size()));
}
// Sanity check
// h_rows and h_cols could be changed to 'unsigned int', but cusparse expects 'int'
OPM_ERROR_IF(rowIndices[matrix.N()] != detail::to_int(matrix.nonzeroes()),
"Error size of rows do not sum to number of nonzeroes in CuSparseMatrix.");
OPM_ERROR_IF(rowIndices.size() != numberOfRows + 1, "Row indices do not match for CuSparseMatrix.");
OPM_ERROR_IF(columnIndices.size() != numberOfNonzeroBlocks, "Column indices do not match for CuSparseMatrix.");
if (copyNonZeroElementsDirectly) {
const T* nonZeroElements = static_cast(&((matrix[0][0][0][0])));
return CuSparseMatrix(
nonZeroElements, rowIndices.data(), columnIndices.data(), numberOfNonzeroBlocks, blockSize, numberOfRows);
} else {
auto nonZeroElementData = extractNonzeroValues(matrix);
return CuSparseMatrix(nonZeroElementData.data(),
rowIndices.data(),
columnIndices.data(),
numberOfNonzeroBlocks,
blockSize,
numberOfRows);
}
}
template
template
void
CuSparseMatrix::updateNonzeroValues(const MatrixType& matrix, bool copyNonZeroElementsDirectly)
{
OPM_ERROR_IF(nonzeroes() != matrix.nonzeroes(), "Matrix does not have the same number of non-zero elements.");
OPM_ERROR_IF(matrix[0][0].N() != blockSize(), "Matrix does not have the same blocksize.");
OPM_ERROR_IF(matrix.N() != N(), "Matrix does not have the same number of rows.");
if (!copyNonZeroElementsDirectly) {
auto nonZeroElementsData = extractNonzeroValues(matrix);
m_nonZeroElements.copyFromHost(nonZeroElementsData.data(), nonzeroes() * blockSize() * blockSize());
} else {
const T* newNonZeroElements = static_cast(&((matrix[0][0][0][0])));
m_nonZeroElements.copyFromHost(newNonZeroElements, nonzeroes() * blockSize() * blockSize());
}
}
template
void
CuSparseMatrix::setUpperTriangular()
{
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatFillMode(m_matrixDescription->get(), CUSPARSE_FILL_MODE_UPPER));
}
template
void
CuSparseMatrix::setLowerTriangular()
{
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatFillMode(m_matrixDescription->get(), CUSPARSE_FILL_MODE_LOWER));
}
template
void
CuSparseMatrix::setUnitDiagonal()
{
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(m_matrixDescription->get(), CUSPARSE_DIAG_TYPE_UNIT));
}
template
void
CuSparseMatrix::setNonUnitDiagonal()
{
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(m_matrixDescription->get(), CUSPARSE_DIAG_TYPE_NON_UNIT));
}
template
void
CuSparseMatrix::mv(const CuVector& x, CuVector& y) const
{
assertSameSize(x);
assertSameSize(y);
if (blockSize() < 2u) {
OPM_THROW(
std::invalid_argument,
"CuSparseMatrix::usmv and CuSparseMatrix::mv are only implemented for block sizes greater than 1.");
}
const auto nonzeroValues = getNonZeroValues().data();
auto rowIndices = getRowIndices().data();
auto columnIndices = getColumnIndices().data();
T alpha = 1.0;
T beta = 0.0;
OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrmv(m_cusparseHandle.get(),
detail::CUSPARSE_MATRIX_ORDER,
CUSPARSE_OPERATION_NON_TRANSPOSE,
m_numberOfRows,
m_numberOfRows,
m_numberOfNonzeroBlocks,
&alpha,
m_matrixDescription->get(),
nonzeroValues,
rowIndices,
columnIndices,
blockSize(),
x.data(),
&beta,
y.data()));
}
template
void
CuSparseMatrix::umv(const CuVector& x, CuVector& y) const
{
assertSameSize(x);
assertSameSize(y);
if (blockSize() < 2u) {
OPM_THROW(
std::invalid_argument,
"CuSparseMatrix::usmv and CuSparseMatrix::mv are only implemented for block sizes greater than 1.");
}
const auto nonzeroValues = getNonZeroValues().data();
auto rowIndices = getRowIndices().data();
auto columnIndices = getColumnIndices().data();
T alpha = 1.0;
T beta = 1.0;
OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrmv(m_cusparseHandle.get(),
detail::CUSPARSE_MATRIX_ORDER,
CUSPARSE_OPERATION_NON_TRANSPOSE,
m_numberOfRows,
m_numberOfRows,
m_numberOfNonzeroBlocks,
&alpha,
m_matrixDescription->get(),
nonzeroValues,
rowIndices,
columnIndices,
m_blockSize,
x.data(),
&beta,
y.data()));
}
template
void
CuSparseMatrix::usmv(T alpha, const CuVector& x, CuVector& y) const
{
assertSameSize(x);
assertSameSize(y);
if (blockSize() < 2) {
OPM_THROW(
std::invalid_argument,
"CuSparseMatrix::usmv and CuSparseMatrix::mv are only implemented for block sizes greater than 1.");
}
const auto numberOfRows = N();
const auto numberOfNonzeroBlocks = nonzeroes();
const auto nonzeroValues = getNonZeroValues().data();
auto rowIndices = getRowIndices().data();
auto columnIndices = getColumnIndices().data();
T beta = 1.0;
OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrmv(m_cusparseHandle.get(),
detail::CUSPARSE_MATRIX_ORDER,
CUSPARSE_OPERATION_NON_TRANSPOSE,
numberOfRows,
numberOfRows,
numberOfNonzeroBlocks,
&alpha,
m_matrixDescription->get(),
nonzeroValues,
rowIndices,
columnIndices,
blockSize(),
x.data(),
&beta,
y.data()));
}
template
template
void
CuSparseMatrix::assertSameSize(const M& x) const
{
if (x.dim() != blockSize() * N()) {
OPM_THROW(std::invalid_argument,
fmt::format("Size mismatch. Input vector has {} elements, while we have {} elements.",
x.dim(),
blockSize() * N()));
}
}
#define INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(realtype, blockdim) \
template CuSparseMatrix CuSparseMatrix::fromMatrix( \
const Dune::BCRSMatrix>&, bool); \
template CuSparseMatrix CuSparseMatrix::fromMatrix( \
const Dune::BCRSMatrix>&, bool); \
template void CuSparseMatrix::updateNonzeroValues( \
const Dune::BCRSMatrix>&, bool); \
template void CuSparseMatrix::updateNonzeroValues( \
const Dune::BCRSMatrix>&, bool)
template class CuSparseMatrix;
template class CuSparseMatrix;
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(double, 1);
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(double, 2);
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(double, 3);
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(double, 4);
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(double, 5);
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(double, 6);
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 1);
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 2);
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 3);
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 4);
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 5);
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 6);
} // namespace Opm::cuistl