Added a CuSparse matrix wrapper.

This commit is contained in:
Kjetil Olsen Lye 2023-03-28 15:55:13 +02:00
parent 858d8b189b
commit 31e7ef04ba
9 changed files with 1529 additions and 0 deletions

View File

@ -149,6 +149,7 @@ if(CUDA_FOUND)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/detail/CuSparseHandle.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/CuVector.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/detail/vector_operations.cu)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/CuSparseMatrix.cpp)
# CUISTL HEADERS
@ -159,6 +160,12 @@ if(CUDA_FOUND)
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp)
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp)
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/CuVector.hpp)
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/CuSparseMatrix.hpp)
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp)
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp)
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/CuSparseResource_impl.hpp)
endif()
if(OPENCL_FOUND)
@ -244,6 +251,7 @@ if(CUDA_FOUND)
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cublas_handle.cpp)
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cusparse_handle.cpp)
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuvector.cpp)
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cusparsematrix.cpp)
endif()
if(OPENCL_FOUND)
list(APPEND TEST_SOURCE_FILES tests/test_openclSolver.cpp)

View File

@ -0,0 +1,315 @@
/*
Copyright 2022-2023 SINTEF AS
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#include <cuda.h>
#include <dune/common/fmatrix.hh>
#include <dune/common/fvector.hh>
#include <dune/istl/bcrsmatrix.hh>
#include <dune/istl/bvector.hh>
#include <fmt/core.h>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_constants.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_wrapper.hpp>
#include <opm/simulators/linalg/matrixblock.hh>
#define CUSPARSE_ASSUME_UNSAFE_SPARSITY 1
#define CHECK_SIZE(x) \
if (x.dim() != blockSize() * N()) { \
OPM_THROW( \
std::invalid_argument, \
fmt::format( \
"Size mismatch. {} has {} elements, while we have {} elements.", #x, x.dim(), blockSize() * N())); \
}
namespace Opm::cuistl
{
template <class T>
CuSparseMatrix<T>::CuSparseMatrix(const T* nonZeroElements,
const int* rowIndices,
const int* columnIndices,
int numberOfNonzeroBlocks,
int blockSize,
int numberOfRows)
: m_nonZeroElements(nonZeroElements, numberOfNonzeroBlocks * blockSize * blockSize)
, m_columnIndices(columnIndices, numberOfNonzeroBlocks)
, m_rowIndices(rowIndices, numberOfRows + 1)
, m_numberOfNonzeroBlocks(numberOfNonzeroBlocks)
, m_numberOfRows(numberOfRows)
, m_blockSize(blockSize)
, m_matrixDescription(detail::createMatrixDescription())
, m_cusparseHandle(detail::CuSparseHandle::getInstance())
{
}
template <class T>
CuSparseMatrix<T>::~CuSparseMatrix()
{
// empty
}
template <typename T>
template <typename MatrixType>
CuSparseMatrix<T>
CuSparseMatrix<T>::fromMatrix(const MatrixType& matrix)
{
// TODO: Do we need this intermediate storage? Or this shuffling of data?
std::vector<int> columnIndices;
std::vector<int> rowIndices;
rowIndices.push_back(0);
const int blockSize = matrix[0][0].N();
const int numberOfRows = matrix.N();
const int numberOfNonzeroBlocks = matrix.nonzeroes();
const int numberOfNonzeroElements = blockSize * blockSize * numberOfNonzeroBlocks;
#ifndef CUSPARSE_ASSUME_UNSAFE_SPARSITY
std::vector<T> nonZeroElementsData;
// TODO: [perf] Can we avoid building nonZeroElementsData?
nonZeroElementsData.reserve(numberOfNonzeroElements);
#endif
columnIndices.reserve(numberOfNonzeroBlocks);
rowIndices.reserve(numberOfRows + 1);
for (auto& row : matrix) {
for (auto columnIterator = row.begin(); columnIterator != row.end(); ++columnIterator) {
columnIndices.push_back(columnIterator.index());
#ifndef CUSPARSE_ASSUME_UNSAFE_SPARSITY
for (int c = 0; c < blockSize; ++c) {
for (int d = 0; d < blockSize; ++d) {
nonZeroElementsData.push_back((*columnIterator)[c][d]);
}
}
#endif
}
rowIndices.push_back(columnIndices.size());
}
#ifndef CUSPARSE_ASSUME_UNSAFE_SPARSITY
auto nonZeroElements = nonZeroElementsData.data();
#else
const T* nonZeroElements = static_cast<const T*>(&((matrix[0][0][0][0])));
#endif
// Sanity check
// h_rows and h_cols could be changed to 'unsigned int', but cusparse expects 'int'
OPM_ERROR_IF(size_t(rowIndices[matrix.N()]) != matrix.nonzeroes(),
"Error size of rows do not sum to number of nonzeroes in CuSparseMatrix.");
OPM_ERROR_IF(rowIndices.size() != numberOfRows + 1, "Row indices do not match for CuSparseMatrix.");
OPM_ERROR_IF(columnIndices.size() != numberOfNonzeroBlocks, "Column indices do not match for CuSparseMatrix.");
return CuSparseMatrix<T>(
nonZeroElements, rowIndices.data(), columnIndices.data(), numberOfNonzeroBlocks, blockSize, numberOfRows);
}
template <class T>
template <class MatrixType>
void
CuSparseMatrix<T>::updateNonzeroValues(const MatrixType& matrix)
{
OPM_ERROR_IF(nonzeroes() != matrix.nonzeroes(), "Matrix does not have the same number of non-zero elements.");
OPM_ERROR_IF(matrix[0][0].N() != blockSize(), "Matrix does not have the same blocksize.");
OPM_ERROR_IF(matrix.N() != N(), "Matrix does not have the same number of rows.");
const int numberOfRows = N();
const int numberOfNonzeroBlocks = nonzeroes();
const int numberOfNonzeroElements = blockSize() * blockSize() * numberOfNonzeroBlocks;
#ifndef CUSPARSE_ASSUME_UNSAFE_SPARSITY
std::vector<T> nonZeroElementsData;
// TODO: [perf] Can we avoid building nonZeroElementsData?
nonZeroElementsData.reserve(numberOfNonzeroElements);
for (auto& row : matrix) {
for (auto columnIterator = row.begin(); columnIterator != row.end(); ++columnIterator) {
for (int c = 0; c < blockSize(); ++c) {
for (int d = 0; d < blockSize(); ++d) {
nonZeroElementsData.push_back((*columnIterator)[c][d]);
}
}
}
}
auto newNonZeroElements = nonZeroElementsData.data();
#else
const T* newNonZeroElements = static_cast<const T*>(&((matrix[0][0][0][0])));
#endif
m_nonZeroElements.copyFromHost(newNonZeroElements, nonzeroes() * blockSize() * blockSize());
}
template <typename T>
void
CuSparseMatrix<T>::setUpperTriangular()
{
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatFillMode(m_matrixDescription->get(), CUSPARSE_FILL_MODE_UPPER));
}
template <typename T>
void
CuSparseMatrix<T>::setLowerTriangular()
{
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatFillMode(m_matrixDescription->get(), CUSPARSE_FILL_MODE_LOWER));
}
template <typename T>
void
CuSparseMatrix<T>::setUnitDiagonal()
{
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(m_matrixDescription->get(), CUSPARSE_DIAG_TYPE_UNIT));
}
template <typename T>
void
CuSparseMatrix<T>::setNonUnitDiagonal()
{
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(m_matrixDescription->get(), CUSPARSE_DIAG_TYPE_NON_UNIT));
}
template <typename T>
void
CuSparseMatrix<T>::mv(const CuVector<T>& x, CuVector<T>& y) const
{
CHECK_SIZE(x)
CHECK_SIZE(y)
if (blockSize() < 2) {
OPM_THROW(
std::invalid_argument,
"CuSparseMatrix<T>::usmv and CuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
}
const auto numberOfRows = N();
const auto numberOfNonzeroBlocks = nonzeroes();
const auto nonzeroValues = getNonZeroValues().data();
auto rowIndices = getRowIndices().data();
auto columnIndices = getColumnIndices().data();
T alpha = 1.0;
T beta = 0.0;
OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrmv(m_cusparseHandle.get(),
detail::CUSPARSE_MATRIX_ORDER,
CUSPARSE_OPERATION_NON_TRANSPOSE,
numberOfRows,
numberOfRows,
numberOfNonzeroBlocks,
&alpha,
m_matrixDescription->get(),
nonzeroValues,
rowIndices,
columnIndices,
blockSize(),
x.data(),
&beta,
y.data()));
}
template <typename T>
void
CuSparseMatrix<T>::umv(const CuVector<T>& x, CuVector<T>& y) const
{
CHECK_SIZE(x)
CHECK_SIZE(y)
if (blockSize() < 2) {
OPM_THROW(
std::invalid_argument,
"CuSparseMatrix<T>::usmv and CuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
}
const auto numberOfRows = N();
const auto numberOfNonzeroBlocks = nonzeroes();
const auto nonzeroValues = getNonZeroValues().data();
auto rowIndices = getRowIndices().data();
auto columnIndices = getColumnIndices().data();
T alpha = 1.0;
T beta = 1.0;
OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrmv(m_cusparseHandle.get(),
detail::CUSPARSE_MATRIX_ORDER,
CUSPARSE_OPERATION_NON_TRANSPOSE,
numberOfRows,
numberOfRows,
numberOfNonzeroBlocks,
&alpha,
m_matrixDescription->get(),
nonzeroValues,
rowIndices,
columnIndices,
blockSize(),
x.data(),
&beta,
y.data()));
}
template <typename T>
void
CuSparseMatrix<T>::usmv(T alpha, const CuVector<T>& x, CuVector<T>& y) const
{
CHECK_SIZE(x)
CHECK_SIZE(y)
if (blockSize() < 2) {
OPM_THROW(
std::invalid_argument,
"CuSparseMatrix<T>::usmv and CuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
}
const auto numberOfRows = N();
const auto numberOfNonzeroBlocks = nonzeroes();
const auto nonzeroValues = getNonZeroValues().data();
auto rowIndices = getRowIndices().data();
auto columnIndices = getColumnIndices().data();
T beta = 1.0;
OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrmv(m_cusparseHandle.get(),
detail::CUSPARSE_MATRIX_ORDER,
CUSPARSE_OPERATION_NON_TRANSPOSE,
numberOfRows,
numberOfRows,
numberOfNonzeroBlocks,
&alpha,
m_matrixDescription->get(),
nonzeroValues,
rowIndices,
columnIndices,
blockSize(),
x.data(),
&beta,
y.data()));
}
#define INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(realtype, blockdim) \
template CuSparseMatrix<realtype> CuSparseMatrix<realtype>::fromMatrix( \
const Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>&); \
template CuSparseMatrix<realtype> CuSparseMatrix<realtype>::fromMatrix( \
const Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>&); \
template void CuSparseMatrix<realtype>::updateNonzeroValues( \
const Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>&); \
template void CuSparseMatrix<realtype>::updateNonzeroValues( \
const Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>&)
template class CuSparseMatrix<float>;
template class CuSparseMatrix<double>;
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(double, 1);
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(double, 2);
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(double, 3);
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(double, 4);
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(double, 5);
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(double, 6);
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 1);
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 2);
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 3);
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 4);
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 5);
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 6);
} // namespace Opm::cuistl

View File

@ -0,0 +1,255 @@
/*
Copyright 2022-2023 SINTEF AS
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef OPM_CUSPARSEMATRIX_HPP
#define OPM_CUSPARSEMATRIX_HPP
#include <cusparse.h>
#include <iostream>
#include <memory>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
#include <vector>
namespace Opm::cuistl
{
/**
* @brief The CuSparseMatrix class simple wrapper class for a CuSparse matrix.
*
* @note we currently only support simple raw primitives for T (double and float). Block size is handled through the
* block size parameter
*
* @tparam T the type to store. Can be either float, double or int.
*
* @note we only support square matrices.
*
* @note We only support Block Compressed Sparse Row Format (BSR) for now.
*/
template <typename T>
class CuSparseMatrix
{
public:
//! Create the sparse matrix specified by the raw data.
//!
//! \note Prefer to use the constructor taking a const reference to a matrix instead.
//!
//! \param[in] nonZeroElements the non-zero values of the matrix
//! \param[in] rowIndices the row indices of the non-zero elements
//! \param[in] columnIndices the column indices of the non-zero elements
//! \param[in] numberOfNonzeroElements number of nonzero elements
//! \param[in] blockSize size of each block matrix (typically 3)
//! \param[in] numberOfRows the number of rows
CuSparseMatrix(const T* nonZeroElements,
const int* rowIndices,
const int* columnIndices,
int numberOfNonzeroBlocks,
int blockSize,
int numberOfRows);
// TODO: Handle copy ctor and operator=
virtual ~CuSparseMatrix();
/**
* @brief fromMatrix creates a new matrix with the same block size and values as the given matrix
* @param matrix the matrix to copy from
* @tparam MatrixType is assumed to be a Dune::BCRSMatrix compatible matrix.
*/
template <class MatrixType>
static CuSparseMatrix<T> fromMatrix(const MatrixType& matrix);
/**
* @brief setUpperTriangular sets the CuSparse flag that this is an upper diagonal (with unit diagonal) matrix.
*/
void setUpperTriangular();
/**
* @brief setLowerTriangular sets the CuSparse flag that this is an lower diagonal (with non-unit diagonal) matrix.
*/
void setLowerTriangular();
/**
* @brief setUnitDiagonal sets the CuSparse flag that this has unit diagional.
*/
void setUnitDiagonal();
/**
* @brief setNonUnitDiagonal sets the CuSparse flag that this has non-unit diagional.
*/
void setNonUnitDiagonal();
/**
* @brief N returns the number of rows (which is equal to the number of columns)
*/
size_t N() const
{
return m_numberOfRows;
}
/**
* @brief nonzeroes behaves as the Dune::BCRSMatrix::nonzeros() function and returns the number of non zero blocks
* @return number of non zero blocks.
*/
size_t nonzeroes() const
{
return m_numberOfNonzeroBlocks;
}
/**
* @brief getNonZeroValues returns the GPU vector containing the non-zero values (ordered by block)
*
* @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
*/
CuVector<T>& getNonZeroValues()
{
return m_nonZeroElements;
}
/**
* @brief getNonZeroValues returns the GPU vector containing the non-zero values (ordered by block)
*
* @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
*/
const CuVector<T>& getNonZeroValues() const
{
return m_nonZeroElements;
}
/**
* @brief getRowIndices returns the row indices used to represent the BSR structure.
*
* @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
*/
CuVector<int>& getRowIndices()
{
return m_rowIndices;
}
/**
* @brief getRowIndices returns the row indices used to represent the BSR structure.
*
* @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
*/
const CuVector<int>& getRowIndices() const
{
return m_rowIndices;
}
/**
* @brief getColumnIndices returns the column indices used to represent the BSR structure.
*
* @return Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
*/
CuVector<int>& getColumnIndices()
{
return m_columnIndices;
}
/**
* @brief getColumnIndices returns the column indices used to represent the BSR structure.
*
* @return Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
*/
const CuVector<int>& getColumnIndices() const
{
return m_columnIndices;
}
/**
* @brief dim returns the dimension of the vector space on which this matrix acts
*
* This is equivalent to matrix.N() * matrix.blockSize()
* @return matrix.N() * matrix.blockSize()
*/
int dim() const
{
return m_blockSize * m_numberOfRows;
}
/**
* @brief blockSize size of the blocks
*/
int blockSize() const
{
return m_blockSize;
}
/**
* @brief getDescription the cusparse matrix description.
*
* This description is needed for most calls to the CuSparse library
*/
detail::CuSparseMatrixDescription& getDescription()
{
return *m_matrixDescription;
}
/**
* @brief mv performs matrix vector multiply y = Ax
* @param[in] x the vector to multiply the matrix with
* @param[out] y the output vector
*
* @note Due to limitations of CuSparse, this is only supported for block sizes greater than 1.
*/
virtual void mv(const CuVector<T>& x, CuVector<T>& y) const;
/**
* @brief umv computes y=Ax+y
* @param[in] x the vector to multiply with A
* @param[inout] y the vector to add and store the output in
*
* @note Due to limitations of CuSparse, this is only supported for block sizes greater than 1.
*/
virtual void umv(const CuVector<T>& x, CuVector<T>& y) const;
/**
* @brief umv computes y=alpha * Ax + y
* @param[in] x the vector to multiply with A
* @param[inout] y the vector to add and store the output in
*
* @note Due to limitations of CuSparse, this is only supported for block sizes greater than 1.
*/
virtual void usmv(T alpha, const CuVector<T>& x, CuVector<T>& y) const;
/**
* @brief updateNonzeroValues updates the non-zero values by using the non-zero values of the supplied matrix
* @param matrix the matrix to extract the non-zero values from
*
* @note This assumes the given matrix has the same sparsity pattern.
* @tparam MatrixType is assumed to be a Dune::BCRSMatrix compatible matrix.
*/
template <class MatrixType>
void updateNonzeroValues(const MatrixType& matrix);
private:
CuVector<T> m_nonZeroElements;
CuVector<int> m_columnIndices;
CuVector<int> m_rowIndices;
const int m_numberOfNonzeroBlocks;
const int m_numberOfRows;
const int m_blockSize;
detail::CuSparseMatrixDescriptionPtr m_matrixDescription;
detail::CuSparseHandle& m_cusparseHandle;
};
} // namespace Opm::cuistl
#endif

View File

@ -0,0 +1,86 @@
/*
Copyright 2022-2023 SINTEF AS
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef CU_MATRIX_DESCRIPTION_HPP
#define CU_MATRIX_DESCRIPTION_HPP
#include <opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
namespace Opm::cuistl::detail
{
/**
* CuSparseMatrixDescription holder. This is internal information needed for most calls to the CuSparse API.
*/
using CuSparseMatrixDescription = CuSparseResource<cusparseMatDescr_t>;
/**
* Pointer to CuSparseMatrixDescription holder. This is internal information needed for most calls to the CuSparse API.
*/
using CuSparseMatrixDescriptionPtr = std::shared_ptr<CuSparseResource<cusparseMatDescr_t>>;
/**
* @brief createMatrixDescription creates a default matrix description
* @return a matrix description to a general sparse matrix with zero based indexing.
*/
inline CuSparseMatrixDescriptionPtr
createMatrixDescription()
{
auto description = std::make_shared<CuSparseMatrixDescription>();
// Note: We always want to use zero base indexing.
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatType(description->get(), CUSPARSE_MATRIX_TYPE_GENERAL));
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(description->get(), CUSPARSE_INDEX_BASE_ZERO));
return description;
}
/**
* @brief createLowerDiagonalDescription creates a lower diagonal matrix description
* @return a lower diagonal matrix description overlapped with options from ::Opm::cuistl::detail::createMatrixDescription()
*
* @note This will assume it has a unit diagonal
*/
inline CuSparseMatrixDescriptionPtr
createLowerDiagonalDescription()
{
auto description = createMatrixDescription();
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatFillMode(description->get(), CUSPARSE_FILL_MODE_LOWER));
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(description->get(), CUSPARSE_DIAG_TYPE_UNIT));
return description;
}
/**
* @brief createUpperDiagonalDescription creates an upper diagonal matrix description
* @return an upper diagonal matrix description overlapped with options from ::Opm::cuistl::detail::createMatrixDescription()
*
* @note This will assume it has a non-unit diagonal.
*/
inline CuSparseMatrixDescriptionPtr
createUpperDiagonalDescription()
{
auto description = createMatrixDescription();
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatFillMode(description->get(), CUSPARSE_FILL_MODE_UPPER));
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(description->get(), CUSPARSE_DIAG_TYPE_NON_UNIT));
return description;
}
} // namespace Opm::cuistl::detail
#endif // CU_MATRIX_DESCRIPTION_HPP

View File

@ -0,0 +1,99 @@
/*
Copyright 2022-2023 SINTEF AS
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef CUSPARSERESOURCE_HPP
#define CUSPARSERESOURCE_HPP
#include <cusparse.h>
#include <functional>
#include <memory>
#include <type_traits>
namespace Opm::cuistl::detail
{
/**
* @brief The CuSparseResource class wraps a CuSparse resource in a proper RAII pattern
*
* Current we support the following types for T:
* - bsrilu02Info_t
* - bsrsv2Info_t
* - cusparseMatDescr_t
*
* More types are in principle supported by supplying a manual Creator and Destructor.
*
* In addition to acting as an easier-to-use smart_ptr specialized for these types, it
* also adds error checking in the construction and deletion of these resources,
* which a plain std::smart_ptr would not support out of the box. It also solves the
* caveat of the pointer types of cuSparse resources not being exposed properly.
*
* Example usage:
* @code{.cpp}
* #include <opm/simulator/linalg/cuistl/detail/CuSparseResource.hpp>
*
* void someFunction() {
* auto resource = CuSparseResource<cuSparseMatDescr_t>();
* }
* @endcode
*/
template <class T>
class CuSparseResource
{
public:
using CreatorType = typename std::function<cusparseStatus_t(T*)>;
using DeleterType = typename std::function<cusparseStatus_t(T)>;
/**
* @brief CuSparseResource creates a new instance by calling creator, and will delete using deleter
* @param creator a functor used to create an instance
* @param deleter a functor used to delete the instance
*
* @note Using this constructor it is possible to add support for new types not already accounted for.
*/
CuSparseResource(CreatorType creator, DeleterType deleter);
/**
* @brief CuSparseResource will automatically select the proper creator and deleter based on the type (and throw an exception if not available)
*/
CuSparseResource();
/**
* Calls the deleter functor
*/
~CuSparseResource();
// This should not be copyable.
CuSparseResource(const CuSparseResource&) = delete;
CuSparseResource& operator=(const CuSparseResource&) = delete;
/**
* @brief get returns the raw pointer to the resource.
*/
T get()
{
return m_resource;
}
private:
T m_resource;
DeleterType m_deleter;
};
} // namespace Opm::cuistl::impl
#include <opm/simulators/linalg/cuistl/detail/CuSparseResource_impl.hpp>
#endif // CUSPARSERESOURCE_HPP

View File

@ -0,0 +1,103 @@
/*
Copyright 2022-2023 SINTEF AS
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#include <exception>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
namespace Opm::cuistl::detail
{
namespace
{
template <class T>
struct CuSparseDeleteAndCreate {
};
template <>
struct CuSparseDeleteAndCreate<bsrilu02Info_t> {
using DeleterType = typename CuSparseResource<bsrilu02Info_t>::DeleterType;
using CreatorType = typename CuSparseResource<bsrilu02Info_t>::CreatorType;
static DeleterType getDeleter()
{
return cusparseDestroyBsrilu02Info;
}
static CreatorType getCreator()
{
return cusparseCreateBsrilu02Info;
}
};
template <>
struct CuSparseDeleteAndCreate<bsrsv2Info_t> {
using DeleterType = typename CuSparseResource<bsrsv2Info_t>::DeleterType;
using CreatorType = typename CuSparseResource<bsrsv2Info_t>::CreatorType;
static DeleterType getDeleter()
{
return cusparseDestroyBsrsv2Info;
}
static CreatorType getCreator()
{
return cusparseCreateBsrsv2Info;
}
};
template <>
struct CuSparseDeleteAndCreate<cusparseMatDescr_t> {
using DeleterType = typename CuSparseResource<cusparseMatDescr_t>::DeleterType;
using CreatorType = typename CuSparseResource<cusparseMatDescr_t>::CreatorType;
static DeleterType getDeleter()
{
return cusparseDestroyMatDescr;
}
static CreatorType getCreator()
{
return cusparseCreateMatDescr;
}
};
} // namespace
template <class T>
CuSparseResource<T>::CuSparseResource(CreatorType creator, DeleterType deleter)
: m_deleter(deleter)
{
// TODO: This should probably not use this macro since it will disguise the
// proper name of the function being called.
OPM_CUSPARSE_SAFE_CALL(creator(&m_resource));
}
template <class T>
CuSparseResource<T>::CuSparseResource()
: CuSparseResource<T>(CuSparseDeleteAndCreate<T>::getCreator(), CuSparseDeleteAndCreate<T>::getDeleter())
{
}
template <class T>
CuSparseResource<T>::~CuSparseResource()
{
// TODO: This should probably not use this macro since it will disguise the
// proper name of the function being called.
OPM_CUSPARSE_SAFE_CALL(m_deleter(m_resource));
}
} // namespace Opm::cuistl::impl

View File

@ -0,0 +1,27 @@
/*
Copyright 2022-2023 SINTEF AS
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef CUSPARSE_CONSTANTS_HPP
#define CUSPARSE_CONSTANTS_HPP
#include <cusparse.h>
namespace Opm::cuistl::detail
{
const constexpr auto CUSPARSE_MATRIX_ORDER = CUSPARSE_DIRECTION_ROW;
}
#endif

View File

@ -0,0 +1,454 @@
/*
Copyright 2022-2023 SINTEF AS
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* Contains wrappers to make the CuSPARSE library behave as a modern C++ library with function overlading.
*
* In simple terms, this allows one to call say cusparseBsrilu02_analysis on both double and single precisision,
* instead of calling cusparseDbsrilu02_analysis and cusparseDbsrilu02_analysis respectively.
*/
#include <cusparse.h>
#include <type_traits>
#ifndef OPM_CUSPARSE_WRAPPER_HPP
#define OPM_CUSPARSE_WRAPPER_HPP
namespace Opm::cuistl::detail
{
inline cusparseStatus_t
cusparseBsrilu02_analysis(cusparseHandle_t handle,
cusparseDirection_t dirA,
int mb,
int nnzb,
const cusparseMatDescr_t descrA,
double* bsrSortedVal,
const int* bsrSortedRowPtr,
const int* bsrSortedColInd,
int blockDim,
bsrilu02Info_t info,
cusparseSolvePolicy_t policy,
void* pBuffer)
{
return cusparseDbsrilu02_analysis(handle,
dirA,
mb,
nnzb,
descrA,
bsrSortedVal,
bsrSortedRowPtr,
bsrSortedColInd,
blockDim,
info,
policy,
pBuffer);
}
inline cusparseStatus_t
cusparseBsrsv2_analysis(cusparseHandle_t handle,
cusparseDirection_t dirA,
cusparseOperation_t transA,
int mb,
int nnzb,
const cusparseMatDescr_t descrA,
const double* bsrSortedValA,
const int* bsrSortedRowPtrA,
const int* bsrSortedColIndA,
int blockDim,
bsrsv2Info_t info,
cusparseSolvePolicy_t policy,
void* pBuffer)
{
return cusparseDbsrsv2_analysis(handle,
dirA,
transA,
mb,
nnzb,
descrA,
bsrSortedValA,
bsrSortedRowPtrA,
bsrSortedColIndA,
blockDim,
info,
policy,
pBuffer);
}
inline cusparseStatus_t
cusparseBsrsv2_analysis(cusparseHandle_t handle,
cusparseDirection_t dirA,
cusparseOperation_t transA,
int mb,
int nnzb,
const cusparseMatDescr_t descrA,
const float* bsrSortedValA,
const int* bsrSortedRowPtrA,
const int* bsrSortedColIndA,
int blockDim,
bsrsv2Info_t info,
cusparseSolvePolicy_t policy,
void* pBuffer)
{
return cusparseSbsrsv2_analysis(handle,
dirA,
transA,
mb,
nnzb,
descrA,
bsrSortedValA,
bsrSortedRowPtrA,
bsrSortedColIndA,
blockDim,
info,
policy,
pBuffer);
}
inline cusparseStatus_t
cusparseBsrilu02_analysis(cusparseHandle_t handle,
cusparseDirection_t dirA,
int mb,
int nnzb,
const cusparseMatDescr_t descrA,
float* bsrSortedVal,
const int* bsrSortedRowPtr,
const int* bsrSortedColInd,
int blockDim,
bsrilu02Info_t info,
cusparseSolvePolicy_t policy,
void* pBuffer)
{
return cusparseSbsrilu02_analysis(handle,
dirA,
mb,
nnzb,
descrA,
bsrSortedVal,
bsrSortedRowPtr,
bsrSortedColInd,
blockDim,
info,
policy,
pBuffer);
}
inline cusparseStatus_t
cusparseBsrsv2_solve(cusparseHandle_t handle,
cusparseDirection_t dirA,
cusparseOperation_t transA,
int mb,
int nnzb,
const double* alpha,
const cusparseMatDescr_t descrA,
const double* bsrSortedValA,
const int* bsrSortedRowPtrA,
const int* bsrSortedColIndA,
int blockDim,
bsrsv2Info_t info,
const double* f,
double* x,
cusparseSolvePolicy_t policy,
void* pBuffer)
{
return cusparseDbsrsv2_solve(handle,
dirA,
transA,
mb,
nnzb,
alpha,
descrA,
bsrSortedValA,
bsrSortedRowPtrA,
bsrSortedColIndA,
blockDim,
info,
f,
x,
policy,
pBuffer);
}
inline cusparseStatus_t
cusparseBsrsv2_solve(cusparseHandle_t handle,
cusparseDirection_t dirA,
cusparseOperation_t transA,
int mb,
int nnzb,
const float* alpha,
const cusparseMatDescr_t descrA,
const float* bsrSortedValA,
const int* bsrSortedRowPtrA,
const int* bsrSortedColIndA,
int blockDim,
bsrsv2Info_t info,
const float* f,
float* x,
cusparseSolvePolicy_t policy,
void* pBuffer)
{
return cusparseSbsrsv2_solve(handle,
dirA,
transA,
mb,
nnzb,
alpha,
descrA,
bsrSortedValA,
bsrSortedRowPtrA,
bsrSortedColIndA,
blockDim,
info,
f,
x,
policy,
pBuffer);
}
inline cusparseStatus_t
cusparseBsrilu02_bufferSize(cusparseHandle_t handle,
cusparseDirection_t dirA,
int mb,
int nnzb,
const cusparseMatDescr_t descrA,
double* bsrSortedVal,
const int* bsrSortedRowPtr,
const int* bsrSortedColInd,
int blockDim,
bsrilu02Info_t info,
int* pBufferSizeInBytes)
{
return cusparseDbsrilu02_bufferSize(handle,
dirA,
mb,
nnzb,
descrA,
bsrSortedVal,
bsrSortedRowPtr,
bsrSortedColInd,
blockDim,
info,
pBufferSizeInBytes);
}
inline cusparseStatus_t
cusparseBsrilu02_bufferSize(cusparseHandle_t handle,
cusparseDirection_t dirA,
int mb,
int nnzb,
const cusparseMatDescr_t descrA,
float* bsrSortedVal,
const int* bsrSortedRowPtr,
const int* bsrSortedColInd,
int blockDim,
bsrilu02Info_t info,
int* pBufferSizeInBytes)
{
return cusparseSbsrilu02_bufferSize(handle,
dirA,
mb,
nnzb,
descrA,
bsrSortedVal,
bsrSortedRowPtr,
bsrSortedColInd,
blockDim,
info,
pBufferSizeInBytes);
}
inline cusparseStatus_t
cusparseBsrsv2_bufferSize(cusparseHandle_t handle,
cusparseDirection_t dirA,
cusparseOperation_t transA,
int mb,
int nnzb,
const cusparseMatDescr_t descrA,
double* bsrSortedValA,
const int* bsrSortedRowPtrA,
const int* bsrSortedColIndA,
int blockDim,
bsrsv2Info_t info,
int* pBufferSizeInBytes)
{
return cusparseDbsrsv2_bufferSize(handle,
dirA,
transA,
mb,
nnzb,
descrA,
bsrSortedValA,
bsrSortedRowPtrA,
bsrSortedColIndA,
blockDim,
info,
pBufferSizeInBytes);
}
inline cusparseStatus_t
cusparseBsrsv2_bufferSize(cusparseHandle_t handle,
cusparseDirection_t dirA,
cusparseOperation_t transA,
int mb,
int nnzb,
const cusparseMatDescr_t descrA,
float* bsrSortedValA,
const int* bsrSortedRowPtrA,
const int* bsrSortedColIndA,
int blockDim,
bsrsv2Info_t info,
int* pBufferSizeInBytes)
{
return cusparseSbsrsv2_bufferSize(handle,
dirA,
transA,
mb,
nnzb,
descrA,
bsrSortedValA,
bsrSortedRowPtrA,
bsrSortedColIndA,
blockDim,
info,
pBufferSizeInBytes);
}
inline cusparseStatus_t
cusparseBsrilu02(cusparseHandle_t handle,
cusparseDirection_t dirA,
int mb,
int nnzb,
const cusparseMatDescr_t descrA,
double* bsrSortedVal,
const int* bsrSortedRowPtr,
const int* bsrSortedColInd,
int blockDim,
bsrilu02Info_t info,
cusparseSolvePolicy_t policy,
void* pBuffer)
{
return cusparseDbsrilu02(handle,
dirA,
mb,
nnzb,
descrA,
bsrSortedVal,
bsrSortedRowPtr,
bsrSortedColInd,
blockDim,
info,
policy,
pBuffer);
}
inline cusparseStatus_t
cusparseBsrilu02(cusparseHandle_t handle,
cusparseDirection_t dirA,
int mb,
int nnzb,
const cusparseMatDescr_t descrA,
float* bsrSortedVal,
const int* bsrSortedRowPtr,
const int* bsrSortedColInd,
int blockDim,
bsrilu02Info_t info,
cusparseSolvePolicy_t policy,
void* pBuffer)
{
return cusparseSbsrilu02(handle,
dirA,
mb,
nnzb,
descrA,
bsrSortedVal,
bsrSortedRowPtr,
bsrSortedColInd,
blockDim,
info,
policy,
pBuffer);
}
inline cusparseStatus_t
cusparseBsrmv(cusparseHandle_t handle,
cusparseDirection_t dirA,
cusparseOperation_t transA,
int mb,
int nb,
int nnzb,
const double* alpha,
const cusparseMatDescr_t descrA,
const double* bsrSortedValA,
const int* bsrSortedRowPtrA,
const int* bsrSortedColIndA,
int blockDim,
const double* x,
const double* beta,
double* y)
{
return cusparseDbsrmv(handle,
dirA,
transA,
mb,
nb,
nnzb,
alpha,
descrA,
bsrSortedValA,
bsrSortedRowPtrA,
bsrSortedColIndA,
blockDim,
x,
beta,
y);
}
inline cusparseStatus_t
cusparseBsrmv(cusparseHandle_t handle,
cusparseDirection_t dirA,
cusparseOperation_t transA,
int mb,
int nb,
int nnzb,
const float* alpha,
const cusparseMatDescr_t descrA,
const float* bsrSortedValA,
const int* bsrSortedRowPtrA,
const int* bsrSortedColIndA,
int blockDim,
const float* x,
const float* beta,
float* y)
{
return cusparseSbsrmv(handle,
dirA,
transA,
mb,
nb,
nnzb,
alpha,
descrA,
bsrSortedValA,
bsrSortedRowPtrA,
bsrSortedColIndA,
blockDim,
x,
beta,
y);
}
} // namespace Opm::cuistl::detail
#endif

View File

@ -0,0 +1,182 @@
/*
Copyright 2022-2023 SINTEF AS
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#include <config.h>
#define BOOST_TEST_MODULE TestCuSparseMatrix
#include <boost/test/unit_test.hpp>
#include <dune/istl/bcrsmatrix.hh>
#include <memory>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <random>
BOOST_AUTO_TEST_CASE(TestConstruction1D)
{
// Here we will test a simple 1D finite difference scheme for
// the Laplace equation:
//
// -\Delta u = f on [0,1]
//
// Using a central difference approximation of \Delta u, this can
// be approximated by
//
// -(u_{i+1}-2u_i+u_{i-1})/Dx^2 = f(x_i)
//
// giving rise to the matrix
//
// -2 1 0 0 ... 0 0
// 1 -2 1 0 0 ... 0
// ....
// 0 0 0 ...1 -2 1
// 0 0 0 ... 1 -2
const int N = 5;
const int nonZeroes = N * 3 - 2;
using M = Dune::FieldMatrix<double, 1, 1>;
using SpMatrix = Dune::BCRSMatrix<M>;
using Vector = Dune::BlockVector<Dune::FieldVector<double, 1>>;
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
for (auto row = B.createbegin(); row != B.createend(); ++row) {
// Add nonzeros for left neighbour, diagonal and right neighbour
if (row.index() > 0) {
row.insert(row.index() - 1);
}
row.insert(row.index());
if (row.index() < B.N() - 1) {
row.insert(row.index() + 1);
}
}
// This might not be the most elegant way of filling in a Dune sparse matrix, but it works.
for (int i = 0; i < N; ++i) {
B[i][i] = -2;
if (i < N - 1) {
B[i][i + 1] = 1;
}
if (i > 0) {
B[i][i - 1] = 1;
}
}
auto cuSparseMatrix = Opm::cuistl::CuSparseMatrix<double>::fromMatrix(B);
const auto& nonZeroValuesCuda = cuSparseMatrix.getNonZeroValues();
std::vector<double> buffer(cuSparseMatrix.nonzeroes(), 0.0);
nonZeroValuesCuda.copyToHost(buffer.data(), buffer.size());
const double* nonZeroElements = static_cast<const double*>(&((B[0][0][0][0])));
BOOST_CHECK_EQUAL_COLLECTIONS(buffer.begin(), buffer.end(), nonZeroElements, nonZeroElements + B.nonzeroes());
BOOST_CHECK_EQUAL(N * 3 - 2, cuSparseMatrix.nonzeroes());
std::vector<int> rowIndicesFromCUDA(N + 1);
cuSparseMatrix.getRowIndices().copyToHost(rowIndicesFromCUDA.data(), rowIndicesFromCUDA.size());
BOOST_CHECK_EQUAL(rowIndicesFromCUDA[0], 0);
BOOST_CHECK_EQUAL(rowIndicesFromCUDA[1], 2);
for (int i = 2; i < N; ++i) {
BOOST_CHECK_EQUAL(rowIndicesFromCUDA[i], rowIndicesFromCUDA[i - 1] + 3);
}
std::vector<int> columnIndicesFromCUDA(B.nonzeroes(), 0);
cuSparseMatrix.getColumnIndices().copyToHost(columnIndicesFromCUDA.data(), columnIndicesFromCUDA.size());
BOOST_CHECK_EQUAL(columnIndicesFromCUDA[0], 0);
BOOST_CHECK_EQUAL(columnIndicesFromCUDA[1], 1);
// TODO: Check rest
}
BOOST_AUTO_TEST_CASE(RandomSparsityMatrix)
{
std::srand(0);
double nonzeroPercent = 0.2;
std::mt19937 generator;
std::uniform_real_distribution<double> distribution(0.0, 1.0);
constexpr size_t dim = 3;
const int N = 300;
using M = Dune::FieldMatrix<double, dim, dim>;
using SpMatrix = Dune::BCRSMatrix<M>;
using Vector = Dune::BlockVector<Dune::FieldVector<double, dim>>;
std::vector<std::vector<size_t>> nonzerocols(N);
int nonZeroes = 0;
for (auto row = 0; row < N; ++row) {
for (size_t col = 0; col < N; ++col) {
if (distribution(generator) < nonzeroPercent) {
nonzerocols.at(row).push_back(col);
nonZeroes++;
}
}
}
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
for (auto row = B.createbegin(); row != B.createend(); ++row) {
for (size_t j = 0; j < nonzerocols[row.index()].size(); ++j) {
row.insert(nonzerocols[row.index()][j]);
}
}
// This might not be the most elegant way of filling in a Dune sparse matrix, but it works.
for (int i = 0; i < N; ++i) {
for (size_t j = 0; j < nonzerocols[i].size(); ++j) {
for (size_t c1 = 0; c1 < dim; ++c1) {
for (size_t c2 = 0; c2 < dim; ++c2) {
B[i][nonzerocols[i][j]][c1][c2] = distribution(generator);
}
}
}
}
auto cuSparseMatrix = Opm::cuistl::CuSparseMatrix<double>::fromMatrix(B);
// check each column
for (size_t component = 0; component < N; ++component) {
std::vector<double> inputDataX(N * dim, 0.0);
inputDataX[component] = 1.0;
std::vector<double> inputDataY(N * dim, .25);
auto inputVectorX = Opm::cuistl::CuVector<double>(inputDataX.data(), inputDataX.size());
auto inputVectorY = Opm::cuistl::CuVector<double>(inputDataY.data(), inputDataY.size());
Vector xHost(N), yHost(N);
yHost = inputDataY[0];
inputVectorX.copyToHost(xHost);
const double alpha = 1.42;
cuSparseMatrix.usmv(alpha, inputVectorX, inputVectorY);
inputVectorY.copyToHost(inputDataY);
B.usmv(alpha, xHost, yHost);
for (size_t i = 0; i < N; ++i) {
for (size_t c = 0; c < dim; ++c) {
BOOST_CHECK_CLOSE(inputDataY[i * dim + c], yHost[i][c], 1e-7);
}
}
inputVectorX.copyToHost(xHost);
cuSparseMatrix.mv(inputVectorX, inputVectorY);
inputVectorY.copyToHost(inputDataY);
B.mv(xHost, yHost);
for (size_t i = 0; i < N; ++i) {
for (size_t c = 0; c < dim; ++c) {
BOOST_CHECK_CLOSE(inputDataY[i * dim + c], yHost[i][c], 1e-7);
}
}
}
}