refactor cusparsematrix

This commit is contained in:
Tobias Meyer Andersen 2024-08-22 15:14:33 +02:00
parent 0c1ea3ee4d
commit 3aa1767548
20 changed files with 96 additions and 96 deletions

View File

@ -554,7 +554,7 @@ if(CUDA_FOUND)
cuSparse_matrix_operations
cuVector_operations
cuvector
cusparsematrix
GpuSparseMatrix
GpuSeqILU0
GpuOwnerOverlapCopy
solver_adapter

View File

@ -216,7 +216,7 @@ if (HAVE_CUDA)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuVector.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuView.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/vector_operations.cu)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuSparseMatrix.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuSparseMatrix.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuDILU.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg OpmCuILU0.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuJac.cpp)
@ -242,7 +242,7 @@ if (HAVE_CUDA)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuJac.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuVector.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuView.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuSparseMatrix.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuSparseMatrix.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuMatrixDescription.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuSparseResource.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuSparseResource_impl.hpp)
@ -400,7 +400,7 @@ if (HAVE_CUDA)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuSeqILU0.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cusparse_handle.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuSparse_matrix_operations.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cusparsematrix.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuSparseMatrix.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuvector.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuVector_operations.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_safe_conversion.cpp)

View File

@ -27,7 +27,7 @@
#include <opm/simulators/linalg/GraphColoring.hpp>
#include <opm/simulators/linalg/cuistl/detail/autotuner.hpp>
#include <opm/simulators/linalg/cuistl/GpuDILU.hpp>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/coloringAndReorderingUtils.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
@ -46,7 +46,7 @@ GpuDILU<M, X, Y, l>::GpuDILU(const M& A, bool splitMatrix, bool tuneKernels)
, m_levelSets(Opm::getMatrixRowColoring(m_cpuMatrix, Opm::ColoringType::LOWER))
, m_reorderedToNatural(detail::createReorderedToNatural(m_levelSets))
, m_naturalToReordered(detail::createNaturalToReordered(m_levelSets))
, m_gpuMatrix(CuSparseMatrix<field_type>::fromMatrix(m_cpuMatrix, true))
, m_gpuMatrix(GpuSparseMatrix<field_type>::fromMatrix(m_cpuMatrix, true))
, m_gpuNaturalToReorder(m_naturalToReordered)
, m_gpuReorderToNatural(m_reorderedToNatural)
, m_gpuDInv(m_gpuMatrix.N() * m_gpuMatrix.blockSize() * m_gpuMatrix.blockSize())
@ -73,11 +73,11 @@ GpuDILU<M, X, Y, l>::GpuDILU(const M& A, bool splitMatrix, bool tuneKernels)
if (m_splitMatrix) {
m_gpuMatrixReorderedDiag = std::make_unique<CuVector<field_type>>(blocksize_ * blocksize_ * m_cpuMatrix.N());
std::tie(m_gpuMatrixReorderedLower, m_gpuMatrixReorderedUpper)
= detail::extractLowerAndUpperMatrices<M, field_type, CuSparseMatrix<field_type>>(m_cpuMatrix,
= detail::extractLowerAndUpperMatrices<M, field_type, GpuSparseMatrix<field_type>>(m_cpuMatrix,
m_reorderedToNatural);
}
else {
m_gpuMatrixReordered = detail::createReorderedMatrix<M, field_type, CuSparseMatrix<field_type>>(
m_gpuMatrixReordered = detail::createReorderedMatrix<M, field_type, GpuSparseMatrix<field_type>>(
m_cpuMatrix, m_reorderedToNatural);
}
computeDiagAndMoveReorderedData(m_moveThreadBlockSize, m_DILUFactorizationThreadBlockSize);

View File

@ -22,7 +22,7 @@
#include <memory>
#include <opm/grid/utility/SparseTable.hpp>
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
#include <vector>
@ -52,7 +52,7 @@ public:
//! \brief The field type of the preconditioner.
using field_type = typename X::field_type;
//! \brief The GPU matrix type
using CuMat = CuSparseMatrix<field_type>;
using CuMat = GpuSparseMatrix<field_type>;
//! \brief Constructor.
//!

View File

@ -33,7 +33,7 @@ template <class M, class X, class Y, int l>
GpuJac<M, X, Y, l>::GpuJac(const M& A, field_type w)
: m_cpuMatrix(A)
, m_relaxationFactor(w)
, m_gpuMatrix(CuSparseMatrix<field_type>::fromMatrix(A))
, m_gpuMatrix(GpuSparseMatrix<field_type>::fromMatrix(A))
, m_diagInvFlattened(m_gpuMatrix.N() * m_gpuMatrix.blockSize() * m_gpuMatrix.blockSize())
{
// Some sanity check

View File

@ -21,7 +21,7 @@
#include <dune/istl/preconditioner.hh>
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp>
@ -104,7 +104,7 @@ private:
//! \brief The relaxation factor to use.
const field_type m_relaxationFactor;
//! \brief The A matrix stored on the gpu
CuSparseMatrix<field_type> m_gpuMatrix;
GpuSparseMatrix<field_type> m_gpuMatrix;
//! \brief the diagonal of cuMatrix inverted, and then flattened to fit in a vector
CuVector<field_type> m_diagInvFlattened;

View File

@ -44,7 +44,7 @@ template <class M, class X, class Y, int l>
GpuSeqILU0<M, X, Y, l>::GpuSeqILU0(const M& A, field_type w)
: m_underlyingMatrix(A)
, m_w(w)
, m_LU(CuSparseMatrix<field_type>::fromMatrix(detail::makeMatrixWithNonzeroDiagonal(A)))
, m_LU(GpuSparseMatrix<field_type>::fromMatrix(detail::makeMatrixWithNonzeroDiagonal(A)))
, m_temporaryStorage(m_LU.N() * m_LU.blockSize())
, m_descriptionL(detail::createLowerDiagonalDescription())
, m_descriptionU(detail::createUpperDiagonalDescription())

View File

@ -21,7 +21,7 @@
#include <dune/istl/preconditioner.hh>
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp>
@ -110,13 +110,13 @@ private:
//! This is the storage for the LU composition.
//! Initially this will have the values of A, but will be
//! modified in the constructor to be the proper LU decomposition.
CuSparseMatrix<field_type> m_LU;
GpuSparseMatrix<field_type> m_LU;
CuVector<field_type> m_temporaryStorage;
detail::CuSparseMatrixDescriptionPtr m_descriptionL;
detail::CuSparseMatrixDescriptionPtr m_descriptionU;
detail::GpuSparseMatrixDescriptionPtr m_descriptionL;
detail::GpuSparseMatrixDescriptionPtr m_descriptionU;
detail::CuSparseResource<bsrsv2Info_t> m_infoL;
detail::CuSparseResource<bsrsv2Info_t> m_infoU;
detail::CuSparseResource<bsrilu02Info_t> m_infoM;

View File

@ -22,7 +22,7 @@
#include <dune/istl/bcrsmatrix.hh>
#include <dune/istl/bvector.hh>
#include <fmt/core.h>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_constants.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_wrapper.hpp>
@ -61,7 +61,7 @@ namespace
template <class T>
CuSparseMatrix<T>::CuSparseMatrix(const T* nonZeroElements,
GpuSparseMatrix<T>::GpuSparseMatrix(const T* nonZeroElements,
const int* rowIndices,
const int* columnIndices,
size_t numberOfNonzeroBlocks,
@ -82,15 +82,15 @@ CuSparseMatrix<T>::CuSparseMatrix(const T* nonZeroElements,
}
template <class T>
CuSparseMatrix<T>::~CuSparseMatrix()
GpuSparseMatrix<T>::~GpuSparseMatrix()
{
// empty
}
template <typename T>
template <typename MatrixType>
CuSparseMatrix<T>
CuSparseMatrix<T>::fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly)
GpuSparseMatrix<T>
GpuSparseMatrix<T>::fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly)
{
// TODO: Do we need this intermediate storage? Or this shuffling of data?
std::vector<int> columnIndices;
@ -129,18 +129,18 @@ CuSparseMatrix<T>::fromMatrix(const MatrixType& matrix, bool copyNonZeroElements
// Sanity check
// h_rows and h_cols could be changed to 'unsigned int', but cusparse expects 'int'
OPM_ERROR_IF(rowIndices[matrix.N()] != detail::to_int(matrix.nonzeroes()),
"Error size of rows do not sum to number of nonzeroes in CuSparseMatrix.");
OPM_ERROR_IF(rowIndices.size() != numberOfRows + 1, "Row indices do not match for CuSparseMatrix.");
OPM_ERROR_IF(columnIndices.size() != numberOfNonzeroBlocks, "Column indices do not match for CuSparseMatrix.");
"Error size of rows do not sum to number of nonzeroes in GpuSparseMatrix.");
OPM_ERROR_IF(rowIndices.size() != numberOfRows + 1, "Row indices do not match for GpuSparseMatrix.");
OPM_ERROR_IF(columnIndices.size() != numberOfNonzeroBlocks, "Column indices do not match for GpuSparseMatrix.");
if (copyNonZeroElementsDirectly) {
const T* nonZeroElements = nonZeroElementsTmp;
return CuSparseMatrix<T>(
return GpuSparseMatrix<T>(
nonZeroElements, rowIndices.data(), columnIndices.data(), numberOfNonzeroBlocks, blockSize, numberOfRows);
} else {
auto nonZeroElementData = extractNonzeroValues<T>(matrix);
return CuSparseMatrix<T>(nonZeroElementData.data(),
return GpuSparseMatrix<T>(nonZeroElementData.data(),
rowIndices.data(),
columnIndices.data(),
numberOfNonzeroBlocks,
@ -152,7 +152,7 @@ CuSparseMatrix<T>::fromMatrix(const MatrixType& matrix, bool copyNonZeroElements
template <class T>
template <class MatrixType>
void
CuSparseMatrix<T>::updateNonzeroValues(const MatrixType& matrix, bool copyNonZeroElementsDirectly)
GpuSparseMatrix<T>::updateNonzeroValues(const MatrixType& matrix, bool copyNonZeroElementsDirectly)
{
OPM_ERROR_IF(nonzeroes() != matrix.nonzeroes(), "Matrix does not have the same number of non-zero elements.");
OPM_ERROR_IF(matrix[0][0].N() != blockSize(), "Matrix does not have the same blocksize.");
@ -170,42 +170,42 @@ CuSparseMatrix<T>::updateNonzeroValues(const MatrixType& matrix, bool copyNonZer
template <typename T>
void
CuSparseMatrix<T>::setUpperTriangular()
GpuSparseMatrix<T>::setUpperTriangular()
{
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatFillMode(m_matrixDescription->get(), CUSPARSE_FILL_MODE_UPPER));
}
template <typename T>
void
CuSparseMatrix<T>::setLowerTriangular()
GpuSparseMatrix<T>::setLowerTriangular()
{
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatFillMode(m_matrixDescription->get(), CUSPARSE_FILL_MODE_LOWER));
}
template <typename T>
void
CuSparseMatrix<T>::setUnitDiagonal()
GpuSparseMatrix<T>::setUnitDiagonal()
{
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(m_matrixDescription->get(), CUSPARSE_DIAG_TYPE_UNIT));
}
template <typename T>
void
CuSparseMatrix<T>::setNonUnitDiagonal()
GpuSparseMatrix<T>::setNonUnitDiagonal()
{
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(m_matrixDescription->get(), CUSPARSE_DIAG_TYPE_NON_UNIT));
}
template <typename T>
void
CuSparseMatrix<T>::mv(const CuVector<T>& x, CuVector<T>& y) const
GpuSparseMatrix<T>::mv(const CuVector<T>& x, CuVector<T>& y) const
{
assertSameSize(x);
assertSameSize(y);
if (blockSize() < 2u) {
OPM_THROW(
std::invalid_argument,
"CuSparseMatrix<T>::usmv and CuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
"GpuSparseMatrix<T>::usmv and GpuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
}
const auto nonzeroValues = getNonZeroValues().data();
@ -232,14 +232,14 @@ CuSparseMatrix<T>::mv(const CuVector<T>& x, CuVector<T>& y) const
template <typename T>
void
CuSparseMatrix<T>::umv(const CuVector<T>& x, CuVector<T>& y) const
GpuSparseMatrix<T>::umv(const CuVector<T>& x, CuVector<T>& y) const
{
assertSameSize(x);
assertSameSize(y);
if (blockSize() < 2u) {
OPM_THROW(
std::invalid_argument,
"CuSparseMatrix<T>::usmv and CuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
"GpuSparseMatrix<T>::usmv and GpuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
}
const auto nonzeroValues = getNonZeroValues().data();
@ -267,14 +267,14 @@ CuSparseMatrix<T>::umv(const CuVector<T>& x, CuVector<T>& y) const
template <typename T>
void
CuSparseMatrix<T>::usmv(T alpha, const CuVector<T>& x, CuVector<T>& y) const
GpuSparseMatrix<T>::usmv(T alpha, const CuVector<T>& x, CuVector<T>& y) const
{
assertSameSize(x);
assertSameSize(y);
if (blockSize() < 2) {
OPM_THROW(
std::invalid_argument,
"CuSparseMatrix<T>::usmv and CuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
"GpuSparseMatrix<T>::usmv and GpuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
}
const auto numberOfRows = N();
const auto numberOfNonzeroBlocks = nonzeroes();
@ -304,7 +304,7 @@ CuSparseMatrix<T>::usmv(T alpha, const CuVector<T>& x, CuVector<T>& y) const
template <class T>
template <class VectorType>
void
CuSparseMatrix<T>::assertSameSize(const VectorType& x) const
GpuSparseMatrix<T>::assertSameSize(const VectorType& x) const
{
if (x.dim() != blockSize() * N()) {
OPM_THROW(std::invalid_argument,
@ -317,17 +317,17 @@ CuSparseMatrix<T>::assertSameSize(const VectorType& x) const
#define INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(realtype, blockdim) \
template CuSparseMatrix<realtype> CuSparseMatrix<realtype>::fromMatrix( \
template GpuSparseMatrix<realtype> GpuSparseMatrix<realtype>::fromMatrix( \
const Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>&, bool); \
template CuSparseMatrix<realtype> CuSparseMatrix<realtype>::fromMatrix( \
template GpuSparseMatrix<realtype> GpuSparseMatrix<realtype>::fromMatrix( \
const Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>&, bool); \
template void CuSparseMatrix<realtype>::updateNonzeroValues( \
template void GpuSparseMatrix<realtype>::updateNonzeroValues( \
const Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>&, bool); \
template void CuSparseMatrix<realtype>::updateNonzeroValues( \
template void GpuSparseMatrix<realtype>::updateNonzeroValues( \
const Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>&, bool)
template class CuSparseMatrix<float>;
template class CuSparseMatrix<double>;
template class GpuSparseMatrix<float>;
template class GpuSparseMatrix<double>;
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(double, 1);
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(double, 2);

View File

@ -16,8 +16,8 @@
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef OPM_CUSPARSEMATRIX_HPP
#define OPM_CUSPARSEMATRIX_HPP
#ifndef OPM_GPUSPARSEMATRIX_HPP
#define OPM_GPUSPARSEMATRIX_HPP
#include <cusparse.h>
#include <iostream>
#include <memory>
@ -32,7 +32,7 @@ namespace Opm::gpuistl
{
/**
* @brief The CuSparseMatrix class simple wrapper class for a CuSparse matrix.
* @brief The GpuSparseMatrix class simple wrapper class for a CuSparse matrix.
*
* @note we currently only support simple raw primitives for T (double and float). Block size is handled through the
* block size parameter
@ -44,7 +44,7 @@ namespace Opm::gpuistl
* @note We only support Block Compressed Sparse Row Format (BSR) for now.
*/
template <typename T>
class CuSparseMatrix
class GpuSparseMatrix
{
public:
//! Create the sparse matrix specified by the raw data.
@ -60,7 +60,7 @@ public:
//!
//! \note We assume numberOfNonzeroBlocks, blockSize and numberOfRows all are representable as int due to
//! restrictions in the current version of cusparse. This might change in future versions.
CuSparseMatrix(const T* nonZeroElements,
GpuSparseMatrix(const T* nonZeroElements,
const int* rowIndices,
const int* columnIndices,
size_t numberOfNonzeroBlocks,
@ -70,14 +70,14 @@ public:
/**
* We don't want to be able to copy this for now (too much hassle in copying the cusparse resources)
*/
CuSparseMatrix(const CuSparseMatrix&) = delete;
GpuSparseMatrix(const GpuSparseMatrix&) = delete;
/**
* We don't want to be able to copy this for now (too much hassle in copying the cusparse resources)
*/
CuSparseMatrix& operator=(const CuSparseMatrix&) = delete;
GpuSparseMatrix& operator=(const GpuSparseMatrix&) = delete;
virtual ~CuSparseMatrix();
virtual ~GpuSparseMatrix();
/**
* @brief fromMatrix creates a new matrix with the same block size and values as the given matrix
@ -89,7 +89,7 @@ public:
* @tparam MatrixType is assumed to be a Dune::BCRSMatrix compatible matrix.
*/
template <class MatrixType>
static CuSparseMatrix<T> fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false);
static GpuSparseMatrix<T> fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false);
/**
* @brief setUpperTriangular sets the CuSparse flag that this is an upper diagonal (with unit diagonal) matrix.
@ -233,7 +233,7 @@ public:
*
* This description is needed for most calls to the CuSparse library
*/
detail::CuSparseMatrixDescription& getDescription()
detail::GpuSparseMatrixDescription& getDescription()
{
return *m_matrixDescription;
}
@ -292,7 +292,7 @@ private:
const int m_numberOfRows;
const int m_blockSize;
detail::CuSparseMatrixDescriptionPtr m_matrixDescription;
detail::GpuSparseMatrixDescriptionPtr m_matrixDescription;
detail::CuSparseHandle& m_cusparseHandle;
template <class VectorType>

View File

@ -26,7 +26,7 @@
#include <opm/common/ErrorMacros.hpp>
#include <opm/common/TimingMacros.hpp>
#include <opm/simulators/linalg/GraphColoring.hpp>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/OpmCuILU0.hpp>
#include <opm/simulators/linalg/cuistl/detail/autotuner.hpp>
@ -46,7 +46,7 @@ OpmCuILU0<M, X, Y, l>::OpmCuILU0(const M& A, bool splitMatrix, bool tuneKernels)
, m_levelSets(Opm::getMatrixRowColoring(m_cpuMatrix, Opm::ColoringType::LOWER))
, m_reorderedToNatural(detail::createReorderedToNatural(m_levelSets))
, m_naturalToReordered(detail::createNaturalToReordered(m_levelSets))
, m_gpuMatrix(CuSparseMatrix<field_type>::fromMatrix(m_cpuMatrix, true))
, m_gpuMatrix(GpuSparseMatrix<field_type>::fromMatrix(m_cpuMatrix, true))
, m_gpuMatrixReorderedLower(nullptr)
, m_gpuMatrixReorderedUpper(nullptr)
, m_gpuNaturalToReorder(m_naturalToReordered)
@ -74,10 +74,10 @@ OpmCuILU0<M, X, Y, l>::OpmCuILU0(const M& A, bool splitMatrix, bool tuneKernels)
if (m_splitMatrix) {
m_gpuMatrixReorderedDiag.emplace(CuVector<field_type>(blocksize_ * blocksize_ * m_cpuMatrix.N()));
std::tie(m_gpuMatrixReorderedLower, m_gpuMatrixReorderedUpper)
= detail::extractLowerAndUpperMatrices<M, field_type, CuSparseMatrix<field_type>>(m_cpuMatrix,
= detail::extractLowerAndUpperMatrices<M, field_type, GpuSparseMatrix<field_type>>(m_cpuMatrix,
m_reorderedToNatural);
} else {
m_gpuReorderedLU = detail::createReorderedMatrix<M, field_type, CuSparseMatrix<field_type>>(
m_gpuReorderedLU = detail::createReorderedMatrix<M, field_type, GpuSparseMatrix<field_type>>(
m_cpuMatrix, m_reorderedToNatural);
}
LUFactorizeAndMoveData(m_moveThreadBlockSize, m_ILU0FactorizationThreadBlockSize);

View File

@ -22,7 +22,7 @@
#include <memory>
#include <opm/grid/utility/SparseTable.hpp>
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <optional>
#include <type_traits>
@ -54,7 +54,7 @@ public:
//! \brief The field type of the preconditioner.
using field_type = typename X::field_type;
//! \brief The GPU matrix type
using CuMat = CuSparseMatrix<field_type>;
using CuMat = GpuSparseMatrix<field_type>;
//! \brief Constructor.
//!

View File

@ -22,7 +22,7 @@
#include <dune/istl/bcrsmatrix.hh>
#include <dune/istl/preconditioner.hh>
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp>

View File

@ -28,7 +28,7 @@
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/GpuBlockPreconditioner.hpp>
#include <opm/simulators/linalg/cuistl/GpuOwnerOverlapCopy.hpp>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/cuistl/detail/has_function.hpp>
@ -67,7 +67,7 @@ public:
int verbose)
: Dune::IterativeSolver<X, X>(op, sp, *prec, reduction, maxit, verbose)
, m_opOnCPUWithMatrix(op)
, m_matrix(CuSparseMatrix<real_type>::fromMatrix(op.getmat()))
, m_matrix(GpuSparseMatrix<real_type>::fromMatrix(op.getmat()))
, m_underlyingSolver(constructSolver(prec, reduction, maxit, verbose))
{
}
@ -116,7 +116,7 @@ public:
private:
Operator& m_opOnCPUWithMatrix;
CuSparseMatrix<real_type> m_matrix;
GpuSparseMatrix<real_type> m_matrix;
UnderlyingSolver<XGPU> m_underlyingSolver;
@ -193,7 +193,7 @@ private:
using CudaCommunication = GpuOwnerOverlapCopy<real_type, block_size, typename Operator::communication_type>;
using SchwarzOperator
= Dune::OverlappingSchwarzOperator<CuSparseMatrix<real_type>, XGPU, XGPU, CudaCommunication>;
= Dune::OverlappingSchwarzOperator<GpuSparseMatrix<real_type>, XGPU, XGPU, CudaCommunication>;
auto cudaCommunication = std::make_shared<CudaCommunication>(gpuComm);
auto mpiPreconditioner = std::make_shared<GpuBlockPreconditioner<XGPU, XGPU, CudaCommunication>>(
@ -227,7 +227,7 @@ private:
auto preconditionerOnGPU = precAsHolder->getUnderlyingPreconditioner();
auto matrixOperator
= std::make_shared<Dune::MatrixAdapter<CuSparseMatrix<real_type>, XGPU, XGPU>>(m_matrix);
= std::make_shared<Dune::MatrixAdapter<GpuSparseMatrix<real_type>, XGPU, XGPU>>(m_matrix);
auto scalarProduct = std::make_shared<Dune::SeqScalarProduct<XGPU>>();
return UnderlyingSolver<XGPU>(
matrixOperator, scalarProduct, preconditionerOnGPU, reduction, maxit, verbose);

View File

@ -25,23 +25,23 @@ namespace Opm::gpuistl::detail
{
/**
* CuSparseMatrixDescription holder. This is internal information needed for most calls to the CuSparse API.
* GpuSparseMatrixDescription holder. This is internal information needed for most calls to the CuSparse API.
*/
using CuSparseMatrixDescription = CuSparseResource<cusparseMatDescr_t>;
using GpuSparseMatrixDescription = CuSparseResource<cusparseMatDescr_t>;
/**
* Pointer to CuSparseMatrixDescription holder. This is internal information needed for most calls to the CuSparse API.
* Pointer to GpuSparseMatrixDescription holder. This is internal information needed for most calls to the CuSparse API.
*/
using CuSparseMatrixDescriptionPtr = std::shared_ptr<CuSparseResource<cusparseMatDescr_t>>;
using GpuSparseMatrixDescriptionPtr = std::shared_ptr<CuSparseResource<cusparseMatDescr_t>>;
/**
* @brief createMatrixDescription creates a default matrix description
* @return a matrix description to a general sparse matrix with zero based indexing.
*/
inline CuSparseMatrixDescriptionPtr
inline GpuSparseMatrixDescriptionPtr
createMatrixDescription()
{
auto description = std::make_shared<CuSparseMatrixDescription>();
auto description = std::make_shared<GpuSparseMatrixDescription>();
// Note: We always want to use zero base indexing.
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatType(description->get(), CUSPARSE_MATRIX_TYPE_GENERAL));
@ -56,7 +56,7 @@ createMatrixDescription()
*
* @note This will assume it has a unit diagonal
*/
inline CuSparseMatrixDescriptionPtr
inline GpuSparseMatrixDescriptionPtr
createLowerDiagonalDescription()
{
auto description = createMatrixDescription();
@ -71,7 +71,7 @@ createLowerDiagonalDescription()
*
* @note This will assume it has a non-unit diagonal.
*/
inline CuSparseMatrixDescriptionPtr
inline GpuSparseMatrixDescriptionPtr
createUpperDiagonalDescription()
{
auto description = createMatrixDescription();

View File

@ -29,7 +29,7 @@
/*
This file contains a collection of utility functions used in the GPU implementation of ILU and DILU
The functions deal with creating the mappings between reordered and natural indices, as well as
extracting sparsity structures from dune matrices and creating cusparsematrix indices
extracting sparsity structures from dune matrices and creating gpusparsematrix indices
*/
namespace Opm::gpuistl::detail
{

View File

@ -26,7 +26,7 @@
#include <memory>
#include <opm/simulators/linalg/DILU.hpp>
#include <opm/simulators/linalg/cuistl/GpuDILU.hpp>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
@ -41,7 +41,7 @@ using B1x1Vec = Dune::BlockVector<Dune::FieldVector<double, 1>>;
using B2x2Vec = Dune::BlockVector<Dune::FieldVector<double, 2>>;
using Sp1x1BlockMatrix = Dune::BCRSMatrix<FM1x1>;
using Sp2x2BlockMatrix = Dune::BCRSMatrix<FM2x2>;
using CuMatrix = Opm::gpuistl::CuSparseMatrix<T>;
using CuMatrix = Opm::gpuistl::GpuSparseMatrix<T>;
using CuIntVec = Opm::gpuistl::CuVector<int>;
using CuFloatingPointVec = Opm::gpuistl::CuVector<T>;
using GpuDilu1x1 = Opm::gpuistl::GpuDILU<Sp1x1BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;

View File

@ -24,7 +24,7 @@
#include <cuda_runtime.h>
#include <dune/istl/bcrsmatrix.hh>
#include <opm/simulators/linalg/cuistl/GpuJac.hpp>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>

View File

@ -18,12 +18,12 @@
*/
#include <config.h>
#define BOOST_TEST_MODULE TestCuSparseMatrix
#define BOOST_TEST_MODULE TestGpuSparseMatrix
#include <boost/test/unit_test.hpp>
#include <dune/istl/bcrsmatrix.hh>
#include <memory>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <random>
@ -76,17 +76,17 @@ BOOST_AUTO_TEST_CASE(TestConstruction1D)
}
}
auto cuSparseMatrix = Opm::gpuistl::CuSparseMatrix<double>::fromMatrix(B);
auto gpuSparseMatrix = Opm::gpuistl::GpuSparseMatrix<double>::fromMatrix(B);
const auto& nonZeroValuesCuda = cuSparseMatrix.getNonZeroValues();
std::vector<double> buffer(cuSparseMatrix.nonzeroes(), 0.0);
const auto& nonZeroValuesCuda = gpuSparseMatrix.getNonZeroValues();
std::vector<double> buffer(gpuSparseMatrix.nonzeroes(), 0.0);
nonZeroValuesCuda.copyToHost(buffer.data(), buffer.size());
const double* nonZeroElements = static_cast<const double*>(&((B[0][0][0][0])));
BOOST_CHECK_EQUAL_COLLECTIONS(buffer.begin(), buffer.end(), nonZeroElements, nonZeroElements + B.nonzeroes());
BOOST_CHECK_EQUAL(N * 3 - 2, cuSparseMatrix.nonzeroes());
BOOST_CHECK_EQUAL(N * 3 - 2, gpuSparseMatrix.nonzeroes());
std::vector<int> rowIndicesFromCUDA(N + 1);
cuSparseMatrix.getRowIndices().copyToHost(rowIndicesFromCUDA.data(), rowIndicesFromCUDA.size());
gpuSparseMatrix.getRowIndices().copyToHost(rowIndicesFromCUDA.data(), rowIndicesFromCUDA.size());
BOOST_CHECK_EQUAL(rowIndicesFromCUDA[0], 0);
BOOST_CHECK_EQUAL(rowIndicesFromCUDA[1], 2);
for (int i = 2; i < N; ++i) {
@ -95,7 +95,7 @@ BOOST_AUTO_TEST_CASE(TestConstruction1D)
std::vector<int> columnIndicesFromCUDA(B.nonzeroes(), 0);
cuSparseMatrix.getColumnIndices().copyToHost(columnIndicesFromCUDA.data(), columnIndicesFromCUDA.size());
gpuSparseMatrix.getColumnIndices().copyToHost(columnIndicesFromCUDA.data(), columnIndicesFromCUDA.size());
BOOST_CHECK_EQUAL(columnIndicesFromCUDA[0], 0);
BOOST_CHECK_EQUAL(columnIndicesFromCUDA[1], 1);
@ -143,7 +143,7 @@ BOOST_AUTO_TEST_CASE(RandomSparsityMatrix)
}
}
auto cuSparseMatrix = Opm::gpuistl::CuSparseMatrix<double>::fromMatrix(B);
auto gpuSparseMatrix = Opm::gpuistl::GpuSparseMatrix<double>::fromMatrix(B);
// check each column
for (size_t component = 0; component < N; ++component) {
std::vector<double> inputDataX(N * dim, 0.0);
@ -155,7 +155,7 @@ BOOST_AUTO_TEST_CASE(RandomSparsityMatrix)
yHost = inputDataY[0];
inputVectorX.copyToHost(xHost);
const double alpha = 1.42;
cuSparseMatrix.usmv(alpha, inputVectorX, inputVectorY);
gpuSparseMatrix.usmv(alpha, inputVectorX, inputVectorY);
inputVectorY.copyToHost(inputDataY);
@ -167,7 +167,7 @@ BOOST_AUTO_TEST_CASE(RandomSparsityMatrix)
}
inputVectorX.copyToHost(xHost);
cuSparseMatrix.mv(inputVectorX, inputVectorY);
gpuSparseMatrix.mv(inputVectorX, inputVectorY);
inputVectorY.copyToHost(inputDataY);

View File

@ -18,12 +18,12 @@
*/
#include <config.h>
#define BOOST_TEST_MODULE TestCuSparseMatrixOperations
#define BOOST_TEST_MODULE TestGpuSparseMatrixOperations
#include <boost/mpl/list.hpp>
#include <boost/test/unit_test.hpp>
#include <cuda_runtime.h>
#include <dune/istl/bcrsmatrix.hh>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
@ -85,7 +85,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(FlattenAndInvertDiagonalWith3By3Blocks, T, Numeric
B[1][1][1][1] = -1.0;
B[1][1][2][2] = -1.0;
Opm::gpuistl::CuSparseMatrix<T> m = Opm::gpuistl::CuSparseMatrix<T>::fromMatrix(B);
Opm::gpuistl::GpuSparseMatrix<T> m = Opm::gpuistl::GpuSparseMatrix<T>::fromMatrix(B);
Opm::gpuistl::CuVector<T> dInvDiag(blocksize * blocksize * N);
Opm::gpuistl::detail::JAC::invertDiagonalAndFlatten<T, 3>(
@ -159,7 +159,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(FlattenAndInvertDiagonalWith2By2Blocks, T, Numeric
B[1][1][0][0] = -1.0;
B[1][1][1][1] = -1.0;
Opm::gpuistl::CuSparseMatrix<T> m = Opm::gpuistl::CuSparseMatrix<T>::fromMatrix(B);
Opm::gpuistl::GpuSparseMatrix<T> m = Opm::gpuistl::GpuSparseMatrix<T>::fromMatrix(B);
Opm::gpuistl::CuVector<T> dInvDiag(blocksize * blocksize * N);
Opm::gpuistl::detail::JAC::invertDiagonalAndFlatten<T, 2>(