mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
Merge pull request #4634 from kjetilly/cuistl_vector_matrix
Path to multigpu: Cuistl vector and matrix classes
This commit is contained in:
commit
e5672ee816
@ -549,6 +549,8 @@ if(CUDA_FOUND)
|
||||
cuda_check_last_error
|
||||
cublas_handle
|
||||
cusparse_handle
|
||||
cuvector
|
||||
cusparsematrix
|
||||
PROPERTIES LABELS gpu_cuda)
|
||||
endif()
|
||||
|
||||
|
@ -147,6 +147,10 @@ if(CUDA_FOUND)
|
||||
# CUISTL SOURCE
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/detail/CuBlasHandle.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/detail/CuSparseHandle.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/CuVector.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/detail/vector_operations.cu)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/CuSparseMatrix.cpp)
|
||||
|
||||
|
||||
# CUISTL HEADERS
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp)
|
||||
@ -155,6 +159,18 @@ if(CUDA_FOUND)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/cuda_check_last_error.hpp)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/CuVector.hpp)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/CuSparseMatrix.hpp)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/CuSparseResource_impl.hpp)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/safe_conversion.hpp)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/cublas_wrapper.hpp)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/cusparse_wrapper.hpp)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/cusparse_constants.hpp)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/vector_operations.hpp)
|
||||
|
||||
|
||||
|
||||
endif()
|
||||
if(OPENCL_FOUND)
|
||||
@ -239,7 +255,9 @@ if(CUDA_FOUND)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuda_check_last_error.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cublas_handle.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cusparse_handle.cpp)
|
||||
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuvector.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cusparsematrix.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_safe_conversion.cpp)
|
||||
|
||||
endif()
|
||||
if(OPENCL_FOUND)
|
||||
|
330
opm/simulators/linalg/cuistl/CuSparseMatrix.cpp
Normal file
330
opm/simulators/linalg/cuistl/CuSparseMatrix.cpp
Normal file
@ -0,0 +1,330 @@
|
||||
/*
|
||||
Copyright 2022-2023 SINTEF AS
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include <cuda.h>
|
||||
#include <dune/common/fmatrix.hh>
|
||||
#include <dune/common/fvector.hh>
|
||||
#include <dune/istl/bcrsmatrix.hh>
|
||||
#include <dune/istl/bvector.hh>
|
||||
#include <fmt/core.h>
|
||||
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_constants.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_wrapper.hpp>
|
||||
#include <opm/simulators/linalg/matrixblock.hh>
|
||||
|
||||
namespace Opm::cuistl
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
template <class T, class M>
|
||||
std::vector<T> extractNonzeroValues(const M& matrix)
|
||||
{
|
||||
const size_t blockSize = matrix[0][0].N();
|
||||
const size_t numberOfNonzeroBlocks = matrix.nonzeroes();
|
||||
const size_t numberOfNonzeroElements = blockSize * blockSize * numberOfNonzeroBlocks;
|
||||
|
||||
std::vector<T> nonZeroElementsData;
|
||||
// TODO: [perf] Can we avoid building nonZeroElementsData?
|
||||
nonZeroElementsData.reserve(numberOfNonzeroElements);
|
||||
for (auto& row : matrix) {
|
||||
for (auto columnIterator = row.begin(); columnIterator != row.end(); ++columnIterator) {
|
||||
for (size_t c = 0; c < blockSize; ++c) {
|
||||
for (size_t d = 0; d < blockSize; ++d) {
|
||||
nonZeroElementsData.push_back((*columnIterator)[c][d]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nonZeroElementsData;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
|
||||
|
||||
template <class T>
|
||||
CuSparseMatrix<T>::CuSparseMatrix(const T* nonZeroElements,
|
||||
const int* rowIndices,
|
||||
const int* columnIndices,
|
||||
size_t numberOfNonzeroBlocks,
|
||||
size_t blockSize,
|
||||
size_t numberOfRows)
|
||||
: m_nonZeroElements(nonZeroElements, numberOfNonzeroBlocks * blockSize * blockSize)
|
||||
, m_columnIndices(columnIndices, numberOfNonzeroBlocks)
|
||||
, m_rowIndices(rowIndices, numberOfRows + 1)
|
||||
, m_numberOfNonzeroBlocks(detail::to_int(numberOfNonzeroBlocks))
|
||||
, m_numberOfRows(detail::to_int(numberOfRows))
|
||||
, m_blockSize(detail::to_int(blockSize))
|
||||
, m_matrixDescription(detail::createMatrixDescription())
|
||||
, m_cusparseHandle(detail::CuSparseHandle::getInstance())
|
||||
{
|
||||
if (detail::to_size_t(rowIndices[numberOfRows]) != numberOfNonzeroBlocks) {
|
||||
OPM_THROW(std::invalid_argument, "Wrong sparsity format. Needs to be CSR compliant. ");
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuSparseMatrix<T>::~CuSparseMatrix()
|
||||
{
|
||||
// empty
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
template <typename MatrixType>
|
||||
CuSparseMatrix<T>
|
||||
CuSparseMatrix<T>::fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly)
|
||||
{
|
||||
// TODO: Do we need this intermediate storage? Or this shuffling of data?
|
||||
std::vector<int> columnIndices;
|
||||
std::vector<int> rowIndices;
|
||||
|
||||
rowIndices.push_back(0);
|
||||
|
||||
const size_t blockSize = matrix[0][0].N();
|
||||
const size_t numberOfRows = matrix.N();
|
||||
const size_t numberOfNonzeroBlocks = matrix.nonzeroes();
|
||||
|
||||
columnIndices.reserve(numberOfNonzeroBlocks);
|
||||
rowIndices.reserve(numberOfRows + 1);
|
||||
for (auto& row : matrix) {
|
||||
for (auto columnIterator = row.begin(); columnIterator != row.end(); ++columnIterator) {
|
||||
columnIndices.push_back(columnIterator.index());
|
||||
}
|
||||
rowIndices.push_back(detail::to_int(columnIndices.size()));
|
||||
}
|
||||
|
||||
// Sanity check
|
||||
// h_rows and h_cols could be changed to 'unsigned int', but cusparse expects 'int'
|
||||
OPM_ERROR_IF(rowIndices[matrix.N()] != detail::to_int(matrix.nonzeroes()),
|
||||
"Error size of rows do not sum to number of nonzeroes in CuSparseMatrix.");
|
||||
OPM_ERROR_IF(rowIndices.size() != numberOfRows + 1, "Row indices do not match for CuSparseMatrix.");
|
||||
OPM_ERROR_IF(columnIndices.size() != numberOfNonzeroBlocks, "Column indices do not match for CuSparseMatrix.");
|
||||
|
||||
|
||||
if (copyNonZeroElementsDirectly) {
|
||||
const T* nonZeroElements = static_cast<const T*>(&((matrix[0][0][0][0])));
|
||||
return CuSparseMatrix<T>(
|
||||
nonZeroElements, rowIndices.data(), columnIndices.data(), numberOfNonzeroBlocks, blockSize, numberOfRows);
|
||||
} else {
|
||||
auto nonZeroElementData = extractNonzeroValues<T>(matrix);
|
||||
return CuSparseMatrix<T>(nonZeroElementData.data(),
|
||||
rowIndices.data(),
|
||||
columnIndices.data(),
|
||||
numberOfNonzeroBlocks,
|
||||
blockSize,
|
||||
numberOfRows);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
template <class MatrixType>
|
||||
void
|
||||
CuSparseMatrix<T>::updateNonzeroValues(const MatrixType& matrix, bool copyNonZeroElementsDirectly)
|
||||
{
|
||||
OPM_ERROR_IF(nonzeroes() != matrix.nonzeroes(), "Matrix does not have the same number of non-zero elements.");
|
||||
OPM_ERROR_IF(matrix[0][0].N() != blockSize(), "Matrix does not have the same blocksize.");
|
||||
OPM_ERROR_IF(matrix.N() != N(), "Matrix does not have the same number of rows.");
|
||||
|
||||
if (!copyNonZeroElementsDirectly) {
|
||||
auto nonZeroElementsData = extractNonzeroValues<T>(matrix);
|
||||
m_nonZeroElements.copyFromHost(nonZeroElementsData.data(), nonzeroes() * blockSize() * blockSize());
|
||||
|
||||
} else {
|
||||
const T* newNonZeroElements = static_cast<const T*>(&((matrix[0][0][0][0])));
|
||||
m_nonZeroElements.copyFromHost(newNonZeroElements, nonzeroes() * blockSize() * blockSize());
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuSparseMatrix<T>::setUpperTriangular()
|
||||
{
|
||||
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatFillMode(m_matrixDescription->get(), CUSPARSE_FILL_MODE_UPPER));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuSparseMatrix<T>::setLowerTriangular()
|
||||
{
|
||||
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatFillMode(m_matrixDescription->get(), CUSPARSE_FILL_MODE_LOWER));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuSparseMatrix<T>::setUnitDiagonal()
|
||||
{
|
||||
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(m_matrixDescription->get(), CUSPARSE_DIAG_TYPE_UNIT));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuSparseMatrix<T>::setNonUnitDiagonal()
|
||||
{
|
||||
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(m_matrixDescription->get(), CUSPARSE_DIAG_TYPE_NON_UNIT));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuSparseMatrix<T>::mv(const CuVector<T>& x, CuVector<T>& y) const
|
||||
{
|
||||
assertSameSize(x);
|
||||
assertSameSize(y);
|
||||
if (blockSize() < 2u) {
|
||||
OPM_THROW(
|
||||
std::invalid_argument,
|
||||
"CuSparseMatrix<T>::usmv and CuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
|
||||
}
|
||||
const auto nonzeroValues = getNonZeroValues().data();
|
||||
|
||||
auto rowIndices = getRowIndices().data();
|
||||
auto columnIndices = getColumnIndices().data();
|
||||
T alpha = 1.0;
|
||||
T beta = 0.0;
|
||||
OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrmv(m_cusparseHandle.get(),
|
||||
detail::CUSPARSE_MATRIX_ORDER,
|
||||
CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
m_numberOfRows,
|
||||
m_numberOfRows,
|
||||
m_numberOfNonzeroBlocks,
|
||||
&alpha,
|
||||
m_matrixDescription->get(),
|
||||
nonzeroValues,
|
||||
rowIndices,
|
||||
columnIndices,
|
||||
blockSize(),
|
||||
x.data(),
|
||||
&beta,
|
||||
y.data()));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuSparseMatrix<T>::umv(const CuVector<T>& x, CuVector<T>& y) const
|
||||
{
|
||||
assertSameSize(x);
|
||||
assertSameSize(y);
|
||||
if (blockSize() < 2u) {
|
||||
OPM_THROW(
|
||||
std::invalid_argument,
|
||||
"CuSparseMatrix<T>::usmv and CuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
|
||||
}
|
||||
|
||||
const auto nonzeroValues = getNonZeroValues().data();
|
||||
|
||||
auto rowIndices = getRowIndices().data();
|
||||
auto columnIndices = getColumnIndices().data();
|
||||
T alpha = 1.0;
|
||||
T beta = 1.0;
|
||||
OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrmv(m_cusparseHandle.get(),
|
||||
detail::CUSPARSE_MATRIX_ORDER,
|
||||
CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
m_numberOfRows,
|
||||
m_numberOfRows,
|
||||
m_numberOfNonzeroBlocks,
|
||||
&alpha,
|
||||
m_matrixDescription->get(),
|
||||
nonzeroValues,
|
||||
rowIndices,
|
||||
columnIndices,
|
||||
m_blockSize,
|
||||
x.data(),
|
||||
&beta,
|
||||
y.data()));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuSparseMatrix<T>::usmv(T alpha, const CuVector<T>& x, CuVector<T>& y) const
|
||||
{
|
||||
assertSameSize(x);
|
||||
assertSameSize(y);
|
||||
if (blockSize() < 2) {
|
||||
OPM_THROW(
|
||||
std::invalid_argument,
|
||||
"CuSparseMatrix<T>::usmv and CuSparseMatrix<T>::mv are only implemented for block sizes greater than 1.");
|
||||
}
|
||||
const auto numberOfRows = N();
|
||||
const auto numberOfNonzeroBlocks = nonzeroes();
|
||||
const auto nonzeroValues = getNonZeroValues().data();
|
||||
|
||||
auto rowIndices = getRowIndices().data();
|
||||
auto columnIndices = getColumnIndices().data();
|
||||
|
||||
T beta = 1.0;
|
||||
OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrmv(m_cusparseHandle.get(),
|
||||
detail::CUSPARSE_MATRIX_ORDER,
|
||||
CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
numberOfRows,
|
||||
numberOfRows,
|
||||
numberOfNonzeroBlocks,
|
||||
&alpha,
|
||||
m_matrixDescription->get(),
|
||||
nonzeroValues,
|
||||
rowIndices,
|
||||
columnIndices,
|
||||
blockSize(),
|
||||
x.data(),
|
||||
&beta,
|
||||
y.data()));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
template <class VectorType>
|
||||
void
|
||||
CuSparseMatrix<T>::assertSameSize(const VectorType& x) const
|
||||
{
|
||||
if (x.dim() != blockSize() * N()) {
|
||||
OPM_THROW(std::invalid_argument,
|
||||
fmt::format("Size mismatch. Input vector has {} elements, while we have {} rows.",
|
||||
x.dim(),
|
||||
blockSize() * N()));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#define INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(realtype, blockdim) \
|
||||
template CuSparseMatrix<realtype> CuSparseMatrix<realtype>::fromMatrix( \
|
||||
const Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>&, bool); \
|
||||
template CuSparseMatrix<realtype> CuSparseMatrix<realtype>::fromMatrix( \
|
||||
const Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>&, bool); \
|
||||
template void CuSparseMatrix<realtype>::updateNonzeroValues( \
|
||||
const Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>&, bool); \
|
||||
template void CuSparseMatrix<realtype>::updateNonzeroValues( \
|
||||
const Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>&, bool)
|
||||
|
||||
template class CuSparseMatrix<float>;
|
||||
template class CuSparseMatrix<double>;
|
||||
|
||||
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(double, 1);
|
||||
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(double, 2);
|
||||
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(double, 3);
|
||||
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(double, 4);
|
||||
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(double, 5);
|
||||
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(double, 6);
|
||||
|
||||
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 1);
|
||||
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 2);
|
||||
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 3);
|
||||
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 4);
|
||||
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 5);
|
||||
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 6);
|
||||
|
||||
} // namespace Opm::cuistl
|
302
opm/simulators/linalg/cuistl/CuSparseMatrix.hpp
Normal file
302
opm/simulators/linalg/cuistl/CuSparseMatrix.hpp
Normal file
@ -0,0 +1,302 @@
|
||||
/*
|
||||
Copyright 2022-2023 SINTEF AS
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef OPM_CUSPARSEMATRIX_HPP
|
||||
#define OPM_CUSPARSEMATRIX_HPP
|
||||
#include <cusparse.h>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp>
|
||||
#include <vector>
|
||||
|
||||
namespace Opm::cuistl
|
||||
{
|
||||
|
||||
/**
|
||||
* @brief The CuSparseMatrix class simple wrapper class for a CuSparse matrix.
|
||||
*
|
||||
* @note we currently only support simple raw primitives for T (double and float). Block size is handled through the
|
||||
* block size parameter
|
||||
*
|
||||
* @tparam T the type to store. Can be either float, double or int.
|
||||
*
|
||||
* @note we only support square matrices.
|
||||
*
|
||||
* @note We only support Block Compressed Sparse Row Format (BSR) for now.
|
||||
*/
|
||||
template <typename T>
|
||||
class CuSparseMatrix
|
||||
{
|
||||
public:
|
||||
//! Create the sparse matrix specified by the raw data.
|
||||
//!
|
||||
//! \note Prefer to use the constructor taking a const reference to a matrix instead.
|
||||
//!
|
||||
//! \param[in] nonZeroElements the non-zero values of the matrix
|
||||
//! \param[in] rowIndices the row indices of the non-zero elements
|
||||
//! \param[in] columnIndices the column indices of the non-zero elements
|
||||
//! \param[in] numberOfNonzeroElements number of nonzero elements
|
||||
//! \param[in] blockSize size of each block matrix (typically 3)
|
||||
//! \param[in] numberOfRows the number of rows
|
||||
//!
|
||||
//! \note We assume numberOfNonzeroBlocks, blockSize and numberOfRows all are representable as int due to
|
||||
//! restrictions in the current version of cusparse. This might change in future versions.
|
||||
CuSparseMatrix(const T* nonZeroElements,
|
||||
const int* rowIndices,
|
||||
const int* columnIndices,
|
||||
size_t numberOfNonzeroBlocks,
|
||||
size_t blockSize,
|
||||
size_t numberOfRows);
|
||||
|
||||
/**
|
||||
* We don't want to be able to copy this for now (too much hassle in copying the cusparse resources)
|
||||
*/
|
||||
CuSparseMatrix(const CuSparseMatrix&) = delete;
|
||||
|
||||
/**
|
||||
* We don't want to be able to copy this for now (too much hassle in copying the cusparse resources)
|
||||
*/
|
||||
CuSparseMatrix& operator=(const CuSparseMatrix&) = delete;
|
||||
|
||||
virtual ~CuSparseMatrix();
|
||||
|
||||
/**
|
||||
* @brief fromMatrix creates a new matrix with the same block size and values as the given matrix
|
||||
* @param matrix the matrix to copy from
|
||||
* @param copyNonZeroElementsDirectly if true will do a memcpy from matrix[0][0][0][0], otherwise will build up the
|
||||
* non-zero elements by looping over the matrix. Note that setting this to true will yield a performance increase,
|
||||
* but might not always yield correct results depending on how the matrix has been initialized. If unsure,
|
||||
* leave it as false.
|
||||
* @tparam MatrixType is assumed to be a Dune::BCRSMatrix compatible matrix.
|
||||
*/
|
||||
template <class MatrixType>
|
||||
static CuSparseMatrix<T> fromMatrix(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false);
|
||||
|
||||
/**
|
||||
* @brief setUpperTriangular sets the CuSparse flag that this is an upper diagonal (with unit diagonal) matrix.
|
||||
*/
|
||||
void setUpperTriangular();
|
||||
|
||||
/**
|
||||
* @brief setLowerTriangular sets the CuSparse flag that this is an lower diagonal (with non-unit diagonal) matrix.
|
||||
*/
|
||||
void setLowerTriangular();
|
||||
|
||||
/**
|
||||
* @brief setUnitDiagonal sets the CuSparse flag that this has unit diagional.
|
||||
*/
|
||||
void setUnitDiagonal();
|
||||
|
||||
|
||||
/**
|
||||
* @brief setNonUnitDiagonal sets the CuSparse flag that this has non-unit diagional.
|
||||
*/
|
||||
void setNonUnitDiagonal();
|
||||
|
||||
/**
|
||||
* @brief N returns the number of rows (which is equal to the number of columns)
|
||||
*/
|
||||
size_t N() const
|
||||
{
|
||||
// Technically this safe conversion is not needed since we enforce these to be
|
||||
// non-negative in the constructor, but keeping them for added sanity for now.
|
||||
//
|
||||
// We don't believe this will yield any performance penality (it's used too far away from the inner loop),
|
||||
// but should that be false, they can be removed.
|
||||
return detail::to_size_t(m_numberOfRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief nonzeroes behaves as the Dune::BCRSMatrix::nonzeros() function and returns the number of non zero blocks
|
||||
* @return number of non zero blocks.
|
||||
*/
|
||||
size_t nonzeroes() const
|
||||
{
|
||||
// Technically this safe conversion is not needed since we enforce these to be
|
||||
// non-negative in the constructor, but keeping them for added sanity for now.
|
||||
//
|
||||
// We don't believe this will yield any performance penality (it's used too far away from the inner loop),
|
||||
// but should that be false, they can be removed.
|
||||
return detail::to_size_t(m_numberOfNonzeroBlocks);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief getNonZeroValues returns the GPU vector containing the non-zero values (ordered by block)
|
||||
*
|
||||
* @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
|
||||
*/
|
||||
CuVector<T>& getNonZeroValues()
|
||||
{
|
||||
return m_nonZeroElements;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief getNonZeroValues returns the GPU vector containing the non-zero values (ordered by block)
|
||||
*
|
||||
* @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
|
||||
*/
|
||||
const CuVector<T>& getNonZeroValues() const
|
||||
{
|
||||
return m_nonZeroElements;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief getRowIndices returns the row indices used to represent the BSR structure.
|
||||
*
|
||||
* @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
|
||||
*/
|
||||
CuVector<int>& getRowIndices()
|
||||
{
|
||||
return m_rowIndices;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief getRowIndices returns the row indices used to represent the BSR structure.
|
||||
*
|
||||
* @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
|
||||
*/
|
||||
const CuVector<int>& getRowIndices() const
|
||||
{
|
||||
return m_rowIndices;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief getColumnIndices returns the column indices used to represent the BSR structure.
|
||||
*
|
||||
* @return Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
|
||||
*/
|
||||
CuVector<int>& getColumnIndices()
|
||||
{
|
||||
return m_columnIndices;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief getColumnIndices returns the column indices used to represent the BSR structure.
|
||||
*
|
||||
* @return Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
|
||||
*/
|
||||
const CuVector<int>& getColumnIndices() const
|
||||
{
|
||||
return m_columnIndices;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief dim returns the dimension of the vector space on which this matrix acts
|
||||
*
|
||||
* This is equivalent to matrix.N() * matrix.blockSize()
|
||||
* @return matrix.N() * matrix.blockSize()
|
||||
*/
|
||||
size_t dim() const
|
||||
{
|
||||
// Technically this safe conversion is not needed since we enforce these to be
|
||||
// non-negative in the constructor, but keeping them for added sanity for now.
|
||||
//
|
||||
// We don't believe this will yield any performance penality (it's used too far away from the inner loop),
|
||||
// but should that be false, they can be removed.
|
||||
return detail::to_size_t(m_blockSize) * detail::to_size_t(m_numberOfRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief blockSize size of the blocks
|
||||
*/
|
||||
size_t blockSize() const
|
||||
{
|
||||
// Technically this safe conversion is not needed since we enforce these to be
|
||||
// non-negative in the constructor, but keeping them for added sanity for now.
|
||||
//
|
||||
// We don't believe this will yield any performance penality (it's used too far away from the inner loop),
|
||||
// but should that be false, they can be removed.
|
||||
return detail::to_size_t(m_blockSize);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief getDescription the cusparse matrix description.
|
||||
*
|
||||
* This description is needed for most calls to the CuSparse library
|
||||
*/
|
||||
detail::CuSparseMatrixDescription& getDescription()
|
||||
{
|
||||
return *m_matrixDescription;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief mv performs matrix vector multiply y = Ax
|
||||
* @param[in] x the vector to multiply the matrix with
|
||||
* @param[out] y the output vector
|
||||
*
|
||||
* @note Due to limitations of CuSparse, this is only supported for block sizes greater than 1.
|
||||
*/
|
||||
virtual void mv(const CuVector<T>& x, CuVector<T>& y) const;
|
||||
|
||||
/**
|
||||
* @brief umv computes y=Ax+y
|
||||
* @param[in] x the vector to multiply with A
|
||||
* @param[inout] y the vector to add and store the output in
|
||||
*
|
||||
* @note Due to limitations of CuSparse, this is only supported for block sizes greater than 1.
|
||||
*/
|
||||
virtual void umv(const CuVector<T>& x, CuVector<T>& y) const;
|
||||
|
||||
|
||||
/**
|
||||
* @brief umv computes y=alpha * Ax + y
|
||||
* @param[in] x the vector to multiply with A
|
||||
* @param[inout] y the vector to add and store the output in
|
||||
*
|
||||
* @note Due to limitations of CuSparse, this is only supported for block sizes greater than 1.
|
||||
*/
|
||||
virtual void usmv(T alpha, const CuVector<T>& x, CuVector<T>& y) const;
|
||||
|
||||
/**
|
||||
* @brief updateNonzeroValues updates the non-zero values by using the non-zero values of the supplied matrix
|
||||
* @param matrix the matrix to extract the non-zero values from
|
||||
* @param copyNonZeroElementsDirectly if true will do a memcpy from matrix[0][0][0][0], otherwise will build up the
|
||||
* non-zero elements by looping over the matrix. Note that setting this to true will yield a performance increase,
|
||||
* but might not always yield correct results depending on how the matrix matrix has been initialized. If unsure,
|
||||
* leave it as false.
|
||||
* @note This assumes the given matrix has the same sparsity pattern.
|
||||
* @tparam MatrixType is assumed to be a Dune::BCRSMatrix compatible matrix.
|
||||
*/
|
||||
template <class MatrixType>
|
||||
void updateNonzeroValues(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false);
|
||||
|
||||
private:
|
||||
CuVector<T> m_nonZeroElements;
|
||||
CuVector<int> m_columnIndices;
|
||||
CuVector<int> m_rowIndices;
|
||||
|
||||
// Notice that we store these three as int to make sure we are cusparse compatible.
|
||||
//
|
||||
// This gives the added benefit of checking the size constraints at construction of the matrix
|
||||
// rather than in some call to cusparse.
|
||||
const int m_numberOfNonzeroBlocks;
|
||||
const int m_numberOfRows;
|
||||
const int m_blockSize;
|
||||
|
||||
detail::CuSparseMatrixDescriptionPtr m_matrixDescription;
|
||||
detail::CuSparseHandle& m_cusparseHandle;
|
||||
|
||||
template <class VectorType>
|
||||
void assertSameSize(const VectorType& vector) const;
|
||||
};
|
||||
} // namespace Opm::cuistl
|
||||
#endif
|
293
opm/simulators/linalg/cuistl/CuVector.cpp
Normal file
293
opm/simulators/linalg/cuistl/CuVector.cpp
Normal file
@ -0,0 +1,293 @@
|
||||
/*
|
||||
Copyright 2022-2023 SINTEF AS
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include <cublas_v2.h>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <fmt/core.h>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cublas_wrapper.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
|
||||
|
||||
namespace Opm::cuistl
|
||||
{
|
||||
|
||||
template <class T>
|
||||
CuVector<T>::CuVector(const std::vector<T>& data)
|
||||
: CuVector(data.data(), detail::to_int(data.size()))
|
||||
{
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuVector<T>::CuVector(const size_t numberOfElements)
|
||||
: m_numberOfElements(detail::to_int(numberOfElements))
|
||||
, m_cuBlasHandle(detail::CuBlasHandle::getInstance())
|
||||
{
|
||||
OPM_CUDA_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * detail::to_size_t(m_numberOfElements)));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuVector<T>::CuVector(const T* dataOnHost, const size_t numberOfElements)
|
||||
: CuVector(numberOfElements)
|
||||
{
|
||||
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(
|
||||
m_dataOnDevice, dataOnHost, detail::to_size_t(m_numberOfElements) * sizeof(T), cudaMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuVector<T>&
|
||||
CuVector<T>::operator=(T scalar)
|
||||
{
|
||||
assertHasElements();
|
||||
detail::setVectorValue(data(), detail::to_size_t(m_numberOfElements), scalar);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuVector<T>&
|
||||
CuVector<T>::operator=(const CuVector<T>& other)
|
||||
{
|
||||
assertHasElements();
|
||||
assertSameSize(other);
|
||||
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
|
||||
other.m_dataOnDevice,
|
||||
detail::to_size_t(m_numberOfElements) * sizeof(T),
|
||||
cudaMemcpyDeviceToDevice));
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuVector<T>::CuVector(const CuVector<T>& other)
|
||||
: CuVector(other.m_numberOfElements)
|
||||
{
|
||||
assertHasElements();
|
||||
assertSameSize(other);
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
|
||||
other.m_dataOnDevice,
|
||||
detail::to_size_t(m_numberOfElements) * sizeof(T),
|
||||
cudaMemcpyDeviceToDevice));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuVector<T>::~CuVector()
|
||||
{
|
||||
OPM_CUDA_WARN_IF_ERROR(cudaFree(m_dataOnDevice));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const T*
|
||||
CuVector<T>::data() const
|
||||
{
|
||||
return m_dataOnDevice;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
typename CuVector<T>::size_type
|
||||
CuVector<T>::dim() const
|
||||
{
|
||||
// Note that there is no way for m_numberOfElements to be non-positive,
|
||||
// but for sanity we still use the safe conversion function here.
|
||||
//
|
||||
// We also doubt that this will lead to any performance penality, but should this prove
|
||||
// to be false, this can be replaced by a simple cast to size_t
|
||||
return detail::to_size_t(m_numberOfElements);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::vector<T>
|
||||
CuVector<T>::asStdVector() const
|
||||
{
|
||||
std::vector<T> temporary(detail::to_size_t(m_numberOfElements));
|
||||
copyToHost(temporary);
|
||||
return temporary;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuVector<T>::setZeroAtIndexSet(const CuVector<int>& indexSet)
|
||||
{
|
||||
detail::setZeroAtIndexSet(m_dataOnDevice, indexSet.dim(), indexSet.data());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuVector<T>::assertSameSize(const CuVector<T>& x) const
|
||||
{
|
||||
assertSameSize(x.m_numberOfElements);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuVector<T>::assertSameSize(int size) const
|
||||
{
|
||||
if (size != m_numberOfElements) {
|
||||
OPM_THROW(std::invalid_argument,
|
||||
fmt::format("Given vector has {}, while we have {}.", size, m_numberOfElements));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuVector<T>::assertHasElements() const
|
||||
{
|
||||
if (m_numberOfElements <= 0) {
|
||||
OPM_THROW(std::invalid_argument, "We have 0 elements");
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T*
|
||||
CuVector<T>::data()
|
||||
{
|
||||
return m_dataOnDevice;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuVector<T>&
|
||||
CuVector<T>::operator*=(const T& scalar)
|
||||
{
|
||||
assertHasElements();
|
||||
OPM_CUBLAS_SAFE_CALL(detail::cublasScal(m_cuBlasHandle.get(), m_numberOfElements, &scalar, data(), 1));
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuVector<T>&
|
||||
CuVector<T>::axpy(T alpha, const CuVector<T>& y)
|
||||
{
|
||||
assertHasElements();
|
||||
assertSameSize(y);
|
||||
OPM_CUBLAS_SAFE_CALL(detail::cublasAxpy(m_cuBlasHandle.get(), m_numberOfElements, &alpha, y.data(), 1, data(), 1));
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
T
|
||||
CuVector<T>::dot(const CuVector<T>& other) const
|
||||
{
|
||||
assertHasElements();
|
||||
assertSameSize(other);
|
||||
T result = T(0);
|
||||
OPM_CUBLAS_SAFE_CALL(
|
||||
detail::cublasDot(m_cuBlasHandle.get(), m_numberOfElements, data(), 1, other.data(), 1, &result));
|
||||
return result;
|
||||
}
|
||||
template <class T>
|
||||
T
|
||||
CuVector<T>::two_norm() const
|
||||
{
|
||||
assertHasElements();
|
||||
T result = T(0);
|
||||
OPM_CUBLAS_SAFE_CALL(detail::cublasNrm2(m_cuBlasHandle.get(), m_numberOfElements, data(), 1, &result));
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T
|
||||
CuVector<T>::dot(const CuVector<T>& other, const CuVector<int>& indexSet, CuVector<T>& buffer) const
|
||||
{
|
||||
return detail::innerProductAtIndices(m_dataOnDevice, other.data(), buffer.data(), indexSet.dim(), indexSet.data());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T
|
||||
CuVector<T>::two_norm(const CuVector<int>& indexSet, CuVector<T>& buffer) const
|
||||
{
|
||||
// TODO: [perf] Optimize this to a single call
|
||||
return std::sqrt(this->dot(*this, indexSet, buffer));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T
|
||||
CuVector<T>::dot(const CuVector<T>& other, const CuVector<int>& indexSet) const
|
||||
{
|
||||
CuVector<T> buffer(indexSet.dim());
|
||||
return detail::innerProductAtIndices(m_dataOnDevice, other.data(), buffer.data(), indexSet.dim(), indexSet.data());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T
|
||||
CuVector<T>::two_norm(const CuVector<int>& indexSet) const
|
||||
{
|
||||
CuVector<T> buffer(indexSet.dim());
|
||||
// TODO: [perf] Optimize this to a single call
|
||||
return std::sqrt(this->dot(*this, indexSet, buffer));
|
||||
}
|
||||
template <class T>
|
||||
CuVector<T>&
|
||||
CuVector<T>::operator+=(const CuVector<T>& other)
|
||||
{
|
||||
assertHasElements();
|
||||
assertSameSize(other);
|
||||
// TODO: [perf] Make a specialized version of this
|
||||
return axpy(1.0, other);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuVector<T>&
|
||||
CuVector<T>::operator-=(const CuVector<T>& other)
|
||||
{
|
||||
assertHasElements();
|
||||
assertSameSize(other);
|
||||
// TODO: [perf] Make a specialized version of this
|
||||
return axpy(-1.0, other);
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
void
|
||||
CuVector<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
|
||||
{
|
||||
if (numberOfElements > dim()) {
|
||||
OPM_THROW(std::runtime_error,
|
||||
fmt::format("Requesting to copy too many elements. Vector has {} elements, while {} was requested.",
|
||||
dim(),
|
||||
numberOfElements));
|
||||
}
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void
|
||||
CuVector<T>::copyToHost(T* dataPointer, size_t numberOfElements) const
|
||||
{
|
||||
assertSameSize(detail::to_int(numberOfElements));
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void
|
||||
CuVector<T>::copyFromHost(const std::vector<T>& data)
|
||||
{
|
||||
copyFromHost(data.data(), data.size());
|
||||
}
|
||||
template <class T>
|
||||
void
|
||||
CuVector<T>::copyToHost(std::vector<T>& data) const
|
||||
{
|
||||
copyToHost(data.data(), data.size());
|
||||
}
|
||||
template class CuVector<double>;
|
||||
template class CuVector<float>;
|
||||
template class CuVector<int>;
|
||||
|
||||
} // namespace Opm::cuistl
|
378
opm/simulators/linalg/cuistl/CuVector.hpp
Normal file
378
opm/simulators/linalg/cuistl/CuVector.hpp
Normal file
@ -0,0 +1,378 @@
|
||||
/*
|
||||
Copyright 2022-2023 SINTEF AS
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef OPM_CUVECTOR_HEADER_HPP
|
||||
#define OPM_CUVECTOR_HEADER_HPP
|
||||
#include <dune/common/fvector.hh>
|
||||
#include <dune/istl/bvector.hh>
|
||||
#include <exception>
|
||||
#include <fmt/core.h>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp>
|
||||
#include <vector>
|
||||
|
||||
|
||||
namespace Opm::cuistl
|
||||
{
|
||||
|
||||
/**
|
||||
* @brief The CuVector class is a simple (arithmetic) vector class for the GPU.
|
||||
*
|
||||
* @note we currently only support simple raw primitives for T (double, float and int)
|
||||
*
|
||||
* @note We currently only support arithmetic operations on double and float.
|
||||
*
|
||||
* @note this vector has no notion of block size. The user is responsible for allocating
|
||||
* the correct number of primitives (double or floats)
|
||||
*
|
||||
* Example usage:
|
||||
*
|
||||
* @code{.cpp}
|
||||
* #include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
*
|
||||
* void someFunction() {
|
||||
* auto someDataOnCPU = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
|
||||
*
|
||||
* auto dataOnGPU = CuVector<double>(someDataOnCPU);
|
||||
*
|
||||
* // Multiply by 4.0:
|
||||
* dataOnGPU *= 4.0;
|
||||
*
|
||||
* // Get data back on CPU in another vector:
|
||||
* auto stdVectorOnCPU = dataOnGPU.asStdVector();
|
||||
* }
|
||||
*
|
||||
* @tparam T the type to store. Can be either float, double or int.
|
||||
*/
|
||||
template <typename T>
|
||||
class CuVector
|
||||
{
|
||||
public:
|
||||
using field_type = T;
|
||||
using size_type = size_t;
|
||||
|
||||
|
||||
/**
|
||||
* @brief CuVector allocates new GPU memory of the same size as other and copies the content of the other vector to
|
||||
* this newly allocated memory.
|
||||
*
|
||||
* @note This does synchronous transfer.
|
||||
*
|
||||
* @param other the vector to copy from
|
||||
*/
|
||||
CuVector(const CuVector<T>& other);
|
||||
|
||||
/**
|
||||
* @brief CuVector allocates new GPU memory of the same size as data and copies the content of the data vector to
|
||||
* this newly allocated memory.
|
||||
*
|
||||
* @note This does CPU to GPU transfer.
|
||||
* @note This does synchronous transfer.
|
||||
*
|
||||
* @note For now data.size() needs to be within the limits of int due to restrctions of CuBlas.
|
||||
*
|
||||
* @param data the vector to copy from
|
||||
*/
|
||||
explicit CuVector(const std::vector<T>& data);
|
||||
|
||||
/**
|
||||
* @brief operator= copies the content of the data vector to the memory of this vector.
|
||||
*
|
||||
* @note This requires the two vectors to be of the same size.
|
||||
* @note This does synchronous transfer.
|
||||
*
|
||||
* @param other the vector to copy from
|
||||
*/
|
||||
CuVector& operator=(const CuVector<T>& other);
|
||||
|
||||
/**
|
||||
* @brief operator= sets the whole vector equal to the scalar value.
|
||||
*
|
||||
* @note This does asynchronous operations
|
||||
*
|
||||
* @param scalar the value all elements will be set to.
|
||||
*/
|
||||
CuVector& operator=(T scalar);
|
||||
|
||||
/**
|
||||
* @brief CuVector allocates new GPU memory of size numberOfElements * sizeof(T)
|
||||
*
|
||||
* @note For now numberOfElements needs to be within the limits of int due to restrictions in cublas
|
||||
*
|
||||
* @param numberOfElements number of T elements to allocate
|
||||
*/
|
||||
explicit CuVector(const size_t numberOfElements);
|
||||
|
||||
|
||||
/**
|
||||
* @brief CuVector allocates new GPU memory of size numberOfElements * sizeof(T) and copies numberOfElements from
|
||||
* data
|
||||
*
|
||||
* @note This assumes the data is on the CPU.
|
||||
*
|
||||
* @param numberOfElements number of T elements to allocate
|
||||
* @param dataOnHost data on host/CPU
|
||||
*
|
||||
* @note For now numberOfElements needs to be within the limits of int due to restrictions in cublas
|
||||
*/
|
||||
CuVector(const T* dataOnHost, const size_t numberOfElements);
|
||||
|
||||
/**
|
||||
* @brief ~CuVector calls cudaFree
|
||||
*/
|
||||
virtual ~CuVector();
|
||||
|
||||
/**
|
||||
* @return the raw pointer to the GPU data
|
||||
*/
|
||||
T* data();
|
||||
|
||||
/**
|
||||
* @return the raw pointer to the GPU data
|
||||
*/
|
||||
const T* data() const;
|
||||
|
||||
/**
|
||||
* @brief copyFromHost copies data from a Dune::BlockVector
|
||||
* @param bvector the vector to copy from
|
||||
*
|
||||
* @note This does synchronous transfer.
|
||||
* @note This assumes that the size of this vector is equal to the dim of the input vector.
|
||||
*/
|
||||
template <int BlockDimension>
|
||||
void copyFromHost(const Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& bvector)
|
||||
{
|
||||
// TODO: [perf] vector.dim() can be replaced by bvector.N() * BlockDimension
|
||||
if (detail::to_size_t(m_numberOfElements) != bvector.dim()) {
|
||||
OPM_THROW(std::runtime_error,
|
||||
fmt::format("Given incompatible vector size. CuVector has size {}, \n"
|
||||
"however, BlockVector has N() = {}, and dim = {}.",
|
||||
m_numberOfElements,
|
||||
bvector.N(),
|
||||
bvector.dim()));
|
||||
}
|
||||
const auto dataPointer = static_cast<const T*>(&(bvector[0][0]));
|
||||
copyFromHost(dataPointer, m_numberOfElements);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief copyToHost copies data to a Dune::BlockVector
|
||||
* @param bvector the vector to copy to
|
||||
*
|
||||
* @note This does synchronous transfer.
|
||||
* @note This assumes that the size of this vector is equal to the dim of the input vector.
|
||||
*/
|
||||
template <int BlockDimension>
|
||||
void copyToHost(Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& bvector) const
|
||||
{
|
||||
// TODO: [perf] vector.dim() can be replaced by bvector.N() * BlockDimension
|
||||
if (detail::to_size_t(m_numberOfElements) != bvector.dim()) {
|
||||
OPM_THROW(std::runtime_error,
|
||||
fmt::format("Given incompatible vector size. CuVector has size {},\n however, the BlockVector "
|
||||
"has has N() = {}, and dim() = {}.",
|
||||
m_numberOfElements,
|
||||
bvector.N(),
|
||||
bvector.dim()));
|
||||
}
|
||||
const auto dataPointer = static_cast<T*>(&(bvector[0][0]));
|
||||
copyToHost(dataPointer, m_numberOfElements);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief copyFromHost copies numberOfElements from the CPU memory dataPointer
|
||||
* @param dataPointer raw pointer to CPU memory
|
||||
* @param numberOfElements number of elements to copy
|
||||
* @note This does synchronous transfer.
|
||||
* @note assumes that this vector has numberOfElements elements
|
||||
*/
|
||||
void copyFromHost(const T* dataPointer, size_t numberOfElements);
|
||||
|
||||
/**
|
||||
* @brief copyFromHost copies numberOfElements to the CPU memory dataPointer
|
||||
* @param dataPointer raw pointer to CPU memory
|
||||
* @param numberOfElements number of elements to copy
|
||||
* @note This does synchronous transfer.
|
||||
* @note assumes that this vector has numberOfElements elements
|
||||
*/
|
||||
void copyToHost(T* dataPointer, size_t numberOfElements) const;
|
||||
|
||||
/**
|
||||
* @brief copyToHost copies data from an std::vector
|
||||
* @param data the vector to copy from
|
||||
*
|
||||
* @note This does synchronous transfer.
|
||||
* @note This assumes that the size of this vector is equal to the size of the input vector.
|
||||
*/
|
||||
void copyFromHost(const std::vector<T>& data);
|
||||
|
||||
/**
|
||||
* @brief copyToHost copies data to an std::vector
|
||||
* @param data the vector to copy to
|
||||
*
|
||||
* @note This does synchronous transfer.
|
||||
* @note This assumes that the size of this vector is equal to the size of the input vector.
|
||||
*/
|
||||
void copyToHost(std::vector<T>& data) const;
|
||||
|
||||
/**
|
||||
* @brief operator *= multiplies every element by scalar
|
||||
* @param scalar the scalar to with which to multiply every element
|
||||
*
|
||||
* @note This operation is asynchronous.
|
||||
*
|
||||
* @note int is not supported
|
||||
*/
|
||||
CuVector<T>& operator*=(const T& scalar);
|
||||
|
||||
/**
|
||||
* @brief axpy sets this vector equal to this + alha * y
|
||||
* @param alpha the scalar with which to multiply y
|
||||
* @param y input vector of same size as this
|
||||
*
|
||||
* @note this will call CuBlas in the background
|
||||
* @note int is not supported
|
||||
*/
|
||||
CuVector<T>& axpy(T alpha, const CuVector<T>& y);
|
||||
|
||||
/**
|
||||
* @brief operator+= adds the other vector to this vector
|
||||
*
|
||||
* @note this will call CuBlas in the background
|
||||
* @note int is not supported
|
||||
*/
|
||||
CuVector<T>& operator+=(const CuVector<T>& other);
|
||||
|
||||
/**
|
||||
* @brief operator-= subtracts the other vector from this vector
|
||||
*
|
||||
* @note this will call CuBlas in the background
|
||||
* @note int is not supported
|
||||
*/
|
||||
CuVector<T>& operator-=(const CuVector<T>& other);
|
||||
|
||||
/**
|
||||
* @brief dot computes the dot product (standard inner product) against the other vector
|
||||
* @param other vector of same size as this
|
||||
* @note this will call CuBlas in the background
|
||||
* @note int is not supported
|
||||
*
|
||||
* @return the result on the inner product
|
||||
*/
|
||||
T dot(const CuVector<T>& other) const;
|
||||
|
||||
/**
|
||||
* @brief returns the l2 norm of the vector
|
||||
* @note this will call CuBlas in the background
|
||||
* @note int is not supported
|
||||
*
|
||||
* @return the l2 norm
|
||||
*/
|
||||
T two_norm() const;
|
||||
|
||||
/**
|
||||
* Computes the dot product sum_i this[indexSet[i]] * other[indexSet[i]]
|
||||
*
|
||||
* @note int is not supported
|
||||
*/
|
||||
T dot(const CuVector<T>& other, const CuVector<int>& indexSet, CuVector<T>& buffer) const;
|
||||
|
||||
/**
|
||||
* Computes the norm sqrt(sum_i this[indexSet[i]] * this[indexSet[i]])
|
||||
*
|
||||
* @note int is not supported
|
||||
*/
|
||||
T two_norm(const CuVector<int>& indexSet, CuVector<T>& buffer) const;
|
||||
|
||||
|
||||
/**
|
||||
* Computes the dot product sum_i this[indexSet[i]] * other[indexSet[i]]
|
||||
*
|
||||
* @note int is not supported
|
||||
*/
|
||||
T dot(const CuVector<T>& other, const CuVector<int>& indexSet) const;
|
||||
|
||||
/**
|
||||
* Computes the norm sqrt(sum_i this[indexSet[i]] * this[indexSet[i]])
|
||||
*
|
||||
* @note int is not supported
|
||||
*/
|
||||
T two_norm(const CuVector<int>& indexSet) const;
|
||||
|
||||
|
||||
/**
|
||||
* @brief dim returns the dimension (number of T elements) in the vector
|
||||
* @return number of elements
|
||||
*/
|
||||
size_type dim() const;
|
||||
|
||||
|
||||
/**
|
||||
* @brief creates an std::vector of the same size and copies the GPU data to this std::vector
|
||||
* @return an std::vector containing the elements copied from the GPU.
|
||||
*/
|
||||
std::vector<T> asStdVector() const;
|
||||
|
||||
/**
|
||||
* @brief creates an std::vector of the same size and copies the GPU data to this std::vector
|
||||
* @return an std::vector containing the elements copied from the GPU.
|
||||
*/
|
||||
template <int blockSize>
|
||||
Dune::BlockVector<Dune::FieldVector<T, blockSize>> asDuneBlockVector() const
|
||||
{
|
||||
OPM_ERROR_IF(dim() % blockSize != 0,
|
||||
fmt::format("blockSize is not a multiple of dim(). Given blockSize = {}, and dim() = {}",
|
||||
blockSize,
|
||||
dim()));
|
||||
|
||||
Dune::BlockVector<Dune::FieldVector<T, blockSize>> returnValue(dim() / blockSize);
|
||||
copyToHost(returnValue);
|
||||
return returnValue;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @brief setZeroAtIndexSet for each element in indexSet, sets the index of this vector to be zero
|
||||
* @param indexSet the set of indices to set to zero
|
||||
*
|
||||
* @note Assumes all indices are within range
|
||||
*
|
||||
* This is supposed to do the same as the following code on the CPU:
|
||||
* @code{.cpp}
|
||||
* for (int index : indexSet) {
|
||||
* this->data[index] = T(0.0);
|
||||
* }
|
||||
* @endcode
|
||||
*/
|
||||
void setZeroAtIndexSet(const CuVector<int>& indexSet);
|
||||
|
||||
private:
|
||||
T* m_dataOnDevice = nullptr;
|
||||
|
||||
// Note that we store this as int to make sure we are always cublas compatible.
|
||||
// This gives the added benefit that a size_t to int conversion error occurs during construction.
|
||||
const int m_numberOfElements;
|
||||
detail::CuBlasHandle& m_cuBlasHandle;
|
||||
|
||||
void assertSameSize(const CuVector<T>& other) const;
|
||||
void assertSameSize(int size) const;
|
||||
|
||||
void assertHasElements() const;
|
||||
};
|
||||
} // namespace Opm::cuistl
|
||||
#endif
|
86
opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp
Normal file
86
opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp
Normal file
@ -0,0 +1,86 @@
|
||||
/*
|
||||
Copyright 2022-2023 SINTEF AS
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef CU_MATRIX_DESCRIPTION_HPP
|
||||
#define CU_MATRIX_DESCRIPTION_HPP
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
{
|
||||
|
||||
/**
|
||||
* CuSparseMatrixDescription holder. This is internal information needed for most calls to the CuSparse API.
|
||||
*/
|
||||
using CuSparseMatrixDescription = CuSparseResource<cusparseMatDescr_t>;
|
||||
|
||||
/**
|
||||
* Pointer to CuSparseMatrixDescription holder. This is internal information needed for most calls to the CuSparse API.
|
||||
*/
|
||||
using CuSparseMatrixDescriptionPtr = std::shared_ptr<CuSparseResource<cusparseMatDescr_t>>;
|
||||
|
||||
/**
|
||||
* @brief createMatrixDescription creates a default matrix description
|
||||
* @return a matrix description to a general sparse matrix with zero based indexing.
|
||||
*/
|
||||
inline CuSparseMatrixDescriptionPtr
|
||||
createMatrixDescription()
|
||||
{
|
||||
auto description = std::make_shared<CuSparseMatrixDescription>();
|
||||
|
||||
// Note: We always want to use zero base indexing.
|
||||
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatType(description->get(), CUSPARSE_MATRIX_TYPE_GENERAL));
|
||||
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(description->get(), CUSPARSE_INDEX_BASE_ZERO));
|
||||
|
||||
return description;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief createLowerDiagonalDescription creates a lower diagonal matrix description
|
||||
* @return a lower diagonal matrix description overlapped with options from ::Opm::cuistl::detail::createMatrixDescription()
|
||||
*
|
||||
* @note This will assume it has a unit diagonal
|
||||
*/
|
||||
inline CuSparseMatrixDescriptionPtr
|
||||
createLowerDiagonalDescription()
|
||||
{
|
||||
auto description = createMatrixDescription();
|
||||
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatFillMode(description->get(), CUSPARSE_FILL_MODE_LOWER));
|
||||
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(description->get(), CUSPARSE_DIAG_TYPE_UNIT));
|
||||
return description;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief createUpperDiagonalDescription creates an upper diagonal matrix description
|
||||
* @return an upper diagonal matrix description overlapped with options from ::Opm::cuistl::detail::createMatrixDescription()
|
||||
*
|
||||
* @note This will assume it has a non-unit diagonal.
|
||||
*/
|
||||
inline CuSparseMatrixDescriptionPtr
|
||||
createUpperDiagonalDescription()
|
||||
{
|
||||
auto description = createMatrixDescription();
|
||||
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatFillMode(description->get(), CUSPARSE_FILL_MODE_UPPER));
|
||||
OPM_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(description->get(), CUSPARSE_DIAG_TYPE_NON_UNIT));
|
||||
|
||||
return description;
|
||||
}
|
||||
|
||||
} // namespace Opm::cuistl::detail
|
||||
|
||||
#endif // CU_MATRIX_DESCRIPTION_HPP
|
99
opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp
Normal file
99
opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp
Normal file
@ -0,0 +1,99 @@
|
||||
/*
|
||||
Copyright 2022-2023 SINTEF AS
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef CUSPARSERESOURCE_HPP
|
||||
#define CUSPARSERESOURCE_HPP
|
||||
#include <cusparse.h>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
{
|
||||
|
||||
/**
|
||||
* @brief The CuSparseResource class wraps a CuSparse resource in a proper RAII pattern
|
||||
*
|
||||
* Current we support the following types for T:
|
||||
* - bsrilu02Info_t
|
||||
* - bsrsv2Info_t
|
||||
* - cusparseMatDescr_t
|
||||
*
|
||||
* More types are in principle supported by supplying a manual Creator and Destructor.
|
||||
*
|
||||
* In addition to acting as an easier-to-use smart_ptr specialized for these types, it
|
||||
* also adds error checking in the construction and deletion of these resources,
|
||||
* which a plain std::smart_ptr would not support out of the box. It also solves the
|
||||
* caveat of the pointer types of cuSparse resources not being exposed properly.
|
||||
*
|
||||
* Example usage:
|
||||
* @code{.cpp}
|
||||
* #include <opm/simulator/linalg/cuistl/detail/CuSparseResource.hpp>
|
||||
*
|
||||
* void someFunction() {
|
||||
* auto resource = CuSparseResource<cuSparseMatDescr_t>();
|
||||
* }
|
||||
* @endcode
|
||||
*/
|
||||
template <class T>
|
||||
class CuSparseResource
|
||||
{
|
||||
public:
|
||||
using CreatorType = typename std::function<cusparseStatus_t(T*)>;
|
||||
using DeleterType = typename std::function<cusparseStatus_t(T)>;
|
||||
|
||||
/**
|
||||
* @brief CuSparseResource creates a new instance by calling creator, and will delete using deleter
|
||||
* @param creator a functor used to create an instance
|
||||
* @param deleter a functor used to delete the instance
|
||||
*
|
||||
* @note Using this constructor it is possible to add support for new types not already accounted for.
|
||||
*/
|
||||
CuSparseResource(CreatorType creator, DeleterType deleter);
|
||||
|
||||
/**
|
||||
* @brief CuSparseResource will automatically select the proper creator and deleter based on the type (and throw an exception if not available)
|
||||
*/
|
||||
CuSparseResource();
|
||||
|
||||
/**
|
||||
* Calls the deleter functor
|
||||
*/
|
||||
~CuSparseResource();
|
||||
|
||||
// This should not be copyable.
|
||||
CuSparseResource(const CuSparseResource&) = delete;
|
||||
CuSparseResource& operator=(const CuSparseResource&) = delete;
|
||||
|
||||
/**
|
||||
* @brief get returns the raw pointer to the resource.
|
||||
*/
|
||||
T get()
|
||||
{
|
||||
return m_resource;
|
||||
}
|
||||
|
||||
private:
|
||||
T m_resource;
|
||||
|
||||
DeleterType m_deleter;
|
||||
};
|
||||
|
||||
} // namespace Opm::cuistl::impl
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuSparseResource_impl.hpp>
|
||||
#endif // CUSPARSERESOURCE_HPP
|
103
opm/simulators/linalg/cuistl/detail/CuSparseResource_impl.hpp
Normal file
103
opm/simulators/linalg/cuistl/detail/CuSparseResource_impl.hpp
Normal file
@ -0,0 +1,103 @@
|
||||
/*
|
||||
Copyright 2022-2023 SINTEF AS
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include <exception>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
template <class T>
|
||||
struct CuSparseDeleteAndCreate {
|
||||
};
|
||||
|
||||
template <>
|
||||
struct CuSparseDeleteAndCreate<bsrilu02Info_t> {
|
||||
using DeleterType = typename CuSparseResource<bsrilu02Info_t>::DeleterType;
|
||||
using CreatorType = typename CuSparseResource<bsrilu02Info_t>::CreatorType;
|
||||
|
||||
static DeleterType getDeleter()
|
||||
{
|
||||
return cusparseDestroyBsrilu02Info;
|
||||
}
|
||||
|
||||
static CreatorType getCreator()
|
||||
{
|
||||
return cusparseCreateBsrilu02Info;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct CuSparseDeleteAndCreate<bsrsv2Info_t> {
|
||||
using DeleterType = typename CuSparseResource<bsrsv2Info_t>::DeleterType;
|
||||
using CreatorType = typename CuSparseResource<bsrsv2Info_t>::CreatorType;
|
||||
|
||||
static DeleterType getDeleter()
|
||||
{
|
||||
return cusparseDestroyBsrsv2Info;
|
||||
}
|
||||
|
||||
static CreatorType getCreator()
|
||||
{
|
||||
return cusparseCreateBsrsv2Info;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct CuSparseDeleteAndCreate<cusparseMatDescr_t> {
|
||||
using DeleterType = typename CuSparseResource<cusparseMatDescr_t>::DeleterType;
|
||||
using CreatorType = typename CuSparseResource<cusparseMatDescr_t>::CreatorType;
|
||||
|
||||
static DeleterType getDeleter()
|
||||
{
|
||||
return cusparseDestroyMatDescr;
|
||||
}
|
||||
|
||||
static CreatorType getCreator()
|
||||
{
|
||||
return cusparseCreateMatDescr;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
template <class T>
|
||||
CuSparseResource<T>::CuSparseResource(CreatorType creator, DeleterType deleter)
|
||||
: m_deleter(deleter)
|
||||
{
|
||||
// TODO: This should probably not use this macro since it will disguise the
|
||||
// proper name of the function being called.
|
||||
OPM_CUSPARSE_SAFE_CALL(creator(&m_resource));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuSparseResource<T>::CuSparseResource()
|
||||
: CuSparseResource<T>(CuSparseDeleteAndCreate<T>::getCreator(), CuSparseDeleteAndCreate<T>::getDeleter())
|
||||
{
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuSparseResource<T>::~CuSparseResource()
|
||||
{
|
||||
// TODO: This should probably not use this macro since it will disguise the
|
||||
// proper name of the function being called.
|
||||
OPM_CUSPARSE_WARN_IF_ERROR(m_deleter(m_resource));
|
||||
}
|
||||
} // namespace Opm::cuistl::detail
|
168
opm/simulators/linalg/cuistl/detail/cublas_wrapper.hpp
Normal file
168
opm/simulators/linalg/cuistl/detail/cublas_wrapper.hpp
Normal file
@ -0,0 +1,168 @@
|
||||
/*
|
||||
Copyright 2022-2023 SINTEF AS
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Contains wrappers to make the CuBLAS library behave as a modern C++ library with function overlading.
|
||||
*
|
||||
* In simple terms, this allows one to call say cublasScal on both double and single precisision,
|
||||
* instead of calling cublasDscal and cublasSscal respectively.
|
||||
*/
|
||||
|
||||
#ifndef OPM_CUBLASWRAPPER_HEADER_INCLUDED
|
||||
#define OPM_CUBLASWRAPPER_HEADER_INCLUDED
|
||||
#include <cublas_v2.h>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
{
|
||||
|
||||
inline cublasStatus_t
|
||||
cublasScal(cublasHandle_t handle,
|
||||
int n,
|
||||
const double* alpha, /* host or device pointer */
|
||||
double* x,
|
||||
int incx)
|
||||
{
|
||||
return cublasDscal(handle,
|
||||
n,
|
||||
alpha, /* host or device pointer */
|
||||
x,
|
||||
incx);
|
||||
}
|
||||
|
||||
inline cublasStatus_t
|
||||
cublasScal(cublasHandle_t handle,
|
||||
int n,
|
||||
const float* alpha, /* host or device pointer */
|
||||
float* x,
|
||||
int incx)
|
||||
{
|
||||
return cublasSscal(handle,
|
||||
n,
|
||||
alpha, /* host or device pointer */
|
||||
x,
|
||||
incx);
|
||||
}
|
||||
|
||||
inline cublasStatus_t
|
||||
cublasScal([[maybe_unused]] cublasHandle_t handle,
|
||||
[[maybe_unused]] int n,
|
||||
[[maybe_unused]] const int* alpha, /* host or device pointer */
|
||||
[[maybe_unused]] int* x,
|
||||
[[maybe_unused]] int incx)
|
||||
{
|
||||
OPM_THROW(std::runtime_error, "cublasScal multiplication for integer vectors is not implemented yet.");
|
||||
}
|
||||
inline cublasStatus_t
|
||||
cublasAxpy(cublasHandle_t handle,
|
||||
int n,
|
||||
const double* alpha, /* host or device pointer */
|
||||
const double* x,
|
||||
int incx,
|
||||
double* y,
|
||||
int incy)
|
||||
{
|
||||
return cublasDaxpy(handle,
|
||||
n,
|
||||
alpha, /* host or device pointer */
|
||||
x,
|
||||
incx,
|
||||
y,
|
||||
incy);
|
||||
}
|
||||
|
||||
inline cublasStatus_t
|
||||
cublasAxpy(cublasHandle_t handle,
|
||||
int n,
|
||||
const float* alpha, /* host or device pointer */
|
||||
const float* x,
|
||||
int incx,
|
||||
float* y,
|
||||
int incy)
|
||||
{
|
||||
return cublasSaxpy(handle,
|
||||
n,
|
||||
alpha, /* host or device pointer */
|
||||
x,
|
||||
incx,
|
||||
y,
|
||||
incy);
|
||||
}
|
||||
|
||||
inline cublasStatus_t
|
||||
cublasAxpy([[maybe_unused]] cublasHandle_t handle,
|
||||
[[maybe_unused]] int n,
|
||||
[[maybe_unused]] const int* alpha, /* host or device pointer */
|
||||
[[maybe_unused]] const int* x,
|
||||
[[maybe_unused]] int incx,
|
||||
[[maybe_unused]] int* y,
|
||||
[[maybe_unused]] int incy)
|
||||
{
|
||||
OPM_THROW(std::runtime_error, "axpy multiplication for integer vectors is not implemented yet.");
|
||||
}
|
||||
|
||||
inline cublasStatus_t
|
||||
cublasDot(cublasHandle_t handle, int n, const double* x, int incx, const double* y, int incy, double* result)
|
||||
{
|
||||
return cublasDdot(handle, n, x, incx, y, incy, result);
|
||||
}
|
||||
|
||||
inline cublasStatus_t
|
||||
cublasDot(cublasHandle_t handle, int n, const float* x, int incx, const float* y, int incy, float* result)
|
||||
{
|
||||
return cublasSdot(handle, n, x, incx, y, incy, result);
|
||||
}
|
||||
|
||||
inline cublasStatus_t
|
||||
cublasDot([[maybe_unused]] cublasHandle_t handle,
|
||||
[[maybe_unused]] int n,
|
||||
[[maybe_unused]] const int* x,
|
||||
[[maybe_unused]] int incx,
|
||||
[[maybe_unused]] const int* y,
|
||||
[[maybe_unused]] int incy,
|
||||
[[maybe_unused]] int* result)
|
||||
{
|
||||
OPM_THROW(std::runtime_error, "inner product for integer vectors is not implemented yet.");
|
||||
}
|
||||
|
||||
inline cublasStatus_t
|
||||
cublasNrm2(cublasHandle_t handle, int n, const double* x, int incx, double* result)
|
||||
{
|
||||
return cublasDnrm2(handle, n, x, incx, result);
|
||||
}
|
||||
|
||||
|
||||
inline cublasStatus_t
|
||||
cublasNrm2(cublasHandle_t handle, int n, const float* x, int incx, float* result)
|
||||
{
|
||||
return cublasSnrm2(handle, n, x, incx, result);
|
||||
}
|
||||
|
||||
inline cublasStatus_t
|
||||
cublasNrm2([[maybe_unused]] cublasHandle_t handle,
|
||||
[[maybe_unused]] int n,
|
||||
[[maybe_unused]] const int* x,
|
||||
[[maybe_unused]] int incx,
|
||||
[[maybe_unused]] int* result)
|
||||
{
|
||||
OPM_THROW(std::runtime_error, "norm2 for integer vectors is not implemented yet.");
|
||||
}
|
||||
|
||||
} // namespace Opm::cuistl::detail
|
||||
#endif
|
27
opm/simulators/linalg/cuistl/detail/cusparse_constants.hpp
Normal file
27
opm/simulators/linalg/cuistl/detail/cusparse_constants.hpp
Normal file
@ -0,0 +1,27 @@
|
||||
/*
|
||||
Copyright 2022-2023 SINTEF AS
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef CUSPARSE_CONSTANTS_HPP
|
||||
#define CUSPARSE_CONSTANTS_HPP
|
||||
#include <cusparse.h>
|
||||
namespace Opm::cuistl::detail
|
||||
{
|
||||
const constexpr auto CUSPARSE_MATRIX_ORDER = CUSPARSE_DIRECTION_ROW;
|
||||
}
|
||||
|
||||
#endif
|
454
opm/simulators/linalg/cuistl/detail/cusparse_wrapper.hpp
Normal file
454
opm/simulators/linalg/cuistl/detail/cusparse_wrapper.hpp
Normal file
@ -0,0 +1,454 @@
|
||||
/*
|
||||
Copyright 2022-2023 SINTEF AS
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Contains wrappers to make the CuSPARSE library behave as a modern C++ library with function overlading.
|
||||
*
|
||||
* In simple terms, this allows one to call say cusparseBsrilu02_analysis on both double and single precisision,
|
||||
* instead of calling cusparseDbsrilu02_analysis and cusparseDbsrilu02_analysis respectively.
|
||||
*/
|
||||
#include <cusparse.h>
|
||||
#include <type_traits>
|
||||
#ifndef OPM_CUSPARSE_WRAPPER_HPP
|
||||
#define OPM_CUSPARSE_WRAPPER_HPP
|
||||
namespace Opm::cuistl::detail
|
||||
{
|
||||
|
||||
inline cusparseStatus_t
|
||||
cusparseBsrilu02_analysis(cusparseHandle_t handle,
|
||||
cusparseDirection_t dirA,
|
||||
int mb,
|
||||
int nnzb,
|
||||
const cusparseMatDescr_t descrA,
|
||||
double* bsrSortedVal,
|
||||
const int* bsrSortedRowPtr,
|
||||
const int* bsrSortedColInd,
|
||||
int blockDim,
|
||||
bsrilu02Info_t info,
|
||||
cusparseSolvePolicy_t policy,
|
||||
void* pBuffer)
|
||||
{
|
||||
return cusparseDbsrilu02_analysis(handle,
|
||||
dirA,
|
||||
mb,
|
||||
nnzb,
|
||||
descrA,
|
||||
bsrSortedVal,
|
||||
bsrSortedRowPtr,
|
||||
bsrSortedColInd,
|
||||
blockDim,
|
||||
info,
|
||||
policy,
|
||||
pBuffer);
|
||||
}
|
||||
|
||||
inline cusparseStatus_t
|
||||
cusparseBsrsv2_analysis(cusparseHandle_t handle,
|
||||
cusparseDirection_t dirA,
|
||||
cusparseOperation_t transA,
|
||||
int mb,
|
||||
int nnzb,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const double* bsrSortedValA,
|
||||
const int* bsrSortedRowPtrA,
|
||||
const int* bsrSortedColIndA,
|
||||
int blockDim,
|
||||
bsrsv2Info_t info,
|
||||
cusparseSolvePolicy_t policy,
|
||||
void* pBuffer)
|
||||
{
|
||||
return cusparseDbsrsv2_analysis(handle,
|
||||
dirA,
|
||||
transA,
|
||||
mb,
|
||||
nnzb,
|
||||
descrA,
|
||||
bsrSortedValA,
|
||||
bsrSortedRowPtrA,
|
||||
bsrSortedColIndA,
|
||||
blockDim,
|
||||
info,
|
||||
policy,
|
||||
pBuffer);
|
||||
}
|
||||
|
||||
inline cusparseStatus_t
|
||||
cusparseBsrsv2_analysis(cusparseHandle_t handle,
|
||||
cusparseDirection_t dirA,
|
||||
cusparseOperation_t transA,
|
||||
int mb,
|
||||
int nnzb,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const float* bsrSortedValA,
|
||||
const int* bsrSortedRowPtrA,
|
||||
const int* bsrSortedColIndA,
|
||||
int blockDim,
|
||||
bsrsv2Info_t info,
|
||||
cusparseSolvePolicy_t policy,
|
||||
void* pBuffer)
|
||||
{
|
||||
return cusparseSbsrsv2_analysis(handle,
|
||||
dirA,
|
||||
transA,
|
||||
mb,
|
||||
nnzb,
|
||||
descrA,
|
||||
bsrSortedValA,
|
||||
bsrSortedRowPtrA,
|
||||
bsrSortedColIndA,
|
||||
blockDim,
|
||||
info,
|
||||
policy,
|
||||
pBuffer);
|
||||
}
|
||||
|
||||
inline cusparseStatus_t
|
||||
cusparseBsrilu02_analysis(cusparseHandle_t handle,
|
||||
cusparseDirection_t dirA,
|
||||
int mb,
|
||||
int nnzb,
|
||||
const cusparseMatDescr_t descrA,
|
||||
float* bsrSortedVal,
|
||||
const int* bsrSortedRowPtr,
|
||||
const int* bsrSortedColInd,
|
||||
int blockDim,
|
||||
bsrilu02Info_t info,
|
||||
cusparseSolvePolicy_t policy,
|
||||
void* pBuffer)
|
||||
{
|
||||
return cusparseSbsrilu02_analysis(handle,
|
||||
dirA,
|
||||
mb,
|
||||
nnzb,
|
||||
descrA,
|
||||
bsrSortedVal,
|
||||
bsrSortedRowPtr,
|
||||
bsrSortedColInd,
|
||||
blockDim,
|
||||
info,
|
||||
policy,
|
||||
pBuffer);
|
||||
}
|
||||
|
||||
inline cusparseStatus_t
|
||||
cusparseBsrsv2_solve(cusparseHandle_t handle,
|
||||
cusparseDirection_t dirA,
|
||||
cusparseOperation_t transA,
|
||||
int mb,
|
||||
int nnzb,
|
||||
const double* alpha,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const double* bsrSortedValA,
|
||||
const int* bsrSortedRowPtrA,
|
||||
const int* bsrSortedColIndA,
|
||||
int blockDim,
|
||||
bsrsv2Info_t info,
|
||||
const double* f,
|
||||
double* x,
|
||||
cusparseSolvePolicy_t policy,
|
||||
void* pBuffer)
|
||||
{
|
||||
return cusparseDbsrsv2_solve(handle,
|
||||
dirA,
|
||||
transA,
|
||||
mb,
|
||||
nnzb,
|
||||
alpha,
|
||||
descrA,
|
||||
bsrSortedValA,
|
||||
bsrSortedRowPtrA,
|
||||
bsrSortedColIndA,
|
||||
blockDim,
|
||||
info,
|
||||
f,
|
||||
x,
|
||||
policy,
|
||||
pBuffer);
|
||||
}
|
||||
|
||||
|
||||
inline cusparseStatus_t
|
||||
cusparseBsrsv2_solve(cusparseHandle_t handle,
|
||||
cusparseDirection_t dirA,
|
||||
cusparseOperation_t transA,
|
||||
int mb,
|
||||
int nnzb,
|
||||
const float* alpha,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const float* bsrSortedValA,
|
||||
const int* bsrSortedRowPtrA,
|
||||
const int* bsrSortedColIndA,
|
||||
int blockDim,
|
||||
bsrsv2Info_t info,
|
||||
const float* f,
|
||||
float* x,
|
||||
cusparseSolvePolicy_t policy,
|
||||
void* pBuffer)
|
||||
{
|
||||
return cusparseSbsrsv2_solve(handle,
|
||||
dirA,
|
||||
transA,
|
||||
mb,
|
||||
nnzb,
|
||||
alpha,
|
||||
descrA,
|
||||
bsrSortedValA,
|
||||
bsrSortedRowPtrA,
|
||||
bsrSortedColIndA,
|
||||
blockDim,
|
||||
info,
|
||||
f,
|
||||
x,
|
||||
policy,
|
||||
pBuffer);
|
||||
}
|
||||
|
||||
|
||||
inline cusparseStatus_t
|
||||
cusparseBsrilu02_bufferSize(cusparseHandle_t handle,
|
||||
cusparseDirection_t dirA,
|
||||
int mb,
|
||||
int nnzb,
|
||||
const cusparseMatDescr_t descrA,
|
||||
double* bsrSortedVal,
|
||||
const int* bsrSortedRowPtr,
|
||||
const int* bsrSortedColInd,
|
||||
int blockDim,
|
||||
bsrilu02Info_t info,
|
||||
int* pBufferSizeInBytes)
|
||||
{
|
||||
return cusparseDbsrilu02_bufferSize(handle,
|
||||
dirA,
|
||||
mb,
|
||||
nnzb,
|
||||
descrA,
|
||||
bsrSortedVal,
|
||||
bsrSortedRowPtr,
|
||||
bsrSortedColInd,
|
||||
blockDim,
|
||||
info,
|
||||
pBufferSizeInBytes);
|
||||
}
|
||||
|
||||
|
||||
inline cusparseStatus_t
|
||||
cusparseBsrilu02_bufferSize(cusparseHandle_t handle,
|
||||
cusparseDirection_t dirA,
|
||||
int mb,
|
||||
int nnzb,
|
||||
const cusparseMatDescr_t descrA,
|
||||
float* bsrSortedVal,
|
||||
const int* bsrSortedRowPtr,
|
||||
const int* bsrSortedColInd,
|
||||
int blockDim,
|
||||
bsrilu02Info_t info,
|
||||
int* pBufferSizeInBytes)
|
||||
{
|
||||
return cusparseSbsrilu02_bufferSize(handle,
|
||||
dirA,
|
||||
mb,
|
||||
nnzb,
|
||||
descrA,
|
||||
bsrSortedVal,
|
||||
bsrSortedRowPtr,
|
||||
bsrSortedColInd,
|
||||
blockDim,
|
||||
info,
|
||||
pBufferSizeInBytes);
|
||||
}
|
||||
|
||||
inline cusparseStatus_t
|
||||
cusparseBsrsv2_bufferSize(cusparseHandle_t handle,
|
||||
cusparseDirection_t dirA,
|
||||
cusparseOperation_t transA,
|
||||
int mb,
|
||||
int nnzb,
|
||||
const cusparseMatDescr_t descrA,
|
||||
double* bsrSortedValA,
|
||||
const int* bsrSortedRowPtrA,
|
||||
const int* bsrSortedColIndA,
|
||||
int blockDim,
|
||||
bsrsv2Info_t info,
|
||||
int* pBufferSizeInBytes)
|
||||
{
|
||||
return cusparseDbsrsv2_bufferSize(handle,
|
||||
dirA,
|
||||
transA,
|
||||
mb,
|
||||
nnzb,
|
||||
descrA,
|
||||
bsrSortedValA,
|
||||
bsrSortedRowPtrA,
|
||||
bsrSortedColIndA,
|
||||
blockDim,
|
||||
info,
|
||||
pBufferSizeInBytes);
|
||||
}
|
||||
inline cusparseStatus_t
|
||||
cusparseBsrsv2_bufferSize(cusparseHandle_t handle,
|
||||
cusparseDirection_t dirA,
|
||||
cusparseOperation_t transA,
|
||||
int mb,
|
||||
int nnzb,
|
||||
const cusparseMatDescr_t descrA,
|
||||
float* bsrSortedValA,
|
||||
const int* bsrSortedRowPtrA,
|
||||
const int* bsrSortedColIndA,
|
||||
int blockDim,
|
||||
bsrsv2Info_t info,
|
||||
int* pBufferSizeInBytes)
|
||||
{
|
||||
return cusparseSbsrsv2_bufferSize(handle,
|
||||
dirA,
|
||||
transA,
|
||||
mb,
|
||||
nnzb,
|
||||
descrA,
|
||||
bsrSortedValA,
|
||||
bsrSortedRowPtrA,
|
||||
bsrSortedColIndA,
|
||||
blockDim,
|
||||
info,
|
||||
pBufferSizeInBytes);
|
||||
}
|
||||
|
||||
inline cusparseStatus_t
|
||||
cusparseBsrilu02(cusparseHandle_t handle,
|
||||
cusparseDirection_t dirA,
|
||||
int mb,
|
||||
int nnzb,
|
||||
const cusparseMatDescr_t descrA,
|
||||
double* bsrSortedVal,
|
||||
const int* bsrSortedRowPtr,
|
||||
const int* bsrSortedColInd,
|
||||
int blockDim,
|
||||
bsrilu02Info_t info,
|
||||
cusparseSolvePolicy_t policy,
|
||||
void* pBuffer)
|
||||
{
|
||||
return cusparseDbsrilu02(handle,
|
||||
dirA,
|
||||
mb,
|
||||
nnzb,
|
||||
descrA,
|
||||
bsrSortedVal,
|
||||
bsrSortedRowPtr,
|
||||
bsrSortedColInd,
|
||||
blockDim,
|
||||
info,
|
||||
policy,
|
||||
pBuffer);
|
||||
}
|
||||
inline cusparseStatus_t
|
||||
cusparseBsrilu02(cusparseHandle_t handle,
|
||||
cusparseDirection_t dirA,
|
||||
int mb,
|
||||
int nnzb,
|
||||
const cusparseMatDescr_t descrA,
|
||||
float* bsrSortedVal,
|
||||
const int* bsrSortedRowPtr,
|
||||
const int* bsrSortedColInd,
|
||||
int blockDim,
|
||||
bsrilu02Info_t info,
|
||||
cusparseSolvePolicy_t policy,
|
||||
void* pBuffer)
|
||||
{
|
||||
return cusparseSbsrilu02(handle,
|
||||
dirA,
|
||||
mb,
|
||||
nnzb,
|
||||
descrA,
|
||||
bsrSortedVal,
|
||||
bsrSortedRowPtr,
|
||||
bsrSortedColInd,
|
||||
blockDim,
|
||||
info,
|
||||
policy,
|
||||
pBuffer);
|
||||
}
|
||||
|
||||
inline cusparseStatus_t
|
||||
cusparseBsrmv(cusparseHandle_t handle,
|
||||
cusparseDirection_t dirA,
|
||||
cusparseOperation_t transA,
|
||||
int mb,
|
||||
int nb,
|
||||
int nnzb,
|
||||
const double* alpha,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const double* bsrSortedValA,
|
||||
const int* bsrSortedRowPtrA,
|
||||
const int* bsrSortedColIndA,
|
||||
int blockDim,
|
||||
const double* x,
|
||||
const double* beta,
|
||||
double* y)
|
||||
{
|
||||
return cusparseDbsrmv(handle,
|
||||
dirA,
|
||||
transA,
|
||||
mb,
|
||||
nb,
|
||||
nnzb,
|
||||
alpha,
|
||||
descrA,
|
||||
bsrSortedValA,
|
||||
bsrSortedRowPtrA,
|
||||
bsrSortedColIndA,
|
||||
blockDim,
|
||||
x,
|
||||
beta,
|
||||
y);
|
||||
}
|
||||
|
||||
inline cusparseStatus_t
|
||||
cusparseBsrmv(cusparseHandle_t handle,
|
||||
cusparseDirection_t dirA,
|
||||
cusparseOperation_t transA,
|
||||
int mb,
|
||||
int nb,
|
||||
int nnzb,
|
||||
const float* alpha,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const float* bsrSortedValA,
|
||||
const int* bsrSortedRowPtrA,
|
||||
const int* bsrSortedColIndA,
|
||||
int blockDim,
|
||||
const float* x,
|
||||
const float* beta,
|
||||
float* y)
|
||||
{
|
||||
return cusparseSbsrmv(handle,
|
||||
dirA,
|
||||
transA,
|
||||
mb,
|
||||
nb,
|
||||
nnzb,
|
||||
alpha,
|
||||
descrA,
|
||||
bsrSortedValA,
|
||||
bsrSortedRowPtrA,
|
||||
bsrSortedColIndA,
|
||||
blockDim,
|
||||
x,
|
||||
beta,
|
||||
y);
|
||||
}
|
||||
} // namespace Opm::cuistl::detail
|
||||
#endif
|
107
opm/simulators/linalg/cuistl/detail/safe_conversion.hpp
Normal file
107
opm/simulators/linalg/cuistl/detail/safe_conversion.hpp
Normal file
@ -0,0 +1,107 @@
|
||||
/*
|
||||
Copyright 2023 SINTEF AS
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef OPM_CUISTL_SAFE_CONVERSION_HPP
|
||||
#define OPM_CUISTL_SAFE_CONVERSION_HPP
|
||||
|
||||
|
||||
|
||||
#include <cstddef>
|
||||
#include <fmt/format.h>
|
||||
#include <limits>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <type_traits>
|
||||
|
||||
|
||||
/**
|
||||
* Provides various utilities for doing signed to unsigned conversion, unsigned to signed, 32 bits to 64 bits and 64
|
||||
* bits to 32 bits.
|
||||
*
|
||||
* The main use case within cuistl is that the cusparse library requires signed int for all its size parameters,
|
||||
* while Dune::BlockVector (and relatives) use unsigned size_t.
|
||||
*/
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
{
|
||||
|
||||
/**
|
||||
* @brief to_int converts a (on most relevant platforms) 64 bits unsigned size_t to a signed 32 bits signed int
|
||||
* @param s the unsigned integer
|
||||
* @throw std::invalid_argument exception if s is out of range for an int
|
||||
* @return converted s to int if s is within the range of int
|
||||
*
|
||||
* @todo This can be done for more generic types, but then it is probably wise to wait for C++20's cmp-functions
|
||||
*/
|
||||
inline int
|
||||
to_int(std::size_t s)
|
||||
{
|
||||
static_assert(
|
||||
std::is_signed_v<int>,
|
||||
"Weird architecture or my understanding of the standard is flawed. Better have a look at this function.");
|
||||
static_assert(
|
||||
!std::is_signed_v<std::size_t>,
|
||||
"Weird architecture or my understanding of the standard is flawed. Better have a look at this function.");
|
||||
|
||||
static_assert(
|
||||
sizeof(int) <= sizeof(std::size_t),
|
||||
"Weird architecture or my understanding of the standard is flawed. Better have a look at this function.");
|
||||
|
||||
|
||||
if (s > std::size_t(std::numeric_limits<int>::max())) {
|
||||
OPM_THROW(std::invalid_argument,
|
||||
fmt::format("Trying to convert {} to int, but it is out of range. Maximum possible int: {}. ",
|
||||
s,
|
||||
std::numeric_limits<int>::max()));
|
||||
}
|
||||
|
||||
// We know it will be in range here:
|
||||
return int(s);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief to_size_t converts a (on most relevant platforms) a 32 bit signed int to a 64 bits unsigned int
|
||||
* @param i the signed integer
|
||||
* @return converted i to size_t if it is a non-negative integer.
|
||||
*
|
||||
* @throw std::invalid_argument if i is negative.
|
||||
* @todo This can be done for more generic types, but then it is probably wise to wait for C++20's cmp-functions
|
||||
*/
|
||||
inline std::size_t
|
||||
to_size_t(int i)
|
||||
{
|
||||
static_assert(
|
||||
std::is_signed_v<int>,
|
||||
"Weird architecture or my understanding of the standard is flawed. Better have a look at this function.");
|
||||
static_assert(
|
||||
!std::is_signed_v<std::size_t>,
|
||||
"Weird architecture or my understanding of the standard is flawed. Better have a look at this function.");
|
||||
|
||||
static_assert(
|
||||
sizeof(int) <= sizeof(std::size_t),
|
||||
"Weird architecture or my understanding of the standard is flawed. Better have a look at this function.");
|
||||
|
||||
|
||||
if (i < int(0)) {
|
||||
OPM_THROW(std::invalid_argument, fmt::format("Trying to convert the negative number {} to size_t.", i));
|
||||
}
|
||||
|
||||
return std::size_t(i);
|
||||
}
|
||||
} // namespace Opm::cuistl::detail
|
||||
|
||||
#endif
|
112
opm/simulators/linalg/cuistl/detail/vector_operations.cu
Normal file
112
opm/simulators/linalg/cuistl/detail/vector_operations.cu
Normal file
@ -0,0 +1,112 @@
|
||||
/*
|
||||
Copyright 2022-2023 SINTEF AS
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
|
||||
// TODO: [perf] Get rid of thrust.
|
||||
#include <thrust/device_ptr.h>
|
||||
#include <thrust/reduce.h>
|
||||
namespace Opm::cuistl::detail
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
template <class T>
|
||||
__global__ void setZeroAtIndexSetKernel(T* devicePointer, size_t numberOfElements, const int* indices)
|
||||
{
|
||||
const auto globalIndex = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (globalIndex < numberOfElements) {
|
||||
devicePointer[indices[globalIndex]] = T(0);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void setVectorValueKernel(T* devicePointer, size_t numberOfElements, T value)
|
||||
{
|
||||
const auto globalIndex = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (globalIndex < numberOfElements) {
|
||||
devicePointer[globalIndex] = value;
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void
|
||||
elementWiseMultiplyKernel(const T* a, const T* b, T* buffer, size_t numberOfElements, const int* indices)
|
||||
{
|
||||
const auto globalIndex = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
// TODO: [perf] Is it faster to just use a mask? Probably does not matter either way
|
||||
// This is hopefully not where we will spend most of our time.
|
||||
if (globalIndex < numberOfElements) {
|
||||
buffer[globalIndex] = a[indices[globalIndex]] * b[indices[globalIndex]];
|
||||
}
|
||||
}
|
||||
|
||||
constexpr inline size_t getThreads([[maybe_unused]] size_t numberOfElements)
|
||||
{
|
||||
return 1024;
|
||||
}
|
||||
|
||||
inline size_t getBlocks(size_t numberOfElements)
|
||||
{
|
||||
const auto threads = getThreads(numberOfElements);
|
||||
return (numberOfElements + threads - 1) / threads;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
template <class T>
|
||||
void
|
||||
setVectorValue(T* deviceData, size_t numberOfElements, const T& value)
|
||||
{
|
||||
setVectorValueKernel<<<getBlocks(numberOfElements), getThreads(numberOfElements)>>>(
|
||||
deviceData, numberOfElements, value);
|
||||
}
|
||||
|
||||
template void setVectorValue(double*, size_t, const double&);
|
||||
template void setVectorValue(float*, size_t, const float&);
|
||||
template void setVectorValue(int*, size_t, const int&);
|
||||
|
||||
template <class T>
|
||||
void
|
||||
setZeroAtIndexSet(T* deviceData, size_t numberOfElements, const int* indices)
|
||||
{
|
||||
setZeroAtIndexSetKernel<<<getBlocks(numberOfElements), getThreads(numberOfElements)>>>(
|
||||
deviceData, numberOfElements, indices);
|
||||
}
|
||||
template void setZeroAtIndexSet(double*, size_t, const int*);
|
||||
template void setZeroAtIndexSet(float*, size_t, const int*);
|
||||
template void setZeroAtIndexSet(int*, size_t, const int*);
|
||||
|
||||
template <class T>
|
||||
T
|
||||
innerProductAtIndices(const T* deviceA, const T* deviceB, T* buffer, size_t numberOfElements, const int* indices)
|
||||
{
|
||||
elementWiseMultiplyKernel<<<getBlocks(numberOfElements), getThreads(numberOfElements)>>>(
|
||||
deviceA, deviceB, buffer, numberOfElements, indices);
|
||||
|
||||
// TODO: [perf] Get rid of thrust and use a more direct reduction here.
|
||||
auto bufferAsDevicePointer = thrust::device_pointer_cast(buffer);
|
||||
return thrust::reduce(bufferAsDevicePointer, bufferAsDevicePointer + numberOfElements);
|
||||
}
|
||||
|
||||
template double innerProductAtIndices(const double*, const double*, double* buffer, size_t, const int*);
|
||||
template float innerProductAtIndices(const float*, const float*, float* buffer, size_t, const int*);
|
||||
template int innerProductAtIndices(const int*, const int*, int* buffer, size_t, const int*);
|
||||
|
||||
} // namespace Opm::cuistl::impl
|
58
opm/simulators/linalg/cuistl/detail/vector_operations.hpp
Normal file
58
opm/simulators/linalg/cuistl/detail/vector_operations.hpp
Normal file
@ -0,0 +1,58 @@
|
||||
/*
|
||||
Copyright 2022-2023 SINTEF AS
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef OPM_CUISTL_VECTOR_OPERATIONS_HPP
|
||||
#define OPM_CUISTL_VECTOR_OPERATIONS_HPP
|
||||
#include <cstddef>
|
||||
namespace Opm::cuistl::detail
|
||||
{
|
||||
|
||||
/**
|
||||
* @brief setVectorValue sets every element of deviceData to value
|
||||
* @param deviceData pointer to GPU memory
|
||||
* @param numberOfElements number of elements to set to value
|
||||
* @param value the value to use
|
||||
*/
|
||||
template <class T>
|
||||
void setVectorValue(T* deviceData, size_t numberOfElements, const T& value);
|
||||
|
||||
/**
|
||||
* @brief setZeroAtIndexSet sets deviceData to zero in the indices of contained in indices
|
||||
* @param deviceData the data to operate on (device memory)
|
||||
* @param numberOfElements number of indices
|
||||
* @param indices the indices to use (device memory)
|
||||
*/
|
||||
template <class T>
|
||||
void setZeroAtIndexSet(T* deviceData, size_t numberOfElements, const int* indices);
|
||||
|
||||
/**
|
||||
* @brief innerProductAtIndices computes the inner product between deviceA[indices] and deviceB[indices]
|
||||
* @param deviceA data A (device memory)
|
||||
* @param deviceB data B (device memory)
|
||||
* @param buffer a buffer with number of elements equal to numberOfElements (device memory)
|
||||
* @param numberOfElements number of indices
|
||||
* @param indices the indices to compute the inner product over (device memory)
|
||||
* @return the result of the inner product
|
||||
*
|
||||
* @note This is equivalent to projecting the vectors to the indices contained in indices, then doing the inner product
|
||||
* of those projected vectors.
|
||||
*/
|
||||
template <class T>
|
||||
T innerProductAtIndices(const T* deviceA, const T* deviceB, T* buffer, size_t numberOfElements, const int* indices);
|
||||
} // namespace Opm::cuistl::detail
|
||||
#endif
|
181
tests/cuistl/test_cusparsematrix.cpp
Normal file
181
tests/cuistl/test_cusparsematrix.cpp
Normal file
@ -0,0 +1,181 @@
|
||||
/*
|
||||
Copyright 2022-2023 SINTEF AS
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include <config.h>
|
||||
|
||||
#define BOOST_TEST_MODULE TestCuSparseMatrix
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <dune/istl/bcrsmatrix.hh>
|
||||
#include <memory>
|
||||
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
#include <random>
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestConstruction1D)
|
||||
{
|
||||
// Here we will test a simple 1D finite difference scheme for
|
||||
// the Laplace equation:
|
||||
//
|
||||
// -\Delta u = f on [0,1]
|
||||
//
|
||||
// Using a central difference approximation of \Delta u, this can
|
||||
// be approximated by
|
||||
//
|
||||
// -(u_{i+1}-2u_i+u_{i-1})/Dx^2 = f(x_i)
|
||||
//
|
||||
// giving rise to the matrix
|
||||
//
|
||||
// -2 1 0 0 ... 0 0
|
||||
// 1 -2 1 0 0 ... 0
|
||||
// ....
|
||||
// 0 0 0 ...1 -2 1
|
||||
// 0 0 0 ... 1 -2
|
||||
|
||||
const int N = 5;
|
||||
const int nonZeroes = N * 3 - 2;
|
||||
using M = Dune::FieldMatrix<double, 1, 1>;
|
||||
using SpMatrix = Dune::BCRSMatrix<M>;
|
||||
|
||||
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
|
||||
for (auto row = B.createbegin(); row != B.createend(); ++row) {
|
||||
// Add nonzeros for left neighbour, diagonal and right neighbour
|
||||
if (row.index() > 0) {
|
||||
row.insert(row.index() - 1);
|
||||
}
|
||||
row.insert(row.index());
|
||||
if (row.index() < B.N() - 1) {
|
||||
row.insert(row.index() + 1);
|
||||
}
|
||||
}
|
||||
// This might not be the most elegant way of filling in a Dune sparse matrix, but it works.
|
||||
for (int i = 0; i < N; ++i) {
|
||||
B[i][i] = -2;
|
||||
if (i < N - 1) {
|
||||
B[i][i + 1] = 1;
|
||||
}
|
||||
|
||||
if (i > 0) {
|
||||
B[i][i - 1] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
auto cuSparseMatrix = Opm::cuistl::CuSparseMatrix<double>::fromMatrix(B);
|
||||
|
||||
const auto& nonZeroValuesCuda = cuSparseMatrix.getNonZeroValues();
|
||||
std::vector<double> buffer(cuSparseMatrix.nonzeroes(), 0.0);
|
||||
nonZeroValuesCuda.copyToHost(buffer.data(), buffer.size());
|
||||
const double* nonZeroElements = static_cast<const double*>(&((B[0][0][0][0])));
|
||||
BOOST_CHECK_EQUAL_COLLECTIONS(buffer.begin(), buffer.end(), nonZeroElements, nonZeroElements + B.nonzeroes());
|
||||
BOOST_CHECK_EQUAL(N * 3 - 2, cuSparseMatrix.nonzeroes());
|
||||
|
||||
std::vector<int> rowIndicesFromCUDA(N + 1);
|
||||
cuSparseMatrix.getRowIndices().copyToHost(rowIndicesFromCUDA.data(), rowIndicesFromCUDA.size());
|
||||
BOOST_CHECK_EQUAL(rowIndicesFromCUDA[0], 0);
|
||||
BOOST_CHECK_EQUAL(rowIndicesFromCUDA[1], 2);
|
||||
for (int i = 2; i < N; ++i) {
|
||||
BOOST_CHECK_EQUAL(rowIndicesFromCUDA[i], rowIndicesFromCUDA[i - 1] + 3);
|
||||
}
|
||||
|
||||
|
||||
std::vector<int> columnIndicesFromCUDA(B.nonzeroes(), 0);
|
||||
cuSparseMatrix.getColumnIndices().copyToHost(columnIndicesFromCUDA.data(), columnIndicesFromCUDA.size());
|
||||
|
||||
BOOST_CHECK_EQUAL(columnIndicesFromCUDA[0], 0);
|
||||
BOOST_CHECK_EQUAL(columnIndicesFromCUDA[1], 1);
|
||||
// TODO: Check rest
|
||||
}
|
||||
|
||||
|
||||
BOOST_AUTO_TEST_CASE(RandomSparsityMatrix)
|
||||
{
|
||||
std::srand(0);
|
||||
double nonzeroPercent = 0.2;
|
||||
std::mt19937 generator;
|
||||
std::uniform_real_distribution<double> distribution(0.0, 1.0);
|
||||
constexpr size_t dim = 3;
|
||||
const int N = 300;
|
||||
using M = Dune::FieldMatrix<double, dim, dim>;
|
||||
using SpMatrix = Dune::BCRSMatrix<M>;
|
||||
using Vector = Dune::BlockVector<Dune::FieldVector<double, dim>>;
|
||||
|
||||
|
||||
std::vector<std::vector<size_t>> nonzerocols(N);
|
||||
int nonZeroes = 0;
|
||||
for (auto row = 0; row < N; ++row) {
|
||||
for (size_t col = 0; col < N; ++col) {
|
||||
if (distribution(generator) < nonzeroPercent) {
|
||||
nonzerocols.at(row).push_back(col);
|
||||
nonZeroes++;
|
||||
}
|
||||
}
|
||||
}
|
||||
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
|
||||
for (auto row = B.createbegin(); row != B.createend(); ++row) {
|
||||
for (size_t j = 0; j < nonzerocols[row.index()].size(); ++j) {
|
||||
row.insert(nonzerocols[row.index()][j]);
|
||||
}
|
||||
}
|
||||
// This might not be the most elegant way of filling in a Dune sparse matrix, but it works.
|
||||
for (int i = 0; i < N; ++i) {
|
||||
for (size_t j = 0; j < nonzerocols[i].size(); ++j) {
|
||||
for (size_t c1 = 0; c1 < dim; ++c1) {
|
||||
for (size_t c2 = 0; c2 < dim; ++c2) {
|
||||
B[i][nonzerocols[i][j]][c1][c2] = distribution(generator);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto cuSparseMatrix = Opm::cuistl::CuSparseMatrix<double>::fromMatrix(B);
|
||||
// check each column
|
||||
for (size_t component = 0; component < N; ++component) {
|
||||
std::vector<double> inputDataX(N * dim, 0.0);
|
||||
inputDataX[component] = 1.0;
|
||||
std::vector<double> inputDataY(N * dim, .25);
|
||||
auto inputVectorX = Opm::cuistl::CuVector<double>(inputDataX.data(), inputDataX.size());
|
||||
auto inputVectorY = Opm::cuistl::CuVector<double>(inputDataY.data(), inputDataY.size());
|
||||
Vector xHost(N), yHost(N);
|
||||
yHost = inputDataY[0];
|
||||
inputVectorX.copyToHost(xHost);
|
||||
const double alpha = 1.42;
|
||||
cuSparseMatrix.usmv(alpha, inputVectorX, inputVectorY);
|
||||
|
||||
inputVectorY.copyToHost(inputDataY);
|
||||
|
||||
B.usmv(alpha, xHost, yHost);
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
for (size_t c = 0; c < dim; ++c) {
|
||||
BOOST_CHECK_CLOSE(inputDataY[i * dim + c], yHost[i][c], 1e-7);
|
||||
}
|
||||
}
|
||||
inputVectorX.copyToHost(xHost);
|
||||
|
||||
cuSparseMatrix.mv(inputVectorX, inputVectorY);
|
||||
|
||||
inputVectorY.copyToHost(inputDataY);
|
||||
|
||||
B.mv(xHost, yHost);
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
for (size_t c = 0; c < dim; ++c) {
|
||||
BOOST_CHECK_CLOSE(inputDataY[i * dim + c], yHost[i][c], 1e-7);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
295
tests/cuistl/test_cuvector.cpp
Normal file
295
tests/cuistl/test_cuvector.cpp
Normal file
@ -0,0 +1,295 @@
|
||||
/*
|
||||
Copyright 2022-2023 SINTEF AS
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include <config.h>
|
||||
|
||||
#define BOOST_TEST_MODULE TestCuVector
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <cuda_runtime.h>
|
||||
#include <dune/common/fvector.hh>
|
||||
#include <dune/istl/bvector.hh>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <random>
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestDocumentedUsage)
|
||||
{
|
||||
auto someDataOnCPU = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
|
||||
|
||||
auto dataOnGPU = ::Opm::cuistl::CuVector<double>(someDataOnCPU);
|
||||
|
||||
// Multiply by 4.0:
|
||||
dataOnGPU *= 4.0;
|
||||
|
||||
// Get data back on CPU in another vector:
|
||||
auto stdVectorOnCPU = dataOnGPU.asStdVector();
|
||||
|
||||
std::vector<double> correctVector(someDataOnCPU.size());
|
||||
|
||||
std::transform(someDataOnCPU.begin(), someDataOnCPU.end(), correctVector.begin(), [](double x) { return 4 * x; });
|
||||
BOOST_CHECK_EQUAL_COLLECTIONS(
|
||||
stdVectorOnCPU.begin(), stdVectorOnCPU.end(), correctVector.begin(), correctVector.end());
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestConstructionSize)
|
||||
{
|
||||
const int numberOfElements = 1234;
|
||||
auto vectorOnGPU = Opm::cuistl::CuVector<double>(numberOfElements);
|
||||
BOOST_CHECK_EQUAL(numberOfElements, vectorOnGPU.dim());
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestCopyFromHostConstructor)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
|
||||
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
|
||||
BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
|
||||
std::vector<double> buffer(data.size(), 0.0);
|
||||
vectorOnGPU.copyToHost(buffer.data(), buffer.size());
|
||||
BOOST_CHECK_EQUAL_COLLECTIONS(buffer.begin(), buffer.end(), data.begin(), data.end());
|
||||
}
|
||||
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestCopyFromHostFunction)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
|
||||
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.size());
|
||||
BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
|
||||
vectorOnGPU.copyFromHost(data.data(), data.size());
|
||||
std::vector<double> buffer(data.size(), 0.0);
|
||||
vectorOnGPU.copyToHost(buffer.data(), buffer.size());
|
||||
BOOST_CHECK_EQUAL_COLLECTIONS(buffer.begin(), buffer.end(), data.begin(), data.end());
|
||||
}
|
||||
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestCopyFromBvector)
|
||||
{
|
||||
auto blockVector = Dune::BlockVector<Dune::FieldVector<double, 2>> {{{42, 43}, {44, 45}, {46, 47}}};
|
||||
auto vectorOnGPU = Opm::cuistl::CuVector<double>(blockVector.dim());
|
||||
vectorOnGPU.copyFromHost(blockVector);
|
||||
std::vector<double> buffer(vectorOnGPU.dim());
|
||||
vectorOnGPU.copyToHost(buffer.data(), buffer.size());
|
||||
|
||||
BOOST_CHECK_EQUAL_COLLECTIONS(
|
||||
buffer.begin(), buffer.end(), &blockVector[0][0], &blockVector[0][0] + blockVector.dim());
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestCopyToBvector)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7, 8, 9}};
|
||||
auto blockVector = Dune::BlockVector<Dune::FieldVector<double, 3>>(3);
|
||||
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
|
||||
vectorOnGPU.copyToHost(blockVector);
|
||||
|
||||
|
||||
BOOST_CHECK_EQUAL_COLLECTIONS(data.begin(), data.end(), &blockVector[0][0], &blockVector[0][0] + blockVector.dim());
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestDataPointer)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7, 8, 9}};
|
||||
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
|
||||
|
||||
std::vector<double> buffer(data.size(), 0.0);
|
||||
cudaMemcpy(buffer.data(), vectorOnGPU.data(), sizeof(double) * data.size(), cudaMemcpyDeviceToHost);
|
||||
BOOST_CHECK_EQUAL_COLLECTIONS(data.begin(), data.end(), buffer.begin(), buffer.end());
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestCopyScalarMultiply)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
|
||||
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
|
||||
BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
|
||||
const double scalar = 42.25;
|
||||
vectorOnGPU *= scalar;
|
||||
std::vector<double> buffer(data.size(), 0.0);
|
||||
vectorOnGPU.copyToHost(buffer.data(), buffer.size());
|
||||
|
||||
for (size_t i = 0; i < buffer.size(); ++i) {
|
||||
BOOST_CHECK_EQUAL(buffer[i], scalar * data[i]);
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestTwoNorm)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
|
||||
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
|
||||
auto twoNorm = vectorOnGPU.two_norm();
|
||||
|
||||
double correctAnswer = 0.0;
|
||||
for (double d : data) {
|
||||
correctAnswer += d * d;
|
||||
}
|
||||
correctAnswer = std::sqrt(correctAnswer);
|
||||
BOOST_CHECK_EQUAL(correctAnswer, twoNorm);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestDot)
|
||||
{
|
||||
std::vector<double> dataA {{1, 2, 3, 4, 5, 6, 7}};
|
||||
std::vector<double> dataB {{8, 9, 10, 11, 12, 13, 14}};
|
||||
auto vectorOnGPUA = Opm::cuistl::CuVector<double>(dataA.data(), dataA.size());
|
||||
auto vectorOnGPUB = Opm::cuistl::CuVector<double>(dataB.data(), dataB.size());
|
||||
auto dot = vectorOnGPUA.dot(vectorOnGPUB);
|
||||
|
||||
double correctAnswer = 0.0;
|
||||
for (size_t i = 0; i < dataA.size(); ++i) {
|
||||
correctAnswer += dataA[i] * dataB[i];
|
||||
}
|
||||
correctAnswer = correctAnswer;
|
||||
BOOST_CHECK_EQUAL(correctAnswer, dot);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(Assigment)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
|
||||
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
|
||||
vectorOnGPU = 10.0;
|
||||
vectorOnGPU.copyToHost(data.data(), data.size());
|
||||
|
||||
for (double x : data) {
|
||||
BOOST_CHECK_EQUAL(10.0, x);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
BOOST_AUTO_TEST_CASE(CopyAssignment)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
|
||||
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
|
||||
vectorOnGPU.copyToHost(data.data(), data.size());
|
||||
auto vectorOnGPUB = Opm::cuistl::CuVector<double>(data.size());
|
||||
vectorOnGPUB = 4.0;
|
||||
vectorOnGPUB = vectorOnGPU;
|
||||
|
||||
std::vector<double> output(data.size());
|
||||
vectorOnGPUB.copyToHost(output.data(), output.size());
|
||||
BOOST_CHECK_EQUAL_COLLECTIONS(output.begin(), output.end(), data.begin(), data.end());
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(RandomVectors)
|
||||
{
|
||||
|
||||
using GVector = Opm::cuistl::CuVector<double>;
|
||||
std::srand(0);
|
||||
std::mt19937 generator;
|
||||
std::uniform_real_distribution<double> distribution(-100.0, 100.0);
|
||||
std::uniform_real_distribution<double> distribution01(.0, 1.0);
|
||||
|
||||
const size_t N = 1000;
|
||||
|
||||
const size_t retries = 100;
|
||||
|
||||
for (size_t retry = 0; retry < retries; ++retry) {
|
||||
std::vector<double> a(N);
|
||||
std::vector<double> b(N);
|
||||
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
a[i] = distribution(generator);
|
||||
b[i] = distribution(generator);
|
||||
}
|
||||
|
||||
auto aGPU = GVector(a);
|
||||
auto bGPU = GVector(b);
|
||||
|
||||
aGPU += bGPU;
|
||||
|
||||
auto aOutputPlus = aGPU.asStdVector();
|
||||
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
BOOST_CHECK_EQUAL(aOutputPlus[i], a[i] + b[i]);
|
||||
}
|
||||
|
||||
aGPU = GVector(a);
|
||||
aGPU -= bGPU;
|
||||
|
||||
auto aOutputMinus = aGPU.asStdVector();
|
||||
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
BOOST_CHECK_EQUAL(aOutputMinus[i], a[i] - b[i]);
|
||||
}
|
||||
|
||||
|
||||
aGPU = GVector(a);
|
||||
auto scalar = distribution(generator);
|
||||
aGPU *= scalar;
|
||||
auto aOutputScalar = aGPU.asStdVector();
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
BOOST_CHECK_EQUAL(aOutputScalar[i], scalar * a[i]);
|
||||
}
|
||||
|
||||
aGPU = GVector(a);
|
||||
aGPU.axpy(scalar, bGPU);
|
||||
auto aOutputSaxypy = aGPU.asStdVector();
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
BOOST_CHECK_CLOSE(aOutputSaxypy[i], a[i] + scalar * b[i], 1e-10);
|
||||
}
|
||||
|
||||
aGPU = GVector(a);
|
||||
auto dotted = aGPU.dot(bGPU);
|
||||
double correct = 0.0;
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
correct += a[i] * b[i];
|
||||
}
|
||||
|
||||
BOOST_CHECK_CLOSE(dotted, correct, 1e-10);
|
||||
|
||||
aGPU = GVector(a);
|
||||
auto twoNorm = aGPU.two_norm();
|
||||
double correctTwoNorm = 0.0;
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
correctTwoNorm += a[i] * a[i];
|
||||
}
|
||||
correctTwoNorm = std::sqrt(correctTwoNorm);
|
||||
|
||||
BOOST_CHECK_CLOSE(twoNorm, correctTwoNorm, 1e-12);
|
||||
|
||||
aGPU = GVector(a);
|
||||
std::vector<int> indexSet;
|
||||
const double rejectCriteria = 0.2;
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
const auto reject = distribution01(generator);
|
||||
if (reject < rejectCriteria) {
|
||||
indexSet.push_back(i);
|
||||
}
|
||||
}
|
||||
auto indexSetGPU = Opm::cuistl::CuVector<int>(indexSet);
|
||||
|
||||
aGPU.setZeroAtIndexSet(indexSetGPU);
|
||||
auto projectedA = aGPU.asStdVector();
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
// Yeah, O(N^2) so sue me
|
||||
bool found = std::find(indexSet.begin(), indexSet.end(), i) != indexSet.end();
|
||||
if (found) {
|
||||
BOOST_CHECK_EQUAL(projectedA[i], 0);
|
||||
} else {
|
||||
BOOST_CHECK_EQUAL(projectedA[i], a[i]);
|
||||
}
|
||||
}
|
||||
|
||||
aGPU = GVector(a);
|
||||
auto twoNormAtIndices = aGPU.two_norm(indexSetGPU);
|
||||
|
||||
double correctTwoNormAtIndices = 0.0;
|
||||
for (size_t i = 0; i < indexSet.size(); ++i) {
|
||||
correctTwoNormAtIndices += a[indexSet[i]] * a[indexSet[i]];
|
||||
}
|
||||
correctTwoNormAtIndices = std::sqrt(correctTwoNormAtIndices);
|
||||
|
||||
BOOST_CHECK_CLOSE(correctTwoNormAtIndices, twoNormAtIndices, 1e-13);
|
||||
}
|
||||
}
|
58
tests/cuistl/test_safe_conversion.cpp
Normal file
58
tests/cuistl/test_safe_conversion.cpp
Normal file
@ -0,0 +1,58 @@
|
||||
/*
|
||||
Copyright 2023 SINTEF AS
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include <config.h>
|
||||
|
||||
#define BOOST_TEST_MODULE TestSafeConversion
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp>
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestToIntThrowsOutofRange)
|
||||
{
|
||||
BOOST_CHECK_THROW(Opm::cuistl::detail::to_int(size_t(std::numeric_limits<int>::max()) + size_t(1));
|
||||
, std::invalid_argument);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestToIntConvertInRange)
|
||||
{
|
||||
// This might seem slow, but it is really fast:
|
||||
for (size_t i = 0; i <= size_t(1024 * 1024); ++i) {
|
||||
BOOST_CHECK_EQUAL(int(i), Opm::cuistl::detail::to_int(i));
|
||||
}
|
||||
|
||||
BOOST_CHECK_EQUAL(std::numeric_limits<int>::max(),
|
||||
Opm::cuistl::detail::to_int(size_t(std::numeric_limits<int>::max())));
|
||||
}
|
||||
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestToSizeTThrowsOutofRange)
|
||||
{
|
||||
BOOST_CHECK_THROW(Opm::cuistl::detail::to_size_t(-1);, std::invalid_argument);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestToSizeTConvertInRange)
|
||||
{
|
||||
// This might seem slow, but it is really fast:
|
||||
for (int i = 0; i <= 1024 * 1024; ++i) {
|
||||
BOOST_CHECK_EQUAL(size_t(i), Opm::cuistl::detail::to_size_t(i));
|
||||
}
|
||||
|
||||
BOOST_CHECK_EQUAL(size_t(std::numeric_limits<int>::max()),
|
||||
Opm::cuistl::detail::to_size_t(std::numeric_limits<int>::max()));
|
||||
}
|
Loading…
Reference in New Issue
Block a user