refactor cuvector

This commit is contained in:
Tobias Meyer Andersen 2024-08-22 15:20:20 +02:00
parent 3aa1767548
commit fba1858f42
28 changed files with 234 additions and 234 deletions

View File

@ -553,7 +553,7 @@ if(CUDA_FOUND)
cusparse_handle
cuSparse_matrix_operations
cuVector_operations
cuvector
GpuVector
GpuSparseMatrix
GpuSeqILU0
GpuOwnerOverlapCopy

View File

@ -213,7 +213,7 @@ if (HAVE_CUDA)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/preconditionerKernels/DILUKernels.cu)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/preconditionerKernels/ILU0Kernels.cu)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/preconditionerKernels/JacKernels.cu)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuVector.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuVector.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuView.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/vector_operations.cu)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuSparseMatrix.cpp)
@ -240,7 +240,7 @@ if (HAVE_CUDA)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuDILU.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg OpmCuILU0.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuJac.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuVector.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuVector.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuView.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuSparseMatrix.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuMatrixDescription.hpp)
@ -401,7 +401,7 @@ if (HAVE_CUDA)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cusparse_handle.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuSparse_matrix_operations.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuSparseMatrix.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuvector.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuVector.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuVector_operations.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_safe_conversion.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_solver_adapter.cpp)

View File

@ -326,7 +326,7 @@ struct StandardPreconditioners {
const double w = prm.get<double>("relaxation", 1.0);
using field_type = typename V::field_type;
using GpuILU0 = typename gpuistl::
GpuSeqILU0<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
GpuSeqILU0<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
auto gpuILU0 = std::make_shared<GpuILU0>(op.getmat(), w);
auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, GpuILU0>>(gpuILU0);
@ -338,7 +338,7 @@ struct StandardPreconditioners {
const double w = prm.get<double>("relaxation", 1.0);
using field_type = typename V::field_type;
using GpuJac =
typename gpuistl::GpuJac<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
typename gpuistl::GpuJac<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
auto gpuJac = std::make_shared<GpuJac>(op.getmat(), w);
auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, GpuJac>>(gpuJac);
@ -350,7 +350,7 @@ struct StandardPreconditioners {
const bool split_matrix = prm.get<bool>("split_matrix", true);
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
using field_type = typename V::field_type;
using GpuDILU = typename gpuistl::GpuDILU<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
using GpuDILU = typename gpuistl::GpuDILU<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
auto gpuDILU = std::make_shared<GpuDILU>(op.getmat(), split_matrix, tune_gpu_kernels);
auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, GpuDILU>>(gpuDILU);
@ -362,7 +362,7 @@ struct StandardPreconditioners {
const bool split_matrix = prm.get<bool>("split_matrix", true);
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
using field_type = typename V::field_type;
using OpmCuILU0 = typename gpuistl::OpmCuILU0<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
using OpmCuILU0 = typename gpuistl::OpmCuILU0<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
auto cuilu0 = std::make_shared<OpmCuILU0>(op.getmat(), split_matrix, tune_gpu_kernels);
auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, OpmCuILU0>>(cuilu0);
@ -586,7 +586,7 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
const double w = prm.get<double>("relaxation", 1.0);
using field_type = typename V::field_type;
using GpuuILU0 = typename gpuistl::
GpuSeqILU0<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
GpuSeqILU0<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
return std::make_shared<gpuistl::PreconditionerAdapter<V, V, GpuuILU0>>(
std::make_shared<GpuuILU0>(op.getmat(), w));
});
@ -598,7 +598,7 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
using matrix_type_to =
typename Dune::BCRSMatrix<Dune::FieldMatrix<float, block_type::dimension, block_type::dimension>>;
using GpuuILU0 = typename gpuistl::
GpuSeqILU0<matrix_type_to, gpuistl::CuVector<float>, gpuistl::CuVector<float>>;
GpuSeqILU0<matrix_type_to, gpuistl::GpuVector<float>, gpuistl::GpuVector<float>>;
using Adapter = typename gpuistl::PreconditionerAdapter<VTo, VTo, GpuuILU0>;
using Converter = typename gpuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>;
auto converted = std::make_shared<Converter>(op.getmat());
@ -611,7 +611,7 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
const double w = prm.get<double>("relaxation", 1.0);
using field_type = typename V::field_type;
using GPUJac =
typename gpuistl::GpuJac<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
typename gpuistl::GpuJac<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
return std::make_shared<gpuistl::PreconditionerAdapter<V, V, GPUJac>>(
std::make_shared<GPUJac>(op.getmat(), w));
});
@ -620,7 +620,7 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
const bool split_matrix = prm.get<bool>("split_matrix", true);
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
using field_type = typename V::field_type;
using CUILU0 = typename gpuistl::OpmCuILU0<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
using CUILU0 = typename gpuistl::OpmCuILU0<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
return std::make_shared<gpuistl::PreconditionerAdapter<V, V, CUILU0>>(std::make_shared<CUILU0>(op.getmat(), split_matrix, tune_gpu_kernels));
});
@ -629,7 +629,7 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
const bool split_matrix = prm.get<bool>("split_matrix", true);
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
using field_type = typename V::field_type;
using GPUDILU = typename gpuistl::GpuDILU<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
using GPUDILU = typename gpuistl::GpuDILU<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
return std::make_shared<gpuistl::PreconditionerAdapter<V, V, GPUDILU>>(std::make_shared<GPUDILU>(op.getmat(), split_matrix, tune_gpu_kernels));
});
@ -639,7 +639,7 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
using block_type = typename V::block_type;
using VTo = Dune::BlockVector<Dune::FieldVector<float, block_type::dimension>>;
using matrix_type_to = typename Dune::BCRSMatrix<Dune::FieldMatrix<float, block_type::dimension, block_type::dimension>>;
using GpuDILU = typename gpuistl::GpuDILU<matrix_type_to, gpuistl::CuVector<float>, gpuistl::CuVector<float>>;
using GpuDILU = typename gpuistl::GpuDILU<matrix_type_to, gpuistl::GpuVector<float>, gpuistl::GpuVector<float>>;
using Adapter = typename gpuistl::PreconditionerAdapter<VTo, VTo, GpuDILU>;
using Converter = typename gpuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>;
auto converted = std::make_shared<Converter>(op.getmat());

View File

@ -28,7 +28,7 @@
#include <opm/simulators/linalg/cuistl/detail/autotuner.hpp>
#include <opm/simulators/linalg/cuistl/GpuDILU.hpp>
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/coloringAndReorderingUtils.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
#include <opm/simulators/linalg/cuistl/detail/preconditionerKernels/DILUKernels.hpp>
@ -71,7 +71,7 @@ GpuDILU<M, X, Y, l>::GpuDILU(const M& A, bool splitMatrix, bool tuneKernels)
m_gpuMatrix.nonzeroes(),
A.nonzeroes()));
if (m_splitMatrix) {
m_gpuMatrixReorderedDiag = std::make_unique<CuVector<field_type>>(blocksize_ * blocksize_ * m_cpuMatrix.N());
m_gpuMatrixReorderedDiag = std::make_unique<GpuVector<field_type>>(blocksize_ * blocksize_ * m_cpuMatrix.N());
std::tie(m_gpuMatrixReorderedLower, m_gpuMatrixReorderedUpper)
= detail::extractLowerAndUpperMatrices<M, field_type, GpuSparseMatrix<field_type>>(m_cpuMatrix,
m_reorderedToNatural);
@ -278,8 +278,8 @@ GpuDILU<M, X, Y, l>::tuneThreadBlockSizes()
m_DILUFactorizationThreadBlockSize = detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "Kernel computing DILU factorization");
// tune the thread-block size of the apply
CuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
CuVector<field_type> tmpD(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
GpuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
GpuVector<field_type> tmpD(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
tmpD = 1;
auto tuneLowerSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int lowerSolveThreadBlockSize){
@ -296,11 +296,11 @@ GpuDILU<M, X, Y, l>::tuneThreadBlockSizes()
} // namespace Opm::gpuistl
#define INSTANTIATE_CUDILU_DUNE(realtype, blockdim) \
template class ::Opm::gpuistl::GpuDILU<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
::Opm::gpuistl::CuVector<realtype>, \
::Opm::gpuistl::CuVector<realtype>>; \
::Opm::gpuistl::GpuVector<realtype>, \
::Opm::gpuistl::GpuVector<realtype>>; \
template class ::Opm::gpuistl::GpuDILU<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
::Opm::gpuistl::CuVector<realtype>, \
::Opm::gpuistl::CuVector<realtype>>
::Opm::gpuistl::GpuVector<realtype>, \
::Opm::gpuistl::GpuVector<realtype>>
INSTANTIATE_CUDILU_DUNE(double, 1);
INSTANTIATE_CUDILU_DUNE(double, 2);

View File

@ -37,7 +37,7 @@ namespace Opm::gpuistl
//! \tparam l Ignored. Just there to have the same number of template arguments
//! as other preconditioners.
//!
//! \note We assume X and Y are both CuVector<real_type>, but we leave them as template
//! \note We assume X and Y are both GpuVector<real_type>, but we leave them as template
//! arguments in case of future additions.
template <class M, class X, class Y, int l = 1>
class GpuDILU : public Dune::PreconditionerWithUpdate<X, Y>
@ -126,13 +126,13 @@ private:
std::unique_ptr<CuMat> m_gpuMatrixReorderedLower;
std::unique_ptr<CuMat> m_gpuMatrixReorderedUpper;
//! \brief If matrix splitting is enabled, we also store the diagonal separately
std::unique_ptr<CuVector<field_type>> m_gpuMatrixReorderedDiag;
std::unique_ptr<GpuVector<field_type>> m_gpuMatrixReorderedDiag;
//! row conversion from natural to reordered matrix indices stored on the GPU
CuVector<int> m_gpuNaturalToReorder;
GpuVector<int> m_gpuNaturalToReorder;
//! row conversion from reordered to natural matrix indices stored on the GPU
CuVector<int> m_gpuReorderToNatural;
GpuVector<int> m_gpuReorderToNatural;
//! \brief Stores the inverted diagonal that we use in DILU
CuVector<field_type> m_gpuDInv;
GpuVector<field_type> m_gpuDInv;
//! \brief Bool storing whether or not we should store matrices in a split format
bool m_splitMatrix;
//! \brief Bool storing whether or not we will tune the threadblock sizes. Only used for AMD cards

View File

@ -21,7 +21,7 @@
#include <fmt/core.h>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/GpuJac.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/preconditionerKernels/JacKernels.hpp>
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
#include <opm/simulators/linalg/matrixblock.hh>
@ -111,11 +111,11 @@ GpuJac<M, X, Y, l>::invertDiagonalAndFlatten()
} // namespace Opm::gpuistl
#define INSTANTIATE_CUJAC_DUNE(realtype, blockdim) \
template class ::Opm::gpuistl::GpuJac<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
::Opm::gpuistl::CuVector<realtype>, \
::Opm::gpuistl::CuVector<realtype>>; \
::Opm::gpuistl::GpuVector<realtype>, \
::Opm::gpuistl::GpuVector<realtype>>; \
template class ::Opm::gpuistl::GpuJac<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
::Opm::gpuistl::CuVector<realtype>, \
::Opm::gpuistl::CuVector<realtype>>
::Opm::gpuistl::GpuVector<realtype>, \
::Opm::gpuistl::GpuVector<realtype>>
INSTANTIATE_CUJAC_DUNE(double, 1);
INSTANTIATE_CUJAC_DUNE(double, 2);

View File

@ -40,7 +40,7 @@ namespace Opm::gpuistl
//! \tparam l Ignored. Just there to have the same number of template arguments
//! as other preconditioners.
//!
//! \note We assume X and Y are both CuVector<real_type>, but we leave them as template
//! \note We assume X and Y are both GpuVector<real_type>, but we leave them as template
//! arguments in case of future additions.
template <class M, class X, class Y, int l = 1>
class GpuJac : public Dune::PreconditionerWithUpdate<X, Y>
@ -106,7 +106,7 @@ private:
//! \brief The A matrix stored on the gpu
GpuSparseMatrix<field_type> m_gpuMatrix;
//! \brief the diagonal of cuMatrix inverted, and then flattened to fit in a vector
CuVector<field_type> m_diagInvFlattened;
GpuVector<field_type> m_diagInvFlattened;
void invertDiagonalAndFlatten();
};

View File

@ -21,7 +21,7 @@
#include <dune/istl/owneroverlapcopy.hh>
#include <memory>
#include <mutex>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
#include <vector>
namespace Opm::gpuistl
@ -36,7 +36,7 @@ namespace Opm::gpuistl
template<class field_type, class OwnerOverlapCopyCommunicationType>
class GPUSender {
public:
using X = CuVector<field_type>;
using X = GpuVector<field_type>;
GPUSender(const OwnerOverlapCopyCommunicationType& cpuOwnerOverlapCopy) : m_cpuOwnerOverlapCopy(cpuOwnerOverlapCopy){}
@ -97,8 +97,8 @@ protected:
// premature optimization, in the sense that we could just initialize these indices
// always, but they are not always used.
mutable std::once_flag m_initializedIndices;
mutable std::unique_ptr<CuVector<int>> m_indicesOwner;
mutable std::unique_ptr<CuVector<int>> m_indicesCopy;
mutable std::unique_ptr<GpuVector<int>> m_indicesOwner;
mutable std::unique_ptr<GpuVector<int>> m_indicesCopy;
const OwnerOverlapCopyCommunicationType& m_cpuOwnerOverlapCopy;
};
@ -113,7 +113,7 @@ template <class field_type, int block_size, class OwnerOverlapCopyCommunicationT
class GPUObliviousMPISender : public GPUSender<field_type, OwnerOverlapCopyCommunicationType>
{
public:
using X = CuVector<field_type>;
using X = GpuVector<field_type>;
GPUObliviousMPISender(const OwnerOverlapCopyCommunicationType& cpuOwnerOverlapCopy)
: GPUSender<field_type, OwnerOverlapCopyCommunicationType>(cpuOwnerOverlapCopy)
@ -151,8 +151,8 @@ private:
}
}
this->m_indicesCopy = std::make_unique<CuVector<int>>(indicesCopyOnCPU);
this->m_indicesOwner = std::make_unique<CuVector<int>>(indicesOwnerCPU);
this->m_indicesCopy = std::make_unique<GpuVector<int>>(indicesCopyOnCPU);
this->m_indicesOwner = std::make_unique<GpuVector<int>>(indicesOwnerCPU);
}
};
@ -168,7 +168,7 @@ template <class field_type, int block_size, class OwnerOverlapCopyCommunicationT
class GPUAwareMPISender : public GPUSender<field_type, OwnerOverlapCopyCommunicationType>
{
public:
using X = CuVector<field_type>;
using X = GpuVector<field_type>;
GPUAwareMPISender(const OwnerOverlapCopyCommunicationType& cpuOwnerOverlapCopy)
: GPUSender<field_type, OwnerOverlapCopyCommunicationType>(cpuOwnerOverlapCopy)
@ -178,7 +178,7 @@ public:
void copyOwnerToAll(const X& source, X& dest) const override
{
OPM_ERROR_IF(&source != &dest, "The provided CuVectors' address did not match"); // In this context, source == dest!!!
OPM_ERROR_IF(&source != &dest, "The provided GpuVectors' address did not match"); // In this context, source == dest!!!
std::call_once(this->m_initializedIndices, [&]() { initIndexSet(); });
int rank = this->m_cpuOwnerOverlapCopy.communicator().rank();
@ -251,10 +251,10 @@ public:
}
private:
mutable std::unique_ptr<CuVector<int>> m_commpairIndicesCopy;
mutable std::unique_ptr<CuVector<int>> m_commpairIndicesOwner;
mutable std::unique_ptr<CuVector<field_type>> m_GPUSendBuf;
mutable std::unique_ptr<CuVector<field_type>> m_GPURecvBuf;
mutable std::unique_ptr<GpuVector<int>> m_commpairIndicesCopy;
mutable std::unique_ptr<GpuVector<int>> m_commpairIndicesOwner;
mutable std::unique_ptr<GpuVector<field_type>> m_GPUSendBuf;
mutable std::unique_ptr<GpuVector<field_type>> m_GPURecvBuf;
struct MessageInformation
{
@ -332,11 +332,11 @@ private:
}
}
m_commpairIndicesCopy = std::make_unique<CuVector<int>>(commpairIndicesCopyOnCPU);
m_commpairIndicesOwner = std::make_unique<CuVector<int>>(commpairIndicesOwnerCPU);
m_commpairIndicesCopy = std::make_unique<GpuVector<int>>(commpairIndicesCopyOnCPU);
m_commpairIndicesOwner = std::make_unique<GpuVector<int>>(commpairIndicesOwnerCPU);
m_GPUSendBuf = std::make_unique<CuVector<field_type>>(sendBufIdx * block_size);
m_GPURecvBuf = std::make_unique<CuVector<field_type>>(recvBufIdx * block_size);
m_GPUSendBuf = std::make_unique<GpuVector<field_type>>(sendBufIdx * block_size);
m_GPURecvBuf = std::make_unique<GpuVector<field_type>>(recvBufIdx * block_size);
}
void initIndexSet() const override
@ -360,8 +360,8 @@ private:
}
}
this->m_indicesCopy = std::make_unique<CuVector<int>>(indicesCopyOnCPU);
this->m_indicesOwner = std::make_unique<CuVector<int>>(indicesOwnerCPU);
this->m_indicesCopy = std::make_unique<GpuVector<int>>(indicesCopyOnCPU);
this->m_indicesOwner = std::make_unique<GpuVector<int>>(indicesOwnerCPU);
buildCommPairIdxs();
}
@ -371,11 +371,11 @@ private:
* @brief CUDA compatiable variant of Dune::OwnerOverlapCopyCommunication
*
* This class can essentially be seen as an adapter around Dune::OwnerOverlapCopyCommunication, and should work as
* a Dune::OwnerOverlapCopyCommunication on CuVectors
* a Dune::OwnerOverlapCopyCommunication on GpuVectors
*
* @note This currently only has the functionality to parallelize the linear solve.
*
* @tparam field_type should be a field_type supported by CuVector (double, float)
* @tparam field_type should be a field_type supported by GpuVector (double, float)
* @tparam block_size the block size used (this is relevant for say figuring out the correct indices)
* @tparam OwnerOverlapCopyCommunicationType should mimic Dune::OwnerOverlapCopyCommunication.
*/
@ -383,7 +383,7 @@ template <class field_type, int block_size, class OwnerOverlapCopyCommunicationT
class GpuOwnerOverlapCopy
{
public:
using X = CuVector<field_type>;
using X = GpuVector<field_type>;
GpuOwnerOverlapCopy(std::shared_ptr<GPUSender<field_type, OwnerOverlapCopyCommunicationType>> sender) : m_sender(sender){}

View File

@ -332,7 +332,7 @@ GpuSeqILU0<M, X, Y, l>::updateILUConfiguration()
{
auto bufferSize = findBufferSize();
if (!m_buffer || m_buffer->dim() < bufferSize) {
m_buffer.reset(new CuVector<field_type>((bufferSize + sizeof(field_type) - 1) / sizeof(field_type)));
m_buffer.reset(new GpuVector<field_type>((bufferSize + sizeof(field_type) - 1) / sizeof(field_type)));
}
analyzeMatrix();
createILU();
@ -340,11 +340,11 @@ GpuSeqILU0<M, X, Y, l>::updateILUConfiguration()
} // namespace Opm::gpuistl
#define INSTANTIATE_GPUSEQILU0_DUNE(realtype, blockdim) \
template class ::Opm::gpuistl::GpuSeqILU0<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
::Opm::gpuistl::CuVector<realtype>, \
::Opm::gpuistl::CuVector<realtype>>; \
::Opm::gpuistl::GpuVector<realtype>, \
::Opm::gpuistl::GpuVector<realtype>>; \
template class ::Opm::gpuistl::GpuSeqILU0<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
::Opm::gpuistl::CuVector<realtype>, \
::Opm::gpuistl::CuVector<realtype>>
::Opm::gpuistl::GpuVector<realtype>, \
::Opm::gpuistl::GpuVector<realtype>>
INSTANTIATE_GPUSEQILU0_DUNE(double, 1);

View File

@ -43,7 +43,7 @@ namespace Opm::gpuistl
//! \tparam l Ignored. Just there to have the same number of template arguments
//! as other preconditioners.
//!
//! \note We assume X and Y are both CuVector<real_type>, but we leave them as template
//! \note We assume X and Y are both GpuVector<real_type>, but we leave them as template
//! arguments in case of future additions.
template <class M, class X, class Y, int l = 1>
class GpuSeqILU0 : public Dune::PreconditionerWithUpdate<X, Y>
@ -112,7 +112,7 @@ private:
//! modified in the constructor to be the proper LU decomposition.
GpuSparseMatrix<field_type> m_LU;
CuVector<field_type> m_temporaryStorage;
GpuVector<field_type> m_temporaryStorage;
detail::GpuSparseMatrixDescriptionPtr m_descriptionL;
@ -121,7 +121,7 @@ private:
detail::CuSparseResource<bsrsv2Info_t> m_infoU;
detail::CuSparseResource<bsrilu02Info_t> m_infoM;
std::unique_ptr<CuVector<field_type>> m_buffer;
std::unique_ptr<GpuVector<field_type>> m_buffer;
detail::CuSparseHandle& m_cuSparseHandle;
bool m_analysisDone = false;

View File

@ -198,7 +198,7 @@ GpuSparseMatrix<T>::setNonUnitDiagonal()
template <typename T>
void
GpuSparseMatrix<T>::mv(const CuVector<T>& x, CuVector<T>& y) const
GpuSparseMatrix<T>::mv(const GpuVector<T>& x, GpuVector<T>& y) const
{
assertSameSize(x);
assertSameSize(y);
@ -232,7 +232,7 @@ GpuSparseMatrix<T>::mv(const CuVector<T>& x, CuVector<T>& y) const
template <typename T>
void
GpuSparseMatrix<T>::umv(const CuVector<T>& x, CuVector<T>& y) const
GpuSparseMatrix<T>::umv(const GpuVector<T>& x, GpuVector<T>& y) const
{
assertSameSize(x);
assertSameSize(y);
@ -267,7 +267,7 @@ GpuSparseMatrix<T>::umv(const CuVector<T>& x, CuVector<T>& y) const
template <typename T>
void
GpuSparseMatrix<T>::usmv(T alpha, const CuVector<T>& x, CuVector<T>& y) const
GpuSparseMatrix<T>::usmv(T alpha, const GpuVector<T>& x, GpuVector<T>& y) const
{
assertSameSize(x);
assertSameSize(y);

View File

@ -22,7 +22,7 @@
#include <iostream>
#include <memory>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp>
#include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp>
@ -144,7 +144,7 @@ public:
*
* @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
*/
CuVector<T>& getNonZeroValues()
GpuVector<T>& getNonZeroValues()
{
return m_nonZeroElements;
}
@ -154,7 +154,7 @@ public:
*
* @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
*/
const CuVector<T>& getNonZeroValues() const
const GpuVector<T>& getNonZeroValues() const
{
return m_nonZeroElements;
}
@ -164,7 +164,7 @@ public:
*
* @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
*/
CuVector<int>& getRowIndices()
GpuVector<int>& getRowIndices()
{
return m_rowIndices;
}
@ -174,7 +174,7 @@ public:
*
* @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
*/
const CuVector<int>& getRowIndices() const
const GpuVector<int>& getRowIndices() const
{
return m_rowIndices;
}
@ -184,7 +184,7 @@ public:
*
* @return Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
*/
CuVector<int>& getColumnIndices()
GpuVector<int>& getColumnIndices()
{
return m_columnIndices;
}
@ -194,7 +194,7 @@ public:
*
* @return Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
*/
const CuVector<int>& getColumnIndices() const
const GpuVector<int>& getColumnIndices() const
{
return m_columnIndices;
}
@ -245,7 +245,7 @@ public:
*
* @note Due to limitations of CuSparse, this is only supported for block sizes greater than 1.
*/
virtual void mv(const CuVector<T>& x, CuVector<T>& y) const;
virtual void mv(const GpuVector<T>& x, GpuVector<T>& y) const;
/**
* @brief umv computes y=Ax+y
@ -254,7 +254,7 @@ public:
*
* @note Due to limitations of CuSparse, this is only supported for block sizes greater than 1.
*/
virtual void umv(const CuVector<T>& x, CuVector<T>& y) const;
virtual void umv(const GpuVector<T>& x, GpuVector<T>& y) const;
/**
@ -264,7 +264,7 @@ public:
*
* @note Due to limitations of CuSparse, this is only supported for block sizes greater than 1.
*/
virtual void usmv(T alpha, const CuVector<T>& x, CuVector<T>& y) const;
virtual void usmv(T alpha, const GpuVector<T>& x, GpuVector<T>& y) const;
/**
* @brief updateNonzeroValues updates the non-zero values by using the non-zero values of the supplied matrix
@ -280,9 +280,9 @@ public:
void updateNonzeroValues(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false);
private:
CuVector<T> m_nonZeroElements;
CuVector<int> m_columnIndices;
CuVector<int> m_rowIndices;
GpuVector<T> m_nonZeroElements;
GpuVector<int> m_columnIndices;
GpuVector<int> m_rowIndices;
// Notice that we store these three as int to make sure we are cusparse compatible.
//

View File

@ -20,7 +20,7 @@
#include <cuda.h>
#include <cuda_runtime.h>
#include <fmt/core.h>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/detail/cublas_wrapper.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
@ -30,13 +30,13 @@ namespace Opm::gpuistl
{
template <class T>
CuVector<T>::CuVector(const std::vector<T>& data)
: CuVector(data.data(), detail::to_int(data.size()))
GpuVector<T>::GpuVector(const std::vector<T>& data)
: GpuVector(data.data(), detail::to_int(data.size()))
{
}
template <class T>
CuVector<T>::CuVector(const size_t numberOfElements)
GpuVector<T>::GpuVector(const size_t numberOfElements)
: m_numberOfElements(detail::to_int(numberOfElements))
, m_cuBlasHandle(detail::CuBlasHandle::getInstance())
{
@ -44,8 +44,8 @@ CuVector<T>::CuVector(const size_t numberOfElements)
}
template <class T>
CuVector<T>::CuVector(const T* dataOnHost, const size_t numberOfElements)
: CuVector(numberOfElements)
GpuVector<T>::GpuVector(const T* dataOnHost, const size_t numberOfElements)
: GpuVector(numberOfElements)
{
OPM_CUDA_SAFE_CALL(cudaMemcpy(
@ -53,8 +53,8 @@ CuVector<T>::CuVector(const T* dataOnHost, const size_t numberOfElements)
}
template <class T>
CuVector<T>&
CuVector<T>::operator=(T scalar)
GpuVector<T>&
GpuVector<T>::operator=(T scalar)
{
assertHasElements();
detail::setVectorValue(data(), detail::to_size_t(m_numberOfElements), scalar);
@ -62,8 +62,8 @@ CuVector<T>::operator=(T scalar)
}
template <class T>
CuVector<T>&
CuVector<T>::operator=(const CuVector<T>& other)
GpuVector<T>&
GpuVector<T>::operator=(const GpuVector<T>& other)
{
assertHasElements();
assertSameSize(other);
@ -76,8 +76,8 @@ CuVector<T>::operator=(const CuVector<T>& other)
}
template <class T>
CuVector<T>::CuVector(const CuVector<T>& other)
: CuVector(other.m_numberOfElements)
GpuVector<T>::GpuVector(const GpuVector<T>& other)
: GpuVector(other.m_numberOfElements)
{
assertHasElements();
assertSameSize(other);
@ -88,21 +88,21 @@ CuVector<T>::CuVector(const CuVector<T>& other)
}
template <class T>
CuVector<T>::~CuVector()
GpuVector<T>::~GpuVector()
{
OPM_CUDA_WARN_IF_ERROR(cudaFree(m_dataOnDevice));
}
template <typename T>
const T*
CuVector<T>::data() const
GpuVector<T>::data() const
{
return m_dataOnDevice;
}
template <typename T>
typename CuVector<T>::size_type
CuVector<T>::dim() const
typename GpuVector<T>::size_type
GpuVector<T>::dim() const
{
// Note that there is no way for m_numberOfElements to be non-positive,
// but for sanity we still use the safe conversion function here.
@ -114,7 +114,7 @@ CuVector<T>::dim() const
template <typename T>
std::vector<T>
CuVector<T>::asStdVector() const
GpuVector<T>::asStdVector() const
{
std::vector<T> temporary(detail::to_size_t(m_numberOfElements));
copyToHost(temporary);
@ -123,21 +123,21 @@ CuVector<T>::asStdVector() const
template <typename T>
void
CuVector<T>::setZeroAtIndexSet(const CuVector<int>& indexSet)
GpuVector<T>::setZeroAtIndexSet(const GpuVector<int>& indexSet)
{
detail::setZeroAtIndexSet(m_dataOnDevice, indexSet.dim(), indexSet.data());
}
template <typename T>
void
CuVector<T>::assertSameSize(const CuVector<T>& x) const
GpuVector<T>::assertSameSize(const GpuVector<T>& x) const
{
assertSameSize(x.m_numberOfElements);
}
template <typename T>
void
CuVector<T>::assertSameSize(int size) const
GpuVector<T>::assertSameSize(int size) const
{
if (size != m_numberOfElements) {
OPM_THROW(std::invalid_argument,
@ -147,7 +147,7 @@ CuVector<T>::assertSameSize(int size) const
template <typename T>
void
CuVector<T>::assertHasElements() const
GpuVector<T>::assertHasElements() const
{
if (m_numberOfElements <= 0) {
OPM_THROW(std::invalid_argument, "We have 0 elements");
@ -156,14 +156,14 @@ CuVector<T>::assertHasElements() const
template <typename T>
T*
CuVector<T>::data()
GpuVector<T>::data()
{
return m_dataOnDevice;
}
template <class T>
CuVector<T>&
CuVector<T>::operator*=(const T& scalar)
GpuVector<T>&
GpuVector<T>::operator*=(const T& scalar)
{
assertHasElements();
OPM_CUBLAS_SAFE_CALL(detail::cublasScal(m_cuBlasHandle.get(), m_numberOfElements, &scalar, data(), 1));
@ -171,8 +171,8 @@ CuVector<T>::operator*=(const T& scalar)
}
template <class T>
CuVector<T>&
CuVector<T>::axpy(T alpha, const CuVector<T>& y)
GpuVector<T>&
GpuVector<T>::axpy(T alpha, const GpuVector<T>& y)
{
assertHasElements();
assertSameSize(y);
@ -182,7 +182,7 @@ CuVector<T>::axpy(T alpha, const CuVector<T>& y)
template <class T>
T
CuVector<T>::dot(const CuVector<T>& other) const
GpuVector<T>::dot(const GpuVector<T>& other) const
{
assertHasElements();
assertSameSize(other);
@ -193,7 +193,7 @@ CuVector<T>::dot(const CuVector<T>& other) const
}
template <class T>
T
CuVector<T>::two_norm() const
GpuVector<T>::two_norm() const
{
assertHasElements();
T result = T(0);
@ -203,14 +203,14 @@ CuVector<T>::two_norm() const
template <typename T>
T
CuVector<T>::dot(const CuVector<T>& other, const CuVector<int>& indexSet, CuVector<T>& buffer) const
GpuVector<T>::dot(const GpuVector<T>& other, const GpuVector<int>& indexSet, GpuVector<T>& buffer) const
{
return detail::innerProductAtIndices(m_cuBlasHandle.get(), m_dataOnDevice, other.data(), buffer.data(), indexSet.dim(), indexSet.data());
}
template <typename T>
T
CuVector<T>::two_norm(const CuVector<int>& indexSet, CuVector<T>& buffer) const
GpuVector<T>::two_norm(const GpuVector<int>& indexSet, GpuVector<T>& buffer) const
{
// TODO: [perf] Optimize this to a single call
return std::sqrt(this->dot(*this, indexSet, buffer));
@ -218,23 +218,23 @@ CuVector<T>::two_norm(const CuVector<int>& indexSet, CuVector<T>& buffer) const
template <typename T>
T
CuVector<T>::dot(const CuVector<T>& other, const CuVector<int>& indexSet) const
GpuVector<T>::dot(const GpuVector<T>& other, const GpuVector<int>& indexSet) const
{
CuVector<T> buffer(indexSet.dim());
GpuVector<T> buffer(indexSet.dim());
return detail::innerProductAtIndices(m_cuBlasHandle.get(), m_dataOnDevice, other.data(), buffer.data(), indexSet.dim(), indexSet.data());
}
template <typename T>
T
CuVector<T>::two_norm(const CuVector<int>& indexSet) const
GpuVector<T>::two_norm(const GpuVector<int>& indexSet) const
{
CuVector<T> buffer(indexSet.dim());
GpuVector<T> buffer(indexSet.dim());
// TODO: [perf] Optimize this to a single call
return std::sqrt(this->dot(*this, indexSet, buffer));
}
template <class T>
CuVector<T>&
CuVector<T>::operator+=(const CuVector<T>& other)
GpuVector<T>&
GpuVector<T>::operator+=(const GpuVector<T>& other)
{
assertHasElements();
assertSameSize(other);
@ -243,8 +243,8 @@ CuVector<T>::operator+=(const CuVector<T>& other)
}
template <class T>
CuVector<T>&
CuVector<T>::operator-=(const CuVector<T>& other)
GpuVector<T>&
GpuVector<T>::operator-=(const GpuVector<T>& other)
{
assertHasElements();
assertSameSize(other);
@ -255,7 +255,7 @@ CuVector<T>::operator-=(const CuVector<T>& other)
template <class T>
void
CuVector<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
GpuVector<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
{
if (numberOfElements > dim()) {
OPM_THROW(std::runtime_error,
@ -268,7 +268,7 @@ CuVector<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
template <class T>
void
CuVector<T>::copyToHost(T* dataPointer, size_t numberOfElements) const
GpuVector<T>::copyToHost(T* dataPointer, size_t numberOfElements) const
{
assertSameSize(detail::to_int(numberOfElements));
OPM_CUDA_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
@ -276,32 +276,32 @@ CuVector<T>::copyToHost(T* dataPointer, size_t numberOfElements) const
template <class T>
void
CuVector<T>::copyFromHost(const std::vector<T>& data)
GpuVector<T>::copyFromHost(const std::vector<T>& data)
{
copyFromHost(data.data(), data.size());
}
template <class T>
void
CuVector<T>::copyToHost(std::vector<T>& data) const
GpuVector<T>::copyToHost(std::vector<T>& data) const
{
copyToHost(data.data(), data.size());
}
template <typename T>
void
CuVector<T>::prepareSendBuf(CuVector<T>& buffer, const CuVector<int>& indexSet) const
GpuVector<T>::prepareSendBuf(GpuVector<T>& buffer, const GpuVector<int>& indexSet) const
{
return detail::prepareSendBuf(m_dataOnDevice, buffer.data(), indexSet.dim(), indexSet.data());
}
template <typename T>
void
CuVector<T>::syncFromRecvBuf(CuVector<T>& buffer, const CuVector<int>& indexSet) const
GpuVector<T>::syncFromRecvBuf(GpuVector<T>& buffer, const GpuVector<int>& indexSet) const
{
return detail::syncFromRecvBuf(m_dataOnDevice, buffer.data(), indexSet.dim(), indexSet.data());
}
template class CuVector<double>;
template class CuVector<float>;
template class CuVector<int>;
template class GpuVector<double>;
template class GpuVector<float>;
template class GpuVector<int>;
} // namespace Opm::gpuistl

View File

@ -16,8 +16,8 @@
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef OPM_CUVECTOR_HEADER_HPP
#define OPM_CUVECTOR_HEADER_HPP
#ifndef OPM_GPUVECTOR_HEADER_HPP
#define OPM_GPUVECTOR_HEADER_HPP
#include <dune/common/fvector.hh>
#include <dune/istl/bvector.hh>
#include <exception>
@ -33,7 +33,7 @@ namespace Opm::gpuistl
{
/**
* @brief The CuVector class is a simple (arithmetic) vector class for the GPU.
* @brief The GpuVector class is a simple (arithmetic) vector class for the GPU.
*
* @note we currently only support simple raw primitives for T (double, float and int)
*
@ -45,12 +45,12 @@ namespace Opm::gpuistl
* Example usage:
*
* @code{.cpp}
* #include <opm/simulators/linalg/cuistl/CuVector.hpp>
* #include <opm/simulators/linalg/cuistl/GpuVector.hpp>
*
* void someFunction() {
* auto someDataOnCPU = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
*
* auto dataOnGPU = CuVector<double>(someDataOnCPU);
* auto dataOnGPU = GpuVector<double>(someDataOnCPU);
*
* // Multiply by 4.0:
* dataOnGPU *= 4.0;
@ -62,7 +62,7 @@ namespace Opm::gpuistl
* @tparam T the type to store. Can be either float, double or int.
*/
template <typename T>
class CuVector
class GpuVector
{
public:
using field_type = T;
@ -70,17 +70,17 @@ public:
/**
* @brief CuVector allocates new GPU memory of the same size as other and copies the content of the other vector to
* @brief GpuVector allocates new GPU memory of the same size as other and copies the content of the other vector to
* this newly allocated memory.
*
* @note This does synchronous transfer.
*
* @param other the vector to copy from
*/
CuVector(const CuVector<T>& other);
GpuVector(const GpuVector<T>& other);
/**
* @brief CuVector allocates new GPU memory of the same size as data and copies the content of the data vector to
* @brief GpuVector allocates new GPU memory of the same size as data and copies the content of the data vector to
* this newly allocated memory.
*
* @note This does CPU to GPU transfer.
@ -90,7 +90,7 @@ public:
*
* @param data the vector to copy from
*/
explicit CuVector(const std::vector<T>& data);
explicit GpuVector(const std::vector<T>& data);
/**
* @brief operator= copies the content of the data vector to the memory of this vector.
@ -100,7 +100,7 @@ public:
*
* @param other the vector to copy from
*/
CuVector& operator=(const CuVector<T>& other);
GpuVector& operator=(const GpuVector<T>& other);
/**
* @brief operator= sets the whole vector equal to the scalar value.
@ -109,20 +109,20 @@ public:
*
* @param scalar the value all elements will be set to.
*/
CuVector& operator=(T scalar);
GpuVector& operator=(T scalar);
/**
* @brief CuVector allocates new GPU memory of size numberOfElements * sizeof(T)
* @brief GpuVector allocates new GPU memory of size numberOfElements * sizeof(T)
*
* @note For now numberOfElements needs to be within the limits of int due to restrictions in cublas
*
* @param numberOfElements number of T elements to allocate
*/
explicit CuVector(const size_t numberOfElements);
explicit GpuVector(const size_t numberOfElements);
/**
* @brief CuVector allocates new GPU memory of size numberOfElements * sizeof(T) and copies numberOfElements from
* @brief GpuVector allocates new GPU memory of size numberOfElements * sizeof(T) and copies numberOfElements from
* data
*
* @note This assumes the data is on the CPU.
@ -132,12 +132,12 @@ public:
*
* @note For now numberOfElements needs to be within the limits of int due to restrictions in cublas
*/
CuVector(const T* dataOnHost, const size_t numberOfElements);
GpuVector(const T* dataOnHost, const size_t numberOfElements);
/**
* @brief ~CuVector calls cudaFree
* @brief ~GpuVector calls cudaFree
*/
virtual ~CuVector();
virtual ~GpuVector();
/**
* @return the raw pointer to the GPU data
@ -162,7 +162,7 @@ public:
// TODO: [perf] vector.dim() can be replaced by bvector.N() * BlockDimension
if (detail::to_size_t(m_numberOfElements) != bvector.dim()) {
OPM_THROW(std::runtime_error,
fmt::format("Given incompatible vector size. CuVector has size {}, \n"
fmt::format("Given incompatible vector size. GpuVector has size {}, \n"
"however, BlockVector has N() = {}, and dim = {}.",
m_numberOfElements,
bvector.N(),
@ -185,7 +185,7 @@ public:
// TODO: [perf] vector.dim() can be replaced by bvector.N() * BlockDimension
if (detail::to_size_t(m_numberOfElements) != bvector.dim()) {
OPM_THROW(std::runtime_error,
fmt::format("Given incompatible vector size. CuVector has size {},\n however, the BlockVector "
fmt::format("Given incompatible vector size. GpuVector has size {},\n however, the BlockVector "
"has has N() = {}, and dim() = {}.",
m_numberOfElements,
bvector.N(),
@ -231,8 +231,8 @@ public:
*/
void copyToHost(std::vector<T>& data) const;
void prepareSendBuf(CuVector<T>& buffer, const CuVector<int>& indexSet) const;
void syncFromRecvBuf(CuVector<T>& buffer, const CuVector<int>& indexSet) const;
void prepareSendBuf(GpuVector<T>& buffer, const GpuVector<int>& indexSet) const;
void syncFromRecvBuf(GpuVector<T>& buffer, const GpuVector<int>& indexSet) const;
/**
* @brief operator *= multiplies every element by scalar
@ -242,7 +242,7 @@ public:
*
* @note int is not supported
*/
CuVector<T>& operator*=(const T& scalar);
GpuVector<T>& operator*=(const T& scalar);
/**
* @brief axpy sets this vector equal to this + alha * y
@ -252,7 +252,7 @@ public:
* @note this will call CuBlas in the background
* @note int is not supported
*/
CuVector<T>& axpy(T alpha, const CuVector<T>& y);
GpuVector<T>& axpy(T alpha, const GpuVector<T>& y);
/**
* @brief operator+= adds the other vector to this vector
@ -260,7 +260,7 @@ public:
* @note this will call CuBlas in the background
* @note int is not supported
*/
CuVector<T>& operator+=(const CuVector<T>& other);
GpuVector<T>& operator+=(const GpuVector<T>& other);
/**
* @brief operator-= subtracts the other vector from this vector
@ -268,7 +268,7 @@ public:
* @note this will call CuBlas in the background
* @note int is not supported
*/
CuVector<T>& operator-=(const CuVector<T>& other);
GpuVector<T>& operator-=(const GpuVector<T>& other);
/**
* @brief dot computes the dot product (standard inner product) against the other vector
@ -278,7 +278,7 @@ public:
*
* @return the result on the inner product
*/
T dot(const CuVector<T>& other) const;
T dot(const GpuVector<T>& other) const;
/**
* @brief returns the l2 norm of the vector
@ -294,14 +294,14 @@ public:
*
* @note int is not supported
*/
T dot(const CuVector<T>& other, const CuVector<int>& indexSet, CuVector<T>& buffer) const;
T dot(const GpuVector<T>& other, const GpuVector<int>& indexSet, GpuVector<T>& buffer) const;
/**
* Computes the norm sqrt(sum_i this[indexSet[i]] * this[indexSet[i]])
*
* @note int is not supported
*/
T two_norm(const CuVector<int>& indexSet, CuVector<T>& buffer) const;
T two_norm(const GpuVector<int>& indexSet, GpuVector<T>& buffer) const;
/**
@ -309,14 +309,14 @@ public:
*
* @note int is not supported
*/
T dot(const CuVector<T>& other, const CuVector<int>& indexSet) const;
T dot(const GpuVector<T>& other, const GpuVector<int>& indexSet) const;
/**
* Computes the norm sqrt(sum_i this[indexSet[i]] * this[indexSet[i]])
*
* @note int is not supported
*/
T two_norm(const CuVector<int>& indexSet) const;
T two_norm(const GpuVector<int>& indexSet) const;
/**
@ -363,9 +363,9 @@ public:
* }
* @endcode
*/
void setZeroAtIndexSet(const CuVector<int>& indexSet);
void setZeroAtIndexSet(const GpuVector<int>& indexSet);
// Slow method that creates a string representation of a CuVector for debug purposes
// Slow method that creates a string representation of a GpuVector for debug purposes
std::string toDebugString()
{
std::vector<T> v = asStdVector();
@ -385,7 +385,7 @@ private:
const int m_numberOfElements;
detail::CuBlasHandle& m_cuBlasHandle;
void assertSameSize(const CuVector<T>& other) const;
void assertSameSize(const GpuVector<T>& other) const;
void assertSameSize(int size) const;
void assertHasElements() const;

View File

@ -27,7 +27,7 @@
#include <opm/common/TimingMacros.hpp>
#include <opm/simulators/linalg/GraphColoring.hpp>
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/OpmCuILU0.hpp>
#include <opm/simulators/linalg/cuistl/detail/autotuner.hpp>
#include <opm/simulators/linalg/cuistl/detail/coloringAndReorderingUtils.hpp>
@ -72,7 +72,7 @@ OpmCuILU0<M, X, Y, l>::OpmCuILU0(const M& A, bool splitMatrix, bool tuneKernels)
m_gpuMatrix.nonzeroes(),
A.nonzeroes()));
if (m_splitMatrix) {
m_gpuMatrixReorderedDiag.emplace(CuVector<field_type>(blocksize_ * blocksize_ * m_cpuMatrix.N()));
m_gpuMatrixReorderedDiag.emplace(GpuVector<field_type>(blocksize_ * blocksize_ * m_cpuMatrix.N()));
std::tie(m_gpuMatrixReorderedLower, m_gpuMatrixReorderedUpper)
= detail::extractLowerAndUpperMatrices<M, field_type, GpuSparseMatrix<field_type>>(m_cpuMatrix,
m_reorderedToNatural);
@ -272,8 +272,8 @@ OpmCuILU0<M, X, Y, l>::tuneThreadBlockSizes()
= detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "Kernel computing ILU0 factorization");
// tune the thread-block size of the apply
CuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
CuVector<field_type> tmpD(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
GpuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
GpuVector<field_type> tmpD(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
tmpD = 1;
auto tuneLowerSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int lowerSolveThreadBlockSize) {
@ -292,11 +292,11 @@ OpmCuILU0<M, X, Y, l>::tuneThreadBlockSizes()
} // namespace Opm::gpuistl
#define INSTANTIATE_CUDILU_DUNE(realtype, blockdim) \
template class ::Opm::gpuistl::OpmCuILU0<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
::Opm::gpuistl::CuVector<realtype>, \
::Opm::gpuistl::CuVector<realtype>>; \
::Opm::gpuistl::GpuVector<realtype>, \
::Opm::gpuistl::GpuVector<realtype>>; \
template class ::Opm::gpuistl::OpmCuILU0<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
::Opm::gpuistl::CuVector<realtype>, \
::Opm::gpuistl::CuVector<realtype>>
::Opm::gpuistl::GpuVector<realtype>, \
::Opm::gpuistl::GpuVector<realtype>>
INSTANTIATE_CUDILU_DUNE(double, 1);
INSTANTIATE_CUDILU_DUNE(double, 2);

View File

@ -23,7 +23,7 @@
#include <opm/grid/utility/SparseTable.hpp>
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
#include <optional>
#include <type_traits>
#include <vector>
@ -39,7 +39,7 @@ namespace Opm::gpuistl
//! \tparam l Ignored. Just there to have the same number of template arguments
//! as other preconditioners.
//!
//! \note We assume X and Y are both CuVector<real_type>, but we leave them as template
//! \note We assume X and Y are both GpuVector<real_type>, but we leave them as template
//! arguments in case of future additions.
template <class M, class X, class Y, int l = 1>
class OpmCuILU0 : public Dune::PreconditionerWithUpdate<X, Y>
@ -126,13 +126,13 @@ private:
std::unique_ptr<CuMat> m_gpuMatrixReorderedLower;
std::unique_ptr<CuMat> m_gpuMatrixReorderedUpper;
//! \brief If matrix splitting is enabled, we also store the diagonal separately
std::optional<CuVector<field_type>> m_gpuMatrixReorderedDiag;
std::optional<GpuVector<field_type>> m_gpuMatrixReorderedDiag;
//! row conversion from natural to reordered matrix indices stored on the GPU
CuVector<int> m_gpuNaturalToReorder;
GpuVector<int> m_gpuNaturalToReorder;
//! row conversion from reordered to natural matrix indices stored on the GPU
CuVector<int> m_gpuReorderToNatural;
GpuVector<int> m_gpuReorderToNatural;
//! \brief Stores the inverted diagonal that we use in ILU0
CuVector<field_type> m_gpuDInv;
GpuVector<field_type> m_gpuDInv;
//! \brief Bool storing whether or not we should store matrices in a split format
bool m_splitMatrix;
//! \brief Bool storing whether or not we will tune the threadblock sizes. Only used for AMD cards

View File

@ -21,7 +21,7 @@
#include <cusparse.h>
#include <dune/istl/preconditioner.hh>
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerHolder.hpp>
#include <opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp>
@ -35,11 +35,11 @@ namespace Opm::gpuistl
//!
//! \tparam X the domain type (should be on the CPU). Typicall a Dune::BlockVector
//! \tparam Y the range type (should be on the CPU). Typicall a Dune::BlockVector
//! \tparam CudaPreconditionerType the preconditioner taking CuVector<real_type> as arguments to apply
//! \tparam CudaPreconditionerType the preconditioner taking GpuVector<real_type> as arguments to apply
template <class X, class Y, class CudaPreconditionerType>
class PreconditionerAdapter
: public Dune::PreconditionerWithUpdate<X, Y>,
public PreconditionerHolder<CuVector<typename X::field_type>, CuVector<typename Y::field_type>>
public PreconditionerHolder<GpuVector<typename X::field_type>, GpuVector<typename Y::field_type>>
{
public:
//! \brief The domain type of the preconditioner.
@ -77,8 +77,8 @@ public:
virtual void apply(X& v, const Y& d) override
{
if (!m_inputBuffer) {
m_inputBuffer.reset(new CuVector<field_type>(v.dim()));
m_outputBuffer.reset(new CuVector<field_type>(v.dim()));
m_inputBuffer.reset(new GpuVector<field_type>(v.dim()));
m_outputBuffer.reset(new GpuVector<field_type>(v.dim()));
}
m_inputBuffer->copyFromHost(d);
m_underlyingPreconditioner->apply(*m_outputBuffer, *m_inputBuffer);
@ -117,7 +117,7 @@ public:
return detail::shouldCallPreconditionerPre<CudaPreconditionerType>();
}
virtual std::shared_ptr<Dune::PreconditionerWithUpdate<CuVector<field_type>, CuVector<field_type>>>
virtual std::shared_ptr<Dune::PreconditionerWithUpdate<GpuVector<field_type>, GpuVector<field_type>>>
getUnderlyingPreconditioner() override
{
return m_underlyingPreconditioner;
@ -131,8 +131,8 @@ private:
//! \brief the underlying preconditioner to use
std::shared_ptr<CudaPreconditionerType> m_underlyingPreconditioner;
std::unique_ptr<CuVector<field_type>> m_inputBuffer;
std::unique_ptr<CuVector<field_type>> m_outputBuffer;
std::unique_ptr<GpuVector<field_type>> m_inputBuffer;
std::unique_ptr<GpuVector<field_type>> m_outputBuffer;
};
} // end namespace Opm::gpuistl

View File

@ -29,7 +29,7 @@
#include <opm/simulators/linalg/cuistl/GpuBlockPreconditioner.hpp>
#include <opm/simulators/linalg/cuistl/GpuOwnerOverlapCopy.hpp>
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/cuistl/detail/has_function.hpp>
@ -56,7 +56,7 @@ public:
using typename Dune::IterativeSolver<X, X>::real_type;
using typename Dune::IterativeSolver<X, X>::scalar_real_type;
static constexpr auto block_size = domain_type::block_type::dimension;
using XGPU = Opm::gpuistl::CuVector<real_type>;
using XGPU = Opm::gpuistl::GpuVector<real_type>;
// TODO: Use a std::forward
SolverAdapter(Operator& op,

View File

@ -18,7 +18,7 @@
*/
#include <config.h>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/detail/cublas_wrapper.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
@ -143,7 +143,7 @@ innerProductAtIndices(cublasHandle_t cublasHandle,
elementWiseMultiplyKernel<<<nThreadBlocks, threadBlockSize>>>(deviceA, deviceB, buffer, numberOfElements, indices);
// TODO: [perf] Get rid of the allocation here.
CuVector<T> oneVector(numberOfElements);
GpuVector<T> oneVector(numberOfElements);
oneVector = 1.0;
T result = 0.0;
OPM_CUBLAS_SAFE_CALL(cublasDot(cublasHandle, numberOfElements, oneVector.data(), 1, buffer, 1, &result));

View File

@ -65,11 +65,11 @@ void syncFromRecvBuf(T* deviceA, T* buffer, size_t numberOfElements, const int*
/**
* @brief Compue the weighted matrix vector product where the matrix is diagonal, the diagonal is a vector, meaning we
* compute the Hadamard product.
* @param squareBlockVector A CuVector whose elements are NxN matrix blocks
* @param squareBlockVector A GpuVector whose elements are NxN matrix blocks
* @param numberOfRows The number of rows in the vector
* @param blocksize The sidelength of the square block elements in the vector
* @param src_vec A pointer to the data of the CuVector we multiply the blockvector with
* @param[out] dst_vec A pointer to the data of the CuVector we store the result in
* @param src_vec A pointer to the data of the GpuVector we multiply the blockvector with
* @param[out] dst_vec A pointer to the data of the GpuVector we store the result in
*
* @note This is implemented as a faster way to multiply a diagonal matrix with a blockvector. We need only store the
* diagonal of the matrix and use this product.

View File

@ -27,7 +27,7 @@
#include <opm/simulators/linalg/DILU.hpp>
#include <opm/simulators/linalg/cuistl/GpuDILU.hpp>
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
#include <random>
@ -42,8 +42,8 @@ using B2x2Vec = Dune::BlockVector<Dune::FieldVector<double, 2>>;
using Sp1x1BlockMatrix = Dune::BCRSMatrix<FM1x1>;
using Sp2x2BlockMatrix = Dune::BCRSMatrix<FM2x2>;
using CuMatrix = Opm::gpuistl::GpuSparseMatrix<T>;
using CuIntVec = Opm::gpuistl::CuVector<int>;
using CuFloatingPointVec = Opm::gpuistl::CuVector<T>;
using CuIntVec = Opm::gpuistl::GpuVector<int>;
using CuFloatingPointVec = Opm::gpuistl::GpuVector<T>;
using GpuDilu1x1 = Opm::gpuistl::GpuDILU<Sp1x1BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
using GpuDilu2x2 = Opm::gpuistl::GpuDILU<Sp2x2BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;

View File

@ -25,7 +25,7 @@
#include <dune/istl/bcrsmatrix.hh>
#include <opm/simulators/linalg/cuistl/GpuJac.hpp>
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
#include <opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp>
@ -49,7 +49,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(GPUJACApplyBlocksize2, T, NumericTypes)
using M = Dune::FieldMatrix<T, blocksize, blocksize>;
using SpMatrix = Dune::BCRSMatrix<M>;
using Vector = Dune::BlockVector<Dune::FieldVector<T, blocksize>>;
using GpuJac = Opm::gpuistl::GpuJac<SpMatrix, Opm::gpuistl::CuVector<T>, Opm::gpuistl::CuVector<T>>;
using GpuJac = Opm::gpuistl::GpuJac<SpMatrix, Opm::gpuistl::GpuVector<T>, Opm::gpuistl::GpuVector<T>>;
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
for (auto row = B.createbegin(); row != B.createend(); ++row) {
@ -103,7 +103,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(GPUJACApplyBlocksize1, T, NumericTypes)
using M = Dune::FieldMatrix<T, blocksize, blocksize>;
using SpMatrix = Dune::BCRSMatrix<M>;
using Vector = Dune::BlockVector<Dune::FieldVector<T, blocksize>>;
using GpuJac = Opm::gpuistl::GpuJac<SpMatrix, Opm::gpuistl::CuVector<T>, Opm::gpuistl::CuVector<T>>;
using GpuJac = Opm::gpuistl::GpuJac<SpMatrix, Opm::gpuistl::GpuVector<T>, Opm::gpuistl::GpuVector<T>>;
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
for (auto row = B.createbegin(); row != B.createend(); ++row) {

View File

@ -27,7 +27,7 @@
#include <dune/istl/owneroverlapcopy.hh>
#include <memory>
#include <opm/simulators/linalg/cuistl/GpuOwnerOverlapCopy.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/set_device.hpp>
#include <random>
@ -62,7 +62,7 @@ BOOST_AUTO_TEST_CASE(TestProject)
auto ownerOverlapCopy = Dune::OwnerOverlapCopyCommunication<int>(indexInfo, MPI_COMM_WORLD);
auto xCPU = std::vector<double> {{1.0, 2.0, 3.0}};
auto xGPU = Opm::gpuistl::CuVector<double>(xCPU);
auto xGPU = Opm::gpuistl::GpuVector<double>(xCPU);
auto gpuComm = std::make_shared<Opm::gpuistl::GPUObliviousMPISender<double, 1, Dune::OwnerOverlapCopyCommunication<int>>>(ownerOverlapCopy);
@ -94,7 +94,7 @@ BOOST_AUTO_TEST_CASE(TestDot)
indexInfo.addRemoteIndex(std::make_tuple(0, 2, Dune::OwnerOverlapCopyAttributeSet::copy));
auto ownerOverlapCopy = Dune::OwnerOverlapCopyCommunication<int>(indexInfo, MPI_COMM_WORLD);
auto xCPU = std::vector<double> {{1.0, 2.0, 3.0}};
auto xGPU = Opm::gpuistl::CuVector<double>(xCPU);
auto xGPU = Opm::gpuistl::GpuVector<double>(xCPU);
auto gpuComm = std::make_shared<Opm::gpuistl::GPUObliviousMPISender<double, 1, Dune::OwnerOverlapCopyCommunication<int>>>(ownerOverlapCopy);

View File

@ -28,7 +28,7 @@
#include <dune/istl/bcrsmatrix.hh>
#include <dune/istl/preconditioners.hh>
#include <opm/simulators/linalg/cuistl/GpuSeqILU0.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
@ -63,7 +63,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifference1D, T, NumericTypes)
using M = Dune::FieldMatrix<T, 1, 1>;
using SpMatrix = Dune::BCRSMatrix<M>;
using Vector = Dune::BlockVector<Dune::FieldVector<T, 1>>;
using GpuILU0 = Opm::gpuistl::GpuSeqILU0<SpMatrix, Opm::gpuistl::CuVector<T>, Opm::gpuistl::CuVector<T>>;
using GpuILU0 = Opm::gpuistl::GpuSeqILU0<SpMatrix, Opm::gpuistl::GpuVector<T>, Opm::gpuistl::GpuVector<T>>;
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
for (auto row = B.createbegin(); row != B.createend(); ++row) {
@ -158,7 +158,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifferenceBlock2, T, NumericTypes)
using M = Dune::FieldMatrix<T, 2, 2>;
using SpMatrix = Dune::BCRSMatrix<M>;
using Vector = Dune::BlockVector<Dune::FieldVector<T, 2>>;
using GpuILU0 = Opm::gpuistl::GpuSeqILU0<SpMatrix, Opm::gpuistl::CuVector<T>, Opm::gpuistl::CuVector<T>>;
using GpuILU0 = Opm::gpuistl::GpuSeqILU0<SpMatrix, Opm::gpuistl::GpuVector<T>, Opm::gpuistl::GpuVector<T>>;
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
for (auto row = B.createbegin(); row != B.createend(); ++row) {

View File

@ -24,7 +24,7 @@
#include <dune/istl/bcrsmatrix.hh>
#include <memory>
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <random>
@ -149,8 +149,8 @@ BOOST_AUTO_TEST_CASE(RandomSparsityMatrix)
std::vector<double> inputDataX(N * dim, 0.0);
inputDataX[component] = 1.0;
std::vector<double> inputDataY(N * dim, .25);
auto inputVectorX = Opm::gpuistl::CuVector<double>(inputDataX.data(), inputDataX.size());
auto inputVectorY = Opm::gpuistl::CuVector<double>(inputDataY.data(), inputDataY.size());
auto inputVectorX = Opm::gpuistl::GpuVector<double>(inputDataX.data(), inputDataX.size());
auto inputVectorY = Opm::gpuistl::GpuVector<double>(inputDataY.data(), inputDataY.size());
Vector xHost(N), yHost(N);
yHost = inputDataY[0];
inputVectorX.copyToHost(xHost);

View File

@ -18,13 +18,13 @@
*/
#include <config.h>
#define BOOST_TEST_MODULE TestCuVector
#define BOOST_TEST_MODULE TestGpuVector
#include <boost/test/unit_test.hpp>
#include <cuda_runtime.h>
#include <dune/common/fvector.hh>
#include <dune/istl/bvector.hh>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <random>
@ -32,7 +32,7 @@ BOOST_AUTO_TEST_CASE(TestDocumentedUsage)
{
auto someDataOnCPU = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
auto dataOnGPU = ::Opm::gpuistl::CuVector<double>(someDataOnCPU);
auto dataOnGPU = ::Opm::gpuistl::GpuVector<double>(someDataOnCPU);
// Multiply by 4.0:
dataOnGPU *= 4.0;
@ -50,14 +50,14 @@ BOOST_AUTO_TEST_CASE(TestDocumentedUsage)
BOOST_AUTO_TEST_CASE(TestConstructionSize)
{
const int numberOfElements = 1234;
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(numberOfElements);
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(numberOfElements);
BOOST_CHECK_EQUAL(numberOfElements, vectorOnGPU.dim());
}
BOOST_AUTO_TEST_CASE(TestCopyFromHostConstructor)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
std::vector<double> buffer(data.size(), 0.0);
vectorOnGPU.copyToHost(buffer.data(), buffer.size());
@ -68,7 +68,7 @@ BOOST_AUTO_TEST_CASE(TestCopyFromHostConstructor)
BOOST_AUTO_TEST_CASE(TestCopyFromHostFunction)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.size());
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.size());
BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
vectorOnGPU.copyFromHost(data.data(), data.size());
std::vector<double> buffer(data.size(), 0.0);
@ -80,7 +80,7 @@ BOOST_AUTO_TEST_CASE(TestCopyFromHostFunction)
BOOST_AUTO_TEST_CASE(TestCopyFromBvector)
{
auto blockVector = Dune::BlockVector<Dune::FieldVector<double, 2>> {{{42, 43}, {44, 45}, {46, 47}}};
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(blockVector.dim());
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(blockVector.dim());
vectorOnGPU.copyFromHost(blockVector);
std::vector<double> buffer(vectorOnGPU.dim());
vectorOnGPU.copyToHost(buffer.data(), buffer.size());
@ -93,7 +93,7 @@ BOOST_AUTO_TEST_CASE(TestCopyToBvector)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7, 8, 9}};
auto blockVector = Dune::BlockVector<Dune::FieldVector<double, 3>>(3);
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
vectorOnGPU.copyToHost(blockVector);
@ -103,7 +103,7 @@ BOOST_AUTO_TEST_CASE(TestCopyToBvector)
BOOST_AUTO_TEST_CASE(TestDataPointer)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7, 8, 9}};
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
std::vector<double> buffer(data.size(), 0.0);
OPM_CUDA_SAFE_CALL(cudaMemcpy(buffer.data(), vectorOnGPU.data(), sizeof(double) * data.size(), cudaMemcpyDeviceToHost));
@ -113,7 +113,7 @@ BOOST_AUTO_TEST_CASE(TestDataPointer)
BOOST_AUTO_TEST_CASE(TestCopyScalarMultiply)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
const double scalar = 42.25;
vectorOnGPU *= scalar;
@ -128,7 +128,7 @@ BOOST_AUTO_TEST_CASE(TestCopyScalarMultiply)
BOOST_AUTO_TEST_CASE(TestTwoNorm)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
auto twoNorm = vectorOnGPU.two_norm();
double correctAnswer = 0.0;
@ -143,8 +143,8 @@ BOOST_AUTO_TEST_CASE(TestDot)
{
std::vector<double> dataA {{1, 2, 3, 4, 5, 6, 7}};
std::vector<double> dataB {{8, 9, 10, 11, 12, 13, 14}};
auto vectorOnGPUA = Opm::gpuistl::CuVector<double>(dataA.data(), dataA.size());
auto vectorOnGPUB = Opm::gpuistl::CuVector<double>(dataB.data(), dataB.size());
auto vectorOnGPUA = Opm::gpuistl::GpuVector<double>(dataA.data(), dataA.size());
auto vectorOnGPUB = Opm::gpuistl::GpuVector<double>(dataB.data(), dataB.size());
auto dot = vectorOnGPUA.dot(vectorOnGPUB);
double correctAnswer = 0.0;
@ -158,7 +158,7 @@ BOOST_AUTO_TEST_CASE(TestDot)
BOOST_AUTO_TEST_CASE(Assigment)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
vectorOnGPU = 10.0;
vectorOnGPU.copyToHost(data.data(), data.size());
@ -171,9 +171,9 @@ BOOST_AUTO_TEST_CASE(Assigment)
BOOST_AUTO_TEST_CASE(CopyAssignment)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
vectorOnGPU.copyToHost(data.data(), data.size());
auto vectorOnGPUB = Opm::gpuistl::CuVector<double>(data.size());
auto vectorOnGPUB = Opm::gpuistl::GpuVector<double>(data.size());
vectorOnGPUB = 4.0;
vectorOnGPUB = vectorOnGPU;
@ -185,7 +185,7 @@ BOOST_AUTO_TEST_CASE(CopyAssignment)
BOOST_AUTO_TEST_CASE(RandomVectors)
{
using GVector = Opm::gpuistl::CuVector<double>;
using GVector = Opm::gpuistl::GpuVector<double>;
std::srand(0);
std::mt19937 generator;
std::uniform_real_distribution<double> distribution(-100.0, 100.0);
@ -268,7 +268,7 @@ BOOST_AUTO_TEST_CASE(RandomVectors)
indexSet.push_back(i);
}
}
auto indexSetGPU = Opm::gpuistl::CuVector<int>(indexSet);
auto indexSetGPU = Opm::gpuistl::GpuVector<int>(indexSet);
aGPU.setZeroAtIndexSet(indexSetGPU);
auto projectedA = aGPU.asStdVector();

View File

@ -24,7 +24,7 @@
#include <cuda_runtime.h>
#include <dune/istl/bcrsmatrix.hh>
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
#include <opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp>
@ -86,7 +86,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(FlattenAndInvertDiagonalWith3By3Blocks, T, Numeric
B[1][1][2][2] = -1.0;
Opm::gpuistl::GpuSparseMatrix<T> m = Opm::gpuistl::GpuSparseMatrix<T>::fromMatrix(B);
Opm::gpuistl::CuVector<T> dInvDiag(blocksize * blocksize * N);
Opm::gpuistl::GpuVector<T> dInvDiag(blocksize * blocksize * N);
Opm::gpuistl::detail::JAC::invertDiagonalAndFlatten<T, 3>(
m.getNonZeroValues().data(), m.getRowIndices().data(), m.getColumnIndices().data(), N, dInvDiag.data());
@ -160,7 +160,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(FlattenAndInvertDiagonalWith2By2Blocks, T, Numeric
B[1][1][1][1] = -1.0;
Opm::gpuistl::GpuSparseMatrix<T> m = Opm::gpuistl::GpuSparseMatrix<T>::fromMatrix(B);
Opm::gpuistl::CuVector<T> dInvDiag(blocksize * blocksize * N);
Opm::gpuistl::GpuVector<T> dInvDiag(blocksize * blocksize * N);
Opm::gpuistl::detail::JAC::invertDiagonalAndFlatten<T, 2>(
m.getNonZeroValues().data(), m.getRowIndices().data(), m.getColumnIndices().data(), N, dInvDiag.data());

View File

@ -18,13 +18,13 @@
*/
#include <config.h>
#define BOOST_TEST_MODULE TestCuVectorOperations
#define BOOST_TEST_MODULE TestGpuVectorOperations
#include <boost/mpl/list.hpp>
#include <boost/test/unit_test.hpp>
#include <cuda_runtime.h>
#include <dune/istl/bcrsmatrix.hh>
#include <opm/simulators/linalg/cuistl/GpuJac.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
@ -47,9 +47,9 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(ElementWiseMultiplicationOf3By3BlockVectorAndVecto
std::vector<T> hostBlockVector({1.0, 2.0, 3.0, 5.0, 2.0, 3.0, 2.0, 1.0, 2.0});
std::vector<T> hostVecVector({3.0, 2.0, 1.0});
std::vector<T> hostDstVector({0, 0, 0});
Opm::gpuistl::CuVector<T> deviceBlockVector(hostBlockVector);
Opm::gpuistl::CuVector<T> deviceVecVector(hostVecVector);
Opm::gpuistl::CuVector<T> deviceDstVector(hostDstVector);
Opm::gpuistl::GpuVector<T> deviceBlockVector(hostBlockVector);
Opm::gpuistl::GpuVector<T> deviceVecVector(hostVecVector);
Opm::gpuistl::GpuVector<T> deviceDstVector(hostDstVector);
Opm::gpuistl::detail::weightedDiagMV(
deviceBlockVector.data(), N, blocksize, weight, deviceVecVector.data(), deviceDstVector.data());
@ -81,9 +81,9 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(ElementWiseMultiplicationOf2By2BlockVectorAndVecto
std::vector<T> hostBlockVector({1.0, 2.0, 3.0, 4.0, 4.0, 3.0, 2.0, 1.0});
std::vector<T> hostVecVector({1.0, 3.0, 2.0, 4.0});
std::vector<T> hostDstVector({0, 0, 0, 0});
Opm::gpuistl::CuVector<T> deviceBlockVector(hostBlockVector);
Opm::gpuistl::CuVector<T> deviceVecVector(hostVecVector);
Opm::gpuistl::CuVector<T> deviceDstVector(hostDstVector);
Opm::gpuistl::GpuVector<T> deviceBlockVector(hostBlockVector);
Opm::gpuistl::GpuVector<T> deviceVecVector(hostVecVector);
Opm::gpuistl::GpuVector<T> deviceDstVector(hostDstVector);
Opm::gpuistl::detail::weightedDiagMV(
deviceBlockVector.data(), N, blocksize, weight, deviceVecVector.data(), deviceDstVector.data());