mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
refactor cuvector
This commit is contained in:
parent
3aa1767548
commit
fba1858f42
@ -553,7 +553,7 @@ if(CUDA_FOUND)
|
||||
cusparse_handle
|
||||
cuSparse_matrix_operations
|
||||
cuVector_operations
|
||||
cuvector
|
||||
GpuVector
|
||||
GpuSparseMatrix
|
||||
GpuSeqILU0
|
||||
GpuOwnerOverlapCopy
|
||||
|
@ -213,7 +213,7 @@ if (HAVE_CUDA)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/preconditionerKernels/DILUKernels.cu)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/preconditionerKernels/ILU0Kernels.cu)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/preconditionerKernels/JacKernels.cu)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuVector.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuVector.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuView.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/vector_operations.cu)
|
||||
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuSparseMatrix.cpp)
|
||||
@ -240,7 +240,7 @@ if (HAVE_CUDA)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuDILU.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg OpmCuILU0.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuJac.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuVector.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuVector.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuView.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuSparseMatrix.hpp)
|
||||
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/CuMatrixDescription.hpp)
|
||||
@ -401,7 +401,7 @@ if (HAVE_CUDA)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cusparse_handle.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuSparse_matrix_operations.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuSparseMatrix.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuvector.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuVector.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuVector_operations.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_safe_conversion.cpp)
|
||||
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_solver_adapter.cpp)
|
||||
|
@ -326,7 +326,7 @@ struct StandardPreconditioners {
|
||||
const double w = prm.get<double>("relaxation", 1.0);
|
||||
using field_type = typename V::field_type;
|
||||
using GpuILU0 = typename gpuistl::
|
||||
GpuSeqILU0<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
|
||||
GpuSeqILU0<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
|
||||
auto gpuILU0 = std::make_shared<GpuILU0>(op.getmat(), w);
|
||||
|
||||
auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, GpuILU0>>(gpuILU0);
|
||||
@ -338,7 +338,7 @@ struct StandardPreconditioners {
|
||||
const double w = prm.get<double>("relaxation", 1.0);
|
||||
using field_type = typename V::field_type;
|
||||
using GpuJac =
|
||||
typename gpuistl::GpuJac<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
|
||||
typename gpuistl::GpuJac<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
|
||||
auto gpuJac = std::make_shared<GpuJac>(op.getmat(), w);
|
||||
|
||||
auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, GpuJac>>(gpuJac);
|
||||
@ -350,7 +350,7 @@ struct StandardPreconditioners {
|
||||
const bool split_matrix = prm.get<bool>("split_matrix", true);
|
||||
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
|
||||
using field_type = typename V::field_type;
|
||||
using GpuDILU = typename gpuistl::GpuDILU<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
|
||||
using GpuDILU = typename gpuistl::GpuDILU<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
|
||||
auto gpuDILU = std::make_shared<GpuDILU>(op.getmat(), split_matrix, tune_gpu_kernels);
|
||||
|
||||
auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, GpuDILU>>(gpuDILU);
|
||||
@ -362,7 +362,7 @@ struct StandardPreconditioners {
|
||||
const bool split_matrix = prm.get<bool>("split_matrix", true);
|
||||
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
|
||||
using field_type = typename V::field_type;
|
||||
using OpmCuILU0 = typename gpuistl::OpmCuILU0<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
|
||||
using OpmCuILU0 = typename gpuistl::OpmCuILU0<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
|
||||
auto cuilu0 = std::make_shared<OpmCuILU0>(op.getmat(), split_matrix, tune_gpu_kernels);
|
||||
|
||||
auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, OpmCuILU0>>(cuilu0);
|
||||
@ -586,7 +586,7 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
|
||||
const double w = prm.get<double>("relaxation", 1.0);
|
||||
using field_type = typename V::field_type;
|
||||
using GpuuILU0 = typename gpuistl::
|
||||
GpuSeqILU0<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
|
||||
GpuSeqILU0<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
|
||||
return std::make_shared<gpuistl::PreconditionerAdapter<V, V, GpuuILU0>>(
|
||||
std::make_shared<GpuuILU0>(op.getmat(), w));
|
||||
});
|
||||
@ -598,7 +598,7 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
|
||||
using matrix_type_to =
|
||||
typename Dune::BCRSMatrix<Dune::FieldMatrix<float, block_type::dimension, block_type::dimension>>;
|
||||
using GpuuILU0 = typename gpuistl::
|
||||
GpuSeqILU0<matrix_type_to, gpuistl::CuVector<float>, gpuistl::CuVector<float>>;
|
||||
GpuSeqILU0<matrix_type_to, gpuistl::GpuVector<float>, gpuistl::GpuVector<float>>;
|
||||
using Adapter = typename gpuistl::PreconditionerAdapter<VTo, VTo, GpuuILU0>;
|
||||
using Converter = typename gpuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>;
|
||||
auto converted = std::make_shared<Converter>(op.getmat());
|
||||
@ -611,7 +611,7 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
|
||||
const double w = prm.get<double>("relaxation", 1.0);
|
||||
using field_type = typename V::field_type;
|
||||
using GPUJac =
|
||||
typename gpuistl::GpuJac<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
|
||||
typename gpuistl::GpuJac<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
|
||||
return std::make_shared<gpuistl::PreconditionerAdapter<V, V, GPUJac>>(
|
||||
std::make_shared<GPUJac>(op.getmat(), w));
|
||||
});
|
||||
@ -620,7 +620,7 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
|
||||
const bool split_matrix = prm.get<bool>("split_matrix", true);
|
||||
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
|
||||
using field_type = typename V::field_type;
|
||||
using CUILU0 = typename gpuistl::OpmCuILU0<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
|
||||
using CUILU0 = typename gpuistl::OpmCuILU0<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
|
||||
|
||||
return std::make_shared<gpuistl::PreconditionerAdapter<V, V, CUILU0>>(std::make_shared<CUILU0>(op.getmat(), split_matrix, tune_gpu_kernels));
|
||||
});
|
||||
@ -629,7 +629,7 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
|
||||
const bool split_matrix = prm.get<bool>("split_matrix", true);
|
||||
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
|
||||
using field_type = typename V::field_type;
|
||||
using GPUDILU = typename gpuistl::GpuDILU<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
|
||||
using GPUDILU = typename gpuistl::GpuDILU<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
|
||||
return std::make_shared<gpuistl::PreconditionerAdapter<V, V, GPUDILU>>(std::make_shared<GPUDILU>(op.getmat(), split_matrix, tune_gpu_kernels));
|
||||
});
|
||||
|
||||
@ -639,7 +639,7 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
|
||||
using block_type = typename V::block_type;
|
||||
using VTo = Dune::BlockVector<Dune::FieldVector<float, block_type::dimension>>;
|
||||
using matrix_type_to = typename Dune::BCRSMatrix<Dune::FieldMatrix<float, block_type::dimension, block_type::dimension>>;
|
||||
using GpuDILU = typename gpuistl::GpuDILU<matrix_type_to, gpuistl::CuVector<float>, gpuistl::CuVector<float>>;
|
||||
using GpuDILU = typename gpuistl::GpuDILU<matrix_type_to, gpuistl::GpuVector<float>, gpuistl::GpuVector<float>>;
|
||||
using Adapter = typename gpuistl::PreconditionerAdapter<VTo, VTo, GpuDILU>;
|
||||
using Converter = typename gpuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>;
|
||||
auto converted = std::make_shared<Converter>(op.getmat());
|
||||
|
@ -28,7 +28,7 @@
|
||||
#include <opm/simulators/linalg/cuistl/detail/autotuner.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/GpuDILU.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/coloringAndReorderingUtils.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/preconditionerKernels/DILUKernels.hpp>
|
||||
@ -71,7 +71,7 @@ GpuDILU<M, X, Y, l>::GpuDILU(const M& A, bool splitMatrix, bool tuneKernels)
|
||||
m_gpuMatrix.nonzeroes(),
|
||||
A.nonzeroes()));
|
||||
if (m_splitMatrix) {
|
||||
m_gpuMatrixReorderedDiag = std::make_unique<CuVector<field_type>>(blocksize_ * blocksize_ * m_cpuMatrix.N());
|
||||
m_gpuMatrixReorderedDiag = std::make_unique<GpuVector<field_type>>(blocksize_ * blocksize_ * m_cpuMatrix.N());
|
||||
std::tie(m_gpuMatrixReorderedLower, m_gpuMatrixReorderedUpper)
|
||||
= detail::extractLowerAndUpperMatrices<M, field_type, GpuSparseMatrix<field_type>>(m_cpuMatrix,
|
||||
m_reorderedToNatural);
|
||||
@ -278,8 +278,8 @@ GpuDILU<M, X, Y, l>::tuneThreadBlockSizes()
|
||||
m_DILUFactorizationThreadBlockSize = detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "Kernel computing DILU factorization");
|
||||
|
||||
// tune the thread-block size of the apply
|
||||
CuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
|
||||
CuVector<field_type> tmpD(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
|
||||
GpuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
|
||||
GpuVector<field_type> tmpD(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
|
||||
tmpD = 1;
|
||||
|
||||
auto tuneLowerSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int lowerSolveThreadBlockSize){
|
||||
@ -296,11 +296,11 @@ GpuDILU<M, X, Y, l>::tuneThreadBlockSizes()
|
||||
} // namespace Opm::gpuistl
|
||||
#define INSTANTIATE_CUDILU_DUNE(realtype, blockdim) \
|
||||
template class ::Opm::gpuistl::GpuDILU<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
|
||||
::Opm::gpuistl::CuVector<realtype>, \
|
||||
::Opm::gpuistl::CuVector<realtype>>; \
|
||||
::Opm::gpuistl::GpuVector<realtype>, \
|
||||
::Opm::gpuistl::GpuVector<realtype>>; \
|
||||
template class ::Opm::gpuistl::GpuDILU<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
|
||||
::Opm::gpuistl::CuVector<realtype>, \
|
||||
::Opm::gpuistl::CuVector<realtype>>
|
||||
::Opm::gpuistl::GpuVector<realtype>, \
|
||||
::Opm::gpuistl::GpuVector<realtype>>
|
||||
|
||||
INSTANTIATE_CUDILU_DUNE(double, 1);
|
||||
INSTANTIATE_CUDILU_DUNE(double, 2);
|
||||
|
@ -37,7 +37,7 @@ namespace Opm::gpuistl
|
||||
//! \tparam l Ignored. Just there to have the same number of template arguments
|
||||
//! as other preconditioners.
|
||||
//!
|
||||
//! \note We assume X and Y are both CuVector<real_type>, but we leave them as template
|
||||
//! \note We assume X and Y are both GpuVector<real_type>, but we leave them as template
|
||||
//! arguments in case of future additions.
|
||||
template <class M, class X, class Y, int l = 1>
|
||||
class GpuDILU : public Dune::PreconditionerWithUpdate<X, Y>
|
||||
@ -126,13 +126,13 @@ private:
|
||||
std::unique_ptr<CuMat> m_gpuMatrixReorderedLower;
|
||||
std::unique_ptr<CuMat> m_gpuMatrixReorderedUpper;
|
||||
//! \brief If matrix splitting is enabled, we also store the diagonal separately
|
||||
std::unique_ptr<CuVector<field_type>> m_gpuMatrixReorderedDiag;
|
||||
std::unique_ptr<GpuVector<field_type>> m_gpuMatrixReorderedDiag;
|
||||
//! row conversion from natural to reordered matrix indices stored on the GPU
|
||||
CuVector<int> m_gpuNaturalToReorder;
|
||||
GpuVector<int> m_gpuNaturalToReorder;
|
||||
//! row conversion from reordered to natural matrix indices stored on the GPU
|
||||
CuVector<int> m_gpuReorderToNatural;
|
||||
GpuVector<int> m_gpuReorderToNatural;
|
||||
//! \brief Stores the inverted diagonal that we use in DILU
|
||||
CuVector<field_type> m_gpuDInv;
|
||||
GpuVector<field_type> m_gpuDInv;
|
||||
//! \brief Bool storing whether or not we should store matrices in a split format
|
||||
bool m_splitMatrix;
|
||||
//! \brief Bool storing whether or not we will tune the threadblock sizes. Only used for AMD cards
|
||||
|
@ -21,7 +21,7 @@
|
||||
#include <fmt/core.h>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/GpuJac.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/preconditionerKernels/JacKernels.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
|
||||
#include <opm/simulators/linalg/matrixblock.hh>
|
||||
@ -111,11 +111,11 @@ GpuJac<M, X, Y, l>::invertDiagonalAndFlatten()
|
||||
} // namespace Opm::gpuistl
|
||||
#define INSTANTIATE_CUJAC_DUNE(realtype, blockdim) \
|
||||
template class ::Opm::gpuistl::GpuJac<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
|
||||
::Opm::gpuistl::CuVector<realtype>, \
|
||||
::Opm::gpuistl::CuVector<realtype>>; \
|
||||
::Opm::gpuistl::GpuVector<realtype>, \
|
||||
::Opm::gpuistl::GpuVector<realtype>>; \
|
||||
template class ::Opm::gpuistl::GpuJac<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
|
||||
::Opm::gpuistl::CuVector<realtype>, \
|
||||
::Opm::gpuistl::CuVector<realtype>>
|
||||
::Opm::gpuistl::GpuVector<realtype>, \
|
||||
::Opm::gpuistl::GpuVector<realtype>>
|
||||
|
||||
INSTANTIATE_CUJAC_DUNE(double, 1);
|
||||
INSTANTIATE_CUJAC_DUNE(double, 2);
|
||||
|
@ -40,7 +40,7 @@ namespace Opm::gpuistl
|
||||
//! \tparam l Ignored. Just there to have the same number of template arguments
|
||||
//! as other preconditioners.
|
||||
//!
|
||||
//! \note We assume X and Y are both CuVector<real_type>, but we leave them as template
|
||||
//! \note We assume X and Y are both GpuVector<real_type>, but we leave them as template
|
||||
//! arguments in case of future additions.
|
||||
template <class M, class X, class Y, int l = 1>
|
||||
class GpuJac : public Dune::PreconditionerWithUpdate<X, Y>
|
||||
@ -106,7 +106,7 @@ private:
|
||||
//! \brief The A matrix stored on the gpu
|
||||
GpuSparseMatrix<field_type> m_gpuMatrix;
|
||||
//! \brief the diagonal of cuMatrix inverted, and then flattened to fit in a vector
|
||||
CuVector<field_type> m_diagInvFlattened;
|
||||
GpuVector<field_type> m_diagInvFlattened;
|
||||
|
||||
void invertDiagonalAndFlatten();
|
||||
};
|
||||
|
@ -21,7 +21,7 @@
|
||||
#include <dune/istl/owneroverlapcopy.hh>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
|
||||
#include <vector>
|
||||
|
||||
namespace Opm::gpuistl
|
||||
@ -36,7 +36,7 @@ namespace Opm::gpuistl
|
||||
template<class field_type, class OwnerOverlapCopyCommunicationType>
|
||||
class GPUSender {
|
||||
public:
|
||||
using X = CuVector<field_type>;
|
||||
using X = GpuVector<field_type>;
|
||||
|
||||
GPUSender(const OwnerOverlapCopyCommunicationType& cpuOwnerOverlapCopy) : m_cpuOwnerOverlapCopy(cpuOwnerOverlapCopy){}
|
||||
|
||||
@ -97,8 +97,8 @@ protected:
|
||||
// premature optimization, in the sense that we could just initialize these indices
|
||||
// always, but they are not always used.
|
||||
mutable std::once_flag m_initializedIndices;
|
||||
mutable std::unique_ptr<CuVector<int>> m_indicesOwner;
|
||||
mutable std::unique_ptr<CuVector<int>> m_indicesCopy;
|
||||
mutable std::unique_ptr<GpuVector<int>> m_indicesOwner;
|
||||
mutable std::unique_ptr<GpuVector<int>> m_indicesCopy;
|
||||
const OwnerOverlapCopyCommunicationType& m_cpuOwnerOverlapCopy;
|
||||
};
|
||||
|
||||
@ -113,7 +113,7 @@ template <class field_type, int block_size, class OwnerOverlapCopyCommunicationT
|
||||
class GPUObliviousMPISender : public GPUSender<field_type, OwnerOverlapCopyCommunicationType>
|
||||
{
|
||||
public:
|
||||
using X = CuVector<field_type>;
|
||||
using X = GpuVector<field_type>;
|
||||
|
||||
GPUObliviousMPISender(const OwnerOverlapCopyCommunicationType& cpuOwnerOverlapCopy)
|
||||
: GPUSender<field_type, OwnerOverlapCopyCommunicationType>(cpuOwnerOverlapCopy)
|
||||
@ -151,8 +151,8 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
this->m_indicesCopy = std::make_unique<CuVector<int>>(indicesCopyOnCPU);
|
||||
this->m_indicesOwner = std::make_unique<CuVector<int>>(indicesOwnerCPU);
|
||||
this->m_indicesCopy = std::make_unique<GpuVector<int>>(indicesCopyOnCPU);
|
||||
this->m_indicesOwner = std::make_unique<GpuVector<int>>(indicesOwnerCPU);
|
||||
}
|
||||
};
|
||||
|
||||
@ -168,7 +168,7 @@ template <class field_type, int block_size, class OwnerOverlapCopyCommunicationT
|
||||
class GPUAwareMPISender : public GPUSender<field_type, OwnerOverlapCopyCommunicationType>
|
||||
{
|
||||
public:
|
||||
using X = CuVector<field_type>;
|
||||
using X = GpuVector<field_type>;
|
||||
|
||||
GPUAwareMPISender(const OwnerOverlapCopyCommunicationType& cpuOwnerOverlapCopy)
|
||||
: GPUSender<field_type, OwnerOverlapCopyCommunicationType>(cpuOwnerOverlapCopy)
|
||||
@ -178,7 +178,7 @@ public:
|
||||
void copyOwnerToAll(const X& source, X& dest) const override
|
||||
{
|
||||
|
||||
OPM_ERROR_IF(&source != &dest, "The provided CuVectors' address did not match"); // In this context, source == dest!!!
|
||||
OPM_ERROR_IF(&source != &dest, "The provided GpuVectors' address did not match"); // In this context, source == dest!!!
|
||||
std::call_once(this->m_initializedIndices, [&]() { initIndexSet(); });
|
||||
|
||||
int rank = this->m_cpuOwnerOverlapCopy.communicator().rank();
|
||||
@ -251,10 +251,10 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
mutable std::unique_ptr<CuVector<int>> m_commpairIndicesCopy;
|
||||
mutable std::unique_ptr<CuVector<int>> m_commpairIndicesOwner;
|
||||
mutable std::unique_ptr<CuVector<field_type>> m_GPUSendBuf;
|
||||
mutable std::unique_ptr<CuVector<field_type>> m_GPURecvBuf;
|
||||
mutable std::unique_ptr<GpuVector<int>> m_commpairIndicesCopy;
|
||||
mutable std::unique_ptr<GpuVector<int>> m_commpairIndicesOwner;
|
||||
mutable std::unique_ptr<GpuVector<field_type>> m_GPUSendBuf;
|
||||
mutable std::unique_ptr<GpuVector<field_type>> m_GPURecvBuf;
|
||||
|
||||
struct MessageInformation
|
||||
{
|
||||
@ -332,11 +332,11 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
m_commpairIndicesCopy = std::make_unique<CuVector<int>>(commpairIndicesCopyOnCPU);
|
||||
m_commpairIndicesOwner = std::make_unique<CuVector<int>>(commpairIndicesOwnerCPU);
|
||||
m_commpairIndicesCopy = std::make_unique<GpuVector<int>>(commpairIndicesCopyOnCPU);
|
||||
m_commpairIndicesOwner = std::make_unique<GpuVector<int>>(commpairIndicesOwnerCPU);
|
||||
|
||||
m_GPUSendBuf = std::make_unique<CuVector<field_type>>(sendBufIdx * block_size);
|
||||
m_GPURecvBuf = std::make_unique<CuVector<field_type>>(recvBufIdx * block_size);
|
||||
m_GPUSendBuf = std::make_unique<GpuVector<field_type>>(sendBufIdx * block_size);
|
||||
m_GPURecvBuf = std::make_unique<GpuVector<field_type>>(recvBufIdx * block_size);
|
||||
}
|
||||
|
||||
void initIndexSet() const override
|
||||
@ -360,8 +360,8 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
this->m_indicesCopy = std::make_unique<CuVector<int>>(indicesCopyOnCPU);
|
||||
this->m_indicesOwner = std::make_unique<CuVector<int>>(indicesOwnerCPU);
|
||||
this->m_indicesCopy = std::make_unique<GpuVector<int>>(indicesCopyOnCPU);
|
||||
this->m_indicesOwner = std::make_unique<GpuVector<int>>(indicesOwnerCPU);
|
||||
|
||||
buildCommPairIdxs();
|
||||
}
|
||||
@ -371,11 +371,11 @@ private:
|
||||
* @brief CUDA compatiable variant of Dune::OwnerOverlapCopyCommunication
|
||||
*
|
||||
* This class can essentially be seen as an adapter around Dune::OwnerOverlapCopyCommunication, and should work as
|
||||
* a Dune::OwnerOverlapCopyCommunication on CuVectors
|
||||
* a Dune::OwnerOverlapCopyCommunication on GpuVectors
|
||||
*
|
||||
* @note This currently only has the functionality to parallelize the linear solve.
|
||||
*
|
||||
* @tparam field_type should be a field_type supported by CuVector (double, float)
|
||||
* @tparam field_type should be a field_type supported by GpuVector (double, float)
|
||||
* @tparam block_size the block size used (this is relevant for say figuring out the correct indices)
|
||||
* @tparam OwnerOverlapCopyCommunicationType should mimic Dune::OwnerOverlapCopyCommunication.
|
||||
*/
|
||||
@ -383,7 +383,7 @@ template <class field_type, int block_size, class OwnerOverlapCopyCommunicationT
|
||||
class GpuOwnerOverlapCopy
|
||||
{
|
||||
public:
|
||||
using X = CuVector<field_type>;
|
||||
using X = GpuVector<field_type>;
|
||||
|
||||
GpuOwnerOverlapCopy(std::shared_ptr<GPUSender<field_type, OwnerOverlapCopyCommunicationType>> sender) : m_sender(sender){}
|
||||
|
||||
|
@ -332,7 +332,7 @@ GpuSeqILU0<M, X, Y, l>::updateILUConfiguration()
|
||||
{
|
||||
auto bufferSize = findBufferSize();
|
||||
if (!m_buffer || m_buffer->dim() < bufferSize) {
|
||||
m_buffer.reset(new CuVector<field_type>((bufferSize + sizeof(field_type) - 1) / sizeof(field_type)));
|
||||
m_buffer.reset(new GpuVector<field_type>((bufferSize + sizeof(field_type) - 1) / sizeof(field_type)));
|
||||
}
|
||||
analyzeMatrix();
|
||||
createILU();
|
||||
@ -340,11 +340,11 @@ GpuSeqILU0<M, X, Y, l>::updateILUConfiguration()
|
||||
} // namespace Opm::gpuistl
|
||||
#define INSTANTIATE_GPUSEQILU0_DUNE(realtype, blockdim) \
|
||||
template class ::Opm::gpuistl::GpuSeqILU0<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
|
||||
::Opm::gpuistl::CuVector<realtype>, \
|
||||
::Opm::gpuistl::CuVector<realtype>>; \
|
||||
::Opm::gpuistl::GpuVector<realtype>, \
|
||||
::Opm::gpuistl::GpuVector<realtype>>; \
|
||||
template class ::Opm::gpuistl::GpuSeqILU0<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
|
||||
::Opm::gpuistl::CuVector<realtype>, \
|
||||
::Opm::gpuistl::CuVector<realtype>>
|
||||
::Opm::gpuistl::GpuVector<realtype>, \
|
||||
::Opm::gpuistl::GpuVector<realtype>>
|
||||
|
||||
|
||||
INSTANTIATE_GPUSEQILU0_DUNE(double, 1);
|
||||
|
@ -43,7 +43,7 @@ namespace Opm::gpuistl
|
||||
//! \tparam l Ignored. Just there to have the same number of template arguments
|
||||
//! as other preconditioners.
|
||||
//!
|
||||
//! \note We assume X and Y are both CuVector<real_type>, but we leave them as template
|
||||
//! \note We assume X and Y are both GpuVector<real_type>, but we leave them as template
|
||||
//! arguments in case of future additions.
|
||||
template <class M, class X, class Y, int l = 1>
|
||||
class GpuSeqILU0 : public Dune::PreconditionerWithUpdate<X, Y>
|
||||
@ -112,7 +112,7 @@ private:
|
||||
//! modified in the constructor to be the proper LU decomposition.
|
||||
GpuSparseMatrix<field_type> m_LU;
|
||||
|
||||
CuVector<field_type> m_temporaryStorage;
|
||||
GpuVector<field_type> m_temporaryStorage;
|
||||
|
||||
|
||||
detail::GpuSparseMatrixDescriptionPtr m_descriptionL;
|
||||
@ -121,7 +121,7 @@ private:
|
||||
detail::CuSparseResource<bsrsv2Info_t> m_infoU;
|
||||
detail::CuSparseResource<bsrilu02Info_t> m_infoM;
|
||||
|
||||
std::unique_ptr<CuVector<field_type>> m_buffer;
|
||||
std::unique_ptr<GpuVector<field_type>> m_buffer;
|
||||
detail::CuSparseHandle& m_cuSparseHandle;
|
||||
|
||||
bool m_analysisDone = false;
|
||||
|
@ -198,7 +198,7 @@ GpuSparseMatrix<T>::setNonUnitDiagonal()
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
GpuSparseMatrix<T>::mv(const CuVector<T>& x, CuVector<T>& y) const
|
||||
GpuSparseMatrix<T>::mv(const GpuVector<T>& x, GpuVector<T>& y) const
|
||||
{
|
||||
assertSameSize(x);
|
||||
assertSameSize(y);
|
||||
@ -232,7 +232,7 @@ GpuSparseMatrix<T>::mv(const CuVector<T>& x, CuVector<T>& y) const
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
GpuSparseMatrix<T>::umv(const CuVector<T>& x, CuVector<T>& y) const
|
||||
GpuSparseMatrix<T>::umv(const GpuVector<T>& x, GpuVector<T>& y) const
|
||||
{
|
||||
assertSameSize(x);
|
||||
assertSameSize(y);
|
||||
@ -267,7 +267,7 @@ GpuSparseMatrix<T>::umv(const CuVector<T>& x, CuVector<T>& y) const
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
GpuSparseMatrix<T>::usmv(T alpha, const CuVector<T>& x, CuVector<T>& y) const
|
||||
GpuSparseMatrix<T>::usmv(T alpha, const GpuVector<T>& x, GpuVector<T>& y) const
|
||||
{
|
||||
assertSameSize(x);
|
||||
assertSameSize(y);
|
||||
|
@ -22,7 +22,7 @@
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp>
|
||||
@ -144,7 +144,7 @@ public:
|
||||
*
|
||||
* @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
|
||||
*/
|
||||
CuVector<T>& getNonZeroValues()
|
||||
GpuVector<T>& getNonZeroValues()
|
||||
{
|
||||
return m_nonZeroElements;
|
||||
}
|
||||
@ -154,7 +154,7 @@ public:
|
||||
*
|
||||
* @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
|
||||
*/
|
||||
const CuVector<T>& getNonZeroValues() const
|
||||
const GpuVector<T>& getNonZeroValues() const
|
||||
{
|
||||
return m_nonZeroElements;
|
||||
}
|
||||
@ -164,7 +164,7 @@ public:
|
||||
*
|
||||
* @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
|
||||
*/
|
||||
CuVector<int>& getRowIndices()
|
||||
GpuVector<int>& getRowIndices()
|
||||
{
|
||||
return m_rowIndices;
|
||||
}
|
||||
@ -174,7 +174,7 @@ public:
|
||||
*
|
||||
* @note Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
|
||||
*/
|
||||
const CuVector<int>& getRowIndices() const
|
||||
const GpuVector<int>& getRowIndices() const
|
||||
{
|
||||
return m_rowIndices;
|
||||
}
|
||||
@ -184,7 +184,7 @@ public:
|
||||
*
|
||||
* @return Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
|
||||
*/
|
||||
CuVector<int>& getColumnIndices()
|
||||
GpuVector<int>& getColumnIndices()
|
||||
{
|
||||
return m_columnIndices;
|
||||
}
|
||||
@ -194,7 +194,7 @@ public:
|
||||
*
|
||||
* @return Read the CuSPARSE documentation on Block Compressed Sparse Row Format (BSR) for the exact ordering.
|
||||
*/
|
||||
const CuVector<int>& getColumnIndices() const
|
||||
const GpuVector<int>& getColumnIndices() const
|
||||
{
|
||||
return m_columnIndices;
|
||||
}
|
||||
@ -245,7 +245,7 @@ public:
|
||||
*
|
||||
* @note Due to limitations of CuSparse, this is only supported for block sizes greater than 1.
|
||||
*/
|
||||
virtual void mv(const CuVector<T>& x, CuVector<T>& y) const;
|
||||
virtual void mv(const GpuVector<T>& x, GpuVector<T>& y) const;
|
||||
|
||||
/**
|
||||
* @brief umv computes y=Ax+y
|
||||
@ -254,7 +254,7 @@ public:
|
||||
*
|
||||
* @note Due to limitations of CuSparse, this is only supported for block sizes greater than 1.
|
||||
*/
|
||||
virtual void umv(const CuVector<T>& x, CuVector<T>& y) const;
|
||||
virtual void umv(const GpuVector<T>& x, GpuVector<T>& y) const;
|
||||
|
||||
|
||||
/**
|
||||
@ -264,7 +264,7 @@ public:
|
||||
*
|
||||
* @note Due to limitations of CuSparse, this is only supported for block sizes greater than 1.
|
||||
*/
|
||||
virtual void usmv(T alpha, const CuVector<T>& x, CuVector<T>& y) const;
|
||||
virtual void usmv(T alpha, const GpuVector<T>& x, GpuVector<T>& y) const;
|
||||
|
||||
/**
|
||||
* @brief updateNonzeroValues updates the non-zero values by using the non-zero values of the supplied matrix
|
||||
@ -280,9 +280,9 @@ public:
|
||||
void updateNonzeroValues(const MatrixType& matrix, bool copyNonZeroElementsDirectly = false);
|
||||
|
||||
private:
|
||||
CuVector<T> m_nonZeroElements;
|
||||
CuVector<int> m_columnIndices;
|
||||
CuVector<int> m_rowIndices;
|
||||
GpuVector<T> m_nonZeroElements;
|
||||
GpuVector<int> m_columnIndices;
|
||||
GpuVector<int> m_rowIndices;
|
||||
|
||||
// Notice that we store these three as int to make sure we are cusparse compatible.
|
||||
//
|
||||
|
@ -20,7 +20,7 @@
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <fmt/core.h>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cublas_wrapper.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
@ -30,13 +30,13 @@ namespace Opm::gpuistl
|
||||
{
|
||||
|
||||
template <class T>
|
||||
CuVector<T>::CuVector(const std::vector<T>& data)
|
||||
: CuVector(data.data(), detail::to_int(data.size()))
|
||||
GpuVector<T>::GpuVector(const std::vector<T>& data)
|
||||
: GpuVector(data.data(), detail::to_int(data.size()))
|
||||
{
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuVector<T>::CuVector(const size_t numberOfElements)
|
||||
GpuVector<T>::GpuVector(const size_t numberOfElements)
|
||||
: m_numberOfElements(detail::to_int(numberOfElements))
|
||||
, m_cuBlasHandle(detail::CuBlasHandle::getInstance())
|
||||
{
|
||||
@ -44,8 +44,8 @@ CuVector<T>::CuVector(const size_t numberOfElements)
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuVector<T>::CuVector(const T* dataOnHost, const size_t numberOfElements)
|
||||
: CuVector(numberOfElements)
|
||||
GpuVector<T>::GpuVector(const T* dataOnHost, const size_t numberOfElements)
|
||||
: GpuVector(numberOfElements)
|
||||
{
|
||||
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(
|
||||
@ -53,8 +53,8 @@ CuVector<T>::CuVector(const T* dataOnHost, const size_t numberOfElements)
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuVector<T>&
|
||||
CuVector<T>::operator=(T scalar)
|
||||
GpuVector<T>&
|
||||
GpuVector<T>::operator=(T scalar)
|
||||
{
|
||||
assertHasElements();
|
||||
detail::setVectorValue(data(), detail::to_size_t(m_numberOfElements), scalar);
|
||||
@ -62,8 +62,8 @@ CuVector<T>::operator=(T scalar)
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuVector<T>&
|
||||
CuVector<T>::operator=(const CuVector<T>& other)
|
||||
GpuVector<T>&
|
||||
GpuVector<T>::operator=(const GpuVector<T>& other)
|
||||
{
|
||||
assertHasElements();
|
||||
assertSameSize(other);
|
||||
@ -76,8 +76,8 @@ CuVector<T>::operator=(const CuVector<T>& other)
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuVector<T>::CuVector(const CuVector<T>& other)
|
||||
: CuVector(other.m_numberOfElements)
|
||||
GpuVector<T>::GpuVector(const GpuVector<T>& other)
|
||||
: GpuVector(other.m_numberOfElements)
|
||||
{
|
||||
assertHasElements();
|
||||
assertSameSize(other);
|
||||
@ -88,21 +88,21 @@ CuVector<T>::CuVector(const CuVector<T>& other)
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuVector<T>::~CuVector()
|
||||
GpuVector<T>::~GpuVector()
|
||||
{
|
||||
OPM_CUDA_WARN_IF_ERROR(cudaFree(m_dataOnDevice));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const T*
|
||||
CuVector<T>::data() const
|
||||
GpuVector<T>::data() const
|
||||
{
|
||||
return m_dataOnDevice;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
typename CuVector<T>::size_type
|
||||
CuVector<T>::dim() const
|
||||
typename GpuVector<T>::size_type
|
||||
GpuVector<T>::dim() const
|
||||
{
|
||||
// Note that there is no way for m_numberOfElements to be non-positive,
|
||||
// but for sanity we still use the safe conversion function here.
|
||||
@ -114,7 +114,7 @@ CuVector<T>::dim() const
|
||||
|
||||
template <typename T>
|
||||
std::vector<T>
|
||||
CuVector<T>::asStdVector() const
|
||||
GpuVector<T>::asStdVector() const
|
||||
{
|
||||
std::vector<T> temporary(detail::to_size_t(m_numberOfElements));
|
||||
copyToHost(temporary);
|
||||
@ -123,21 +123,21 @@ CuVector<T>::asStdVector() const
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuVector<T>::setZeroAtIndexSet(const CuVector<int>& indexSet)
|
||||
GpuVector<T>::setZeroAtIndexSet(const GpuVector<int>& indexSet)
|
||||
{
|
||||
detail::setZeroAtIndexSet(m_dataOnDevice, indexSet.dim(), indexSet.data());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuVector<T>::assertSameSize(const CuVector<T>& x) const
|
||||
GpuVector<T>::assertSameSize(const GpuVector<T>& x) const
|
||||
{
|
||||
assertSameSize(x.m_numberOfElements);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuVector<T>::assertSameSize(int size) const
|
||||
GpuVector<T>::assertSameSize(int size) const
|
||||
{
|
||||
if (size != m_numberOfElements) {
|
||||
OPM_THROW(std::invalid_argument,
|
||||
@ -147,7 +147,7 @@ CuVector<T>::assertSameSize(int size) const
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuVector<T>::assertHasElements() const
|
||||
GpuVector<T>::assertHasElements() const
|
||||
{
|
||||
if (m_numberOfElements <= 0) {
|
||||
OPM_THROW(std::invalid_argument, "We have 0 elements");
|
||||
@ -156,14 +156,14 @@ CuVector<T>::assertHasElements() const
|
||||
|
||||
template <typename T>
|
||||
T*
|
||||
CuVector<T>::data()
|
||||
GpuVector<T>::data()
|
||||
{
|
||||
return m_dataOnDevice;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuVector<T>&
|
||||
CuVector<T>::operator*=(const T& scalar)
|
||||
GpuVector<T>&
|
||||
GpuVector<T>::operator*=(const T& scalar)
|
||||
{
|
||||
assertHasElements();
|
||||
OPM_CUBLAS_SAFE_CALL(detail::cublasScal(m_cuBlasHandle.get(), m_numberOfElements, &scalar, data(), 1));
|
||||
@ -171,8 +171,8 @@ CuVector<T>::operator*=(const T& scalar)
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuVector<T>&
|
||||
CuVector<T>::axpy(T alpha, const CuVector<T>& y)
|
||||
GpuVector<T>&
|
||||
GpuVector<T>::axpy(T alpha, const GpuVector<T>& y)
|
||||
{
|
||||
assertHasElements();
|
||||
assertSameSize(y);
|
||||
@ -182,7 +182,7 @@ CuVector<T>::axpy(T alpha, const CuVector<T>& y)
|
||||
|
||||
template <class T>
|
||||
T
|
||||
CuVector<T>::dot(const CuVector<T>& other) const
|
||||
GpuVector<T>::dot(const GpuVector<T>& other) const
|
||||
{
|
||||
assertHasElements();
|
||||
assertSameSize(other);
|
||||
@ -193,7 +193,7 @@ CuVector<T>::dot(const CuVector<T>& other) const
|
||||
}
|
||||
template <class T>
|
||||
T
|
||||
CuVector<T>::two_norm() const
|
||||
GpuVector<T>::two_norm() const
|
||||
{
|
||||
assertHasElements();
|
||||
T result = T(0);
|
||||
@ -203,14 +203,14 @@ CuVector<T>::two_norm() const
|
||||
|
||||
template <typename T>
|
||||
T
|
||||
CuVector<T>::dot(const CuVector<T>& other, const CuVector<int>& indexSet, CuVector<T>& buffer) const
|
||||
GpuVector<T>::dot(const GpuVector<T>& other, const GpuVector<int>& indexSet, GpuVector<T>& buffer) const
|
||||
{
|
||||
return detail::innerProductAtIndices(m_cuBlasHandle.get(), m_dataOnDevice, other.data(), buffer.data(), indexSet.dim(), indexSet.data());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T
|
||||
CuVector<T>::two_norm(const CuVector<int>& indexSet, CuVector<T>& buffer) const
|
||||
GpuVector<T>::two_norm(const GpuVector<int>& indexSet, GpuVector<T>& buffer) const
|
||||
{
|
||||
// TODO: [perf] Optimize this to a single call
|
||||
return std::sqrt(this->dot(*this, indexSet, buffer));
|
||||
@ -218,23 +218,23 @@ CuVector<T>::two_norm(const CuVector<int>& indexSet, CuVector<T>& buffer) const
|
||||
|
||||
template <typename T>
|
||||
T
|
||||
CuVector<T>::dot(const CuVector<T>& other, const CuVector<int>& indexSet) const
|
||||
GpuVector<T>::dot(const GpuVector<T>& other, const GpuVector<int>& indexSet) const
|
||||
{
|
||||
CuVector<T> buffer(indexSet.dim());
|
||||
GpuVector<T> buffer(indexSet.dim());
|
||||
return detail::innerProductAtIndices(m_cuBlasHandle.get(), m_dataOnDevice, other.data(), buffer.data(), indexSet.dim(), indexSet.data());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T
|
||||
CuVector<T>::two_norm(const CuVector<int>& indexSet) const
|
||||
GpuVector<T>::two_norm(const GpuVector<int>& indexSet) const
|
||||
{
|
||||
CuVector<T> buffer(indexSet.dim());
|
||||
GpuVector<T> buffer(indexSet.dim());
|
||||
// TODO: [perf] Optimize this to a single call
|
||||
return std::sqrt(this->dot(*this, indexSet, buffer));
|
||||
}
|
||||
template <class T>
|
||||
CuVector<T>&
|
||||
CuVector<T>::operator+=(const CuVector<T>& other)
|
||||
GpuVector<T>&
|
||||
GpuVector<T>::operator+=(const GpuVector<T>& other)
|
||||
{
|
||||
assertHasElements();
|
||||
assertSameSize(other);
|
||||
@ -243,8 +243,8 @@ CuVector<T>::operator+=(const CuVector<T>& other)
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CuVector<T>&
|
||||
CuVector<T>::operator-=(const CuVector<T>& other)
|
||||
GpuVector<T>&
|
||||
GpuVector<T>::operator-=(const GpuVector<T>& other)
|
||||
{
|
||||
assertHasElements();
|
||||
assertSameSize(other);
|
||||
@ -255,7 +255,7 @@ CuVector<T>::operator-=(const CuVector<T>& other)
|
||||
|
||||
template <class T>
|
||||
void
|
||||
CuVector<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
|
||||
GpuVector<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
|
||||
{
|
||||
if (numberOfElements > dim()) {
|
||||
OPM_THROW(std::runtime_error,
|
||||
@ -268,7 +268,7 @@ CuVector<T>::copyFromHost(const T* dataPointer, size_t numberOfElements)
|
||||
|
||||
template <class T>
|
||||
void
|
||||
CuVector<T>::copyToHost(T* dataPointer, size_t numberOfElements) const
|
||||
GpuVector<T>::copyToHost(T* dataPointer, size_t numberOfElements) const
|
||||
{
|
||||
assertSameSize(detail::to_int(numberOfElements));
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
|
||||
@ -276,32 +276,32 @@ CuVector<T>::copyToHost(T* dataPointer, size_t numberOfElements) const
|
||||
|
||||
template <class T>
|
||||
void
|
||||
CuVector<T>::copyFromHost(const std::vector<T>& data)
|
||||
GpuVector<T>::copyFromHost(const std::vector<T>& data)
|
||||
{
|
||||
copyFromHost(data.data(), data.size());
|
||||
}
|
||||
template <class T>
|
||||
void
|
||||
CuVector<T>::copyToHost(std::vector<T>& data) const
|
||||
GpuVector<T>::copyToHost(std::vector<T>& data) const
|
||||
{
|
||||
copyToHost(data.data(), data.size());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
CuVector<T>::prepareSendBuf(CuVector<T>& buffer, const CuVector<int>& indexSet) const
|
||||
GpuVector<T>::prepareSendBuf(GpuVector<T>& buffer, const GpuVector<int>& indexSet) const
|
||||
{
|
||||
return detail::prepareSendBuf(m_dataOnDevice, buffer.data(), indexSet.dim(), indexSet.data());
|
||||
}
|
||||
template <typename T>
|
||||
void
|
||||
CuVector<T>::syncFromRecvBuf(CuVector<T>& buffer, const CuVector<int>& indexSet) const
|
||||
GpuVector<T>::syncFromRecvBuf(GpuVector<T>& buffer, const GpuVector<int>& indexSet) const
|
||||
{
|
||||
return detail::syncFromRecvBuf(m_dataOnDevice, buffer.data(), indexSet.dim(), indexSet.data());
|
||||
}
|
||||
|
||||
template class CuVector<double>;
|
||||
template class CuVector<float>;
|
||||
template class CuVector<int>;
|
||||
template class GpuVector<double>;
|
||||
template class GpuVector<float>;
|
||||
template class GpuVector<int>;
|
||||
|
||||
} // namespace Opm::gpuistl
|
@ -16,8 +16,8 @@
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef OPM_CUVECTOR_HEADER_HPP
|
||||
#define OPM_CUVECTOR_HEADER_HPP
|
||||
#ifndef OPM_GPUVECTOR_HEADER_HPP
|
||||
#define OPM_GPUVECTOR_HEADER_HPP
|
||||
#include <dune/common/fvector.hh>
|
||||
#include <dune/istl/bvector.hh>
|
||||
#include <exception>
|
||||
@ -33,7 +33,7 @@ namespace Opm::gpuistl
|
||||
{
|
||||
|
||||
/**
|
||||
* @brief The CuVector class is a simple (arithmetic) vector class for the GPU.
|
||||
* @brief The GpuVector class is a simple (arithmetic) vector class for the GPU.
|
||||
*
|
||||
* @note we currently only support simple raw primitives for T (double, float and int)
|
||||
*
|
||||
@ -45,12 +45,12 @@ namespace Opm::gpuistl
|
||||
* Example usage:
|
||||
*
|
||||
* @code{.cpp}
|
||||
* #include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
* #include <opm/simulators/linalg/cuistl/GpuVector.hpp>
|
||||
*
|
||||
* void someFunction() {
|
||||
* auto someDataOnCPU = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
|
||||
*
|
||||
* auto dataOnGPU = CuVector<double>(someDataOnCPU);
|
||||
* auto dataOnGPU = GpuVector<double>(someDataOnCPU);
|
||||
*
|
||||
* // Multiply by 4.0:
|
||||
* dataOnGPU *= 4.0;
|
||||
@ -62,7 +62,7 @@ namespace Opm::gpuistl
|
||||
* @tparam T the type to store. Can be either float, double or int.
|
||||
*/
|
||||
template <typename T>
|
||||
class CuVector
|
||||
class GpuVector
|
||||
{
|
||||
public:
|
||||
using field_type = T;
|
||||
@ -70,17 +70,17 @@ public:
|
||||
|
||||
|
||||
/**
|
||||
* @brief CuVector allocates new GPU memory of the same size as other and copies the content of the other vector to
|
||||
* @brief GpuVector allocates new GPU memory of the same size as other and copies the content of the other vector to
|
||||
* this newly allocated memory.
|
||||
*
|
||||
* @note This does synchronous transfer.
|
||||
*
|
||||
* @param other the vector to copy from
|
||||
*/
|
||||
CuVector(const CuVector<T>& other);
|
||||
GpuVector(const GpuVector<T>& other);
|
||||
|
||||
/**
|
||||
* @brief CuVector allocates new GPU memory of the same size as data and copies the content of the data vector to
|
||||
* @brief GpuVector allocates new GPU memory of the same size as data and copies the content of the data vector to
|
||||
* this newly allocated memory.
|
||||
*
|
||||
* @note This does CPU to GPU transfer.
|
||||
@ -90,7 +90,7 @@ public:
|
||||
*
|
||||
* @param data the vector to copy from
|
||||
*/
|
||||
explicit CuVector(const std::vector<T>& data);
|
||||
explicit GpuVector(const std::vector<T>& data);
|
||||
|
||||
/**
|
||||
* @brief operator= copies the content of the data vector to the memory of this vector.
|
||||
@ -100,7 +100,7 @@ public:
|
||||
*
|
||||
* @param other the vector to copy from
|
||||
*/
|
||||
CuVector& operator=(const CuVector<T>& other);
|
||||
GpuVector& operator=(const GpuVector<T>& other);
|
||||
|
||||
/**
|
||||
* @brief operator= sets the whole vector equal to the scalar value.
|
||||
@ -109,20 +109,20 @@ public:
|
||||
*
|
||||
* @param scalar the value all elements will be set to.
|
||||
*/
|
||||
CuVector& operator=(T scalar);
|
||||
GpuVector& operator=(T scalar);
|
||||
|
||||
/**
|
||||
* @brief CuVector allocates new GPU memory of size numberOfElements * sizeof(T)
|
||||
* @brief GpuVector allocates new GPU memory of size numberOfElements * sizeof(T)
|
||||
*
|
||||
* @note For now numberOfElements needs to be within the limits of int due to restrictions in cublas
|
||||
*
|
||||
* @param numberOfElements number of T elements to allocate
|
||||
*/
|
||||
explicit CuVector(const size_t numberOfElements);
|
||||
explicit GpuVector(const size_t numberOfElements);
|
||||
|
||||
|
||||
/**
|
||||
* @brief CuVector allocates new GPU memory of size numberOfElements * sizeof(T) and copies numberOfElements from
|
||||
* @brief GpuVector allocates new GPU memory of size numberOfElements * sizeof(T) and copies numberOfElements from
|
||||
* data
|
||||
*
|
||||
* @note This assumes the data is on the CPU.
|
||||
@ -132,12 +132,12 @@ public:
|
||||
*
|
||||
* @note For now numberOfElements needs to be within the limits of int due to restrictions in cublas
|
||||
*/
|
||||
CuVector(const T* dataOnHost, const size_t numberOfElements);
|
||||
GpuVector(const T* dataOnHost, const size_t numberOfElements);
|
||||
|
||||
/**
|
||||
* @brief ~CuVector calls cudaFree
|
||||
* @brief ~GpuVector calls cudaFree
|
||||
*/
|
||||
virtual ~CuVector();
|
||||
virtual ~GpuVector();
|
||||
|
||||
/**
|
||||
* @return the raw pointer to the GPU data
|
||||
@ -162,7 +162,7 @@ public:
|
||||
// TODO: [perf] vector.dim() can be replaced by bvector.N() * BlockDimension
|
||||
if (detail::to_size_t(m_numberOfElements) != bvector.dim()) {
|
||||
OPM_THROW(std::runtime_error,
|
||||
fmt::format("Given incompatible vector size. CuVector has size {}, \n"
|
||||
fmt::format("Given incompatible vector size. GpuVector has size {}, \n"
|
||||
"however, BlockVector has N() = {}, and dim = {}.",
|
||||
m_numberOfElements,
|
||||
bvector.N(),
|
||||
@ -185,7 +185,7 @@ public:
|
||||
// TODO: [perf] vector.dim() can be replaced by bvector.N() * BlockDimension
|
||||
if (detail::to_size_t(m_numberOfElements) != bvector.dim()) {
|
||||
OPM_THROW(std::runtime_error,
|
||||
fmt::format("Given incompatible vector size. CuVector has size {},\n however, the BlockVector "
|
||||
fmt::format("Given incompatible vector size. GpuVector has size {},\n however, the BlockVector "
|
||||
"has has N() = {}, and dim() = {}.",
|
||||
m_numberOfElements,
|
||||
bvector.N(),
|
||||
@ -231,8 +231,8 @@ public:
|
||||
*/
|
||||
void copyToHost(std::vector<T>& data) const;
|
||||
|
||||
void prepareSendBuf(CuVector<T>& buffer, const CuVector<int>& indexSet) const;
|
||||
void syncFromRecvBuf(CuVector<T>& buffer, const CuVector<int>& indexSet) const;
|
||||
void prepareSendBuf(GpuVector<T>& buffer, const GpuVector<int>& indexSet) const;
|
||||
void syncFromRecvBuf(GpuVector<T>& buffer, const GpuVector<int>& indexSet) const;
|
||||
|
||||
/**
|
||||
* @brief operator *= multiplies every element by scalar
|
||||
@ -242,7 +242,7 @@ public:
|
||||
*
|
||||
* @note int is not supported
|
||||
*/
|
||||
CuVector<T>& operator*=(const T& scalar);
|
||||
GpuVector<T>& operator*=(const T& scalar);
|
||||
|
||||
/**
|
||||
* @brief axpy sets this vector equal to this + alha * y
|
||||
@ -252,7 +252,7 @@ public:
|
||||
* @note this will call CuBlas in the background
|
||||
* @note int is not supported
|
||||
*/
|
||||
CuVector<T>& axpy(T alpha, const CuVector<T>& y);
|
||||
GpuVector<T>& axpy(T alpha, const GpuVector<T>& y);
|
||||
|
||||
/**
|
||||
* @brief operator+= adds the other vector to this vector
|
||||
@ -260,7 +260,7 @@ public:
|
||||
* @note this will call CuBlas in the background
|
||||
* @note int is not supported
|
||||
*/
|
||||
CuVector<T>& operator+=(const CuVector<T>& other);
|
||||
GpuVector<T>& operator+=(const GpuVector<T>& other);
|
||||
|
||||
/**
|
||||
* @brief operator-= subtracts the other vector from this vector
|
||||
@ -268,7 +268,7 @@ public:
|
||||
* @note this will call CuBlas in the background
|
||||
* @note int is not supported
|
||||
*/
|
||||
CuVector<T>& operator-=(const CuVector<T>& other);
|
||||
GpuVector<T>& operator-=(const GpuVector<T>& other);
|
||||
|
||||
/**
|
||||
* @brief dot computes the dot product (standard inner product) against the other vector
|
||||
@ -278,7 +278,7 @@ public:
|
||||
*
|
||||
* @return the result on the inner product
|
||||
*/
|
||||
T dot(const CuVector<T>& other) const;
|
||||
T dot(const GpuVector<T>& other) const;
|
||||
|
||||
/**
|
||||
* @brief returns the l2 norm of the vector
|
||||
@ -294,14 +294,14 @@ public:
|
||||
*
|
||||
* @note int is not supported
|
||||
*/
|
||||
T dot(const CuVector<T>& other, const CuVector<int>& indexSet, CuVector<T>& buffer) const;
|
||||
T dot(const GpuVector<T>& other, const GpuVector<int>& indexSet, GpuVector<T>& buffer) const;
|
||||
|
||||
/**
|
||||
* Computes the norm sqrt(sum_i this[indexSet[i]] * this[indexSet[i]])
|
||||
*
|
||||
* @note int is not supported
|
||||
*/
|
||||
T two_norm(const CuVector<int>& indexSet, CuVector<T>& buffer) const;
|
||||
T two_norm(const GpuVector<int>& indexSet, GpuVector<T>& buffer) const;
|
||||
|
||||
|
||||
/**
|
||||
@ -309,14 +309,14 @@ public:
|
||||
*
|
||||
* @note int is not supported
|
||||
*/
|
||||
T dot(const CuVector<T>& other, const CuVector<int>& indexSet) const;
|
||||
T dot(const GpuVector<T>& other, const GpuVector<int>& indexSet) const;
|
||||
|
||||
/**
|
||||
* Computes the norm sqrt(sum_i this[indexSet[i]] * this[indexSet[i]])
|
||||
*
|
||||
* @note int is not supported
|
||||
*/
|
||||
T two_norm(const CuVector<int>& indexSet) const;
|
||||
T two_norm(const GpuVector<int>& indexSet) const;
|
||||
|
||||
|
||||
/**
|
||||
@ -363,9 +363,9 @@ public:
|
||||
* }
|
||||
* @endcode
|
||||
*/
|
||||
void setZeroAtIndexSet(const CuVector<int>& indexSet);
|
||||
void setZeroAtIndexSet(const GpuVector<int>& indexSet);
|
||||
|
||||
// Slow method that creates a string representation of a CuVector for debug purposes
|
||||
// Slow method that creates a string representation of a GpuVector for debug purposes
|
||||
std::string toDebugString()
|
||||
{
|
||||
std::vector<T> v = asStdVector();
|
||||
@ -385,7 +385,7 @@ private:
|
||||
const int m_numberOfElements;
|
||||
detail::CuBlasHandle& m_cuBlasHandle;
|
||||
|
||||
void assertSameSize(const CuVector<T>& other) const;
|
||||
void assertSameSize(const GpuVector<T>& other) const;
|
||||
void assertSameSize(int size) const;
|
||||
|
||||
void assertHasElements() const;
|
@ -27,7 +27,7 @@
|
||||
#include <opm/common/TimingMacros.hpp>
|
||||
#include <opm/simulators/linalg/GraphColoring.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/OpmCuILU0.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/autotuner.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/coloringAndReorderingUtils.hpp>
|
||||
@ -72,7 +72,7 @@ OpmCuILU0<M, X, Y, l>::OpmCuILU0(const M& A, bool splitMatrix, bool tuneKernels)
|
||||
m_gpuMatrix.nonzeroes(),
|
||||
A.nonzeroes()));
|
||||
if (m_splitMatrix) {
|
||||
m_gpuMatrixReorderedDiag.emplace(CuVector<field_type>(blocksize_ * blocksize_ * m_cpuMatrix.N()));
|
||||
m_gpuMatrixReorderedDiag.emplace(GpuVector<field_type>(blocksize_ * blocksize_ * m_cpuMatrix.N()));
|
||||
std::tie(m_gpuMatrixReorderedLower, m_gpuMatrixReorderedUpper)
|
||||
= detail::extractLowerAndUpperMatrices<M, field_type, GpuSparseMatrix<field_type>>(m_cpuMatrix,
|
||||
m_reorderedToNatural);
|
||||
@ -272,8 +272,8 @@ OpmCuILU0<M, X, Y, l>::tuneThreadBlockSizes()
|
||||
= detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "Kernel computing ILU0 factorization");
|
||||
|
||||
// tune the thread-block size of the apply
|
||||
CuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
|
||||
CuVector<field_type> tmpD(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
|
||||
GpuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
|
||||
GpuVector<field_type> tmpD(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
|
||||
tmpD = 1;
|
||||
|
||||
auto tuneLowerSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int lowerSolveThreadBlockSize) {
|
||||
@ -292,11 +292,11 @@ OpmCuILU0<M, X, Y, l>::tuneThreadBlockSizes()
|
||||
} // namespace Opm::gpuistl
|
||||
#define INSTANTIATE_CUDILU_DUNE(realtype, blockdim) \
|
||||
template class ::Opm::gpuistl::OpmCuILU0<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
|
||||
::Opm::gpuistl::CuVector<realtype>, \
|
||||
::Opm::gpuistl::CuVector<realtype>>; \
|
||||
::Opm::gpuistl::GpuVector<realtype>, \
|
||||
::Opm::gpuistl::GpuVector<realtype>>; \
|
||||
template class ::Opm::gpuistl::OpmCuILU0<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
|
||||
::Opm::gpuistl::CuVector<realtype>, \
|
||||
::Opm::gpuistl::CuVector<realtype>>
|
||||
::Opm::gpuistl::GpuVector<realtype>, \
|
||||
::Opm::gpuistl::GpuVector<realtype>>
|
||||
|
||||
INSTANTIATE_CUDILU_DUNE(double, 1);
|
||||
INSTANTIATE_CUDILU_DUNE(double, 2);
|
||||
|
@ -23,7 +23,7 @@
|
||||
#include <opm/grid/utility/SparseTable.hpp>
|
||||
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
|
||||
#include <optional>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
@ -39,7 +39,7 @@ namespace Opm::gpuistl
|
||||
//! \tparam l Ignored. Just there to have the same number of template arguments
|
||||
//! as other preconditioners.
|
||||
//!
|
||||
//! \note We assume X and Y are both CuVector<real_type>, but we leave them as template
|
||||
//! \note We assume X and Y are both GpuVector<real_type>, but we leave them as template
|
||||
//! arguments in case of future additions.
|
||||
template <class M, class X, class Y, int l = 1>
|
||||
class OpmCuILU0 : public Dune::PreconditionerWithUpdate<X, Y>
|
||||
@ -126,13 +126,13 @@ private:
|
||||
std::unique_ptr<CuMat> m_gpuMatrixReorderedLower;
|
||||
std::unique_ptr<CuMat> m_gpuMatrixReorderedUpper;
|
||||
//! \brief If matrix splitting is enabled, we also store the diagonal separately
|
||||
std::optional<CuVector<field_type>> m_gpuMatrixReorderedDiag;
|
||||
std::optional<GpuVector<field_type>> m_gpuMatrixReorderedDiag;
|
||||
//! row conversion from natural to reordered matrix indices stored on the GPU
|
||||
CuVector<int> m_gpuNaturalToReorder;
|
||||
GpuVector<int> m_gpuNaturalToReorder;
|
||||
//! row conversion from reordered to natural matrix indices stored on the GPU
|
||||
CuVector<int> m_gpuReorderToNatural;
|
||||
GpuVector<int> m_gpuReorderToNatural;
|
||||
//! \brief Stores the inverted diagonal that we use in ILU0
|
||||
CuVector<field_type> m_gpuDInv;
|
||||
GpuVector<field_type> m_gpuDInv;
|
||||
//! \brief Bool storing whether or not we should store matrices in a split format
|
||||
bool m_splitMatrix;
|
||||
//! \brief Bool storing whether or not we will tune the threadblock sizes. Only used for AMD cards
|
||||
|
@ -21,7 +21,7 @@
|
||||
#include <cusparse.h>
|
||||
#include <dune/istl/preconditioner.hh>
|
||||
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/PreconditionerHolder.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp>
|
||||
|
||||
@ -35,11 +35,11 @@ namespace Opm::gpuistl
|
||||
//!
|
||||
//! \tparam X the domain type (should be on the CPU). Typicall a Dune::BlockVector
|
||||
//! \tparam Y the range type (should be on the CPU). Typicall a Dune::BlockVector
|
||||
//! \tparam CudaPreconditionerType the preconditioner taking CuVector<real_type> as arguments to apply
|
||||
//! \tparam CudaPreconditionerType the preconditioner taking GpuVector<real_type> as arguments to apply
|
||||
template <class X, class Y, class CudaPreconditionerType>
|
||||
class PreconditionerAdapter
|
||||
: public Dune::PreconditionerWithUpdate<X, Y>,
|
||||
public PreconditionerHolder<CuVector<typename X::field_type>, CuVector<typename Y::field_type>>
|
||||
public PreconditionerHolder<GpuVector<typename X::field_type>, GpuVector<typename Y::field_type>>
|
||||
{
|
||||
public:
|
||||
//! \brief The domain type of the preconditioner.
|
||||
@ -77,8 +77,8 @@ public:
|
||||
virtual void apply(X& v, const Y& d) override
|
||||
{
|
||||
if (!m_inputBuffer) {
|
||||
m_inputBuffer.reset(new CuVector<field_type>(v.dim()));
|
||||
m_outputBuffer.reset(new CuVector<field_type>(v.dim()));
|
||||
m_inputBuffer.reset(new GpuVector<field_type>(v.dim()));
|
||||
m_outputBuffer.reset(new GpuVector<field_type>(v.dim()));
|
||||
}
|
||||
m_inputBuffer->copyFromHost(d);
|
||||
m_underlyingPreconditioner->apply(*m_outputBuffer, *m_inputBuffer);
|
||||
@ -117,7 +117,7 @@ public:
|
||||
return detail::shouldCallPreconditionerPre<CudaPreconditionerType>();
|
||||
}
|
||||
|
||||
virtual std::shared_ptr<Dune::PreconditionerWithUpdate<CuVector<field_type>, CuVector<field_type>>>
|
||||
virtual std::shared_ptr<Dune::PreconditionerWithUpdate<GpuVector<field_type>, GpuVector<field_type>>>
|
||||
getUnderlyingPreconditioner() override
|
||||
{
|
||||
return m_underlyingPreconditioner;
|
||||
@ -131,8 +131,8 @@ private:
|
||||
//! \brief the underlying preconditioner to use
|
||||
std::shared_ptr<CudaPreconditionerType> m_underlyingPreconditioner;
|
||||
|
||||
std::unique_ptr<CuVector<field_type>> m_inputBuffer;
|
||||
std::unique_ptr<CuVector<field_type>> m_outputBuffer;
|
||||
std::unique_ptr<GpuVector<field_type>> m_inputBuffer;
|
||||
std::unique_ptr<GpuVector<field_type>> m_outputBuffer;
|
||||
};
|
||||
} // end namespace Opm::gpuistl
|
||||
|
||||
|
@ -29,7 +29,7 @@
|
||||
#include <opm/simulators/linalg/cuistl/GpuBlockPreconditioner.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/GpuOwnerOverlapCopy.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/has_function.hpp>
|
||||
|
||||
@ -56,7 +56,7 @@ public:
|
||||
using typename Dune::IterativeSolver<X, X>::real_type;
|
||||
using typename Dune::IterativeSolver<X, X>::scalar_real_type;
|
||||
static constexpr auto block_size = domain_type::block_type::dimension;
|
||||
using XGPU = Opm::gpuistl::CuVector<real_type>;
|
||||
using XGPU = Opm::gpuistl::GpuVector<real_type>;
|
||||
|
||||
// TODO: Use a std::forward
|
||||
SolverAdapter(Operator& op,
|
||||
|
@ -18,7 +18,7 @@
|
||||
*/
|
||||
#include <config.h>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cublas_wrapper.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
@ -143,7 +143,7 @@ innerProductAtIndices(cublasHandle_t cublasHandle,
|
||||
elementWiseMultiplyKernel<<<nThreadBlocks, threadBlockSize>>>(deviceA, deviceB, buffer, numberOfElements, indices);
|
||||
|
||||
// TODO: [perf] Get rid of the allocation here.
|
||||
CuVector<T> oneVector(numberOfElements);
|
||||
GpuVector<T> oneVector(numberOfElements);
|
||||
oneVector = 1.0;
|
||||
T result = 0.0;
|
||||
OPM_CUBLAS_SAFE_CALL(cublasDot(cublasHandle, numberOfElements, oneVector.data(), 1, buffer, 1, &result));
|
||||
|
@ -65,11 +65,11 @@ void syncFromRecvBuf(T* deviceA, T* buffer, size_t numberOfElements, const int*
|
||||
/**
|
||||
* @brief Compue the weighted matrix vector product where the matrix is diagonal, the diagonal is a vector, meaning we
|
||||
* compute the Hadamard product.
|
||||
* @param squareBlockVector A CuVector whose elements are NxN matrix blocks
|
||||
* @param squareBlockVector A GpuVector whose elements are NxN matrix blocks
|
||||
* @param numberOfRows The number of rows in the vector
|
||||
* @param blocksize The sidelength of the square block elements in the vector
|
||||
* @param src_vec A pointer to the data of the CuVector we multiply the blockvector with
|
||||
* @param[out] dst_vec A pointer to the data of the CuVector we store the result in
|
||||
* @param src_vec A pointer to the data of the GpuVector we multiply the blockvector with
|
||||
* @param[out] dst_vec A pointer to the data of the GpuVector we store the result in
|
||||
*
|
||||
* @note This is implemented as a faster way to multiply a diagonal matrix with a blockvector. We need only store the
|
||||
* diagonal of the matrix and use this product.
|
||||
|
@ -27,7 +27,7 @@
|
||||
#include <opm/simulators/linalg/DILU.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/GpuDILU.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
|
||||
#include <random>
|
||||
@ -42,8 +42,8 @@ using B2x2Vec = Dune::BlockVector<Dune::FieldVector<double, 2>>;
|
||||
using Sp1x1BlockMatrix = Dune::BCRSMatrix<FM1x1>;
|
||||
using Sp2x2BlockMatrix = Dune::BCRSMatrix<FM2x2>;
|
||||
using CuMatrix = Opm::gpuistl::GpuSparseMatrix<T>;
|
||||
using CuIntVec = Opm::gpuistl::CuVector<int>;
|
||||
using CuFloatingPointVec = Opm::gpuistl::CuVector<T>;
|
||||
using CuIntVec = Opm::gpuistl::GpuVector<int>;
|
||||
using CuFloatingPointVec = Opm::gpuistl::GpuVector<T>;
|
||||
using GpuDilu1x1 = Opm::gpuistl::GpuDILU<Sp1x1BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
|
||||
using GpuDilu2x2 = Opm::gpuistl::GpuDILU<Sp2x2BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
|
||||
|
||||
|
@ -25,7 +25,7 @@
|
||||
#include <dune/istl/bcrsmatrix.hh>
|
||||
#include <opm/simulators/linalg/cuistl/GpuJac.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp>
|
||||
@ -49,7 +49,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(GPUJACApplyBlocksize2, T, NumericTypes)
|
||||
using M = Dune::FieldMatrix<T, blocksize, blocksize>;
|
||||
using SpMatrix = Dune::BCRSMatrix<M>;
|
||||
using Vector = Dune::BlockVector<Dune::FieldVector<T, blocksize>>;
|
||||
using GpuJac = Opm::gpuistl::GpuJac<SpMatrix, Opm::gpuistl::CuVector<T>, Opm::gpuistl::CuVector<T>>;
|
||||
using GpuJac = Opm::gpuistl::GpuJac<SpMatrix, Opm::gpuistl::GpuVector<T>, Opm::gpuistl::GpuVector<T>>;
|
||||
|
||||
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
|
||||
for (auto row = B.createbegin(); row != B.createend(); ++row) {
|
||||
@ -103,7 +103,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(GPUJACApplyBlocksize1, T, NumericTypes)
|
||||
using M = Dune::FieldMatrix<T, blocksize, blocksize>;
|
||||
using SpMatrix = Dune::BCRSMatrix<M>;
|
||||
using Vector = Dune::BlockVector<Dune::FieldVector<T, blocksize>>;
|
||||
using GpuJac = Opm::gpuistl::GpuJac<SpMatrix, Opm::gpuistl::CuVector<T>, Opm::gpuistl::CuVector<T>>;
|
||||
using GpuJac = Opm::gpuistl::GpuJac<SpMatrix, Opm::gpuistl::GpuVector<T>, Opm::gpuistl::GpuVector<T>>;
|
||||
|
||||
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
|
||||
for (auto row = B.createbegin(); row != B.createend(); ++row) {
|
||||
|
@ -27,7 +27,7 @@
|
||||
#include <dune/istl/owneroverlapcopy.hh>
|
||||
#include <memory>
|
||||
#include <opm/simulators/linalg/cuistl/GpuOwnerOverlapCopy.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/set_device.hpp>
|
||||
#include <random>
|
||||
@ -62,7 +62,7 @@ BOOST_AUTO_TEST_CASE(TestProject)
|
||||
|
||||
auto ownerOverlapCopy = Dune::OwnerOverlapCopyCommunication<int>(indexInfo, MPI_COMM_WORLD);
|
||||
auto xCPU = std::vector<double> {{1.0, 2.0, 3.0}};
|
||||
auto xGPU = Opm::gpuistl::CuVector<double>(xCPU);
|
||||
auto xGPU = Opm::gpuistl::GpuVector<double>(xCPU);
|
||||
|
||||
auto gpuComm = std::make_shared<Opm::gpuistl::GPUObliviousMPISender<double, 1, Dune::OwnerOverlapCopyCommunication<int>>>(ownerOverlapCopy);
|
||||
|
||||
@ -94,7 +94,7 @@ BOOST_AUTO_TEST_CASE(TestDot)
|
||||
indexInfo.addRemoteIndex(std::make_tuple(0, 2, Dune::OwnerOverlapCopyAttributeSet::copy));
|
||||
auto ownerOverlapCopy = Dune::OwnerOverlapCopyCommunication<int>(indexInfo, MPI_COMM_WORLD);
|
||||
auto xCPU = std::vector<double> {{1.0, 2.0, 3.0}};
|
||||
auto xGPU = Opm::gpuistl::CuVector<double>(xCPU);
|
||||
auto xGPU = Opm::gpuistl::GpuVector<double>(xCPU);
|
||||
|
||||
auto gpuComm = std::make_shared<Opm::gpuistl::GPUObliviousMPISender<double, 1, Dune::OwnerOverlapCopyCommunication<int>>>(ownerOverlapCopy);
|
||||
|
||||
|
@ -28,7 +28,7 @@
|
||||
#include <dune/istl/bcrsmatrix.hh>
|
||||
#include <dune/istl/preconditioners.hh>
|
||||
#include <opm/simulators/linalg/cuistl/GpuSeqILU0.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
|
||||
@ -63,7 +63,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifference1D, T, NumericTypes)
|
||||
using M = Dune::FieldMatrix<T, 1, 1>;
|
||||
using SpMatrix = Dune::BCRSMatrix<M>;
|
||||
using Vector = Dune::BlockVector<Dune::FieldVector<T, 1>>;
|
||||
using GpuILU0 = Opm::gpuistl::GpuSeqILU0<SpMatrix, Opm::gpuistl::CuVector<T>, Opm::gpuistl::CuVector<T>>;
|
||||
using GpuILU0 = Opm::gpuistl::GpuSeqILU0<SpMatrix, Opm::gpuistl::GpuVector<T>, Opm::gpuistl::GpuVector<T>>;
|
||||
|
||||
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
|
||||
for (auto row = B.createbegin(); row != B.createend(); ++row) {
|
||||
@ -158,7 +158,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifferenceBlock2, T, NumericTypes)
|
||||
using M = Dune::FieldMatrix<T, 2, 2>;
|
||||
using SpMatrix = Dune::BCRSMatrix<M>;
|
||||
using Vector = Dune::BlockVector<Dune::FieldVector<T, 2>>;
|
||||
using GpuILU0 = Opm::gpuistl::GpuSeqILU0<SpMatrix, Opm::gpuistl::CuVector<T>, Opm::gpuistl::CuVector<T>>;
|
||||
using GpuILU0 = Opm::gpuistl::GpuSeqILU0<SpMatrix, Opm::gpuistl::GpuVector<T>, Opm::gpuistl::GpuVector<T>>;
|
||||
|
||||
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
|
||||
for (auto row = B.createbegin(); row != B.createend(); ++row) {
|
||||
|
@ -24,7 +24,7 @@
|
||||
#include <dune/istl/bcrsmatrix.hh>
|
||||
#include <memory>
|
||||
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
#include <random>
|
||||
|
||||
@ -149,8 +149,8 @@ BOOST_AUTO_TEST_CASE(RandomSparsityMatrix)
|
||||
std::vector<double> inputDataX(N * dim, 0.0);
|
||||
inputDataX[component] = 1.0;
|
||||
std::vector<double> inputDataY(N * dim, .25);
|
||||
auto inputVectorX = Opm::gpuistl::CuVector<double>(inputDataX.data(), inputDataX.size());
|
||||
auto inputVectorY = Opm::gpuistl::CuVector<double>(inputDataY.data(), inputDataY.size());
|
||||
auto inputVectorX = Opm::gpuistl::GpuVector<double>(inputDataX.data(), inputDataX.size());
|
||||
auto inputVectorY = Opm::gpuistl::GpuVector<double>(inputDataY.data(), inputDataY.size());
|
||||
Vector xHost(N), yHost(N);
|
||||
yHost = inputDataY[0];
|
||||
inputVectorX.copyToHost(xHost);
|
||||
|
@ -18,13 +18,13 @@
|
||||
*/
|
||||
#include <config.h>
|
||||
|
||||
#define BOOST_TEST_MODULE TestCuVector
|
||||
#define BOOST_TEST_MODULE TestGpuVector
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <cuda_runtime.h>
|
||||
#include <dune/common/fvector.hh>
|
||||
#include <dune/istl/bvector.hh>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
#include <random>
|
||||
|
||||
@ -32,7 +32,7 @@ BOOST_AUTO_TEST_CASE(TestDocumentedUsage)
|
||||
{
|
||||
auto someDataOnCPU = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
|
||||
|
||||
auto dataOnGPU = ::Opm::gpuistl::CuVector<double>(someDataOnCPU);
|
||||
auto dataOnGPU = ::Opm::gpuistl::GpuVector<double>(someDataOnCPU);
|
||||
|
||||
// Multiply by 4.0:
|
||||
dataOnGPU *= 4.0;
|
||||
@ -50,14 +50,14 @@ BOOST_AUTO_TEST_CASE(TestDocumentedUsage)
|
||||
BOOST_AUTO_TEST_CASE(TestConstructionSize)
|
||||
{
|
||||
const int numberOfElements = 1234;
|
||||
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(numberOfElements);
|
||||
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(numberOfElements);
|
||||
BOOST_CHECK_EQUAL(numberOfElements, vectorOnGPU.dim());
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestCopyFromHostConstructor)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
|
||||
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
|
||||
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
|
||||
BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
|
||||
std::vector<double> buffer(data.size(), 0.0);
|
||||
vectorOnGPU.copyToHost(buffer.data(), buffer.size());
|
||||
@ -68,7 +68,7 @@ BOOST_AUTO_TEST_CASE(TestCopyFromHostConstructor)
|
||||
BOOST_AUTO_TEST_CASE(TestCopyFromHostFunction)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
|
||||
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.size());
|
||||
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.size());
|
||||
BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
|
||||
vectorOnGPU.copyFromHost(data.data(), data.size());
|
||||
std::vector<double> buffer(data.size(), 0.0);
|
||||
@ -80,7 +80,7 @@ BOOST_AUTO_TEST_CASE(TestCopyFromHostFunction)
|
||||
BOOST_AUTO_TEST_CASE(TestCopyFromBvector)
|
||||
{
|
||||
auto blockVector = Dune::BlockVector<Dune::FieldVector<double, 2>> {{{42, 43}, {44, 45}, {46, 47}}};
|
||||
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(blockVector.dim());
|
||||
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(blockVector.dim());
|
||||
vectorOnGPU.copyFromHost(blockVector);
|
||||
std::vector<double> buffer(vectorOnGPU.dim());
|
||||
vectorOnGPU.copyToHost(buffer.data(), buffer.size());
|
||||
@ -93,7 +93,7 @@ BOOST_AUTO_TEST_CASE(TestCopyToBvector)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7, 8, 9}};
|
||||
auto blockVector = Dune::BlockVector<Dune::FieldVector<double, 3>>(3);
|
||||
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
|
||||
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
|
||||
vectorOnGPU.copyToHost(blockVector);
|
||||
|
||||
|
||||
@ -103,7 +103,7 @@ BOOST_AUTO_TEST_CASE(TestCopyToBvector)
|
||||
BOOST_AUTO_TEST_CASE(TestDataPointer)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7, 8, 9}};
|
||||
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
|
||||
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
|
||||
|
||||
std::vector<double> buffer(data.size(), 0.0);
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(buffer.data(), vectorOnGPU.data(), sizeof(double) * data.size(), cudaMemcpyDeviceToHost));
|
||||
@ -113,7 +113,7 @@ BOOST_AUTO_TEST_CASE(TestDataPointer)
|
||||
BOOST_AUTO_TEST_CASE(TestCopyScalarMultiply)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
|
||||
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
|
||||
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
|
||||
BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
|
||||
const double scalar = 42.25;
|
||||
vectorOnGPU *= scalar;
|
||||
@ -128,7 +128,7 @@ BOOST_AUTO_TEST_CASE(TestCopyScalarMultiply)
|
||||
BOOST_AUTO_TEST_CASE(TestTwoNorm)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
|
||||
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
|
||||
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
|
||||
auto twoNorm = vectorOnGPU.two_norm();
|
||||
|
||||
double correctAnswer = 0.0;
|
||||
@ -143,8 +143,8 @@ BOOST_AUTO_TEST_CASE(TestDot)
|
||||
{
|
||||
std::vector<double> dataA {{1, 2, 3, 4, 5, 6, 7}};
|
||||
std::vector<double> dataB {{8, 9, 10, 11, 12, 13, 14}};
|
||||
auto vectorOnGPUA = Opm::gpuistl::CuVector<double>(dataA.data(), dataA.size());
|
||||
auto vectorOnGPUB = Opm::gpuistl::CuVector<double>(dataB.data(), dataB.size());
|
||||
auto vectorOnGPUA = Opm::gpuistl::GpuVector<double>(dataA.data(), dataA.size());
|
||||
auto vectorOnGPUB = Opm::gpuistl::GpuVector<double>(dataB.data(), dataB.size());
|
||||
auto dot = vectorOnGPUA.dot(vectorOnGPUB);
|
||||
|
||||
double correctAnswer = 0.0;
|
||||
@ -158,7 +158,7 @@ BOOST_AUTO_TEST_CASE(TestDot)
|
||||
BOOST_AUTO_TEST_CASE(Assigment)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
|
||||
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
|
||||
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
|
||||
vectorOnGPU = 10.0;
|
||||
vectorOnGPU.copyToHost(data.data(), data.size());
|
||||
|
||||
@ -171,9 +171,9 @@ BOOST_AUTO_TEST_CASE(Assigment)
|
||||
BOOST_AUTO_TEST_CASE(CopyAssignment)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
|
||||
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
|
||||
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
|
||||
vectorOnGPU.copyToHost(data.data(), data.size());
|
||||
auto vectorOnGPUB = Opm::gpuistl::CuVector<double>(data.size());
|
||||
auto vectorOnGPUB = Opm::gpuistl::GpuVector<double>(data.size());
|
||||
vectorOnGPUB = 4.0;
|
||||
vectorOnGPUB = vectorOnGPU;
|
||||
|
||||
@ -185,7 +185,7 @@ BOOST_AUTO_TEST_CASE(CopyAssignment)
|
||||
BOOST_AUTO_TEST_CASE(RandomVectors)
|
||||
{
|
||||
|
||||
using GVector = Opm::gpuistl::CuVector<double>;
|
||||
using GVector = Opm::gpuistl::GpuVector<double>;
|
||||
std::srand(0);
|
||||
std::mt19937 generator;
|
||||
std::uniform_real_distribution<double> distribution(-100.0, 100.0);
|
||||
@ -268,7 +268,7 @@ BOOST_AUTO_TEST_CASE(RandomVectors)
|
||||
indexSet.push_back(i);
|
||||
}
|
||||
}
|
||||
auto indexSetGPU = Opm::gpuistl::CuVector<int>(indexSet);
|
||||
auto indexSetGPU = Opm::gpuistl::GpuVector<int>(indexSet);
|
||||
|
||||
aGPU.setZeroAtIndexSet(indexSetGPU);
|
||||
auto projectedA = aGPU.asStdVector();
|
@ -24,7 +24,7 @@
|
||||
#include <cuda_runtime.h>
|
||||
#include <dune/istl/bcrsmatrix.hh>
|
||||
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp>
|
||||
@ -86,7 +86,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(FlattenAndInvertDiagonalWith3By3Blocks, T, Numeric
|
||||
B[1][1][2][2] = -1.0;
|
||||
|
||||
Opm::gpuistl::GpuSparseMatrix<T> m = Opm::gpuistl::GpuSparseMatrix<T>::fromMatrix(B);
|
||||
Opm::gpuistl::CuVector<T> dInvDiag(blocksize * blocksize * N);
|
||||
Opm::gpuistl::GpuVector<T> dInvDiag(blocksize * blocksize * N);
|
||||
|
||||
Opm::gpuistl::detail::JAC::invertDiagonalAndFlatten<T, 3>(
|
||||
m.getNonZeroValues().data(), m.getRowIndices().data(), m.getColumnIndices().data(), N, dInvDiag.data());
|
||||
@ -160,7 +160,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(FlattenAndInvertDiagonalWith2By2Blocks, T, Numeric
|
||||
B[1][1][1][1] = -1.0;
|
||||
|
||||
Opm::gpuistl::GpuSparseMatrix<T> m = Opm::gpuistl::GpuSparseMatrix<T>::fromMatrix(B);
|
||||
Opm::gpuistl::CuVector<T> dInvDiag(blocksize * blocksize * N);
|
||||
Opm::gpuistl::GpuVector<T> dInvDiag(blocksize * blocksize * N);
|
||||
|
||||
Opm::gpuistl::detail::JAC::invertDiagonalAndFlatten<T, 2>(
|
||||
m.getNonZeroValues().data(), m.getRowIndices().data(), m.getColumnIndices().data(), N, dInvDiag.data());
|
||||
|
@ -18,13 +18,13 @@
|
||||
*/
|
||||
#include <config.h>
|
||||
|
||||
#define BOOST_TEST_MODULE TestCuVectorOperations
|
||||
#define BOOST_TEST_MODULE TestGpuVectorOperations
|
||||
#include <boost/mpl/list.hpp>
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <cuda_runtime.h>
|
||||
#include <dune/istl/bcrsmatrix.hh>
|
||||
#include <opm/simulators/linalg/cuistl/GpuJac.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
|
||||
@ -47,9 +47,9 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(ElementWiseMultiplicationOf3By3BlockVectorAndVecto
|
||||
std::vector<T> hostBlockVector({1.0, 2.0, 3.0, 5.0, 2.0, 3.0, 2.0, 1.0, 2.0});
|
||||
std::vector<T> hostVecVector({3.0, 2.0, 1.0});
|
||||
std::vector<T> hostDstVector({0, 0, 0});
|
||||
Opm::gpuistl::CuVector<T> deviceBlockVector(hostBlockVector);
|
||||
Opm::gpuistl::CuVector<T> deviceVecVector(hostVecVector);
|
||||
Opm::gpuistl::CuVector<T> deviceDstVector(hostDstVector);
|
||||
Opm::gpuistl::GpuVector<T> deviceBlockVector(hostBlockVector);
|
||||
Opm::gpuistl::GpuVector<T> deviceVecVector(hostVecVector);
|
||||
Opm::gpuistl::GpuVector<T> deviceDstVector(hostDstVector);
|
||||
|
||||
Opm::gpuistl::detail::weightedDiagMV(
|
||||
deviceBlockVector.data(), N, blocksize, weight, deviceVecVector.data(), deviceDstVector.data());
|
||||
@ -81,9 +81,9 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(ElementWiseMultiplicationOf2By2BlockVectorAndVecto
|
||||
std::vector<T> hostBlockVector({1.0, 2.0, 3.0, 4.0, 4.0, 3.0, 2.0, 1.0});
|
||||
std::vector<T> hostVecVector({1.0, 3.0, 2.0, 4.0});
|
||||
std::vector<T> hostDstVector({0, 0, 0, 0});
|
||||
Opm::gpuistl::CuVector<T> deviceBlockVector(hostBlockVector);
|
||||
Opm::gpuistl::CuVector<T> deviceVecVector(hostVecVector);
|
||||
Opm::gpuistl::CuVector<T> deviceDstVector(hostDstVector);
|
||||
Opm::gpuistl::GpuVector<T> deviceBlockVector(hostBlockVector);
|
||||
Opm::gpuistl::GpuVector<T> deviceVecVector(hostVecVector);
|
||||
Opm::gpuistl::GpuVector<T> deviceDstVector(hostDstVector);
|
||||
|
||||
Opm::gpuistl::detail::weightedDiagMV(
|
||||
deviceBlockVector.data(), N, blocksize, weight, deviceVecVector.data(), deviceDstVector.data());
|
||||
|
Loading…
Reference in New Issue
Block a user