mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
refactor cuistl namespace
This commit is contained in:
@@ -163,7 +163,7 @@ void Main::initMPI()
|
||||
}
|
||||
|
||||
#if HAVE_CUDA
|
||||
Opm::cuistl::setDevice(FlowGenericVanguard::comm().rank(), FlowGenericVanguard::comm().size());
|
||||
Opm::gpuistl::setDevice(FlowGenericVanguard::comm().rank(), FlowGenericVanguard::comm().size());
|
||||
#endif
|
||||
|
||||
#endif // HAVE_MPI
|
||||
|
||||
@@ -201,7 +201,7 @@ namespace Dune
|
||||
#endif
|
||||
#if HAVE_CUDA
|
||||
} else if (solver_type == "cubicgstab") {
|
||||
linsolver_.reset(new Opm::cuistl::SolverAdapter<Operator, Dune::BiCGSTABSolver, VectorType>(
|
||||
linsolver_.reset(new Opm::gpuistl::SolverAdapter<Operator, Dune::BiCGSTABSolver, VectorType>(
|
||||
*linearoperator_for_solver_,
|
||||
*scalarproduct_,
|
||||
preconditioner_,
|
||||
|
||||
@@ -325,12 +325,12 @@ struct StandardPreconditioners {
|
||||
F::addCreator("CUILU0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
|
||||
const double w = prm.get<double>("relaxation", 1.0);
|
||||
using field_type = typename V::field_type;
|
||||
using CuILU0 = typename cuistl::
|
||||
CuSeqILU0<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
|
||||
using CuILU0 = typename gpuistl::
|
||||
CuSeqILU0<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
|
||||
auto cuILU0 = std::make_shared<CuILU0>(op.getmat(), w);
|
||||
|
||||
auto adapted = std::make_shared<cuistl::PreconditionerAdapter<V, V, CuILU0>>(cuILU0);
|
||||
auto wrapped = std::make_shared<cuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
|
||||
auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, CuILU0>>(cuILU0);
|
||||
auto wrapped = std::make_shared<gpuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
|
||||
return wrapped;
|
||||
});
|
||||
|
||||
@@ -338,11 +338,11 @@ struct StandardPreconditioners {
|
||||
const double w = prm.get<double>("relaxation", 1.0);
|
||||
using field_type = typename V::field_type;
|
||||
using CuJac =
|
||||
typename cuistl::CuJac<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
|
||||
typename gpuistl::CuJac<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
|
||||
auto cuJac = std::make_shared<CuJac>(op.getmat(), w);
|
||||
|
||||
auto adapted = std::make_shared<cuistl::PreconditionerAdapter<V, V, CuJac>>(cuJac);
|
||||
auto wrapped = std::make_shared<cuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
|
||||
auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, CuJac>>(cuJac);
|
||||
auto wrapped = std::make_shared<gpuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
|
||||
return wrapped;
|
||||
});
|
||||
|
||||
@@ -350,11 +350,11 @@ struct StandardPreconditioners {
|
||||
const bool split_matrix = prm.get<bool>("split_matrix", true);
|
||||
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
|
||||
using field_type = typename V::field_type;
|
||||
using CuDILU = typename cuistl::CuDILU<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
|
||||
using CuDILU = typename gpuistl::CuDILU<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
|
||||
auto cuDILU = std::make_shared<CuDILU>(op.getmat(), split_matrix, tune_gpu_kernels);
|
||||
|
||||
auto adapted = std::make_shared<cuistl::PreconditionerAdapter<V, V, CuDILU>>(cuDILU);
|
||||
auto wrapped = std::make_shared<cuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
|
||||
auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, CuDILU>>(cuDILU);
|
||||
auto wrapped = std::make_shared<gpuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
|
||||
return wrapped;
|
||||
});
|
||||
|
||||
@@ -362,11 +362,11 @@ struct StandardPreconditioners {
|
||||
const bool split_matrix = prm.get<bool>("split_matrix", true);
|
||||
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
|
||||
using field_type = typename V::field_type;
|
||||
using OpmCuILU0 = typename cuistl::OpmCuILU0<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
|
||||
using OpmCuILU0 = typename gpuistl::OpmCuILU0<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
|
||||
auto cuilu0 = std::make_shared<OpmCuILU0>(op.getmat(), split_matrix, tune_gpu_kernels);
|
||||
|
||||
auto adapted = std::make_shared<cuistl::PreconditionerAdapter<V, V, OpmCuILU0>>(cuilu0);
|
||||
auto wrapped = std::make_shared<cuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
|
||||
auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, OpmCuILU0>>(cuilu0);
|
||||
auto wrapped = std::make_shared<gpuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
|
||||
return wrapped;
|
||||
});
|
||||
#endif
|
||||
@@ -585,9 +585,9 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
|
||||
F::addCreator("CUILU0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
|
||||
const double w = prm.get<double>("relaxation", 1.0);
|
||||
using field_type = typename V::field_type;
|
||||
using CuILU0 = typename cuistl::
|
||||
CuSeqILU0<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
|
||||
return std::make_shared<cuistl::PreconditionerAdapter<V, V, CuILU0>>(
|
||||
using CuILU0 = typename gpuistl::
|
||||
CuSeqILU0<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
|
||||
return std::make_shared<gpuistl::PreconditionerAdapter<V, V, CuILU0>>(
|
||||
std::make_shared<CuILU0>(op.getmat(), w));
|
||||
});
|
||||
|
||||
@@ -597,10 +597,10 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
|
||||
using VTo = Dune::BlockVector<Dune::FieldVector<float, block_type::dimension>>;
|
||||
using matrix_type_to =
|
||||
typename Dune::BCRSMatrix<Dune::FieldMatrix<float, block_type::dimension, block_type::dimension>>;
|
||||
using CuILU0 = typename cuistl::
|
||||
CuSeqILU0<matrix_type_to, cuistl::CuVector<float>, cuistl::CuVector<float>>;
|
||||
using Adapter = typename cuistl::PreconditionerAdapter<VTo, VTo, CuILU0>;
|
||||
using Converter = typename cuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>;
|
||||
using CuILU0 = typename gpuistl::
|
||||
CuSeqILU0<matrix_type_to, gpuistl::CuVector<float>, gpuistl::CuVector<float>>;
|
||||
using Adapter = typename gpuistl::PreconditionerAdapter<VTo, VTo, CuILU0>;
|
||||
using Converter = typename gpuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>;
|
||||
auto converted = std::make_shared<Converter>(op.getmat());
|
||||
auto adapted = std::make_shared<Adapter>(std::make_shared<CuILU0>(converted->getConvertedMatrix(), w));
|
||||
converted->setUnderlyingPreconditioner(adapted);
|
||||
@@ -611,8 +611,8 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
|
||||
const double w = prm.get<double>("relaxation", 1.0);
|
||||
using field_type = typename V::field_type;
|
||||
using CUJac =
|
||||
typename cuistl::CuJac<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
|
||||
return std::make_shared<cuistl::PreconditionerAdapter<V, V, CUJac>>(
|
||||
typename gpuistl::CuJac<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
|
||||
return std::make_shared<gpuistl::PreconditionerAdapter<V, V, CUJac>>(
|
||||
std::make_shared<CUJac>(op.getmat(), w));
|
||||
});
|
||||
|
||||
@@ -620,17 +620,17 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
|
||||
const bool split_matrix = prm.get<bool>("split_matrix", true);
|
||||
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
|
||||
using field_type = typename V::field_type;
|
||||
using CUILU0 = typename cuistl::OpmCuILU0<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
|
||||
using CUILU0 = typename gpuistl::OpmCuILU0<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
|
||||
|
||||
return std::make_shared<cuistl::PreconditionerAdapter<V, V, CUILU0>>(std::make_shared<CUILU0>(op.getmat(), split_matrix, tune_gpu_kernels));
|
||||
return std::make_shared<gpuistl::PreconditionerAdapter<V, V, CUILU0>>(std::make_shared<CUILU0>(op.getmat(), split_matrix, tune_gpu_kernels));
|
||||
});
|
||||
|
||||
F::addCreator("CUDILU", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t) {
|
||||
const bool split_matrix = prm.get<bool>("split_matrix", true);
|
||||
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
|
||||
using field_type = typename V::field_type;
|
||||
using CUDILU = typename cuistl::CuDILU<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
|
||||
return std::make_shared<cuistl::PreconditionerAdapter<V, V, CUDILU>>(std::make_shared<CUDILU>(op.getmat(), split_matrix, tune_gpu_kernels));
|
||||
using CUDILU = typename gpuistl::CuDILU<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
|
||||
return std::make_shared<gpuistl::PreconditionerAdapter<V, V, CUDILU>>(std::make_shared<CUDILU>(op.getmat(), split_matrix, tune_gpu_kernels));
|
||||
});
|
||||
|
||||
F::addCreator("CUDILUFloat", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t) {
|
||||
@@ -639,9 +639,9 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
|
||||
using block_type = typename V::block_type;
|
||||
using VTo = Dune::BlockVector<Dune::FieldVector<float, block_type::dimension>>;
|
||||
using matrix_type_to = typename Dune::BCRSMatrix<Dune::FieldMatrix<float, block_type::dimension, block_type::dimension>>;
|
||||
using CuDILU = typename cuistl::CuDILU<matrix_type_to, cuistl::CuVector<float>, cuistl::CuVector<float>>;
|
||||
using Adapter = typename cuistl::PreconditionerAdapter<VTo, VTo, CuDILU>;
|
||||
using Converter = typename cuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>;
|
||||
using CuDILU = typename gpuistl::CuDILU<matrix_type_to, gpuistl::CuVector<float>, gpuistl::CuVector<float>>;
|
||||
using Adapter = typename gpuistl::PreconditionerAdapter<VTo, VTo, CuDILU>;
|
||||
using Converter = typename gpuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>;
|
||||
auto converted = std::make_shared<Converter>(op.getmat());
|
||||
auto adapted = std::make_shared<Adapter>(std::make_shared<CuDILU>(converted->getConvertedMatrix(), split_matrix, tune_gpu_kernels));
|
||||
converted->setUnderlyingPreconditioner(adapted);
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
#include <opm/simulators/linalg/cuistl/PreconditionerHolder.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp>
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
//! @brief Is an adaptation of Dune::BlockPreconditioner that works within the CuISTL framework.
|
||||
//!
|
||||
@@ -125,5 +125,5 @@ private:
|
||||
//! \brief the communication object
|
||||
std::shared_ptr<const communication_type> m_communication;
|
||||
};
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
||||
#endif
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
#include <opm/simulators/linalg/cuistl/CuView.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
|
||||
template <class T>
|
||||
@@ -201,4 +201,4 @@ template CuView<const double> make_view<double>(const CuBuffer<double>&);
|
||||
template CuView<const float> make_view<float>(const CuBuffer<float>&);
|
||||
template CuView<const int> make_view<int>(const CuBuffer<int>&);
|
||||
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
#include <string>
|
||||
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -276,5 +276,5 @@ private:
|
||||
template <class T>
|
||||
CuView<const T> make_view(const CuBuffer<T>&);
|
||||
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
||||
#endif
|
||||
|
||||
@@ -37,7 +37,7 @@
|
||||
#include <functional>
|
||||
#include <utility>
|
||||
#include <string>
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
@@ -293,14 +293,14 @@ CuDILU<M, X, Y, l>::tuneThreadBlockSizes()
|
||||
m_upperSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneUpperSolveThreadBlockSizeInApply, "Kernel computing an upper triangular solve for a level set");
|
||||
}
|
||||
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
||||
#define INSTANTIATE_CUDILU_DUNE(realtype, blockdim) \
|
||||
template class ::Opm::cuistl::CuDILU<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
|
||||
::Opm::cuistl::CuVector<realtype>, \
|
||||
::Opm::cuistl::CuVector<realtype>>; \
|
||||
template class ::Opm::cuistl::CuDILU<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
|
||||
::Opm::cuistl::CuVector<realtype>, \
|
||||
::Opm::cuistl::CuVector<realtype>>
|
||||
template class ::Opm::gpuistl::CuDILU<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
|
||||
::Opm::gpuistl::CuVector<realtype>, \
|
||||
::Opm::gpuistl::CuVector<realtype>>; \
|
||||
template class ::Opm::gpuistl::CuDILU<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
|
||||
::Opm::gpuistl::CuVector<realtype>, \
|
||||
::Opm::gpuistl::CuVector<realtype>>
|
||||
|
||||
INSTANTIATE_CUDILU_DUNE(double, 1);
|
||||
INSTANTIATE_CUDILU_DUNE(double, 2);
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
|
||||
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
//! \brief DILU preconditioner on the GPU.
|
||||
//!
|
||||
@@ -144,6 +144,6 @@ private:
|
||||
int m_moveThreadBlockSize = -1;
|
||||
int m_DILUFactorizationThreadBlockSize = -1;
|
||||
};
|
||||
} // end namespace Opm::cuistl
|
||||
} // end namespace Opm::gpuistl
|
||||
|
||||
#endif
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
|
||||
#include <opm/simulators/linalg/matrixblock.hh>
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
@@ -108,14 +108,14 @@ CuJac<M, X, Y, l>::invertDiagonalAndFlatten()
|
||||
m_diagInvFlattened.data());
|
||||
}
|
||||
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
||||
#define INSTANTIATE_CUJAC_DUNE(realtype, blockdim) \
|
||||
template class ::Opm::cuistl::CuJac<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
|
||||
::Opm::cuistl::CuVector<realtype>, \
|
||||
::Opm::cuistl::CuVector<realtype>>; \
|
||||
template class ::Opm::cuistl::CuJac<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
|
||||
::Opm::cuistl::CuVector<realtype>, \
|
||||
::Opm::cuistl::CuVector<realtype>>
|
||||
template class ::Opm::gpuistl::CuJac<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
|
||||
::Opm::gpuistl::CuVector<realtype>, \
|
||||
::Opm::gpuistl::CuVector<realtype>>; \
|
||||
template class ::Opm::gpuistl::CuJac<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
|
||||
::Opm::gpuistl::CuVector<realtype>, \
|
||||
::Opm::gpuistl::CuVector<realtype>>
|
||||
|
||||
INSTANTIATE_CUJAC_DUNE(double, 1);
|
||||
INSTANTIATE_CUJAC_DUNE(double, 2);
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
|
||||
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
//! \brief Jacobi preconditioner on the GPU.
|
||||
//!
|
||||
@@ -110,6 +110,6 @@ private:
|
||||
|
||||
void invertDiagonalAndFlatten();
|
||||
};
|
||||
} // end namespace Opm::cuistl
|
||||
} // end namespace Opm::gpuistl
|
||||
|
||||
#endif
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <vector>
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
/**
|
||||
* @brief GPUSender is a wrapper class for classes which will implement copOwnerToAll
|
||||
@@ -409,5 +409,5 @@ public:
|
||||
private:
|
||||
std::shared_ptr<GPUSender<field_type, OwnerOverlapCopyCommunicationType>> m_sender;
|
||||
};
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
||||
#endif
|
||||
|
||||
@@ -37,7 +37,7 @@
|
||||
// it highly recommended to read that before proceeding.
|
||||
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
@@ -337,14 +337,14 @@ CuSeqILU0<M, X, Y, l>::updateILUConfiguration()
|
||||
analyzeMatrix();
|
||||
createILU();
|
||||
}
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
||||
#define INSTANTIATE_CUSEQILU0_DUNE(realtype, blockdim) \
|
||||
template class ::Opm::cuistl::CuSeqILU0<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
|
||||
::Opm::cuistl::CuVector<realtype>, \
|
||||
::Opm::cuistl::CuVector<realtype>>; \
|
||||
template class ::Opm::cuistl::CuSeqILU0<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
|
||||
::Opm::cuistl::CuVector<realtype>, \
|
||||
::Opm::cuistl::CuVector<realtype>>
|
||||
template class ::Opm::gpuistl::CuSeqILU0<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
|
||||
::Opm::gpuistl::CuVector<realtype>, \
|
||||
::Opm::gpuistl::CuVector<realtype>>; \
|
||||
template class ::Opm::gpuistl::CuSeqILU0<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
|
||||
::Opm::gpuistl::CuVector<realtype>, \
|
||||
::Opm::gpuistl::CuVector<realtype>>
|
||||
|
||||
|
||||
INSTANTIATE_CUSEQILU0_DUNE(double, 1);
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
|
||||
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
//! \brief Sequential ILU0 preconditioner on the GPU through the CuSparse library.
|
||||
//!
|
||||
@@ -133,6 +133,6 @@ private:
|
||||
|
||||
void updateILUConfiguration();
|
||||
};
|
||||
} // end namespace Opm::cuistl
|
||||
} // end namespace Opm::gpuistl
|
||||
|
||||
#endif
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
#include <opm/simulators/linalg/matrixblock.hh>
|
||||
#include <type_traits>
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
|
||||
namespace
|
||||
@@ -343,4 +343,4 @@ INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 4);
|
||||
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 5);
|
||||
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 6);
|
||||
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp>
|
||||
#include <vector>
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -298,5 +298,5 @@ private:
|
||||
template <class VectorType>
|
||||
void assertSameSize(const VectorType& vector) const;
|
||||
};
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
||||
#endif
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
|
||||
template <class T>
|
||||
@@ -304,4 +304,4 @@ template class CuVector<double>;
|
||||
template class CuVector<float>;
|
||||
template class CuVector<int>;
|
||||
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
#include <string>
|
||||
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -391,5 +391,5 @@ private:
|
||||
void assertHasElements() const;
|
||||
};
|
||||
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
||||
#endif
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
#include <opm/simulators/linalg/cuistl/CuView.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
|
||||
template <class T>
|
||||
@@ -79,4 +79,4 @@ template class CuView<double>;
|
||||
template class CuView<float>;
|
||||
template class CuView<int>;
|
||||
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
||||
|
||||
@@ -37,7 +37,7 @@
|
||||
#define OPM_IS_INSIDE_DEVICE_FUNCTION_TEMPORARY 0
|
||||
#endif
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -410,6 +410,6 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
||||
|
||||
#endif
|
||||
|
||||
@@ -37,7 +37,7 @@
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
@@ -289,14 +289,14 @@ OpmCuILU0<M, X, Y, l>::tuneThreadBlockSizes()
|
||||
tuneUpperSolveThreadBlockSizeInApply, "Kernel computing an upper triangular solve for a level set");
|
||||
}
|
||||
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
||||
#define INSTANTIATE_CUDILU_DUNE(realtype, blockdim) \
|
||||
template class ::Opm::cuistl::OpmCuILU0<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
|
||||
::Opm::cuistl::CuVector<realtype>, \
|
||||
::Opm::cuistl::CuVector<realtype>>; \
|
||||
template class ::Opm::cuistl::OpmCuILU0<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
|
||||
::Opm::cuistl::CuVector<realtype>, \
|
||||
::Opm::cuistl::CuVector<realtype>>
|
||||
template class ::Opm::gpuistl::OpmCuILU0<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
|
||||
::Opm::gpuistl::CuVector<realtype>, \
|
||||
::Opm::gpuistl::CuVector<realtype>>; \
|
||||
template class ::Opm::gpuistl::OpmCuILU0<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
|
||||
::Opm::gpuistl::CuVector<realtype>, \
|
||||
::Opm::gpuistl::CuVector<realtype>>
|
||||
|
||||
INSTANTIATE_CUDILU_DUNE(double, 1);
|
||||
INSTANTIATE_CUDILU_DUNE(double, 2);
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
#include <vector>
|
||||
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
//! \brief ILU0 preconditioner on the GPU.
|
||||
//!
|
||||
@@ -144,6 +144,6 @@ private:
|
||||
int m_moveThreadBlockSize = -1;
|
||||
int m_ILU0FactorizationThreadBlockSize = -1;
|
||||
};
|
||||
} // end namespace Opm::cuistl
|
||||
} // end namespace Opm::gpuistl
|
||||
|
||||
#endif
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
#include <opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp>
|
||||
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
//!\brief Makes a CUDA preconditioner available to a CPU simulator.
|
||||
//!
|
||||
@@ -134,6 +134,6 @@ private:
|
||||
std::unique_ptr<CuVector<field_type>> m_inputBuffer;
|
||||
std::unique_ptr<CuVector<field_type>> m_outputBuffer;
|
||||
};
|
||||
} // end namespace Opm::cuistl
|
||||
} // end namespace Opm::gpuistl
|
||||
|
||||
#endif
|
||||
|
||||
@@ -31,7 +31,7 @@
|
||||
#include <opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp>
|
||||
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
//! \brief Converts the field type (eg. double to float) to benchmark single precision preconditioners
|
||||
//!
|
||||
@@ -64,7 +64,7 @@ namespace Opm::cuistl
|
||||
//! void applyILU0AsFloat(const MDouble& matrix, const XDouble& x, XDouble& y) {
|
||||
//!
|
||||
//! using FloatILU0 = typename Opm::ParallelOverlappingILU0<MFloat, XFloat, XFloat, ParallelInfo>;
|
||||
//! using DoubleToFloatConverter = typename Opm::cuistl::PreconditionerConvertFieldTypeAdapter<FloatILU0, MDouble,
|
||||
//! using DoubleToFloatConverter = typename Opm::gpuistl::PreconditionerConvertFieldTypeAdapter<FloatILU0, MDouble,
|
||||
//! XDouble, XDouble>;
|
||||
//!
|
||||
//! // Note that we do not need to make a new instance for every invocation, this
|
||||
@@ -239,6 +239,6 @@ private:
|
||||
//! \brief the underlying preconditioner to use
|
||||
std::shared_ptr<CudaPreconditionerType> m_underlyingPreconditioner;
|
||||
};
|
||||
} // end namespace Opm::cuistl
|
||||
} // end namespace Opm::gpuistl
|
||||
|
||||
#endif
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
|
||||
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
//! \brief Common interface for adapters that hold preconditioners.
|
||||
//!
|
||||
@@ -38,6 +38,6 @@ public:
|
||||
*/
|
||||
virtual std::shared_ptr<Dune::PreconditionerWithUpdate<X, Y>> getUnderlyingPreconditioner() = 0;
|
||||
};
|
||||
} // end namespace Opm::cuistl
|
||||
} // end namespace Opm::gpuistl
|
||||
|
||||
#endif
|
||||
|
||||
@@ -39,7 +39,7 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
//! @brief Wraps a CUDA solver to work with CPU data.
|
||||
//!
|
||||
@@ -56,7 +56,7 @@ public:
|
||||
using typename Dune::IterativeSolver<X, X>::real_type;
|
||||
using typename Dune::IterativeSolver<X, X>::scalar_real_type;
|
||||
static constexpr auto block_size = domain_type::block_type::dimension;
|
||||
using XGPU = Opm::cuistl::CuVector<real_type>;
|
||||
using XGPU = Opm::gpuistl::CuVector<real_type>;
|
||||
|
||||
// TODO: Use a std::forward
|
||||
SolverAdapter(Operator& op,
|
||||
@@ -148,8 +148,8 @@ private:
|
||||
if (!precAsHolder) {
|
||||
OPM_THROW(std::invalid_argument,
|
||||
"The preconditioner needs to be a CUDA preconditioner (eg. CuILU0) wrapped in a "
|
||||
"Opm::cuistl::PreconditionerAdapter wrapped in a "
|
||||
"Opm::cuistl::CuBlockPreconditioner. If you are unsure what this means, set "
|
||||
"Opm::gpuistl::PreconditionerAdapter wrapped in a "
|
||||
"Opm::gpuistl::CuBlockPreconditioner. If you are unsure what this means, set "
|
||||
"preconditioner to 'CUILU0'"); // TODO: Suggest a better preconditioner
|
||||
}
|
||||
|
||||
@@ -159,8 +159,8 @@ private:
|
||||
if (!preconditionerAdapterAsHolder) {
|
||||
OPM_THROW(std::invalid_argument,
|
||||
"The preconditioner needs to be a CUDA preconditioner (eg. CuILU0) wrapped in a "
|
||||
"Opm::cuistl::PreconditionerAdapter wrapped in a "
|
||||
"Opm::cuistl::CuBlockPreconditioner. If you are unsure what this means, set "
|
||||
"Opm::gpuistl::PreconditionerAdapter wrapped in a "
|
||||
"Opm::gpuistl::CuBlockPreconditioner. If you are unsure what this means, set "
|
||||
"preconditioner to 'CUILU0'"); // TODO: Suggest a better preconditioner
|
||||
}
|
||||
// We need to get the underlying preconditioner:
|
||||
@@ -183,12 +183,12 @@ private:
|
||||
|
||||
|
||||
// TODO add typename Operator communication type as a named type with using
|
||||
std::shared_ptr<Opm::cuistl::GPUSender<real_type, typename Operator::communication_type>> gpuComm;
|
||||
std::shared_ptr<Opm::gpuistl::GPUSender<real_type, typename Operator::communication_type>> gpuComm;
|
||||
if (mpiSupportsCudaAwareAtCompileTime && mpiSupportsCudaAwareAtRunTime){
|
||||
gpuComm = std::make_shared<Opm::cuistl::GPUAwareMPISender<real_type, block_size, typename Operator::communication_type>>(communication);
|
||||
gpuComm = std::make_shared<Opm::gpuistl::GPUAwareMPISender<real_type, block_size, typename Operator::communication_type>>(communication);
|
||||
}
|
||||
else{
|
||||
gpuComm = std::make_shared<Opm::cuistl::GPUObliviousMPISender<real_type, block_size, typename Operator::communication_type>>(communication);
|
||||
gpuComm = std::make_shared<Opm::gpuistl::GPUObliviousMPISender<real_type, block_size, typename Operator::communication_type>>(communication);
|
||||
}
|
||||
|
||||
using CudaCommunication = CuOwnerOverlapCopy<real_type, block_size, typename Operator::communication_type>;
|
||||
@@ -222,7 +222,7 @@ private:
|
||||
if (!precAsHolder) {
|
||||
OPM_THROW(std::invalid_argument,
|
||||
"The preconditioner needs to be a CUDA preconditioner wrapped in a "
|
||||
"Opm::cuistl::PreconditionerHolder (eg. CuILU0).");
|
||||
"Opm::gpuistl::PreconditionerHolder (eg. CuILU0).");
|
||||
}
|
||||
auto preconditionerOnGPU = precAsHolder->getUnderlyingPreconditioner();
|
||||
|
||||
@@ -237,6 +237,6 @@ private:
|
||||
std::unique_ptr<XGPU> m_inputBuffer;
|
||||
std::unique_ptr<XGPU> m_outputBuffer;
|
||||
};
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
||||
|
||||
#endif
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
#include <cublas_v2.h>
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp>
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
|
||||
@@ -46,4 +46,4 @@ CuBlasHandle::getInstance()
|
||||
return instance;
|
||||
}
|
||||
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
#include <cublas_v2.h>
|
||||
#include <memory>
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -31,7 +31,7 @@ namespace Opm::cuistl::detail
|
||||
* @code{.cpp}
|
||||
* #include <opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp>
|
||||
* void someFunction() {
|
||||
* auto& cublasHandle = ::Opm::cuistl::detail::CuBlasHandle::getInstance();
|
||||
* auto& cublasHandle = ::Opm::gpuistl::detail::CuBlasHandle::getInstance();
|
||||
* int cuBlasVersion = -1;
|
||||
* OPM_CUBLAS_SAFE_CALL(cublasGetVersion(cublasHandle.get(), &cuBlasVersion));
|
||||
* }
|
||||
@@ -64,5 +64,5 @@ private:
|
||||
CuBlasHandle();
|
||||
cublasHandle_t m_handle;
|
||||
};
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
#endif // OPM_CUBLASHANDLE_HPP
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -52,7 +52,7 @@ createMatrixDescription()
|
||||
|
||||
/**
|
||||
* @brief createLowerDiagonalDescription creates a lower diagonal matrix description
|
||||
* @return a lower diagonal matrix description overlapped with options from ::Opm::cuistl::detail::createMatrixDescription()
|
||||
* @return a lower diagonal matrix description overlapped with options from ::Opm::gpuistl::detail::createMatrixDescription()
|
||||
*
|
||||
* @note This will assume it has a unit diagonal
|
||||
*/
|
||||
@@ -67,7 +67,7 @@ createLowerDiagonalDescription()
|
||||
|
||||
/**
|
||||
* @brief createUpperDiagonalDescription creates an upper diagonal matrix description
|
||||
* @return an upper diagonal matrix description overlapped with options from ::Opm::cuistl::detail::createMatrixDescription()
|
||||
* @return an upper diagonal matrix description overlapped with options from ::Opm::gpuistl::detail::createMatrixDescription()
|
||||
*
|
||||
* @note This will assume it has a non-unit diagonal.
|
||||
*/
|
||||
@@ -81,6 +81,6 @@ createUpperDiagonalDescription()
|
||||
return description;
|
||||
}
|
||||
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
|
||||
#endif // CU_MATRIX_DESCRIPTION_HPP
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
*/
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
|
||||
@@ -46,4 +46,4 @@ CuSparseHandle::getInstance()
|
||||
return instance;
|
||||
}
|
||||
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
#include <cusparse.h>
|
||||
#include <memory>
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -31,7 +31,7 @@ namespace Opm::cuistl::detail
|
||||
* @code{.cpp}
|
||||
* #include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
|
||||
* void someFunction() {
|
||||
* auto& cuSparseHandle = ::Opm::cuistl::detail::CuSparseHandle::getInstance();
|
||||
* auto& cuSparseHandle = ::Opm::gpuistl::detail::CuSparseHandle::getInstance();
|
||||
* int cuSparseVersion = -1;
|
||||
* OPM_CUSPARSE_SAFE_CALL(cusparseGetVersion(cuSparseHandle.get(), &cuSparseVersion));
|
||||
* }
|
||||
@@ -63,5 +63,5 @@ private:
|
||||
CuSparseHandle();
|
||||
cusparseHandle_t m_handle;
|
||||
};
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
#endif // OPM_CUSPARSEHANDLE_HPP
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -94,6 +94,6 @@ private:
|
||||
DeleterType m_deleter;
|
||||
};
|
||||
|
||||
} // namespace Opm::cuistl::impl
|
||||
} // namespace Opm::gpuistl::impl
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuSparseResource_impl.hpp>
|
||||
#endif // CUSPARSERESOURCE_HPP
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
namespace
|
||||
@@ -100,4 +100,4 @@ CuSparseResource<T>::~CuSparseResource()
|
||||
// proper name of the function being called.
|
||||
OPM_CUSPARSE_WARN_IF_ERROR(m_deleter(m_resource));
|
||||
}
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
/// @brief Function that tests the best thread block size, assumes the provided function depends on threadblock-size
|
||||
@@ -88,6 +88,6 @@ tuneThreadBlockSize(func& f, std::string descriptionOfFunction)
|
||||
return bestBlockSize;
|
||||
}
|
||||
|
||||
} // end namespace Opm::cuistl::detail
|
||||
} // end namespace Opm::gpuistl::detail
|
||||
|
||||
#endif
|
||||
|
||||
@@ -31,16 +31,16 @@ This file contains a collection of utility functions used in the GPU implementat
|
||||
The functions deal with creating the mappings between reordered and natural indices, as well as
|
||||
extracting sparsity structures from dune matrices and creating cusparsematrix indices
|
||||
*/
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
inline std::vector<int>
|
||||
createReorderedToNatural(const Opm::SparseTable<size_t>& levelSets)
|
||||
{
|
||||
auto res = std::vector<int>(Opm::cuistl::detail::to_size_t(levelSets.dataSize()));
|
||||
auto res = std::vector<int>(Opm::gpuistl::detail::to_size_t(levelSets.dataSize()));
|
||||
int globCnt = 0;
|
||||
for (auto row : levelSets) {
|
||||
for (auto col : row) {
|
||||
OPM_ERROR_IF(Opm::cuistl::detail::to_size_t(globCnt) >= res.size(),
|
||||
OPM_ERROR_IF(Opm::gpuistl::detail::to_size_t(globCnt) >= res.size(),
|
||||
fmt::format("Internal error. globCnt = {}, res.size() = {}", globCnt, res.size()));
|
||||
res[globCnt++] = static_cast<int>(col);
|
||||
}
|
||||
@@ -51,11 +51,11 @@ createReorderedToNatural(const Opm::SparseTable<size_t>& levelSets)
|
||||
inline std::vector<int>
|
||||
createNaturalToReordered(const Opm::SparseTable<size_t>& levelSets)
|
||||
{
|
||||
auto res = std::vector<int>(Opm::cuistl::detail::to_size_t(levelSets.dataSize()));
|
||||
auto res = std::vector<int>(Opm::gpuistl::detail::to_size_t(levelSets.dataSize()));
|
||||
int globCnt = 0;
|
||||
for (auto row : levelSets) {
|
||||
for (auto col : row) {
|
||||
OPM_ERROR_IF(Opm::cuistl::detail::to_size_t(globCnt) >= res.size(),
|
||||
OPM_ERROR_IF(Opm::gpuistl::detail::to_size_t(globCnt) >= res.size(),
|
||||
fmt::format("Internal error. globCnt = {}, res.size() = {}", globCnt, res.size()));
|
||||
res[col] = globCnt++;
|
||||
}
|
||||
@@ -105,6 +105,6 @@ extractLowerAndUpperMatrices(const M& naturalMatrix, const std::vector<int>& reo
|
||||
return {std::unique_ptr<GPUM>(new auto(GPUM::fromMatrix(reorderedLower, true))),
|
||||
std::unique_ptr<GPUM>(new auto(GPUM::fromMatrix(reorderedUpper, true)))};
|
||||
}
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
|
||||
#endif
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
|
||||
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
#define CHECK_CUBLAS_ERROR_TYPE(code, x) \
|
||||
@@ -174,7 +174,7 @@ cublasWarnIfError(cublasStatus_t error,
|
||||
|
||||
return error;
|
||||
}
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
|
||||
/**
|
||||
* @brief OPM_CUBLAS_SAFE_CALL checks the return type of the cublas expression (function call) and throws an exception
|
||||
@@ -194,7 +194,7 @@ cublasWarnIfError(cublasStatus_t error,
|
||||
* @note This should be used for any call to cuBlas unless you have a good reason not to.
|
||||
*/
|
||||
#define OPM_CUBLAS_SAFE_CALL(expression) \
|
||||
::Opm::cuistl::detail::cublasSafeCall(expression, #expression, __FILE__, __func__, __LINE__)
|
||||
::Opm::gpuistl::detail::cublasSafeCall(expression, #expression, __FILE__, __func__, __LINE__)
|
||||
|
||||
/**
|
||||
* @brief OPM_CUBLAS_WARN_IF_ERROR checks the return type of the cublas expression (function call) and issues a warning
|
||||
@@ -214,6 +214,6 @@ cublasWarnIfError(cublasStatus_t error,
|
||||
* @note Prefer the cublasSafeCall/OPM_CUBLAS_SAFE_CALL counterpart unless you really don't want to throw an exception.
|
||||
*/
|
||||
#define OPM_CUBLAS_WARN_IF_ERROR(expression) \
|
||||
::Opm::cuistl::detail::cublasWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
|
||||
::Opm::gpuistl::detail::cublasWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
|
||||
|
||||
#endif // OPM_CUBLAS_SAFE_CALL_HPP
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
#include <cublas_v2.h>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
inline cublasStatus_t
|
||||
@@ -164,5 +164,5 @@ cublasNrm2([[maybe_unused]] cublasHandle_t handle,
|
||||
OPM_THROW(std::runtime_error, "norm2 for integer vectors is not implemented yet.");
|
||||
}
|
||||
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
#endif
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
#include <opm/common/OpmLog/OpmLog.hpp>
|
||||
#include <string_view>
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
/**
|
||||
* @brief getCudaErrorMessage generates the error message to display for a given error.
|
||||
@@ -128,7 +128,7 @@ cudaWarnIfError(cudaError_t error,
|
||||
OpmLog::warning(getCudaErrorMessage(error, expression, filename, functionName, lineNumber));
|
||||
}
|
||||
}
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
|
||||
/**
|
||||
* @brief OPM_CUDA_SAFE_CALL checks the return type of the CUDA expression (function call) and throws an exception if it
|
||||
@@ -148,7 +148,7 @@ cudaWarnIfError(cudaError_t error,
|
||||
* @note This should be used for any call to the CUDA runtime API unless you have a good reason not to.
|
||||
*/
|
||||
#define OPM_CUDA_SAFE_CALL(expression) \
|
||||
::Opm::cuistl::detail::cudaSafeCall(expression, #expression, __FILE__, __func__, __LINE__)
|
||||
::Opm::gpuistl::detail::cudaSafeCall(expression, #expression, __FILE__, __func__, __LINE__)
|
||||
|
||||
|
||||
/**
|
||||
@@ -169,6 +169,6 @@ cudaWarnIfError(cudaError_t error,
|
||||
* @note Prefer the cudaSafeCall/OPM_CUDA_SAFE_CALL counterpart unless you really don't want to throw an exception.
|
||||
*/
|
||||
#define OPM_CUDA_WARN_IF_ERROR(expression) \
|
||||
::Opm::cuistl::detail::cudaWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
|
||||
::Opm::gpuistl::detail::cudaWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
|
||||
|
||||
#endif
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
#ifndef CUSPARSE_CONSTANTS_HPP
|
||||
#define CUSPARSE_CONSTANTS_HPP
|
||||
#include <cusparse.h>
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
const constexpr auto CUSPARSE_MATRIX_ORDER = CUSPARSE_DIRECTION_ROW;
|
||||
}
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
#include <opm/simulators/linalg/cuistl/detail/gpuThreadUtils.hpp>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
namespace
|
||||
{
|
||||
@@ -110,8 +110,8 @@ copyMatDataToReordered(T* srcMatrix,
|
||||
int thrBlockSize)
|
||||
{
|
||||
int threadBlockSize
|
||||
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuMoveDataToReordered<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
|
||||
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuMoveDataToReordered<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
|
||||
cuMoveDataToReordered<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
|
||||
srcMatrix, srcRowIndices, dstMatrix, dstRowIndices, naturalToReordered, numberOfRows);
|
||||
}
|
||||
@@ -130,9 +130,9 @@ copyMatDataToReorderedSplit(T* srcMatrix,
|
||||
size_t numberOfRows,
|
||||
int thrBlockSize)
|
||||
{
|
||||
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
|
||||
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
|
||||
cuMoveDataToReorderedSplit<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
|
||||
cuMoveDataToReorderedSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(srcMatrix,
|
||||
srcRowIndices,
|
||||
srcColumnIndices,
|
||||
@@ -161,4 +161,4 @@ INSTANTIATE_KERNEL_WRAPPERS(double, 3);
|
||||
INSTANTIATE_KERNEL_WRAPPERS(double, 4);
|
||||
INSTANTIATE_KERNEL_WRAPPERS(double, 5);
|
||||
INSTANTIATE_KERNEL_WRAPPERS(double, 6);
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
#define OPM_CUISTL_CUSPARSE_MATRIX_OPERATIONS_HPP
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
/**
|
||||
* @brief Reorders the elements of a matrix by copying them from one matrix to another using a permutation list
|
||||
@@ -68,5 +68,5 @@ void copyMatDataToReorderedSplit(T* srcMatrix,
|
||||
size_t numberOfRows,
|
||||
int threadBlockSize);
|
||||
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
#endif
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/common/OpmLog/OpmLog.hpp>
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
#define CHECK_CUSPARSE_ERROR_TYPE(code, x) \
|
||||
@@ -161,7 +161,7 @@ cusparseWarnIfError(cusparseStatus_t error,
|
||||
|
||||
return error;
|
||||
}
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
|
||||
|
||||
|
||||
@@ -183,7 +183,7 @@ cusparseWarnIfError(cusparseStatus_t error,
|
||||
* @note This should be used for any call to cuSparse unless you have a good reason not to.
|
||||
*/
|
||||
#define OPM_CUSPARSE_SAFE_CALL(expression) \
|
||||
::Opm::cuistl::detail::cusparseSafeCall(expression, #expression, __FILE__, __func__, __LINE__)
|
||||
::Opm::gpuistl::detail::cusparseSafeCall(expression, #expression, __FILE__, __func__, __LINE__)
|
||||
|
||||
/**
|
||||
* @brief OPM_CUSPARSE_WARN_IF_ERROR checks the return type of the cusparse expression (function call) and issues a
|
||||
@@ -204,5 +204,5 @@ cusparseWarnIfError(cusparseStatus_t error,
|
||||
* exception.
|
||||
*/
|
||||
#define OPM_CUSPARSE_WARN_IF_ERROR(expression) \
|
||||
::Opm::cuistl::detail::cusparseWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
|
||||
::Opm::gpuistl::detail::cusparseWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
|
||||
#endif // OPM_CUSPARSE_SAFE_CALL_HPP
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
#include <type_traits>
|
||||
#ifndef OPM_CUSPARSE_WRAPPER_HPP
|
||||
#define OPM_CUSPARSE_WRAPPER_HPP
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
inline cusparseStatus_t
|
||||
@@ -450,5 +450,5 @@ cusparseBsrmv(cusparseHandle_t handle,
|
||||
beta,
|
||||
y);
|
||||
}
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
#endif
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -53,6 +53,6 @@ makeMatrixWithNonzeroDiagonal(const Matrix& matrix,
|
||||
|
||||
return newMatrix;
|
||||
}
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
|
||||
#endif
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
/*
|
||||
This file provides some logic for handling how to choose the correct thread-block size
|
||||
*/
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
constexpr inline size_t
|
||||
getThreads([[maybe_unused]] size_t numberOfRows)
|
||||
@@ -61,6 +61,6 @@ getNumberOfBlocks(int wantedThreads, int threadBlockSize)
|
||||
return (wantedThreads + threadBlockSize - 1) / threadBlockSize;
|
||||
}
|
||||
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
|
||||
#endif
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
* TODO: Use the requires-keyword once C++20 becomes availble (https://en.cppreference.com/w/cpp/language/constraints ).
|
||||
* With C++20 this file can be removed.
|
||||
*/
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -121,5 +121,5 @@ public:
|
||||
static constexpr bool value = std::is_same_v<decltype(test<T>(0)), std::true_type>;
|
||||
};
|
||||
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
#endif
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
#include <opm/simulators/linalg/cuistl/detail/preconditionerKernels/DILUKernels.hpp>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace Opm::cuistl::detail::DILU
|
||||
namespace Opm::gpuistl::detail::DILU
|
||||
{
|
||||
namespace
|
||||
{
|
||||
@@ -282,8 +282,8 @@ solveLowerLevelSet(T* reorderedMat,
|
||||
int thrBlockSize)
|
||||
{
|
||||
int threadBlockSize
|
||||
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveLowerLevelSet<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveLowerLevelSet<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
cuSolveLowerLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
|
||||
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, d, v);
|
||||
}
|
||||
@@ -302,9 +302,9 @@ solveLowerLevelSetSplit(T* reorderedMat,
|
||||
T* v,
|
||||
int thrBlockSize)
|
||||
{
|
||||
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
|
||||
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
|
||||
cuSolveLowerLevelSetSplit<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
cuSolveLowerLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
|
||||
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, d, v);
|
||||
}
|
||||
@@ -322,8 +322,8 @@ solveUpperLevelSet(T* reorderedMat,
|
||||
int thrBlockSize)
|
||||
{
|
||||
int threadBlockSize
|
||||
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveUpperLevelSet<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveUpperLevelSet<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
cuSolveUpperLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
|
||||
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, v);
|
||||
}
|
||||
@@ -340,9 +340,9 @@ solveUpperLevelSetSplit(T* reorderedMat,
|
||||
T* v,
|
||||
int thrBlockSize)
|
||||
{
|
||||
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
|
||||
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
|
||||
cuSolveUpperLevelSetSplit<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
cuSolveUpperLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
|
||||
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, v);
|
||||
}
|
||||
@@ -360,9 +360,9 @@ computeDiluDiagonal(T* reorderedMat,
|
||||
int thrBlockSize)
|
||||
{
|
||||
if (blocksize <= 3) {
|
||||
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
|
||||
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
|
||||
cuComputeDiluDiagonal<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
cuComputeDiluDiagonal<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(reorderedMat,
|
||||
rowIndices,
|
||||
colIndices,
|
||||
@@ -393,9 +393,9 @@ computeDiluDiagonalSplit(T* reorderedLowerMat,
|
||||
int thrBlockSize)
|
||||
{
|
||||
if (blocksize <= 3) {
|
||||
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
|
||||
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
|
||||
cuComputeDiluDiagonalSplit<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
cuComputeDiluDiagonalSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(reorderedLowerMat,
|
||||
lowerRowIndices,
|
||||
lowerColIndices,
|
||||
@@ -434,4 +434,4 @@ INSTANTIATE_KERNEL_WRAPPERS(double, 3);
|
||||
INSTANTIATE_KERNEL_WRAPPERS(double, 4);
|
||||
INSTANTIATE_KERNEL_WRAPPERS(double, 5);
|
||||
INSTANTIATE_KERNEL_WRAPPERS(double, 6);
|
||||
} // namespace Opm::cuistl::detail::DILU
|
||||
} // namespace Opm::gpuistl::detail::DILU
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
#include <cuda_runtime.h>
|
||||
#include <vector>
|
||||
|
||||
namespace Opm::cuistl::detail::DILU
|
||||
namespace Opm::gpuistl::detail::DILU
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -198,5 +198,5 @@ void computeDiluDiagonalSplit(T* reorderedLowerMat,
|
||||
T* dInv,
|
||||
int threadBlockSize);
|
||||
|
||||
} // namespace Opm::cuistl::detail::DILU
|
||||
} // namespace Opm::gpuistl::detail::DILU
|
||||
#endif
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
The LU factorization and apply step is written based on the Dune implementations
|
||||
*/
|
||||
|
||||
namespace Opm::cuistl::detail::ILU0
|
||||
namespace Opm::gpuistl::detail::ILU0
|
||||
{
|
||||
namespace
|
||||
{
|
||||
@@ -341,8 +341,8 @@ solveLowerLevelSet(T* reorderedMat,
|
||||
int thrBlockSize)
|
||||
{
|
||||
int threadBlockSize
|
||||
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveLowerLevelSet<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveLowerLevelSet<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
cuSolveLowerLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
|
||||
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, d, v);
|
||||
}
|
||||
@@ -359,8 +359,8 @@ solveUpperLevelSet(T* reorderedMat,
|
||||
int thrBlockSize)
|
||||
{
|
||||
int threadBlockSize
|
||||
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveUpperLevelSet<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveUpperLevelSet<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
cuSolveUpperLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
|
||||
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, v);
|
||||
}
|
||||
@@ -377,9 +377,9 @@ solveLowerLevelSetSplit(T* reorderedMat,
|
||||
T* v,
|
||||
int thrBlockSize)
|
||||
{
|
||||
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
|
||||
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
|
||||
cuSolveLowerLevelSetSplit<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
cuSolveLowerLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
|
||||
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, d, v);
|
||||
}
|
||||
@@ -396,9 +396,9 @@ solveUpperLevelSetSplit(T* reorderedMat,
|
||||
T* v,
|
||||
int thrBlockSize)
|
||||
{
|
||||
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
|
||||
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
|
||||
cuSolveUpperLevelSetSplit<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
cuSolveUpperLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
|
||||
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, v);
|
||||
}
|
||||
@@ -415,8 +415,8 @@ LUFactorization(T* srcMatrix,
|
||||
int thrBlockSize)
|
||||
{
|
||||
int threadBlockSize
|
||||
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuLUFactorization<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuLUFactorization<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
cuLUFactorization<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
|
||||
srcMatrix, srcRowIndices, srcColumnIndices, naturalToReordered, reorderedToNatual, rowsInLevelSet, startIdx);
|
||||
}
|
||||
@@ -437,8 +437,8 @@ LUFactorizationSplit(T* reorderedLowerMat,
|
||||
int thrBlockSize)
|
||||
{
|
||||
int threadBlockSize
|
||||
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuLUFactorizationSplit<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuLUFactorizationSplit<T, blocksize>, thrBlockSize);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
cuLUFactorizationSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(reorderedLowerMat,
|
||||
lowerRowIndices,
|
||||
lowerColIndices,
|
||||
@@ -473,4 +473,4 @@ INSTANTIATE_KERNEL_WRAPPERS(double, 3);
|
||||
INSTANTIATE_KERNEL_WRAPPERS(double, 4);
|
||||
INSTANTIATE_KERNEL_WRAPPERS(double, 5);
|
||||
INSTANTIATE_KERNEL_WRAPPERS(double, 6);
|
||||
} // namespace Opm::cuistl::detail::ILU0
|
||||
} // namespace Opm::gpuistl::detail::ILU0
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
#define OPM_ILU0_KERNELS_HPP
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
namespace Opm::cuistl::detail::ILU0
|
||||
namespace Opm::gpuistl::detail::ILU0
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -189,5 +189,5 @@ void LUFactorizationSplit(T* reorderedLowerMat,
|
||||
int rowsInLevelSet,
|
||||
int threadBlockSize);
|
||||
|
||||
} // namespace Opm::cuistl::detail::ILU0
|
||||
} // namespace Opm::gpuistl::detail::ILU0
|
||||
#endif
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
#include <opm/simulators/linalg/cuistl/detail/preconditionerKernels/JacKernels.hpp>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace Opm::cuistl::detail::JAC
|
||||
namespace Opm::gpuistl::detail::JAC
|
||||
{
|
||||
namespace
|
||||
{
|
||||
@@ -60,8 +60,8 @@ invertDiagonalAndFlatten(T* mat, int* rowIndices, int* colIndices, size_t number
|
||||
{
|
||||
if (blocksize <= 3) {
|
||||
int threadBlockSize
|
||||
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuInvertDiagonalAndFlatten<T, blocksize>);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
|
||||
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuInvertDiagonalAndFlatten<T, blocksize>);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
|
||||
cuInvertDiagonalAndFlatten<T, blocksize>
|
||||
<<<nThreadBlocks, threadBlockSize>>>(mat, rowIndices, colIndices, numberOfRows, vec);
|
||||
} else {
|
||||
@@ -85,4 +85,4 @@ INSTANTIATE_KERNEL_WRAPPERS(double, 4);
|
||||
INSTANTIATE_KERNEL_WRAPPERS(double, 5);
|
||||
INSTANTIATE_KERNEL_WRAPPERS(double, 6);
|
||||
|
||||
} // namespace Opm::cuistl::detail::JAC
|
||||
} // namespace Opm::gpuistl::detail::JAC
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
#define OPM_JAC_KERNELS_HPP
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
namespace Opm::cuistl::detail::JAC
|
||||
namespace Opm::gpuistl::detail::JAC
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -34,5 +34,5 @@ namespace Opm::cuistl::detail::JAC
|
||||
template <class T, int blocksize>
|
||||
void invertDiagonalAndFlatten(T* mat, int* rowIndices, int* colIndices, size_t numberOfRows, T* vec);
|
||||
|
||||
} // namespace Opm::cuistl::detail::JAC
|
||||
} // namespace Opm::gpuistl::detail::JAC
|
||||
#endif
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
|
||||
#include <opm/simulators/linalg/cuistl/detail/has_function.hpp>
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
//! @brief Tests (compile time) if the preconditioner type needs to call pre() before a call to apply()
|
||||
@@ -60,5 +60,5 @@ shouldCallPreconditionerPost()
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
#endif
|
||||
|
||||
@@ -37,7 +37,7 @@
|
||||
* while Dune::BlockVector (and relatives) use unsigned size_t.
|
||||
*/
|
||||
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -106,6 +106,6 @@ to_size_t(int i)
|
||||
|
||||
return std::size_t(i);
|
||||
}
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
|
||||
#endif
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
#include <opm/simulators/linalg/cuistl/detail/gpuThreadUtils.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
|
||||
#include <stdexcept>
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
namespace
|
||||
@@ -108,8 +108,8 @@ template <class T>
|
||||
void
|
||||
setVectorValue(T* deviceData, size_t numberOfElements, const T& value)
|
||||
{
|
||||
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(setVectorValueKernel<T>);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(setVectorValueKernel<T>);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
setVectorValueKernel<<<nThreadBlocks, threadBlockSize>>>(deviceData, numberOfElements, value);
|
||||
}
|
||||
|
||||
@@ -121,8 +121,8 @@ template <class T>
|
||||
void
|
||||
setZeroAtIndexSet(T* deviceData, size_t numberOfElements, const int* indices)
|
||||
{
|
||||
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(setZeroAtIndexSetKernel<T>);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(setZeroAtIndexSetKernel<T>);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
setZeroAtIndexSetKernel<<<nThreadBlocks, threadBlockSize>>>(deviceData, numberOfElements, indices);
|
||||
}
|
||||
template void setZeroAtIndexSet(double*, size_t, const int*);
|
||||
@@ -138,8 +138,8 @@ innerProductAtIndices(cublasHandle_t cublasHandle,
|
||||
size_t numberOfElements,
|
||||
const int* indices)
|
||||
{
|
||||
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(elementWiseMultiplyKernel<T>);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(elementWiseMultiplyKernel<T>);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
elementWiseMultiplyKernel<<<nThreadBlocks, threadBlockSize>>>(deviceA, deviceB, buffer, numberOfElements, indices);
|
||||
|
||||
// TODO: [perf] Get rid of the allocation here.
|
||||
@@ -158,8 +158,8 @@ template <class T>
|
||||
void
|
||||
prepareSendBuf(const T* deviceA, T* buffer, size_t numberOfElements, const int* indices)
|
||||
{
|
||||
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(prepareSendBufKernel<T>);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(prepareSendBufKernel<T>);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
prepareSendBufKernel<<<nThreadBlocks, threadBlockSize>>>(deviceA, buffer, numberOfElements, indices);
|
||||
OPM_CUDA_SAFE_CALL(cudaDeviceSynchronize()); // The buffers are prepared for MPI. Wait for them to finish.
|
||||
}
|
||||
@@ -171,8 +171,8 @@ template <class T>
|
||||
void
|
||||
syncFromRecvBuf(T* deviceA, T* buffer, size_t numberOfElements, const int* indices)
|
||||
{
|
||||
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(syncFromRecvBufKernel<T>);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(syncFromRecvBufKernel<T>);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
syncFromRecvBufKernel<<<nThreadBlocks, threadBlockSize>>>(deviceA, buffer, numberOfElements, indices);
|
||||
// cudaDeviceSynchronize(); // Not needed, I guess...
|
||||
}
|
||||
@@ -191,20 +191,20 @@ weightedDiagMV(const T* squareBlockVector,
|
||||
{
|
||||
switch (blocksize) {
|
||||
case 1: {
|
||||
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 1>);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 1>);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
weightedDiagMV<T, 1>
|
||||
<<<nThreadBlocks, threadBlockSize>>>(squareBlockVector, numberOfElements, relaxationFactor, srcVec, dstVec);
|
||||
} break;
|
||||
case 2: {
|
||||
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 2>);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 2>);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
weightedDiagMV<T, 2>
|
||||
<<<nThreadBlocks, threadBlockSize>>>(squareBlockVector, numberOfElements, relaxationFactor, srcVec, dstVec);
|
||||
} break;
|
||||
case 3: {
|
||||
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 3>);
|
||||
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 3>);
|
||||
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
|
||||
weightedDiagMV<T, 3>
|
||||
<<<nThreadBlocks, threadBlockSize>>>(squareBlockVector, numberOfElements, relaxationFactor, srcVec, dstVec);
|
||||
} break;
|
||||
@@ -217,4 +217,4 @@ weightedDiagMV(const T* squareBlockVector,
|
||||
template void weightedDiagMV(const double*, const size_t, const size_t, double, const double*, double*);
|
||||
template void weightedDiagMV(const float*, const size_t, const size_t, float, const float*, float*);
|
||||
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
#define OPM_CUISTL_VECTOR_OPERATIONS_HPP
|
||||
#include <cstddef>
|
||||
#include <cublas_v2.h>
|
||||
namespace Opm::cuistl::detail
|
||||
namespace Opm::gpuistl::detail
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -81,5 +81,5 @@ void weightedDiagMV(const T* squareBlockVector,
|
||||
T relaxationFactor,
|
||||
const T* srcVec,
|
||||
T* dstVec);
|
||||
} // namespace Opm::cuistl::detail
|
||||
} // namespace Opm::gpuistl::detail
|
||||
#endif
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
#include <opm/common/OpmLog/OpmLog.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/set_device.hpp>
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
void
|
||||
setDevice(int mpiRank, [[maybe_unused]] int numberOfMpiRanks)
|
||||
@@ -46,4 +46,4 @@ setDevice(int mpiRank, [[maybe_unused]] int numberOfMpiRanks)
|
||||
OpmLog::info("Set CUDA device to " + std::to_string(deviceId) + " (out of " + std::to_string(deviceCount)
|
||||
+ " devices).");
|
||||
}
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
#ifndef OPM_CUISTL_SET_DEVICE_HEADER
|
||||
#define OPM_CUISTL_SET_DEVICE_HEADER
|
||||
|
||||
namespace Opm::cuistl
|
||||
namespace Opm::gpuistl
|
||||
{
|
||||
//! @brief Sets the correct CUDA device in the setting of MPI
|
||||
//!
|
||||
@@ -32,5 +32,5 @@ namespace Opm::cuistl
|
||||
//!
|
||||
//! @note If no CUDA device is present, this does nothing.
|
||||
void setDevice(int mpiRank, int numberOfMpiRanks);
|
||||
} // namespace Opm::cuistl
|
||||
} // namespace Opm::gpuistl
|
||||
#endif
|
||||
|
||||
@@ -167,7 +167,7 @@ BOOST_AUTO_TEST_CASE(TestFiniteDifference1D)
|
||||
expectedOutputVector[i][1] = 43.0;
|
||||
inputVector[i][0] = 1.0;
|
||||
auto converter
|
||||
= Opm::cuistl::PreconditionerConvertFieldTypeAdapter<TestPreconditioner, SpMatrixDouble, XDouble, XDouble>(
|
||||
= Opm::gpuistl::PreconditionerConvertFieldTypeAdapter<TestPreconditioner, SpMatrixDouble, XDouble, XDouble>(
|
||||
B);
|
||||
auto underlyingPreconditioner = std::make_shared<TestPreconditioner>(
|
||||
converter.getConvertedMatrix(), inputVector, B, expectedOutputVector);
|
||||
|
||||
@@ -85,10 +85,10 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(FlattenAndInvertDiagonalWith3By3Blocks, T, Numeric
|
||||
B[1][1][1][1] = -1.0;
|
||||
B[1][1][2][2] = -1.0;
|
||||
|
||||
Opm::cuistl::CuSparseMatrix<T> m = Opm::cuistl::CuSparseMatrix<T>::fromMatrix(B);
|
||||
Opm::cuistl::CuVector<T> dInvDiag(blocksize * blocksize * N);
|
||||
Opm::gpuistl::CuSparseMatrix<T> m = Opm::gpuistl::CuSparseMatrix<T>::fromMatrix(B);
|
||||
Opm::gpuistl::CuVector<T> dInvDiag(blocksize * blocksize * N);
|
||||
|
||||
Opm::cuistl::detail::JAC::invertDiagonalAndFlatten<T, 3>(
|
||||
Opm::gpuistl::detail::JAC::invertDiagonalAndFlatten<T, 3>(
|
||||
m.getNonZeroValues().data(), m.getRowIndices().data(), m.getColumnIndices().data(), N, dInvDiag.data());
|
||||
|
||||
std::vector<T> expectedInvDiag {-1.0 / 4.0,
|
||||
@@ -159,10 +159,10 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(FlattenAndInvertDiagonalWith2By2Blocks, T, Numeric
|
||||
B[1][1][0][0] = -1.0;
|
||||
B[1][1][1][1] = -1.0;
|
||||
|
||||
Opm::cuistl::CuSparseMatrix<T> m = Opm::cuistl::CuSparseMatrix<T>::fromMatrix(B);
|
||||
Opm::cuistl::CuVector<T> dInvDiag(blocksize * blocksize * N);
|
||||
Opm::gpuistl::CuSparseMatrix<T> m = Opm::gpuistl::CuSparseMatrix<T>::fromMatrix(B);
|
||||
Opm::gpuistl::CuVector<T> dInvDiag(blocksize * blocksize * N);
|
||||
|
||||
Opm::cuistl::detail::JAC::invertDiagonalAndFlatten<T, 2>(
|
||||
Opm::gpuistl::detail::JAC::invertDiagonalAndFlatten<T, 2>(
|
||||
m.getNonZeroValues().data(), m.getRowIndices().data(), m.getColumnIndices().data(), N, dInvDiag.data());
|
||||
|
||||
std::vector<T> expectedInvDiag {2.0, -2.0, -1.0 / 2.0, 1.0, -1.0, 0.0, 0.0, -1.0};
|
||||
|
||||
@@ -47,11 +47,11 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(ElementWiseMultiplicationOf3By3BlockVectorAndVecto
|
||||
std::vector<T> hostBlockVector({1.0, 2.0, 3.0, 5.0, 2.0, 3.0, 2.0, 1.0, 2.0});
|
||||
std::vector<T> hostVecVector({3.0, 2.0, 1.0});
|
||||
std::vector<T> hostDstVector({0, 0, 0});
|
||||
Opm::cuistl::CuVector<T> deviceBlockVector(hostBlockVector);
|
||||
Opm::cuistl::CuVector<T> deviceVecVector(hostVecVector);
|
||||
Opm::cuistl::CuVector<T> deviceDstVector(hostDstVector);
|
||||
Opm::gpuistl::CuVector<T> deviceBlockVector(hostBlockVector);
|
||||
Opm::gpuistl::CuVector<T> deviceVecVector(hostVecVector);
|
||||
Opm::gpuistl::CuVector<T> deviceDstVector(hostDstVector);
|
||||
|
||||
Opm::cuistl::detail::weightedDiagMV(
|
||||
Opm::gpuistl::detail::weightedDiagMV(
|
||||
deviceBlockVector.data(), N, blocksize, weight, deviceVecVector.data(), deviceDstVector.data());
|
||||
|
||||
std::vector<T> expectedVec {10.0, 22.0, 10.0};
|
||||
@@ -81,11 +81,11 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(ElementWiseMultiplicationOf2By2BlockVectorAndVecto
|
||||
std::vector<T> hostBlockVector({1.0, 2.0, 3.0, 4.0, 4.0, 3.0, 2.0, 1.0});
|
||||
std::vector<T> hostVecVector({1.0, 3.0, 2.0, 4.0});
|
||||
std::vector<T> hostDstVector({0, 0, 0, 0});
|
||||
Opm::cuistl::CuVector<T> deviceBlockVector(hostBlockVector);
|
||||
Opm::cuistl::CuVector<T> deviceVecVector(hostVecVector);
|
||||
Opm::cuistl::CuVector<T> deviceDstVector(hostDstVector);
|
||||
Opm::gpuistl::CuVector<T> deviceBlockVector(hostBlockVector);
|
||||
Opm::gpuistl::CuVector<T> deviceVecVector(hostVecVector);
|
||||
Opm::gpuistl::CuVector<T> deviceDstVector(hostDstVector);
|
||||
|
||||
Opm::cuistl::detail::weightedDiagMV(
|
||||
Opm::gpuistl::detail::weightedDiagMV(
|
||||
deviceBlockVector.data(), N, blocksize, weight, deviceVecVector.data(), deviceDstVector.data());
|
||||
|
||||
std::vector<T> expectedVec {3.5, 7.5, 10.0, 4.0};
|
||||
@@ -95,4 +95,4 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(ElementWiseMultiplicationOf2By2BlockVectorAndVecto
|
||||
for (size_t i = 0; i < expectedVec.size(); i++) {
|
||||
BOOST_CHECK_CLOSE(expectedVec[i], computedVec[i], 1e-7);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -32,7 +32,7 @@ BOOST_AUTO_TEST_CASE(TestGetCublasVersion)
|
||||
// that checks the version of blas programatically. Let the test pass for now.
|
||||
BOOST_CHECK(true);
|
||||
#else
|
||||
auto& cublasHandle = ::Opm::cuistl::detail::CuBlasHandle::getInstance();
|
||||
auto& cublasHandle = ::Opm::gpuistl::detail::CuBlasHandle::getInstance();
|
||||
int cuBlasVersion = -1;
|
||||
OPM_CUBLAS_SAFE_CALL(cublasGetVersion(cublasHandle.get(), &cuBlasVersion));
|
||||
|
||||
|
||||
@@ -35,15 +35,15 @@ BOOST_AUTO_TEST_CASE(TestMakeView)
|
||||
{
|
||||
// test that we can create buffers and make views of the buffers using the pointer constructor
|
||||
auto buf = std::vector<int>({1, 2, 3, 4, 5, 6});
|
||||
const auto gpubuf = ::Opm::cuistl::CuBuffer<int>(buf);
|
||||
auto gpuview = ::Opm::cuistl::CuView<int>(buf.data(), buf.size());
|
||||
bool gpuBufCreatedView = std::is_same<::Opm::cuistl::CuView<int>, decltype(gpuview)>::value;
|
||||
const auto gpubuf = ::Opm::gpuistl::CuBuffer<int>(buf);
|
||||
auto gpuview = ::Opm::gpuistl::CuView<int>(buf.data(), buf.size());
|
||||
bool gpuBufCreatedView = std::is_same<::Opm::gpuistl::CuView<int>, decltype(gpuview)>::value;
|
||||
|
||||
BOOST_CHECK(gpuBufCreatedView);
|
||||
|
||||
// test that we can make views of buffers by using the cubuffer constructor
|
||||
auto gpuview2 = ::Opm::cuistl::make_view(gpubuf);
|
||||
bool gpuBufCreatedView2 = std::is_same<::Opm::cuistl::CuView<const int>, decltype(gpuview2)>::value;
|
||||
auto gpuview2 = ::Opm::gpuistl::make_view(gpubuf);
|
||||
bool gpuBufCreatedView2 = std::is_same<::Opm::gpuistl::CuView<const int>, decltype(gpuview2)>::value;
|
||||
|
||||
BOOST_CHECK(gpuBufCreatedView2);
|
||||
|
||||
|
||||
@@ -41,11 +41,11 @@ using B1x1Vec = Dune::BlockVector<Dune::FieldVector<double, 1>>;
|
||||
using B2x2Vec = Dune::BlockVector<Dune::FieldVector<double, 2>>;
|
||||
using Sp1x1BlockMatrix = Dune::BCRSMatrix<FM1x1>;
|
||||
using Sp2x2BlockMatrix = Dune::BCRSMatrix<FM2x2>;
|
||||
using CuMatrix = Opm::cuistl::CuSparseMatrix<T>;
|
||||
using CuIntVec = Opm::cuistl::CuVector<int>;
|
||||
using CuFloatingPointVec = Opm::cuistl::CuVector<T>;
|
||||
using CuDilu1x1 = Opm::cuistl::CuDILU<Sp1x1BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
|
||||
using CuDilu2x2 = Opm::cuistl::CuDILU<Sp2x2BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
|
||||
using CuMatrix = Opm::gpuistl::CuSparseMatrix<T>;
|
||||
using CuIntVec = Opm::gpuistl::CuVector<int>;
|
||||
using CuFloatingPointVec = Opm::gpuistl::CuVector<T>;
|
||||
using CuDilu1x1 = Opm::gpuistl::CuDILU<Sp1x1BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
|
||||
using CuDilu2x2 = Opm::gpuistl::CuDILU<Sp2x2BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
|
||||
|
||||
Sp1x1BlockMatrix
|
||||
get1x1BlockTestMatrix()
|
||||
|
||||
@@ -49,7 +49,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize2, T, NumericTypes)
|
||||
using M = Dune::FieldMatrix<T, blocksize, blocksize>;
|
||||
using SpMatrix = Dune::BCRSMatrix<M>;
|
||||
using Vector = Dune::BlockVector<Dune::FieldVector<T, blocksize>>;
|
||||
using CuJac = Opm::cuistl::CuJac<SpMatrix, Opm::cuistl::CuVector<T>, Opm::cuistl::CuVector<T>>;
|
||||
using CuJac = Opm::gpuistl::CuJac<SpMatrix, Opm::gpuistl::CuVector<T>, Opm::gpuistl::CuVector<T>>;
|
||||
|
||||
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
|
||||
for (auto row = B.createbegin(); row != B.createend(); ++row) {
|
||||
@@ -70,7 +70,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize2, T, NumericTypes)
|
||||
B[1][1][0][0] = -1.0;
|
||||
B[1][1][1][1] = -1.0;
|
||||
|
||||
auto cujac = Opm::cuistl::PreconditionerAdapter<Vector, Vector, CuJac>(std::make_shared<CuJac>(B, 0.5));
|
||||
auto cujac = Opm::gpuistl::PreconditionerAdapter<Vector, Vector, CuJac>(std::make_shared<CuJac>(B, 0.5));
|
||||
|
||||
Vector vVector(2);
|
||||
Vector dVector(2);
|
||||
@@ -103,7 +103,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize1, T, NumericTypes)
|
||||
using M = Dune::FieldMatrix<T, blocksize, blocksize>;
|
||||
using SpMatrix = Dune::BCRSMatrix<M>;
|
||||
using Vector = Dune::BlockVector<Dune::FieldVector<T, blocksize>>;
|
||||
using CuJac = Opm::cuistl::CuJac<SpMatrix, Opm::cuistl::CuVector<T>, Opm::cuistl::CuVector<T>>;
|
||||
using CuJac = Opm::gpuistl::CuJac<SpMatrix, Opm::gpuistl::CuVector<T>, Opm::gpuistl::CuVector<T>>;
|
||||
|
||||
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
|
||||
for (auto row = B.createbegin(); row != B.createend(); ++row) {
|
||||
@@ -129,7 +129,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize1, T, NumericTypes)
|
||||
B[2][2][0][0] = -1.0;
|
||||
B[3][3][0][0] = -1.0;
|
||||
|
||||
auto cujac = Opm::cuistl::PreconditionerAdapter<Vector, Vector, CuJac>(std::make_shared<CuJac>(B, 0.5));
|
||||
auto cujac = Opm::gpuistl::PreconditionerAdapter<Vector, Vector, CuJac>(std::make_shared<CuJac>(B, 0.5));
|
||||
|
||||
Vector vVector(4);
|
||||
Vector dVector(4);
|
||||
|
||||
@@ -46,7 +46,7 @@ main(int argc, char** argv)
|
||||
int rank, totalRanks;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &totalRanks);
|
||||
Opm::cuistl::setDevice(rank, totalRanks);
|
||||
Opm::gpuistl::setDevice(rank, totalRanks);
|
||||
boost::unit_test::unit_test_main(&init_unit_test_func, argc, argv);
|
||||
}
|
||||
|
||||
@@ -62,12 +62,12 @@ BOOST_AUTO_TEST_CASE(TestProject)
|
||||
|
||||
auto ownerOverlapCopy = Dune::OwnerOverlapCopyCommunication<int>(indexInfo, MPI_COMM_WORLD);
|
||||
auto xCPU = std::vector<double> {{1.0, 2.0, 3.0}};
|
||||
auto xGPU = Opm::cuistl::CuVector<double>(xCPU);
|
||||
auto xGPU = Opm::gpuistl::CuVector<double>(xCPU);
|
||||
|
||||
auto gpuComm = std::make_shared<Opm::cuistl::GPUObliviousMPISender<double, 1, Dune::OwnerOverlapCopyCommunication<int>>>(ownerOverlapCopy);
|
||||
auto gpuComm = std::make_shared<Opm::gpuistl::GPUObliviousMPISender<double, 1, Dune::OwnerOverlapCopyCommunication<int>>>(ownerOverlapCopy);
|
||||
|
||||
auto cuOwnerOverlapCopy
|
||||
= Opm::cuistl::CuOwnerOverlapCopy<double, 1, Dune::OwnerOverlapCopyCommunication<int>>(gpuComm);
|
||||
= Opm::gpuistl::CuOwnerOverlapCopy<double, 1, Dune::OwnerOverlapCopyCommunication<int>>(gpuComm);
|
||||
|
||||
cuOwnerOverlapCopy.project(xGPU);
|
||||
|
||||
@@ -94,12 +94,12 @@ BOOST_AUTO_TEST_CASE(TestDot)
|
||||
indexInfo.addRemoteIndex(std::make_tuple(0, 2, Dune::OwnerOverlapCopyAttributeSet::copy));
|
||||
auto ownerOverlapCopy = Dune::OwnerOverlapCopyCommunication<int>(indexInfo, MPI_COMM_WORLD);
|
||||
auto xCPU = std::vector<double> {{1.0, 2.0, 3.0}};
|
||||
auto xGPU = Opm::cuistl::CuVector<double>(xCPU);
|
||||
auto xGPU = Opm::gpuistl::CuVector<double>(xCPU);
|
||||
|
||||
auto gpuComm = std::make_shared<Opm::cuistl::GPUObliviousMPISender<double, 1, Dune::OwnerOverlapCopyCommunication<int>>>(ownerOverlapCopy);
|
||||
auto gpuComm = std::make_shared<Opm::gpuistl::GPUObliviousMPISender<double, 1, Dune::OwnerOverlapCopyCommunication<int>>>(ownerOverlapCopy);
|
||||
|
||||
auto cuOwnerOverlapCopy
|
||||
= Opm::cuistl::CuOwnerOverlapCopy<double, 1, Dune::OwnerOverlapCopyCommunication<int>>(gpuComm);
|
||||
= Opm::gpuistl::CuOwnerOverlapCopy<double, 1, Dune::OwnerOverlapCopyCommunication<int>>(gpuComm);
|
||||
|
||||
double outputDune = -1.0;
|
||||
auto xDune = xGPU.asDuneBlockVector<1>();
|
||||
|
||||
@@ -63,7 +63,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifference1D, T, NumericTypes)
|
||||
using M = Dune::FieldMatrix<T, 1, 1>;
|
||||
using SpMatrix = Dune::BCRSMatrix<M>;
|
||||
using Vector = Dune::BlockVector<Dune::FieldVector<T, 1>>;
|
||||
using CuILU0 = Opm::cuistl::CuSeqILU0<SpMatrix, Opm::cuistl::CuVector<T>, Opm::cuistl::CuVector<T>>;
|
||||
using CuILU0 = Opm::gpuistl::CuSeqILU0<SpMatrix, Opm::gpuistl::CuVector<T>, Opm::gpuistl::CuVector<T>>;
|
||||
|
||||
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
|
||||
for (auto row = B.createbegin(); row != B.createend(); ++row) {
|
||||
@@ -91,7 +91,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifference1D, T, NumericTypes)
|
||||
|
||||
auto duneILU = Dune::SeqILU<SpMatrix, Vector, Vector>(B, 1.0);
|
||||
|
||||
auto cuILU = Opm::cuistl::PreconditionerAdapter<Vector, Vector, CuILU0>(std::make_shared<CuILU0>(B, 1.0));
|
||||
auto cuILU = Opm::gpuistl::PreconditionerAdapter<Vector, Vector, CuILU0>(std::make_shared<CuILU0>(B, 1.0));
|
||||
|
||||
// check for the standard basis {e_i}
|
||||
// (e_i=(0,...,0, 1 (i-th place), 0, ..., 0))
|
||||
@@ -158,7 +158,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifferenceBlock2, T, NumericTypes)
|
||||
using M = Dune::FieldMatrix<T, 2, 2>;
|
||||
using SpMatrix = Dune::BCRSMatrix<M>;
|
||||
using Vector = Dune::BlockVector<Dune::FieldVector<T, 2>>;
|
||||
using CuILU0 = Opm::cuistl::CuSeqILU0<SpMatrix, Opm::cuistl::CuVector<T>, Opm::cuistl::CuVector<T>>;
|
||||
using CuILU0 = Opm::gpuistl::CuSeqILU0<SpMatrix, Opm::gpuistl::CuVector<T>, Opm::gpuistl::CuVector<T>>;
|
||||
|
||||
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
|
||||
for (auto row = B.createbegin(); row != B.createend(); ++row) {
|
||||
@@ -181,7 +181,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifferenceBlock2, T, NumericTypes)
|
||||
|
||||
auto duneILU = Dune::SeqILU<SpMatrix, Vector, Vector>(B, 1.0);
|
||||
|
||||
auto cuILU = Opm::cuistl::PreconditionerAdapter<Vector, Vector, CuILU0>(std::make_shared<CuILU0>(B, 1.0));
|
||||
auto cuILU = Opm::gpuistl::PreconditionerAdapter<Vector, Vector, CuILU0>(std::make_shared<CuILU0>(B, 1.0));
|
||||
|
||||
// check for the standard basis {e_i}
|
||||
// (e_i=(0,...,0, 1 (i-th place), 0, ..., 0))
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestGetSparseVersion)
|
||||
{
|
||||
auto& cuSparseHandle = ::Opm::cuistl::detail::CuSparseHandle::getInstance();
|
||||
auto& cuSparseHandle = ::Opm::gpuistl::detail::CuSparseHandle::getInstance();
|
||||
int cuSparseVersion = -1;
|
||||
OPM_CUSPARSE_SAFE_CALL(cusparseGetVersion(cuSparseHandle.get(), &cuSparseVersion));
|
||||
BOOST_CHECK_LT(0, cuSparseVersion);
|
||||
|
||||
@@ -76,7 +76,7 @@ BOOST_AUTO_TEST_CASE(TestConstruction1D)
|
||||
}
|
||||
}
|
||||
|
||||
auto cuSparseMatrix = Opm::cuistl::CuSparseMatrix<double>::fromMatrix(B);
|
||||
auto cuSparseMatrix = Opm::gpuistl::CuSparseMatrix<double>::fromMatrix(B);
|
||||
|
||||
const auto& nonZeroValuesCuda = cuSparseMatrix.getNonZeroValues();
|
||||
std::vector<double> buffer(cuSparseMatrix.nonzeroes(), 0.0);
|
||||
@@ -143,14 +143,14 @@ BOOST_AUTO_TEST_CASE(RandomSparsityMatrix)
|
||||
}
|
||||
}
|
||||
|
||||
auto cuSparseMatrix = Opm::cuistl::CuSparseMatrix<double>::fromMatrix(B);
|
||||
auto cuSparseMatrix = Opm::gpuistl::CuSparseMatrix<double>::fromMatrix(B);
|
||||
// check each column
|
||||
for (size_t component = 0; component < N; ++component) {
|
||||
std::vector<double> inputDataX(N * dim, 0.0);
|
||||
inputDataX[component] = 1.0;
|
||||
std::vector<double> inputDataY(N * dim, .25);
|
||||
auto inputVectorX = Opm::cuistl::CuVector<double>(inputDataX.data(), inputDataX.size());
|
||||
auto inputVectorY = Opm::cuistl::CuVector<double>(inputDataY.data(), inputDataY.size());
|
||||
auto inputVectorX = Opm::gpuistl::CuVector<double>(inputDataX.data(), inputDataX.size());
|
||||
auto inputVectorY = Opm::gpuistl::CuVector<double>(inputDataY.data(), inputDataY.size());
|
||||
Vector xHost(N), yHost(N);
|
||||
yHost = inputDataY[0];
|
||||
inputVectorX.copyToHost(xHost);
|
||||
|
||||
@@ -32,7 +32,7 @@ BOOST_AUTO_TEST_CASE(TestDocumentedUsage)
|
||||
{
|
||||
auto someDataOnCPU = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
|
||||
|
||||
auto dataOnGPU = ::Opm::cuistl::CuVector<double>(someDataOnCPU);
|
||||
auto dataOnGPU = ::Opm::gpuistl::CuVector<double>(someDataOnCPU);
|
||||
|
||||
// Multiply by 4.0:
|
||||
dataOnGPU *= 4.0;
|
||||
@@ -50,14 +50,14 @@ BOOST_AUTO_TEST_CASE(TestDocumentedUsage)
|
||||
BOOST_AUTO_TEST_CASE(TestConstructionSize)
|
||||
{
|
||||
const int numberOfElements = 1234;
|
||||
auto vectorOnGPU = Opm::cuistl::CuVector<double>(numberOfElements);
|
||||
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(numberOfElements);
|
||||
BOOST_CHECK_EQUAL(numberOfElements, vectorOnGPU.dim());
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestCopyFromHostConstructor)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
|
||||
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
|
||||
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
|
||||
BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
|
||||
std::vector<double> buffer(data.size(), 0.0);
|
||||
vectorOnGPU.copyToHost(buffer.data(), buffer.size());
|
||||
@@ -68,7 +68,7 @@ BOOST_AUTO_TEST_CASE(TestCopyFromHostConstructor)
|
||||
BOOST_AUTO_TEST_CASE(TestCopyFromHostFunction)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
|
||||
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.size());
|
||||
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.size());
|
||||
BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
|
||||
vectorOnGPU.copyFromHost(data.data(), data.size());
|
||||
std::vector<double> buffer(data.size(), 0.0);
|
||||
@@ -80,7 +80,7 @@ BOOST_AUTO_TEST_CASE(TestCopyFromHostFunction)
|
||||
BOOST_AUTO_TEST_CASE(TestCopyFromBvector)
|
||||
{
|
||||
auto blockVector = Dune::BlockVector<Dune::FieldVector<double, 2>> {{{42, 43}, {44, 45}, {46, 47}}};
|
||||
auto vectorOnGPU = Opm::cuistl::CuVector<double>(blockVector.dim());
|
||||
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(blockVector.dim());
|
||||
vectorOnGPU.copyFromHost(blockVector);
|
||||
std::vector<double> buffer(vectorOnGPU.dim());
|
||||
vectorOnGPU.copyToHost(buffer.data(), buffer.size());
|
||||
@@ -93,7 +93,7 @@ BOOST_AUTO_TEST_CASE(TestCopyToBvector)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7, 8, 9}};
|
||||
auto blockVector = Dune::BlockVector<Dune::FieldVector<double, 3>>(3);
|
||||
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
|
||||
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
|
||||
vectorOnGPU.copyToHost(blockVector);
|
||||
|
||||
|
||||
@@ -103,7 +103,7 @@ BOOST_AUTO_TEST_CASE(TestCopyToBvector)
|
||||
BOOST_AUTO_TEST_CASE(TestDataPointer)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7, 8, 9}};
|
||||
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
|
||||
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
|
||||
|
||||
std::vector<double> buffer(data.size(), 0.0);
|
||||
OPM_CUDA_SAFE_CALL(cudaMemcpy(buffer.data(), vectorOnGPU.data(), sizeof(double) * data.size(), cudaMemcpyDeviceToHost));
|
||||
@@ -113,7 +113,7 @@ BOOST_AUTO_TEST_CASE(TestDataPointer)
|
||||
BOOST_AUTO_TEST_CASE(TestCopyScalarMultiply)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
|
||||
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
|
||||
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
|
||||
BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
|
||||
const double scalar = 42.25;
|
||||
vectorOnGPU *= scalar;
|
||||
@@ -128,7 +128,7 @@ BOOST_AUTO_TEST_CASE(TestCopyScalarMultiply)
|
||||
BOOST_AUTO_TEST_CASE(TestTwoNorm)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
|
||||
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
|
||||
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
|
||||
auto twoNorm = vectorOnGPU.two_norm();
|
||||
|
||||
double correctAnswer = 0.0;
|
||||
@@ -143,8 +143,8 @@ BOOST_AUTO_TEST_CASE(TestDot)
|
||||
{
|
||||
std::vector<double> dataA {{1, 2, 3, 4, 5, 6, 7}};
|
||||
std::vector<double> dataB {{8, 9, 10, 11, 12, 13, 14}};
|
||||
auto vectorOnGPUA = Opm::cuistl::CuVector<double>(dataA.data(), dataA.size());
|
||||
auto vectorOnGPUB = Opm::cuistl::CuVector<double>(dataB.data(), dataB.size());
|
||||
auto vectorOnGPUA = Opm::gpuistl::CuVector<double>(dataA.data(), dataA.size());
|
||||
auto vectorOnGPUB = Opm::gpuistl::CuVector<double>(dataB.data(), dataB.size());
|
||||
auto dot = vectorOnGPUA.dot(vectorOnGPUB);
|
||||
|
||||
double correctAnswer = 0.0;
|
||||
@@ -158,7 +158,7 @@ BOOST_AUTO_TEST_CASE(TestDot)
|
||||
BOOST_AUTO_TEST_CASE(Assigment)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
|
||||
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
|
||||
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
|
||||
vectorOnGPU = 10.0;
|
||||
vectorOnGPU.copyToHost(data.data(), data.size());
|
||||
|
||||
@@ -171,9 +171,9 @@ BOOST_AUTO_TEST_CASE(Assigment)
|
||||
BOOST_AUTO_TEST_CASE(CopyAssignment)
|
||||
{
|
||||
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
|
||||
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
|
||||
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
|
||||
vectorOnGPU.copyToHost(data.data(), data.size());
|
||||
auto vectorOnGPUB = Opm::cuistl::CuVector<double>(data.size());
|
||||
auto vectorOnGPUB = Opm::gpuistl::CuVector<double>(data.size());
|
||||
vectorOnGPUB = 4.0;
|
||||
vectorOnGPUB = vectorOnGPU;
|
||||
|
||||
@@ -185,7 +185,7 @@ BOOST_AUTO_TEST_CASE(CopyAssignment)
|
||||
BOOST_AUTO_TEST_CASE(RandomVectors)
|
||||
{
|
||||
|
||||
using GVector = Opm::cuistl::CuVector<double>;
|
||||
using GVector = Opm::gpuistl::CuVector<double>;
|
||||
std::srand(0);
|
||||
std::mt19937 generator;
|
||||
std::uniform_real_distribution<double> distribution(-100.0, 100.0);
|
||||
@@ -268,7 +268,7 @@ BOOST_AUTO_TEST_CASE(RandomVectors)
|
||||
indexSet.push_back(i);
|
||||
}
|
||||
}
|
||||
auto indexSetGPU = Opm::cuistl::CuVector<int>(indexSet);
|
||||
auto indexSetGPU = Opm::gpuistl::CuVector<int>(indexSet);
|
||||
|
||||
aGPU.setZeroAtIndexSet(indexSetGPU);
|
||||
auto projectedA = aGPU.asStdVector();
|
||||
|
||||
@@ -32,8 +32,8 @@
|
||||
#include <algorithm>
|
||||
#include <type_traits>
|
||||
|
||||
using CuViewDouble = ::Opm::cuistl::CuView<double>;
|
||||
using CuBufferDouble = ::Opm::cuistl::CuBuffer<double>;
|
||||
using CuViewDouble = ::Opm::gpuistl::CuView<double>;
|
||||
using CuBufferDouble = ::Opm::gpuistl::CuBuffer<double>;
|
||||
|
||||
__global__ void useCuViewOnGPU(CuViewDouble a, CuViewDouble b){
|
||||
b[0] = a.front();
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestToIntThrowsOutofRange)
|
||||
{
|
||||
BOOST_CHECK_THROW(Opm::cuistl::detail::to_int(size_t(std::numeric_limits<int>::max()) + size_t(1));
|
||||
BOOST_CHECK_THROW(Opm::gpuistl::detail::to_int(size_t(std::numeric_limits<int>::max()) + size_t(1));
|
||||
, std::invalid_argument);
|
||||
}
|
||||
|
||||
@@ -33,26 +33,26 @@ BOOST_AUTO_TEST_CASE(TestToIntConvertInRange)
|
||||
{
|
||||
// This might seem slow, but it is really fast:
|
||||
for (size_t i = 0; i <= size_t(1024 * 1024); ++i) {
|
||||
BOOST_CHECK_EQUAL(int(i), Opm::cuistl::detail::to_int(i));
|
||||
BOOST_CHECK_EQUAL(int(i), Opm::gpuistl::detail::to_int(i));
|
||||
}
|
||||
|
||||
BOOST_CHECK_EQUAL(std::numeric_limits<int>::max(),
|
||||
Opm::cuistl::detail::to_int(size_t(std::numeric_limits<int>::max())));
|
||||
Opm::gpuistl::detail::to_int(size_t(std::numeric_limits<int>::max())));
|
||||
}
|
||||
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestToSizeTThrowsOutofRange)
|
||||
{
|
||||
BOOST_CHECK_THROW(Opm::cuistl::detail::to_size_t(-1);, std::invalid_argument);
|
||||
BOOST_CHECK_THROW(Opm::gpuistl::detail::to_size_t(-1);, std::invalid_argument);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestToSizeTConvertInRange)
|
||||
{
|
||||
// This might seem slow, but it is really fast:
|
||||
for (int i = 0; i <= 1024 * 1024; ++i) {
|
||||
BOOST_CHECK_EQUAL(size_t(i), Opm::cuistl::detail::to_size_t(i));
|
||||
BOOST_CHECK_EQUAL(size_t(i), Opm::gpuistl::detail::to_size_t(i));
|
||||
}
|
||||
|
||||
BOOST_CHECK_EQUAL(size_t(std::numeric_limits<int>::max()),
|
||||
Opm::cuistl::detail::to_size_t(std::numeric_limits<int>::max()));
|
||||
Opm::gpuistl::detail::to_size_t(std::numeric_limits<int>::max()));
|
||||
}
|
||||
|
||||
@@ -31,7 +31,7 @@ using Matrix = Dune::BCRSMatrix<Dune::FieldMatrix<double, dim, dim>>;
|
||||
using Vector = Dune::BlockVector<Dune::FieldVector<double, dim>>;
|
||||
using Moperator = Dune::MatrixAdapter<Matrix, Vector, Vector>;
|
||||
using PrecondFactory = Opm::PreconditionerFactory<Moperator, Dune::Amg::SequentialInformation>;
|
||||
using SolverAdapter = Opm::cuistl::SolverAdapter<Moperator, Dune::BiCGSTABSolver, Vector>;
|
||||
using SolverAdapter = Opm::gpuistl::SolverAdapter<Moperator, Dune::BiCGSTABSolver, Vector>;
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user