refactor cuistl namespace

This commit is contained in:
Tobias Meyer Andersen
2024-08-22 13:52:50 +02:00
parent c4f686227b
commit 3f4ae4ddf4
72 changed files with 304 additions and 304 deletions

View File

@@ -163,7 +163,7 @@ void Main::initMPI()
}
#if HAVE_CUDA
Opm::cuistl::setDevice(FlowGenericVanguard::comm().rank(), FlowGenericVanguard::comm().size());
Opm::gpuistl::setDevice(FlowGenericVanguard::comm().rank(), FlowGenericVanguard::comm().size());
#endif
#endif // HAVE_MPI

View File

@@ -201,7 +201,7 @@ namespace Dune
#endif
#if HAVE_CUDA
} else if (solver_type == "cubicgstab") {
linsolver_.reset(new Opm::cuistl::SolverAdapter<Operator, Dune::BiCGSTABSolver, VectorType>(
linsolver_.reset(new Opm::gpuistl::SolverAdapter<Operator, Dune::BiCGSTABSolver, VectorType>(
*linearoperator_for_solver_,
*scalarproduct_,
preconditioner_,

View File

@@ -325,12 +325,12 @@ struct StandardPreconditioners {
F::addCreator("CUILU0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
const double w = prm.get<double>("relaxation", 1.0);
using field_type = typename V::field_type;
using CuILU0 = typename cuistl::
CuSeqILU0<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
using CuILU0 = typename gpuistl::
CuSeqILU0<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
auto cuILU0 = std::make_shared<CuILU0>(op.getmat(), w);
auto adapted = std::make_shared<cuistl::PreconditionerAdapter<V, V, CuILU0>>(cuILU0);
auto wrapped = std::make_shared<cuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, CuILU0>>(cuILU0);
auto wrapped = std::make_shared<gpuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
return wrapped;
});
@@ -338,11 +338,11 @@ struct StandardPreconditioners {
const double w = prm.get<double>("relaxation", 1.0);
using field_type = typename V::field_type;
using CuJac =
typename cuistl::CuJac<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
typename gpuistl::CuJac<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
auto cuJac = std::make_shared<CuJac>(op.getmat(), w);
auto adapted = std::make_shared<cuistl::PreconditionerAdapter<V, V, CuJac>>(cuJac);
auto wrapped = std::make_shared<cuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, CuJac>>(cuJac);
auto wrapped = std::make_shared<gpuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
return wrapped;
});
@@ -350,11 +350,11 @@ struct StandardPreconditioners {
const bool split_matrix = prm.get<bool>("split_matrix", true);
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
using field_type = typename V::field_type;
using CuDILU = typename cuistl::CuDILU<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
using CuDILU = typename gpuistl::CuDILU<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
auto cuDILU = std::make_shared<CuDILU>(op.getmat(), split_matrix, tune_gpu_kernels);
auto adapted = std::make_shared<cuistl::PreconditionerAdapter<V, V, CuDILU>>(cuDILU);
auto wrapped = std::make_shared<cuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, CuDILU>>(cuDILU);
auto wrapped = std::make_shared<gpuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
return wrapped;
});
@@ -362,11 +362,11 @@ struct StandardPreconditioners {
const bool split_matrix = prm.get<bool>("split_matrix", true);
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
using field_type = typename V::field_type;
using OpmCuILU0 = typename cuistl::OpmCuILU0<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
using OpmCuILU0 = typename gpuistl::OpmCuILU0<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
auto cuilu0 = std::make_shared<OpmCuILU0>(op.getmat(), split_matrix, tune_gpu_kernels);
auto adapted = std::make_shared<cuistl::PreconditionerAdapter<V, V, OpmCuILU0>>(cuilu0);
auto wrapped = std::make_shared<cuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, OpmCuILU0>>(cuilu0);
auto wrapped = std::make_shared<gpuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
return wrapped;
});
#endif
@@ -585,9 +585,9 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
F::addCreator("CUILU0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
const double w = prm.get<double>("relaxation", 1.0);
using field_type = typename V::field_type;
using CuILU0 = typename cuistl::
CuSeqILU0<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
return std::make_shared<cuistl::PreconditionerAdapter<V, V, CuILU0>>(
using CuILU0 = typename gpuistl::
CuSeqILU0<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
return std::make_shared<gpuistl::PreconditionerAdapter<V, V, CuILU0>>(
std::make_shared<CuILU0>(op.getmat(), w));
});
@@ -597,10 +597,10 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
using VTo = Dune::BlockVector<Dune::FieldVector<float, block_type::dimension>>;
using matrix_type_to =
typename Dune::BCRSMatrix<Dune::FieldMatrix<float, block_type::dimension, block_type::dimension>>;
using CuILU0 = typename cuistl::
CuSeqILU0<matrix_type_to, cuistl::CuVector<float>, cuistl::CuVector<float>>;
using Adapter = typename cuistl::PreconditionerAdapter<VTo, VTo, CuILU0>;
using Converter = typename cuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>;
using CuILU0 = typename gpuistl::
CuSeqILU0<matrix_type_to, gpuistl::CuVector<float>, gpuistl::CuVector<float>>;
using Adapter = typename gpuistl::PreconditionerAdapter<VTo, VTo, CuILU0>;
using Converter = typename gpuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>;
auto converted = std::make_shared<Converter>(op.getmat());
auto adapted = std::make_shared<Adapter>(std::make_shared<CuILU0>(converted->getConvertedMatrix(), w));
converted->setUnderlyingPreconditioner(adapted);
@@ -611,8 +611,8 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
const double w = prm.get<double>("relaxation", 1.0);
using field_type = typename V::field_type;
using CUJac =
typename cuistl::CuJac<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
return std::make_shared<cuistl::PreconditionerAdapter<V, V, CUJac>>(
typename gpuistl::CuJac<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
return std::make_shared<gpuistl::PreconditionerAdapter<V, V, CUJac>>(
std::make_shared<CUJac>(op.getmat(), w));
});
@@ -620,17 +620,17 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
const bool split_matrix = prm.get<bool>("split_matrix", true);
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
using field_type = typename V::field_type;
using CUILU0 = typename cuistl::OpmCuILU0<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
using CUILU0 = typename gpuistl::OpmCuILU0<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
return std::make_shared<cuistl::PreconditionerAdapter<V, V, CUILU0>>(std::make_shared<CUILU0>(op.getmat(), split_matrix, tune_gpu_kernels));
return std::make_shared<gpuistl::PreconditionerAdapter<V, V, CUILU0>>(std::make_shared<CUILU0>(op.getmat(), split_matrix, tune_gpu_kernels));
});
F::addCreator("CUDILU", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t) {
const bool split_matrix = prm.get<bool>("split_matrix", true);
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
using field_type = typename V::field_type;
using CUDILU = typename cuistl::CuDILU<M, cuistl::CuVector<field_type>, cuistl::CuVector<field_type>>;
return std::make_shared<cuistl::PreconditionerAdapter<V, V, CUDILU>>(std::make_shared<CUDILU>(op.getmat(), split_matrix, tune_gpu_kernels));
using CUDILU = typename gpuistl::CuDILU<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
return std::make_shared<gpuistl::PreconditionerAdapter<V, V, CUDILU>>(std::make_shared<CUDILU>(op.getmat(), split_matrix, tune_gpu_kernels));
});
F::addCreator("CUDILUFloat", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t) {
@@ -639,9 +639,9 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
using block_type = typename V::block_type;
using VTo = Dune::BlockVector<Dune::FieldVector<float, block_type::dimension>>;
using matrix_type_to = typename Dune::BCRSMatrix<Dune::FieldMatrix<float, block_type::dimension, block_type::dimension>>;
using CuDILU = typename cuistl::CuDILU<matrix_type_to, cuistl::CuVector<float>, cuistl::CuVector<float>>;
using Adapter = typename cuistl::PreconditionerAdapter<VTo, VTo, CuDILU>;
using Converter = typename cuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>;
using CuDILU = typename gpuistl::CuDILU<matrix_type_to, gpuistl::CuVector<float>, gpuistl::CuVector<float>>;
using Adapter = typename gpuistl::PreconditionerAdapter<VTo, VTo, CuDILU>;
using Converter = typename gpuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>;
auto converted = std::make_shared<Converter>(op.getmat());
auto adapted = std::make_shared<Adapter>(std::make_shared<CuDILU>(converted->getConvertedMatrix(), split_matrix, tune_gpu_kernels));
converted->setUnderlyingPreconditioner(adapted);

View File

@@ -25,7 +25,7 @@
#include <opm/simulators/linalg/cuistl/PreconditionerHolder.hpp>
#include <opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp>
namespace Opm::cuistl
namespace Opm::gpuistl
{
//! @brief Is an adaptation of Dune::BlockPreconditioner that works within the CuISTL framework.
//!
@@ -125,5 +125,5 @@ private:
//! \brief the communication object
std::shared_ptr<const communication_type> m_communication;
};
} // namespace Opm::cuistl
} // namespace Opm::gpuistl
#endif

View File

@@ -24,7 +24,7 @@
#include <opm/simulators/linalg/cuistl/CuView.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
namespace Opm::cuistl
namespace Opm::gpuistl
{
template <class T>
@@ -201,4 +201,4 @@ template CuView<const double> make_view<double>(const CuBuffer<double>&);
template CuView<const float> make_view<float>(const CuBuffer<float>&);
template CuView<const int> make_view<int>(const CuBuffer<int>&);
} // namespace Opm::cuistl
} // namespace Opm::gpuistl

View File

@@ -29,7 +29,7 @@
#include <string>
namespace Opm::cuistl
namespace Opm::gpuistl
{
/**
@@ -276,5 +276,5 @@ private:
template <class T>
CuView<const T> make_view(const CuBuffer<T>&);
} // namespace Opm::cuistl
} // namespace Opm::gpuistl
#endif

View File

@@ -37,7 +37,7 @@
#include <functional>
#include <utility>
#include <string>
namespace Opm::cuistl
namespace Opm::gpuistl
{
template <class M, class X, class Y, int l>
@@ -293,14 +293,14 @@ CuDILU<M, X, Y, l>::tuneThreadBlockSizes()
m_upperSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneUpperSolveThreadBlockSizeInApply, "Kernel computing an upper triangular solve for a level set");
}
} // namespace Opm::cuistl
} // namespace Opm::gpuistl
#define INSTANTIATE_CUDILU_DUNE(realtype, blockdim) \
template class ::Opm::cuistl::CuDILU<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
::Opm::cuistl::CuVector<realtype>, \
::Opm::cuistl::CuVector<realtype>>; \
template class ::Opm::cuistl::CuDILU<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
::Opm::cuistl::CuVector<realtype>, \
::Opm::cuistl::CuVector<realtype>>
template class ::Opm::gpuistl::CuDILU<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
::Opm::gpuistl::CuVector<realtype>, \
::Opm::gpuistl::CuVector<realtype>>; \
template class ::Opm::gpuistl::CuDILU<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
::Opm::gpuistl::CuVector<realtype>, \
::Opm::gpuistl::CuVector<realtype>>
INSTANTIATE_CUDILU_DUNE(double, 1);
INSTANTIATE_CUDILU_DUNE(double, 2);

View File

@@ -27,7 +27,7 @@
namespace Opm::cuistl
namespace Opm::gpuistl
{
//! \brief DILU preconditioner on the GPU.
//!
@@ -144,6 +144,6 @@ private:
int m_moveThreadBlockSize = -1;
int m_DILUFactorizationThreadBlockSize = -1;
};
} // end namespace Opm::cuistl
} // end namespace Opm::gpuistl
#endif

View File

@@ -26,7 +26,7 @@
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
#include <opm/simulators/linalg/matrixblock.hh>
namespace Opm::cuistl
namespace Opm::gpuistl
{
template <class M, class X, class Y, int l>
@@ -108,14 +108,14 @@ CuJac<M, X, Y, l>::invertDiagonalAndFlatten()
m_diagInvFlattened.data());
}
} // namespace Opm::cuistl
} // namespace Opm::gpuistl
#define INSTANTIATE_CUJAC_DUNE(realtype, blockdim) \
template class ::Opm::cuistl::CuJac<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
::Opm::cuistl::CuVector<realtype>, \
::Opm::cuistl::CuVector<realtype>>; \
template class ::Opm::cuistl::CuJac<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
::Opm::cuistl::CuVector<realtype>, \
::Opm::cuistl::CuVector<realtype>>
template class ::Opm::gpuistl::CuJac<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
::Opm::gpuistl::CuVector<realtype>, \
::Opm::gpuistl::CuVector<realtype>>; \
template class ::Opm::gpuistl::CuJac<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
::Opm::gpuistl::CuVector<realtype>, \
::Opm::gpuistl::CuVector<realtype>>
INSTANTIATE_CUJAC_DUNE(double, 1);
INSTANTIATE_CUJAC_DUNE(double, 2);

View File

@@ -28,7 +28,7 @@
namespace Opm::cuistl
namespace Opm::gpuistl
{
//! \brief Jacobi preconditioner on the GPU.
//!
@@ -110,6 +110,6 @@ private:
void invertDiagonalAndFlatten();
};
} // end namespace Opm::cuistl
} // end namespace Opm::gpuistl
#endif

View File

@@ -24,7 +24,7 @@
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <vector>
namespace Opm::cuistl
namespace Opm::gpuistl
{
/**
* @brief GPUSender is a wrapper class for classes which will implement copOwnerToAll
@@ -409,5 +409,5 @@ public:
private:
std::shared_ptr<GPUSender<field_type, OwnerOverlapCopyCommunicationType>> m_sender;
};
} // namespace Opm::cuistl
} // namespace Opm::gpuistl
#endif

View File

@@ -37,7 +37,7 @@
// it highly recommended to read that before proceeding.
namespace Opm::cuistl
namespace Opm::gpuistl
{
template <class M, class X, class Y, int l>
@@ -337,14 +337,14 @@ CuSeqILU0<M, X, Y, l>::updateILUConfiguration()
analyzeMatrix();
createILU();
}
} // namespace Opm::cuistl
} // namespace Opm::gpuistl
#define INSTANTIATE_CUSEQILU0_DUNE(realtype, blockdim) \
template class ::Opm::cuistl::CuSeqILU0<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
::Opm::cuistl::CuVector<realtype>, \
::Opm::cuistl::CuVector<realtype>>; \
template class ::Opm::cuistl::CuSeqILU0<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
::Opm::cuistl::CuVector<realtype>, \
::Opm::cuistl::CuVector<realtype>>
template class ::Opm::gpuistl::CuSeqILU0<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
::Opm::gpuistl::CuVector<realtype>, \
::Opm::gpuistl::CuVector<realtype>>; \
template class ::Opm::gpuistl::CuSeqILU0<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
::Opm::gpuistl::CuVector<realtype>, \
::Opm::gpuistl::CuVector<realtype>>
INSTANTIATE_CUSEQILU0_DUNE(double, 1);

View File

@@ -28,7 +28,7 @@
namespace Opm::cuistl
namespace Opm::gpuistl
{
//! \brief Sequential ILU0 preconditioner on the GPU through the CuSparse library.
//!
@@ -133,6 +133,6 @@ private:
void updateILUConfiguration();
};
} // end namespace Opm::cuistl
} // end namespace Opm::gpuistl
#endif

View File

@@ -29,7 +29,7 @@
#include <opm/simulators/linalg/matrixblock.hh>
#include <type_traits>
namespace Opm::cuistl
namespace Opm::gpuistl
{
namespace
@@ -343,4 +343,4 @@ INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 4);
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 5);
INSTANTIATE_CUSPARSE_DUNE_MATRIX_CONSTRUCTION_FUNTIONS(float, 6);
} // namespace Opm::cuistl
} // namespace Opm::gpuistl

View File

@@ -28,7 +28,7 @@
#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp>
#include <vector>
namespace Opm::cuistl
namespace Opm::gpuistl
{
/**
@@ -298,5 +298,5 @@ private:
template <class VectorType>
void assertSameSize(const VectorType& vector) const;
};
} // namespace Opm::cuistl
} // namespace Opm::gpuistl
#endif

View File

@@ -26,7 +26,7 @@
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
namespace Opm::cuistl
namespace Opm::gpuistl
{
template <class T>
@@ -304,4 +304,4 @@ template class CuVector<double>;
template class CuVector<float>;
template class CuVector<int>;
} // namespace Opm::cuistl
} // namespace Opm::gpuistl

View File

@@ -29,7 +29,7 @@
#include <string>
namespace Opm::cuistl
namespace Opm::gpuistl
{
/**
@@ -391,5 +391,5 @@ private:
void assertHasElements() const;
};
} // namespace Opm::cuistl
} // namespace Opm::gpuistl
#endif

View File

@@ -23,7 +23,7 @@
#include <opm/simulators/linalg/cuistl/CuView.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
namespace Opm::cuistl
namespace Opm::gpuistl
{
template <class T>
@@ -79,4 +79,4 @@ template class CuView<double>;
template class CuView<float>;
template class CuView<int>;
} // namespace Opm::cuistl
} // namespace Opm::gpuistl

View File

@@ -37,7 +37,7 @@
#define OPM_IS_INSIDE_DEVICE_FUNCTION_TEMPORARY 0
#endif
namespace Opm::cuistl
namespace Opm::gpuistl
{
/**
@@ -410,6 +410,6 @@ private:
}
};
} // namespace Opm::cuistl
} // namespace Opm::gpuistl
#endif

View File

@@ -37,7 +37,7 @@
#include <string>
#include <tuple>
#include <utility>
namespace Opm::cuistl
namespace Opm::gpuistl
{
template <class M, class X, class Y, int l>
@@ -289,14 +289,14 @@ OpmCuILU0<M, X, Y, l>::tuneThreadBlockSizes()
tuneUpperSolveThreadBlockSizeInApply, "Kernel computing an upper triangular solve for a level set");
}
} // namespace Opm::cuistl
} // namespace Opm::gpuistl
#define INSTANTIATE_CUDILU_DUNE(realtype, blockdim) \
template class ::Opm::cuistl::OpmCuILU0<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
::Opm::cuistl::CuVector<realtype>, \
::Opm::cuistl::CuVector<realtype>>; \
template class ::Opm::cuistl::OpmCuILU0<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
::Opm::cuistl::CuVector<realtype>, \
::Opm::cuistl::CuVector<realtype>>
template class ::Opm::gpuistl::OpmCuILU0<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
::Opm::gpuistl::CuVector<realtype>, \
::Opm::gpuistl::CuVector<realtype>>; \
template class ::Opm::gpuistl::OpmCuILU0<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
::Opm::gpuistl::CuVector<realtype>, \
::Opm::gpuistl::CuVector<realtype>>
INSTANTIATE_CUDILU_DUNE(double, 1);
INSTANTIATE_CUDILU_DUNE(double, 2);

View File

@@ -29,7 +29,7 @@
#include <vector>
namespace Opm::cuistl
namespace Opm::gpuistl
{
//! \brief ILU0 preconditioner on the GPU.
//!
@@ -144,6 +144,6 @@ private:
int m_moveThreadBlockSize = -1;
int m_ILU0FactorizationThreadBlockSize = -1;
};
} // end namespace Opm::cuistl
} // end namespace Opm::gpuistl
#endif

View File

@@ -26,7 +26,7 @@
#include <opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp>
namespace Opm::cuistl
namespace Opm::gpuistl
{
//!\brief Makes a CUDA preconditioner available to a CPU simulator.
//!
@@ -134,6 +134,6 @@ private:
std::unique_ptr<CuVector<field_type>> m_inputBuffer;
std::unique_ptr<CuVector<field_type>> m_outputBuffer;
};
} // end namespace Opm::cuistl
} // end namespace Opm::gpuistl
#endif

View File

@@ -31,7 +31,7 @@
#include <opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp>
namespace Opm::cuistl
namespace Opm::gpuistl
{
//! \brief Converts the field type (eg. double to float) to benchmark single precision preconditioners
//!
@@ -64,7 +64,7 @@ namespace Opm::cuistl
//! void applyILU0AsFloat(const MDouble& matrix, const XDouble& x, XDouble& y) {
//!
//! using FloatILU0 = typename Opm::ParallelOverlappingILU0<MFloat, XFloat, XFloat, ParallelInfo>;
//! using DoubleToFloatConverter = typename Opm::cuistl::PreconditionerConvertFieldTypeAdapter<FloatILU0, MDouble,
//! using DoubleToFloatConverter = typename Opm::gpuistl::PreconditionerConvertFieldTypeAdapter<FloatILU0, MDouble,
//! XDouble, XDouble>;
//!
//! // Note that we do not need to make a new instance for every invocation, this
@@ -239,6 +239,6 @@ private:
//! \brief the underlying preconditioner to use
std::shared_ptr<CudaPreconditionerType> m_underlyingPreconditioner;
};
} // end namespace Opm::cuistl
} // end namespace Opm::gpuistl
#endif

View File

@@ -21,7 +21,7 @@
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
namespace Opm::cuistl
namespace Opm::gpuistl
{
//! \brief Common interface for adapters that hold preconditioners.
//!
@@ -38,6 +38,6 @@ public:
*/
virtual std::shared_ptr<Dune::PreconditionerWithUpdate<X, Y>> getUnderlyingPreconditioner() = 0;
};
} // end namespace Opm::cuistl
} // end namespace Opm::gpuistl
#endif

View File

@@ -39,7 +39,7 @@
#endif
#endif
namespace Opm::cuistl
namespace Opm::gpuistl
{
//! @brief Wraps a CUDA solver to work with CPU data.
//!
@@ -56,7 +56,7 @@ public:
using typename Dune::IterativeSolver<X, X>::real_type;
using typename Dune::IterativeSolver<X, X>::scalar_real_type;
static constexpr auto block_size = domain_type::block_type::dimension;
using XGPU = Opm::cuistl::CuVector<real_type>;
using XGPU = Opm::gpuistl::CuVector<real_type>;
// TODO: Use a std::forward
SolverAdapter(Operator& op,
@@ -148,8 +148,8 @@ private:
if (!precAsHolder) {
OPM_THROW(std::invalid_argument,
"The preconditioner needs to be a CUDA preconditioner (eg. CuILU0) wrapped in a "
"Opm::cuistl::PreconditionerAdapter wrapped in a "
"Opm::cuistl::CuBlockPreconditioner. If you are unsure what this means, set "
"Opm::gpuistl::PreconditionerAdapter wrapped in a "
"Opm::gpuistl::CuBlockPreconditioner. If you are unsure what this means, set "
"preconditioner to 'CUILU0'"); // TODO: Suggest a better preconditioner
}
@@ -159,8 +159,8 @@ private:
if (!preconditionerAdapterAsHolder) {
OPM_THROW(std::invalid_argument,
"The preconditioner needs to be a CUDA preconditioner (eg. CuILU0) wrapped in a "
"Opm::cuistl::PreconditionerAdapter wrapped in a "
"Opm::cuistl::CuBlockPreconditioner. If you are unsure what this means, set "
"Opm::gpuistl::PreconditionerAdapter wrapped in a "
"Opm::gpuistl::CuBlockPreconditioner. If you are unsure what this means, set "
"preconditioner to 'CUILU0'"); // TODO: Suggest a better preconditioner
}
// We need to get the underlying preconditioner:
@@ -183,12 +183,12 @@ private:
// TODO add typename Operator communication type as a named type with using
std::shared_ptr<Opm::cuistl::GPUSender<real_type, typename Operator::communication_type>> gpuComm;
std::shared_ptr<Opm::gpuistl::GPUSender<real_type, typename Operator::communication_type>> gpuComm;
if (mpiSupportsCudaAwareAtCompileTime && mpiSupportsCudaAwareAtRunTime){
gpuComm = std::make_shared<Opm::cuistl::GPUAwareMPISender<real_type, block_size, typename Operator::communication_type>>(communication);
gpuComm = std::make_shared<Opm::gpuistl::GPUAwareMPISender<real_type, block_size, typename Operator::communication_type>>(communication);
}
else{
gpuComm = std::make_shared<Opm::cuistl::GPUObliviousMPISender<real_type, block_size, typename Operator::communication_type>>(communication);
gpuComm = std::make_shared<Opm::gpuistl::GPUObliviousMPISender<real_type, block_size, typename Operator::communication_type>>(communication);
}
using CudaCommunication = CuOwnerOverlapCopy<real_type, block_size, typename Operator::communication_type>;
@@ -222,7 +222,7 @@ private:
if (!precAsHolder) {
OPM_THROW(std::invalid_argument,
"The preconditioner needs to be a CUDA preconditioner wrapped in a "
"Opm::cuistl::PreconditionerHolder (eg. CuILU0).");
"Opm::gpuistl::PreconditionerHolder (eg. CuILU0).");
}
auto preconditionerOnGPU = precAsHolder->getUnderlyingPreconditioner();
@@ -237,6 +237,6 @@ private:
std::unique_ptr<XGPU> m_inputBuffer;
std::unique_ptr<XGPU> m_outputBuffer;
};
} // namespace Opm::cuistl
} // namespace Opm::gpuistl
#endif

View File

@@ -19,7 +19,7 @@
#include <cublas_v2.h>
#include <opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp>
#include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
@@ -46,4 +46,4 @@ CuBlasHandle::getInstance()
return instance;
}
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail

View File

@@ -21,7 +21,7 @@
#include <cublas_v2.h>
#include <memory>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
/**
@@ -31,7 +31,7 @@ namespace Opm::cuistl::detail
* @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp>
* void someFunction() {
* auto& cublasHandle = ::Opm::cuistl::detail::CuBlasHandle::getInstance();
* auto& cublasHandle = ::Opm::gpuistl::detail::CuBlasHandle::getInstance();
* int cuBlasVersion = -1;
* OPM_CUBLAS_SAFE_CALL(cublasGetVersion(cublasHandle.get(), &cuBlasVersion));
* }
@@ -64,5 +64,5 @@ private:
CuBlasHandle();
cublasHandle_t m_handle;
};
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
#endif // OPM_CUBLASHANDLE_HPP

View File

@@ -21,7 +21,7 @@
#include <opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
/**
@@ -52,7 +52,7 @@ createMatrixDescription()
/**
* @brief createLowerDiagonalDescription creates a lower diagonal matrix description
* @return a lower diagonal matrix description overlapped with options from ::Opm::cuistl::detail::createMatrixDescription()
* @return a lower diagonal matrix description overlapped with options from ::Opm::gpuistl::detail::createMatrixDescription()
*
* @note This will assume it has a unit diagonal
*/
@@ -67,7 +67,7 @@ createLowerDiagonalDescription()
/**
* @brief createUpperDiagonalDescription creates an upper diagonal matrix description
* @return an upper diagonal matrix description overlapped with options from ::Opm::cuistl::detail::createMatrixDescription()
* @return an upper diagonal matrix description overlapped with options from ::Opm::gpuistl::detail::createMatrixDescription()
*
* @note This will assume it has a non-unit diagonal.
*/
@@ -81,6 +81,6 @@ createUpperDiagonalDescription()
return description;
}
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
#endif // CU_MATRIX_DESCRIPTION_HPP

View File

@@ -18,7 +18,7 @@
*/
#include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
@@ -46,4 +46,4 @@ CuSparseHandle::getInstance()
return instance;
}
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail

View File

@@ -21,7 +21,7 @@
#include <cusparse.h>
#include <memory>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
/**
@@ -31,7 +31,7 @@ namespace Opm::cuistl::detail
* @code{.cpp}
* #include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
* void someFunction() {
* auto& cuSparseHandle = ::Opm::cuistl::detail::CuSparseHandle::getInstance();
* auto& cuSparseHandle = ::Opm::gpuistl::detail::CuSparseHandle::getInstance();
* int cuSparseVersion = -1;
* OPM_CUSPARSE_SAFE_CALL(cusparseGetVersion(cuSparseHandle.get(), &cuSparseVersion));
* }
@@ -63,5 +63,5 @@ private:
CuSparseHandle();
cusparseHandle_t m_handle;
};
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
#endif // OPM_CUSPARSEHANDLE_HPP

View File

@@ -23,7 +23,7 @@
#include <memory>
#include <type_traits>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
/**
@@ -94,6 +94,6 @@ private:
DeleterType m_deleter;
};
} // namespace Opm::cuistl::impl
} // namespace Opm::gpuistl::impl
#include <opm/simulators/linalg/cuistl/detail/CuSparseResource_impl.hpp>
#endif // CUSPARSERESOURCE_HPP

View File

@@ -20,7 +20,7 @@
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
namespace
@@ -100,4 +100,4 @@ CuSparseResource<T>::~CuSparseResource()
// proper name of the function being called.
OPM_CUSPARSE_WARN_IF_ERROR(m_deleter(m_resource));
}
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail

View File

@@ -25,7 +25,7 @@
#include <string>
#include <utility>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
/// @brief Function that tests the best thread block size, assumes the provided function depends on threadblock-size
@@ -88,6 +88,6 @@ tuneThreadBlockSize(func& f, std::string descriptionOfFunction)
return bestBlockSize;
}
} // end namespace Opm::cuistl::detail
} // end namespace Opm::gpuistl::detail
#endif

View File

@@ -31,16 +31,16 @@ This file contains a collection of utility functions used in the GPU implementat
The functions deal with creating the mappings between reordered and natural indices, as well as
extracting sparsity structures from dune matrices and creating cusparsematrix indices
*/
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
inline std::vector<int>
createReorderedToNatural(const Opm::SparseTable<size_t>& levelSets)
{
auto res = std::vector<int>(Opm::cuistl::detail::to_size_t(levelSets.dataSize()));
auto res = std::vector<int>(Opm::gpuistl::detail::to_size_t(levelSets.dataSize()));
int globCnt = 0;
for (auto row : levelSets) {
for (auto col : row) {
OPM_ERROR_IF(Opm::cuistl::detail::to_size_t(globCnt) >= res.size(),
OPM_ERROR_IF(Opm::gpuistl::detail::to_size_t(globCnt) >= res.size(),
fmt::format("Internal error. globCnt = {}, res.size() = {}", globCnt, res.size()));
res[globCnt++] = static_cast<int>(col);
}
@@ -51,11 +51,11 @@ createReorderedToNatural(const Opm::SparseTable<size_t>& levelSets)
inline std::vector<int>
createNaturalToReordered(const Opm::SparseTable<size_t>& levelSets)
{
auto res = std::vector<int>(Opm::cuistl::detail::to_size_t(levelSets.dataSize()));
auto res = std::vector<int>(Opm::gpuistl::detail::to_size_t(levelSets.dataSize()));
int globCnt = 0;
for (auto row : levelSets) {
for (auto col : row) {
OPM_ERROR_IF(Opm::cuistl::detail::to_size_t(globCnt) >= res.size(),
OPM_ERROR_IF(Opm::gpuistl::detail::to_size_t(globCnt) >= res.size(),
fmt::format("Internal error. globCnt = {}, res.size() = {}", globCnt, res.size()));
res[col] = globCnt++;
}
@@ -105,6 +105,6 @@ extractLowerAndUpperMatrices(const M& naturalMatrix, const std::vector<int>& reo
return {std::unique_ptr<GPUM>(new auto(GPUM::fromMatrix(reorderedLower, true))),
std::unique_ptr<GPUM>(new auto(GPUM::fromMatrix(reorderedUpper, true)))};
}
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
#endif

View File

@@ -28,7 +28,7 @@
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
#define CHECK_CUBLAS_ERROR_TYPE(code, x) \
@@ -174,7 +174,7 @@ cublasWarnIfError(cublasStatus_t error,
return error;
}
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
/**
* @brief OPM_CUBLAS_SAFE_CALL checks the return type of the cublas expression (function call) and throws an exception
@@ -194,7 +194,7 @@ cublasWarnIfError(cublasStatus_t error,
* @note This should be used for any call to cuBlas unless you have a good reason not to.
*/
#define OPM_CUBLAS_SAFE_CALL(expression) \
::Opm::cuistl::detail::cublasSafeCall(expression, #expression, __FILE__, __func__, __LINE__)
::Opm::gpuistl::detail::cublasSafeCall(expression, #expression, __FILE__, __func__, __LINE__)
/**
* @brief OPM_CUBLAS_WARN_IF_ERROR checks the return type of the cublas expression (function call) and issues a warning
@@ -214,6 +214,6 @@ cublasWarnIfError(cublasStatus_t error,
* @note Prefer the cublasSafeCall/OPM_CUBLAS_SAFE_CALL counterpart unless you really don't want to throw an exception.
*/
#define OPM_CUBLAS_WARN_IF_ERROR(expression) \
::Opm::cuistl::detail::cublasWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
::Opm::gpuistl::detail::cublasWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
#endif // OPM_CUBLAS_SAFE_CALL_HPP

View File

@@ -29,7 +29,7 @@
#include <cublas_v2.h>
#include <opm/common/ErrorMacros.hpp>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
inline cublasStatus_t
@@ -164,5 +164,5 @@ cublasNrm2([[maybe_unused]] cublasHandle_t handle,
OPM_THROW(std::runtime_error, "norm2 for integer vectors is not implemented yet.");
}
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
#endif

View File

@@ -24,7 +24,7 @@
#include <opm/common/OpmLog/OpmLog.hpp>
#include <string_view>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
/**
* @brief getCudaErrorMessage generates the error message to display for a given error.
@@ -128,7 +128,7 @@ cudaWarnIfError(cudaError_t error,
OpmLog::warning(getCudaErrorMessage(error, expression, filename, functionName, lineNumber));
}
}
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
/**
* @brief OPM_CUDA_SAFE_CALL checks the return type of the CUDA expression (function call) and throws an exception if it
@@ -148,7 +148,7 @@ cudaWarnIfError(cudaError_t error,
* @note This should be used for any call to the CUDA runtime API unless you have a good reason not to.
*/
#define OPM_CUDA_SAFE_CALL(expression) \
::Opm::cuistl::detail::cudaSafeCall(expression, #expression, __FILE__, __func__, __LINE__)
::Opm::gpuistl::detail::cudaSafeCall(expression, #expression, __FILE__, __func__, __LINE__)
/**
@@ -169,6 +169,6 @@ cudaWarnIfError(cudaError_t error,
* @note Prefer the cudaSafeCall/OPM_CUDA_SAFE_CALL counterpart unless you really don't want to throw an exception.
*/
#define OPM_CUDA_WARN_IF_ERROR(expression) \
::Opm::cuistl::detail::cudaWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
::Opm::gpuistl::detail::cudaWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
#endif

View File

@@ -19,7 +19,7 @@
#ifndef CUSPARSE_CONSTANTS_HPP
#define CUSPARSE_CONSTANTS_HPP
#include <cusparse.h>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
const constexpr auto CUSPARSE_MATRIX_ORDER = CUSPARSE_DIRECTION_ROW;
}

View File

@@ -23,7 +23,7 @@
#include <opm/simulators/linalg/cuistl/detail/gpuThreadUtils.hpp>
#include <stdexcept>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
namespace
{
@@ -110,8 +110,8 @@ copyMatDataToReordered(T* srcMatrix,
int thrBlockSize)
{
int threadBlockSize
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuMoveDataToReordered<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuMoveDataToReordered<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
cuMoveDataToReordered<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
srcMatrix, srcRowIndices, dstMatrix, dstRowIndices, naturalToReordered, numberOfRows);
}
@@ -130,9 +130,9 @@ copyMatDataToReorderedSplit(T* srcMatrix,
size_t numberOfRows,
int thrBlockSize)
{
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
cuMoveDataToReorderedSplit<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
cuMoveDataToReorderedSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(srcMatrix,
srcRowIndices,
srcColumnIndices,
@@ -161,4 +161,4 @@ INSTANTIATE_KERNEL_WRAPPERS(double, 3);
INSTANTIATE_KERNEL_WRAPPERS(double, 4);
INSTANTIATE_KERNEL_WRAPPERS(double, 5);
INSTANTIATE_KERNEL_WRAPPERS(double, 6);
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail

View File

@@ -20,7 +20,7 @@
#define OPM_CUISTL_CUSPARSE_MATRIX_OPERATIONS_HPP
#include <cstddef>
#include <vector>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
/**
* @brief Reorders the elements of a matrix by copying them from one matrix to another using a permutation list
@@ -68,5 +68,5 @@ void copyMatDataToReorderedSplit(T* srcMatrix,
size_t numberOfRows,
int threadBlockSize);
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
#endif

View File

@@ -24,7 +24,7 @@
#include <opm/common/ErrorMacros.hpp>
#include <opm/common/OpmLog/OpmLog.hpp>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
#define CHECK_CUSPARSE_ERROR_TYPE(code, x) \
@@ -161,7 +161,7 @@ cusparseWarnIfError(cusparseStatus_t error,
return error;
}
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
@@ -183,7 +183,7 @@ cusparseWarnIfError(cusparseStatus_t error,
* @note This should be used for any call to cuSparse unless you have a good reason not to.
*/
#define OPM_CUSPARSE_SAFE_CALL(expression) \
::Opm::cuistl::detail::cusparseSafeCall(expression, #expression, __FILE__, __func__, __LINE__)
::Opm::gpuistl::detail::cusparseSafeCall(expression, #expression, __FILE__, __func__, __LINE__)
/**
* @brief OPM_CUSPARSE_WARN_IF_ERROR checks the return type of the cusparse expression (function call) and issues a
@@ -204,5 +204,5 @@ cusparseWarnIfError(cusparseStatus_t error,
* exception.
*/
#define OPM_CUSPARSE_WARN_IF_ERROR(expression) \
::Opm::cuistl::detail::cusparseWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
::Opm::gpuistl::detail::cusparseWarnIfError(expression, #expression, __FILE__, __func__, __LINE__)
#endif // OPM_CUSPARSE_SAFE_CALL_HPP

View File

@@ -27,7 +27,7 @@
#include <type_traits>
#ifndef OPM_CUSPARSE_WRAPPER_HPP
#define OPM_CUSPARSE_WRAPPER_HPP
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
inline cusparseStatus_t
@@ -450,5 +450,5 @@ cusparseBsrmv(cusparseHandle_t handle,
beta,
y);
}
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
#endif

View File

@@ -22,7 +22,7 @@
#include <limits>
#include <vector>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
/**
@@ -53,6 +53,6 @@ makeMatrixWithNonzeroDiagonal(const Matrix& matrix,
return newMatrix;
}
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
#endif

View File

@@ -26,7 +26,7 @@
/*
This file provides some logic for handling how to choose the correct thread-block size
*/
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
constexpr inline size_t
getThreads([[maybe_unused]] size_t numberOfRows)
@@ -61,6 +61,6 @@ getNumberOfBlocks(int wantedThreads, int threadBlockSize)
return (wantedThreads + threadBlockSize - 1) / threadBlockSize;
}
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
#endif

View File

@@ -29,7 +29,7 @@
* TODO: Use the requires-keyword once C++20 becomes availble (https://en.cppreference.com/w/cpp/language/constraints ).
* With C++20 this file can be removed.
*/
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
/**
@@ -121,5 +121,5 @@ public:
static constexpr bool value = std::is_same_v<decltype(test<T>(0)), std::true_type>;
};
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
#endif

View File

@@ -23,7 +23,7 @@
#include <opm/simulators/linalg/cuistl/detail/preconditionerKernels/DILUKernels.hpp>
#include <stdexcept>
namespace Opm::cuistl::detail::DILU
namespace Opm::gpuistl::detail::DILU
{
namespace
{
@@ -282,8 +282,8 @@ solveLowerLevelSet(T* reorderedMat,
int thrBlockSize)
{
int threadBlockSize
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveLowerLevelSet<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveLowerLevelSet<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuSolveLowerLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, d, v);
}
@@ -302,9 +302,9 @@ solveLowerLevelSetSplit(T* reorderedMat,
T* v,
int thrBlockSize)
{
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
cuSolveLowerLevelSetSplit<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuSolveLowerLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, d, v);
}
@@ -322,8 +322,8 @@ solveUpperLevelSet(T* reorderedMat,
int thrBlockSize)
{
int threadBlockSize
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveUpperLevelSet<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveUpperLevelSet<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuSolveUpperLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, v);
}
@@ -340,9 +340,9 @@ solveUpperLevelSetSplit(T* reorderedMat,
T* v,
int thrBlockSize)
{
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
cuSolveUpperLevelSetSplit<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuSolveUpperLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, v);
}
@@ -360,9 +360,9 @@ computeDiluDiagonal(T* reorderedMat,
int thrBlockSize)
{
if (blocksize <= 3) {
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
cuComputeDiluDiagonal<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuComputeDiluDiagonal<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(reorderedMat,
rowIndices,
colIndices,
@@ -393,9 +393,9 @@ computeDiluDiagonalSplit(T* reorderedLowerMat,
int thrBlockSize)
{
if (blocksize <= 3) {
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
cuComputeDiluDiagonalSplit<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuComputeDiluDiagonalSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(reorderedLowerMat,
lowerRowIndices,
lowerColIndices,
@@ -434,4 +434,4 @@ INSTANTIATE_KERNEL_WRAPPERS(double, 3);
INSTANTIATE_KERNEL_WRAPPERS(double, 4);
INSTANTIATE_KERNEL_WRAPPERS(double, 5);
INSTANTIATE_KERNEL_WRAPPERS(double, 6);
} // namespace Opm::cuistl::detail::DILU
} // namespace Opm::gpuistl::detail::DILU

View File

@@ -24,7 +24,7 @@
#include <cuda_runtime.h>
#include <vector>
namespace Opm::cuistl::detail::DILU
namespace Opm::gpuistl::detail::DILU
{
/**
@@ -198,5 +198,5 @@ void computeDiluDiagonalSplit(T* reorderedLowerMat,
T* dInv,
int threadBlockSize);
} // namespace Opm::cuistl::detail::DILU
} // namespace Opm::gpuistl::detail::DILU
#endif

View File

@@ -27,7 +27,7 @@
The LU factorization and apply step is written based on the Dune implementations
*/
namespace Opm::cuistl::detail::ILU0
namespace Opm::gpuistl::detail::ILU0
{
namespace
{
@@ -341,8 +341,8 @@ solveLowerLevelSet(T* reorderedMat,
int thrBlockSize)
{
int threadBlockSize
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveLowerLevelSet<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveLowerLevelSet<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuSolveLowerLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, d, v);
}
@@ -359,8 +359,8 @@ solveUpperLevelSet(T* reorderedMat,
int thrBlockSize)
{
int threadBlockSize
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveUpperLevelSet<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveUpperLevelSet<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuSolveUpperLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, v);
}
@@ -377,9 +377,9 @@ solveLowerLevelSetSplit(T* reorderedMat,
T* v,
int thrBlockSize)
{
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
cuSolveLowerLevelSetSplit<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuSolveLowerLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, d, v);
}
@@ -396,9 +396,9 @@ solveUpperLevelSetSplit(T* reorderedMat,
T* v,
int thrBlockSize)
{
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(
cuSolveUpperLevelSetSplit<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuSolveUpperLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, v);
}
@@ -415,8 +415,8 @@ LUFactorization(T* srcMatrix,
int thrBlockSize)
{
int threadBlockSize
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuLUFactorization<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuLUFactorization<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuLUFactorization<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
srcMatrix, srcRowIndices, srcColumnIndices, naturalToReordered, reorderedToNatual, rowsInLevelSet, startIdx);
}
@@ -437,8 +437,8 @@ LUFactorizationSplit(T* reorderedLowerMat,
int thrBlockSize)
{
int threadBlockSize
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuLUFactorizationSplit<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuLUFactorizationSplit<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuLUFactorizationSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(reorderedLowerMat,
lowerRowIndices,
lowerColIndices,
@@ -473,4 +473,4 @@ INSTANTIATE_KERNEL_WRAPPERS(double, 3);
INSTANTIATE_KERNEL_WRAPPERS(double, 4);
INSTANTIATE_KERNEL_WRAPPERS(double, 5);
INSTANTIATE_KERNEL_WRAPPERS(double, 6);
} // namespace Opm::cuistl::detail::ILU0
} // namespace Opm::gpuistl::detail::ILU0

View File

@@ -20,7 +20,7 @@
#define OPM_ILU0_KERNELS_HPP
#include <cstddef>
#include <vector>
namespace Opm::cuistl::detail::ILU0
namespace Opm::gpuistl::detail::ILU0
{
/**
@@ -189,5 +189,5 @@ void LUFactorizationSplit(T* reorderedLowerMat,
int rowsInLevelSet,
int threadBlockSize);
} // namespace Opm::cuistl::detail::ILU0
} // namespace Opm::gpuistl::detail::ILU0
#endif

View File

@@ -23,7 +23,7 @@
#include <opm/simulators/linalg/cuistl/detail/preconditionerKernels/JacKernels.hpp>
#include <stdexcept>
namespace Opm::cuistl::detail::JAC
namespace Opm::gpuistl::detail::JAC
{
namespace
{
@@ -60,8 +60,8 @@ invertDiagonalAndFlatten(T* mat, int* rowIndices, int* colIndices, size_t number
{
if (blocksize <= 3) {
int threadBlockSize
= ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuInvertDiagonalAndFlatten<T, blocksize>);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
= ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(cuInvertDiagonalAndFlatten<T, blocksize>);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfRows, threadBlockSize);
cuInvertDiagonalAndFlatten<T, blocksize>
<<<nThreadBlocks, threadBlockSize>>>(mat, rowIndices, colIndices, numberOfRows, vec);
} else {
@@ -85,4 +85,4 @@ INSTANTIATE_KERNEL_WRAPPERS(double, 4);
INSTANTIATE_KERNEL_WRAPPERS(double, 5);
INSTANTIATE_KERNEL_WRAPPERS(double, 6);
} // namespace Opm::cuistl::detail::JAC
} // namespace Opm::gpuistl::detail::JAC

View File

@@ -20,7 +20,7 @@
#define OPM_JAC_KERNELS_HPP
#include <cstddef>
#include <vector>
namespace Opm::cuistl::detail::JAC
namespace Opm::gpuistl::detail::JAC
{
/**
@@ -34,5 +34,5 @@ namespace Opm::cuistl::detail::JAC
template <class T, int blocksize>
void invertDiagonalAndFlatten(T* mat, int* rowIndices, int* colIndices, size_t numberOfRows, T* vec);
} // namespace Opm::cuistl::detail::JAC
} // namespace Opm::gpuistl::detail::JAC
#endif

View File

@@ -22,7 +22,7 @@
#include <opm/simulators/linalg/cuistl/detail/has_function.hpp>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
//! @brief Tests (compile time) if the preconditioner type needs to call pre() before a call to apply()
@@ -60,5 +60,5 @@ shouldCallPreconditionerPost()
return true;
}
}
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
#endif

View File

@@ -37,7 +37,7 @@
* while Dune::BlockVector (and relatives) use unsigned size_t.
*/
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
/**
@@ -106,6 +106,6 @@ to_size_t(int i)
return std::size_t(i);
}
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
#endif

View File

@@ -25,7 +25,7 @@
#include <opm/simulators/linalg/cuistl/detail/gpuThreadUtils.hpp>
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
#include <stdexcept>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
namespace
@@ -108,8 +108,8 @@ template <class T>
void
setVectorValue(T* deviceData, size_t numberOfElements, const T& value)
{
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(setVectorValueKernel<T>);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(setVectorValueKernel<T>);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
setVectorValueKernel<<<nThreadBlocks, threadBlockSize>>>(deviceData, numberOfElements, value);
}
@@ -121,8 +121,8 @@ template <class T>
void
setZeroAtIndexSet(T* deviceData, size_t numberOfElements, const int* indices)
{
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(setZeroAtIndexSetKernel<T>);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(setZeroAtIndexSetKernel<T>);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
setZeroAtIndexSetKernel<<<nThreadBlocks, threadBlockSize>>>(deviceData, numberOfElements, indices);
}
template void setZeroAtIndexSet(double*, size_t, const int*);
@@ -138,8 +138,8 @@ innerProductAtIndices(cublasHandle_t cublasHandle,
size_t numberOfElements,
const int* indices)
{
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(elementWiseMultiplyKernel<T>);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(elementWiseMultiplyKernel<T>);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
elementWiseMultiplyKernel<<<nThreadBlocks, threadBlockSize>>>(deviceA, deviceB, buffer, numberOfElements, indices);
// TODO: [perf] Get rid of the allocation here.
@@ -158,8 +158,8 @@ template <class T>
void
prepareSendBuf(const T* deviceA, T* buffer, size_t numberOfElements, const int* indices)
{
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(prepareSendBufKernel<T>);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(prepareSendBufKernel<T>);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
prepareSendBufKernel<<<nThreadBlocks, threadBlockSize>>>(deviceA, buffer, numberOfElements, indices);
OPM_CUDA_SAFE_CALL(cudaDeviceSynchronize()); // The buffers are prepared for MPI. Wait for them to finish.
}
@@ -171,8 +171,8 @@ template <class T>
void
syncFromRecvBuf(T* deviceA, T* buffer, size_t numberOfElements, const int* indices)
{
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(syncFromRecvBufKernel<T>);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(syncFromRecvBufKernel<T>);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
syncFromRecvBufKernel<<<nThreadBlocks, threadBlockSize>>>(deviceA, buffer, numberOfElements, indices);
// cudaDeviceSynchronize(); // Not needed, I guess...
}
@@ -191,20 +191,20 @@ weightedDiagMV(const T* squareBlockVector,
{
switch (blocksize) {
case 1: {
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 1>);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 1>);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
weightedDiagMV<T, 1>
<<<nThreadBlocks, threadBlockSize>>>(squareBlockVector, numberOfElements, relaxationFactor, srcVec, dstVec);
} break;
case 2: {
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 2>);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 2>);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
weightedDiagMV<T, 2>
<<<nThreadBlocks, threadBlockSize>>>(squareBlockVector, numberOfElements, relaxationFactor, srcVec, dstVec);
} break;
case 3: {
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 3>);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
int threadBlockSize = ::Opm::gpuistl::detail::getCudaRecomendedThreadBlockSize(weightedDiagMV<T, 3>);
int nThreadBlocks = ::Opm::gpuistl::detail::getNumberOfBlocks(numberOfElements, threadBlockSize);
weightedDiagMV<T, 3>
<<<nThreadBlocks, threadBlockSize>>>(squareBlockVector, numberOfElements, relaxationFactor, srcVec, dstVec);
} break;
@@ -217,4 +217,4 @@ weightedDiagMV(const T* squareBlockVector,
template void weightedDiagMV(const double*, const size_t, const size_t, double, const double*, double*);
template void weightedDiagMV(const float*, const size_t, const size_t, float, const float*, float*);
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail

View File

@@ -20,7 +20,7 @@
#define OPM_CUISTL_VECTOR_OPERATIONS_HPP
#include <cstddef>
#include <cublas_v2.h>
namespace Opm::cuistl::detail
namespace Opm::gpuistl::detail
{
/**
@@ -81,5 +81,5 @@ void weightedDiagMV(const T* squareBlockVector,
T relaxationFactor,
const T* srcVec,
T* dstVec);
} // namespace Opm::cuistl::detail
} // namespace Opm::gpuistl::detail
#endif

View File

@@ -21,7 +21,7 @@
#include <opm/common/OpmLog/OpmLog.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/set_device.hpp>
namespace Opm::cuistl
namespace Opm::gpuistl
{
void
setDevice(int mpiRank, [[maybe_unused]] int numberOfMpiRanks)
@@ -46,4 +46,4 @@ setDevice(int mpiRank, [[maybe_unused]] int numberOfMpiRanks)
OpmLog::info("Set CUDA device to " + std::to_string(deviceId) + " (out of " + std::to_string(deviceCount)
+ " devices).");
}
} // namespace Opm::cuistl
} // namespace Opm::gpuistl

View File

@@ -20,7 +20,7 @@
#ifndef OPM_CUISTL_SET_DEVICE_HEADER
#define OPM_CUISTL_SET_DEVICE_HEADER
namespace Opm::cuistl
namespace Opm::gpuistl
{
//! @brief Sets the correct CUDA device in the setting of MPI
//!
@@ -32,5 +32,5 @@ namespace Opm::cuistl
//!
//! @note If no CUDA device is present, this does nothing.
void setDevice(int mpiRank, int numberOfMpiRanks);
} // namespace Opm::cuistl
} // namespace Opm::gpuistl
#endif

View File

@@ -167,7 +167,7 @@ BOOST_AUTO_TEST_CASE(TestFiniteDifference1D)
expectedOutputVector[i][1] = 43.0;
inputVector[i][0] = 1.0;
auto converter
= Opm::cuistl::PreconditionerConvertFieldTypeAdapter<TestPreconditioner, SpMatrixDouble, XDouble, XDouble>(
= Opm::gpuistl::PreconditionerConvertFieldTypeAdapter<TestPreconditioner, SpMatrixDouble, XDouble, XDouble>(
B);
auto underlyingPreconditioner = std::make_shared<TestPreconditioner>(
converter.getConvertedMatrix(), inputVector, B, expectedOutputVector);

View File

@@ -85,10 +85,10 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(FlattenAndInvertDiagonalWith3By3Blocks, T, Numeric
B[1][1][1][1] = -1.0;
B[1][1][2][2] = -1.0;
Opm::cuistl::CuSparseMatrix<T> m = Opm::cuistl::CuSparseMatrix<T>::fromMatrix(B);
Opm::cuistl::CuVector<T> dInvDiag(blocksize * blocksize * N);
Opm::gpuistl::CuSparseMatrix<T> m = Opm::gpuistl::CuSparseMatrix<T>::fromMatrix(B);
Opm::gpuistl::CuVector<T> dInvDiag(blocksize * blocksize * N);
Opm::cuistl::detail::JAC::invertDiagonalAndFlatten<T, 3>(
Opm::gpuistl::detail::JAC::invertDiagonalAndFlatten<T, 3>(
m.getNonZeroValues().data(), m.getRowIndices().data(), m.getColumnIndices().data(), N, dInvDiag.data());
std::vector<T> expectedInvDiag {-1.0 / 4.0,
@@ -159,10 +159,10 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(FlattenAndInvertDiagonalWith2By2Blocks, T, Numeric
B[1][1][0][0] = -1.0;
B[1][1][1][1] = -1.0;
Opm::cuistl::CuSparseMatrix<T> m = Opm::cuistl::CuSparseMatrix<T>::fromMatrix(B);
Opm::cuistl::CuVector<T> dInvDiag(blocksize * blocksize * N);
Opm::gpuistl::CuSparseMatrix<T> m = Opm::gpuistl::CuSparseMatrix<T>::fromMatrix(B);
Opm::gpuistl::CuVector<T> dInvDiag(blocksize * blocksize * N);
Opm::cuistl::detail::JAC::invertDiagonalAndFlatten<T, 2>(
Opm::gpuistl::detail::JAC::invertDiagonalAndFlatten<T, 2>(
m.getNonZeroValues().data(), m.getRowIndices().data(), m.getColumnIndices().data(), N, dInvDiag.data());
std::vector<T> expectedInvDiag {2.0, -2.0, -1.0 / 2.0, 1.0, -1.0, 0.0, 0.0, -1.0};

View File

@@ -47,11 +47,11 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(ElementWiseMultiplicationOf3By3BlockVectorAndVecto
std::vector<T> hostBlockVector({1.0, 2.0, 3.0, 5.0, 2.0, 3.0, 2.0, 1.0, 2.0});
std::vector<T> hostVecVector({3.0, 2.0, 1.0});
std::vector<T> hostDstVector({0, 0, 0});
Opm::cuistl::CuVector<T> deviceBlockVector(hostBlockVector);
Opm::cuistl::CuVector<T> deviceVecVector(hostVecVector);
Opm::cuistl::CuVector<T> deviceDstVector(hostDstVector);
Opm::gpuistl::CuVector<T> deviceBlockVector(hostBlockVector);
Opm::gpuistl::CuVector<T> deviceVecVector(hostVecVector);
Opm::gpuistl::CuVector<T> deviceDstVector(hostDstVector);
Opm::cuistl::detail::weightedDiagMV(
Opm::gpuistl::detail::weightedDiagMV(
deviceBlockVector.data(), N, blocksize, weight, deviceVecVector.data(), deviceDstVector.data());
std::vector<T> expectedVec {10.0, 22.0, 10.0};
@@ -81,11 +81,11 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(ElementWiseMultiplicationOf2By2BlockVectorAndVecto
std::vector<T> hostBlockVector({1.0, 2.0, 3.0, 4.0, 4.0, 3.0, 2.0, 1.0});
std::vector<T> hostVecVector({1.0, 3.0, 2.0, 4.0});
std::vector<T> hostDstVector({0, 0, 0, 0});
Opm::cuistl::CuVector<T> deviceBlockVector(hostBlockVector);
Opm::cuistl::CuVector<T> deviceVecVector(hostVecVector);
Opm::cuistl::CuVector<T> deviceDstVector(hostDstVector);
Opm::gpuistl::CuVector<T> deviceBlockVector(hostBlockVector);
Opm::gpuistl::CuVector<T> deviceVecVector(hostVecVector);
Opm::gpuistl::CuVector<T> deviceDstVector(hostDstVector);
Opm::cuistl::detail::weightedDiagMV(
Opm::gpuistl::detail::weightedDiagMV(
deviceBlockVector.data(), N, blocksize, weight, deviceVecVector.data(), deviceDstVector.data());
std::vector<T> expectedVec {3.5, 7.5, 10.0, 4.0};
@@ -95,4 +95,4 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(ElementWiseMultiplicationOf2By2BlockVectorAndVecto
for (size_t i = 0; i < expectedVec.size(); i++) {
BOOST_CHECK_CLOSE(expectedVec[i], computedVec[i], 1e-7);
}
}
}

View File

@@ -32,7 +32,7 @@ BOOST_AUTO_TEST_CASE(TestGetCublasVersion)
// that checks the version of blas programatically. Let the test pass for now.
BOOST_CHECK(true);
#else
auto& cublasHandle = ::Opm::cuistl::detail::CuBlasHandle::getInstance();
auto& cublasHandle = ::Opm::gpuistl::detail::CuBlasHandle::getInstance();
int cuBlasVersion = -1;
OPM_CUBLAS_SAFE_CALL(cublasGetVersion(cublasHandle.get(), &cuBlasVersion));

View File

@@ -35,15 +35,15 @@ BOOST_AUTO_TEST_CASE(TestMakeView)
{
// test that we can create buffers and make views of the buffers using the pointer constructor
auto buf = std::vector<int>({1, 2, 3, 4, 5, 6});
const auto gpubuf = ::Opm::cuistl::CuBuffer<int>(buf);
auto gpuview = ::Opm::cuistl::CuView<int>(buf.data(), buf.size());
bool gpuBufCreatedView = std::is_same<::Opm::cuistl::CuView<int>, decltype(gpuview)>::value;
const auto gpubuf = ::Opm::gpuistl::CuBuffer<int>(buf);
auto gpuview = ::Opm::gpuistl::CuView<int>(buf.data(), buf.size());
bool gpuBufCreatedView = std::is_same<::Opm::gpuistl::CuView<int>, decltype(gpuview)>::value;
BOOST_CHECK(gpuBufCreatedView);
// test that we can make views of buffers by using the cubuffer constructor
auto gpuview2 = ::Opm::cuistl::make_view(gpubuf);
bool gpuBufCreatedView2 = std::is_same<::Opm::cuistl::CuView<const int>, decltype(gpuview2)>::value;
auto gpuview2 = ::Opm::gpuistl::make_view(gpubuf);
bool gpuBufCreatedView2 = std::is_same<::Opm::gpuistl::CuView<const int>, decltype(gpuview2)>::value;
BOOST_CHECK(gpuBufCreatedView2);

View File

@@ -41,11 +41,11 @@ using B1x1Vec = Dune::BlockVector<Dune::FieldVector<double, 1>>;
using B2x2Vec = Dune::BlockVector<Dune::FieldVector<double, 2>>;
using Sp1x1BlockMatrix = Dune::BCRSMatrix<FM1x1>;
using Sp2x2BlockMatrix = Dune::BCRSMatrix<FM2x2>;
using CuMatrix = Opm::cuistl::CuSparseMatrix<T>;
using CuIntVec = Opm::cuistl::CuVector<int>;
using CuFloatingPointVec = Opm::cuistl::CuVector<T>;
using CuDilu1x1 = Opm::cuistl::CuDILU<Sp1x1BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
using CuDilu2x2 = Opm::cuistl::CuDILU<Sp2x2BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
using CuMatrix = Opm::gpuistl::CuSparseMatrix<T>;
using CuIntVec = Opm::gpuistl::CuVector<int>;
using CuFloatingPointVec = Opm::gpuistl::CuVector<T>;
using CuDilu1x1 = Opm::gpuistl::CuDILU<Sp1x1BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
using CuDilu2x2 = Opm::gpuistl::CuDILU<Sp2x2BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
Sp1x1BlockMatrix
get1x1BlockTestMatrix()

View File

@@ -49,7 +49,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize2, T, NumericTypes)
using M = Dune::FieldMatrix<T, blocksize, blocksize>;
using SpMatrix = Dune::BCRSMatrix<M>;
using Vector = Dune::BlockVector<Dune::FieldVector<T, blocksize>>;
using CuJac = Opm::cuistl::CuJac<SpMatrix, Opm::cuistl::CuVector<T>, Opm::cuistl::CuVector<T>>;
using CuJac = Opm::gpuistl::CuJac<SpMatrix, Opm::gpuistl::CuVector<T>, Opm::gpuistl::CuVector<T>>;
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
for (auto row = B.createbegin(); row != B.createend(); ++row) {
@@ -70,7 +70,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize2, T, NumericTypes)
B[1][1][0][0] = -1.0;
B[1][1][1][1] = -1.0;
auto cujac = Opm::cuistl::PreconditionerAdapter<Vector, Vector, CuJac>(std::make_shared<CuJac>(B, 0.5));
auto cujac = Opm::gpuistl::PreconditionerAdapter<Vector, Vector, CuJac>(std::make_shared<CuJac>(B, 0.5));
Vector vVector(2);
Vector dVector(2);
@@ -103,7 +103,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize1, T, NumericTypes)
using M = Dune::FieldMatrix<T, blocksize, blocksize>;
using SpMatrix = Dune::BCRSMatrix<M>;
using Vector = Dune::BlockVector<Dune::FieldVector<T, blocksize>>;
using CuJac = Opm::cuistl::CuJac<SpMatrix, Opm::cuistl::CuVector<T>, Opm::cuistl::CuVector<T>>;
using CuJac = Opm::gpuistl::CuJac<SpMatrix, Opm::gpuistl::CuVector<T>, Opm::gpuistl::CuVector<T>>;
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
for (auto row = B.createbegin(); row != B.createend(); ++row) {
@@ -129,7 +129,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize1, T, NumericTypes)
B[2][2][0][0] = -1.0;
B[3][3][0][0] = -1.0;
auto cujac = Opm::cuistl::PreconditionerAdapter<Vector, Vector, CuJac>(std::make_shared<CuJac>(B, 0.5));
auto cujac = Opm::gpuistl::PreconditionerAdapter<Vector, Vector, CuJac>(std::make_shared<CuJac>(B, 0.5));
Vector vVector(4);
Vector dVector(4);

View File

@@ -46,7 +46,7 @@ main(int argc, char** argv)
int rank, totalRanks;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &totalRanks);
Opm::cuistl::setDevice(rank, totalRanks);
Opm::gpuistl::setDevice(rank, totalRanks);
boost::unit_test::unit_test_main(&init_unit_test_func, argc, argv);
}
@@ -62,12 +62,12 @@ BOOST_AUTO_TEST_CASE(TestProject)
auto ownerOverlapCopy = Dune::OwnerOverlapCopyCommunication<int>(indexInfo, MPI_COMM_WORLD);
auto xCPU = std::vector<double> {{1.0, 2.0, 3.0}};
auto xGPU = Opm::cuistl::CuVector<double>(xCPU);
auto xGPU = Opm::gpuistl::CuVector<double>(xCPU);
auto gpuComm = std::make_shared<Opm::cuistl::GPUObliviousMPISender<double, 1, Dune::OwnerOverlapCopyCommunication<int>>>(ownerOverlapCopy);
auto gpuComm = std::make_shared<Opm::gpuistl::GPUObliviousMPISender<double, 1, Dune::OwnerOverlapCopyCommunication<int>>>(ownerOverlapCopy);
auto cuOwnerOverlapCopy
= Opm::cuistl::CuOwnerOverlapCopy<double, 1, Dune::OwnerOverlapCopyCommunication<int>>(gpuComm);
= Opm::gpuistl::CuOwnerOverlapCopy<double, 1, Dune::OwnerOverlapCopyCommunication<int>>(gpuComm);
cuOwnerOverlapCopy.project(xGPU);
@@ -94,12 +94,12 @@ BOOST_AUTO_TEST_CASE(TestDot)
indexInfo.addRemoteIndex(std::make_tuple(0, 2, Dune::OwnerOverlapCopyAttributeSet::copy));
auto ownerOverlapCopy = Dune::OwnerOverlapCopyCommunication<int>(indexInfo, MPI_COMM_WORLD);
auto xCPU = std::vector<double> {{1.0, 2.0, 3.0}};
auto xGPU = Opm::cuistl::CuVector<double>(xCPU);
auto xGPU = Opm::gpuistl::CuVector<double>(xCPU);
auto gpuComm = std::make_shared<Opm::cuistl::GPUObliviousMPISender<double, 1, Dune::OwnerOverlapCopyCommunication<int>>>(ownerOverlapCopy);
auto gpuComm = std::make_shared<Opm::gpuistl::GPUObliviousMPISender<double, 1, Dune::OwnerOverlapCopyCommunication<int>>>(ownerOverlapCopy);
auto cuOwnerOverlapCopy
= Opm::cuistl::CuOwnerOverlapCopy<double, 1, Dune::OwnerOverlapCopyCommunication<int>>(gpuComm);
= Opm::gpuistl::CuOwnerOverlapCopy<double, 1, Dune::OwnerOverlapCopyCommunication<int>>(gpuComm);
double outputDune = -1.0;
auto xDune = xGPU.asDuneBlockVector<1>();

View File

@@ -63,7 +63,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifference1D, T, NumericTypes)
using M = Dune::FieldMatrix<T, 1, 1>;
using SpMatrix = Dune::BCRSMatrix<M>;
using Vector = Dune::BlockVector<Dune::FieldVector<T, 1>>;
using CuILU0 = Opm::cuistl::CuSeqILU0<SpMatrix, Opm::cuistl::CuVector<T>, Opm::cuistl::CuVector<T>>;
using CuILU0 = Opm::gpuistl::CuSeqILU0<SpMatrix, Opm::gpuistl::CuVector<T>, Opm::gpuistl::CuVector<T>>;
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
for (auto row = B.createbegin(); row != B.createend(); ++row) {
@@ -91,7 +91,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifference1D, T, NumericTypes)
auto duneILU = Dune::SeqILU<SpMatrix, Vector, Vector>(B, 1.0);
auto cuILU = Opm::cuistl::PreconditionerAdapter<Vector, Vector, CuILU0>(std::make_shared<CuILU0>(B, 1.0));
auto cuILU = Opm::gpuistl::PreconditionerAdapter<Vector, Vector, CuILU0>(std::make_shared<CuILU0>(B, 1.0));
// check for the standard basis {e_i}
// (e_i=(0,...,0, 1 (i-th place), 0, ..., 0))
@@ -158,7 +158,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifferenceBlock2, T, NumericTypes)
using M = Dune::FieldMatrix<T, 2, 2>;
using SpMatrix = Dune::BCRSMatrix<M>;
using Vector = Dune::BlockVector<Dune::FieldVector<T, 2>>;
using CuILU0 = Opm::cuistl::CuSeqILU0<SpMatrix, Opm::cuistl::CuVector<T>, Opm::cuistl::CuVector<T>>;
using CuILU0 = Opm::gpuistl::CuSeqILU0<SpMatrix, Opm::gpuistl::CuVector<T>, Opm::gpuistl::CuVector<T>>;
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
for (auto row = B.createbegin(); row != B.createend(); ++row) {
@@ -181,7 +181,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifferenceBlock2, T, NumericTypes)
auto duneILU = Dune::SeqILU<SpMatrix, Vector, Vector>(B, 1.0);
auto cuILU = Opm::cuistl::PreconditionerAdapter<Vector, Vector, CuILU0>(std::make_shared<CuILU0>(B, 1.0));
auto cuILU = Opm::gpuistl::PreconditionerAdapter<Vector, Vector, CuILU0>(std::make_shared<CuILU0>(B, 1.0));
// check for the standard basis {e_i}
// (e_i=(0,...,0, 1 (i-th place), 0, ..., 0))

View File

@@ -26,7 +26,7 @@
BOOST_AUTO_TEST_CASE(TestGetSparseVersion)
{
auto& cuSparseHandle = ::Opm::cuistl::detail::CuSparseHandle::getInstance();
auto& cuSparseHandle = ::Opm::gpuistl::detail::CuSparseHandle::getInstance();
int cuSparseVersion = -1;
OPM_CUSPARSE_SAFE_CALL(cusparseGetVersion(cuSparseHandle.get(), &cuSparseVersion));
BOOST_CHECK_LT(0, cuSparseVersion);

View File

@@ -76,7 +76,7 @@ BOOST_AUTO_TEST_CASE(TestConstruction1D)
}
}
auto cuSparseMatrix = Opm::cuistl::CuSparseMatrix<double>::fromMatrix(B);
auto cuSparseMatrix = Opm::gpuistl::CuSparseMatrix<double>::fromMatrix(B);
const auto& nonZeroValuesCuda = cuSparseMatrix.getNonZeroValues();
std::vector<double> buffer(cuSparseMatrix.nonzeroes(), 0.0);
@@ -143,14 +143,14 @@ BOOST_AUTO_TEST_CASE(RandomSparsityMatrix)
}
}
auto cuSparseMatrix = Opm::cuistl::CuSparseMatrix<double>::fromMatrix(B);
auto cuSparseMatrix = Opm::gpuistl::CuSparseMatrix<double>::fromMatrix(B);
// check each column
for (size_t component = 0; component < N; ++component) {
std::vector<double> inputDataX(N * dim, 0.0);
inputDataX[component] = 1.0;
std::vector<double> inputDataY(N * dim, .25);
auto inputVectorX = Opm::cuistl::CuVector<double>(inputDataX.data(), inputDataX.size());
auto inputVectorY = Opm::cuistl::CuVector<double>(inputDataY.data(), inputDataY.size());
auto inputVectorX = Opm::gpuistl::CuVector<double>(inputDataX.data(), inputDataX.size());
auto inputVectorY = Opm::gpuistl::CuVector<double>(inputDataY.data(), inputDataY.size());
Vector xHost(N), yHost(N);
yHost = inputDataY[0];
inputVectorX.copyToHost(xHost);

View File

@@ -32,7 +32,7 @@ BOOST_AUTO_TEST_CASE(TestDocumentedUsage)
{
auto someDataOnCPU = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
auto dataOnGPU = ::Opm::cuistl::CuVector<double>(someDataOnCPU);
auto dataOnGPU = ::Opm::gpuistl::CuVector<double>(someDataOnCPU);
// Multiply by 4.0:
dataOnGPU *= 4.0;
@@ -50,14 +50,14 @@ BOOST_AUTO_TEST_CASE(TestDocumentedUsage)
BOOST_AUTO_TEST_CASE(TestConstructionSize)
{
const int numberOfElements = 1234;
auto vectorOnGPU = Opm::cuistl::CuVector<double>(numberOfElements);
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(numberOfElements);
BOOST_CHECK_EQUAL(numberOfElements, vectorOnGPU.dim());
}
BOOST_AUTO_TEST_CASE(TestCopyFromHostConstructor)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
std::vector<double> buffer(data.size(), 0.0);
vectorOnGPU.copyToHost(buffer.data(), buffer.size());
@@ -68,7 +68,7 @@ BOOST_AUTO_TEST_CASE(TestCopyFromHostConstructor)
BOOST_AUTO_TEST_CASE(TestCopyFromHostFunction)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.size());
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.size());
BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
vectorOnGPU.copyFromHost(data.data(), data.size());
std::vector<double> buffer(data.size(), 0.0);
@@ -80,7 +80,7 @@ BOOST_AUTO_TEST_CASE(TestCopyFromHostFunction)
BOOST_AUTO_TEST_CASE(TestCopyFromBvector)
{
auto blockVector = Dune::BlockVector<Dune::FieldVector<double, 2>> {{{42, 43}, {44, 45}, {46, 47}}};
auto vectorOnGPU = Opm::cuistl::CuVector<double>(blockVector.dim());
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(blockVector.dim());
vectorOnGPU.copyFromHost(blockVector);
std::vector<double> buffer(vectorOnGPU.dim());
vectorOnGPU.copyToHost(buffer.data(), buffer.size());
@@ -93,7 +93,7 @@ BOOST_AUTO_TEST_CASE(TestCopyToBvector)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7, 8, 9}};
auto blockVector = Dune::BlockVector<Dune::FieldVector<double, 3>>(3);
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
vectorOnGPU.copyToHost(blockVector);
@@ -103,7 +103,7 @@ BOOST_AUTO_TEST_CASE(TestCopyToBvector)
BOOST_AUTO_TEST_CASE(TestDataPointer)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7, 8, 9}};
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
std::vector<double> buffer(data.size(), 0.0);
OPM_CUDA_SAFE_CALL(cudaMemcpy(buffer.data(), vectorOnGPU.data(), sizeof(double) * data.size(), cudaMemcpyDeviceToHost));
@@ -113,7 +113,7 @@ BOOST_AUTO_TEST_CASE(TestDataPointer)
BOOST_AUTO_TEST_CASE(TestCopyScalarMultiply)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
const double scalar = 42.25;
vectorOnGPU *= scalar;
@@ -128,7 +128,7 @@ BOOST_AUTO_TEST_CASE(TestCopyScalarMultiply)
BOOST_AUTO_TEST_CASE(TestTwoNorm)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
auto twoNorm = vectorOnGPU.two_norm();
double correctAnswer = 0.0;
@@ -143,8 +143,8 @@ BOOST_AUTO_TEST_CASE(TestDot)
{
std::vector<double> dataA {{1, 2, 3, 4, 5, 6, 7}};
std::vector<double> dataB {{8, 9, 10, 11, 12, 13, 14}};
auto vectorOnGPUA = Opm::cuistl::CuVector<double>(dataA.data(), dataA.size());
auto vectorOnGPUB = Opm::cuistl::CuVector<double>(dataB.data(), dataB.size());
auto vectorOnGPUA = Opm::gpuistl::CuVector<double>(dataA.data(), dataA.size());
auto vectorOnGPUB = Opm::gpuistl::CuVector<double>(dataB.data(), dataB.size());
auto dot = vectorOnGPUA.dot(vectorOnGPUB);
double correctAnswer = 0.0;
@@ -158,7 +158,7 @@ BOOST_AUTO_TEST_CASE(TestDot)
BOOST_AUTO_TEST_CASE(Assigment)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
vectorOnGPU = 10.0;
vectorOnGPU.copyToHost(data.data(), data.size());
@@ -171,9 +171,9 @@ BOOST_AUTO_TEST_CASE(Assigment)
BOOST_AUTO_TEST_CASE(CopyAssignment)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
auto vectorOnGPU = Opm::cuistl::CuVector<double>(data.data(), data.size());
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
vectorOnGPU.copyToHost(data.data(), data.size());
auto vectorOnGPUB = Opm::cuistl::CuVector<double>(data.size());
auto vectorOnGPUB = Opm::gpuistl::CuVector<double>(data.size());
vectorOnGPUB = 4.0;
vectorOnGPUB = vectorOnGPU;
@@ -185,7 +185,7 @@ BOOST_AUTO_TEST_CASE(CopyAssignment)
BOOST_AUTO_TEST_CASE(RandomVectors)
{
using GVector = Opm::cuistl::CuVector<double>;
using GVector = Opm::gpuistl::CuVector<double>;
std::srand(0);
std::mt19937 generator;
std::uniform_real_distribution<double> distribution(-100.0, 100.0);
@@ -268,7 +268,7 @@ BOOST_AUTO_TEST_CASE(RandomVectors)
indexSet.push_back(i);
}
}
auto indexSetGPU = Opm::cuistl::CuVector<int>(indexSet);
auto indexSetGPU = Opm::gpuistl::CuVector<int>(indexSet);
aGPU.setZeroAtIndexSet(indexSetGPU);
auto projectedA = aGPU.asStdVector();

View File

@@ -32,8 +32,8 @@
#include <algorithm>
#include <type_traits>
using CuViewDouble = ::Opm::cuistl::CuView<double>;
using CuBufferDouble = ::Opm::cuistl::CuBuffer<double>;
using CuViewDouble = ::Opm::gpuistl::CuView<double>;
using CuBufferDouble = ::Opm::gpuistl::CuBuffer<double>;
__global__ void useCuViewOnGPU(CuViewDouble a, CuViewDouble b){
b[0] = a.front();

View File

@@ -25,7 +25,7 @@
BOOST_AUTO_TEST_CASE(TestToIntThrowsOutofRange)
{
BOOST_CHECK_THROW(Opm::cuistl::detail::to_int(size_t(std::numeric_limits<int>::max()) + size_t(1));
BOOST_CHECK_THROW(Opm::gpuistl::detail::to_int(size_t(std::numeric_limits<int>::max()) + size_t(1));
, std::invalid_argument);
}
@@ -33,26 +33,26 @@ BOOST_AUTO_TEST_CASE(TestToIntConvertInRange)
{
// This might seem slow, but it is really fast:
for (size_t i = 0; i <= size_t(1024 * 1024); ++i) {
BOOST_CHECK_EQUAL(int(i), Opm::cuistl::detail::to_int(i));
BOOST_CHECK_EQUAL(int(i), Opm::gpuistl::detail::to_int(i));
}
BOOST_CHECK_EQUAL(std::numeric_limits<int>::max(),
Opm::cuistl::detail::to_int(size_t(std::numeric_limits<int>::max())));
Opm::gpuistl::detail::to_int(size_t(std::numeric_limits<int>::max())));
}
BOOST_AUTO_TEST_CASE(TestToSizeTThrowsOutofRange)
{
BOOST_CHECK_THROW(Opm::cuistl::detail::to_size_t(-1);, std::invalid_argument);
BOOST_CHECK_THROW(Opm::gpuistl::detail::to_size_t(-1);, std::invalid_argument);
}
BOOST_AUTO_TEST_CASE(TestToSizeTConvertInRange)
{
// This might seem slow, but it is really fast:
for (int i = 0; i <= 1024 * 1024; ++i) {
BOOST_CHECK_EQUAL(size_t(i), Opm::cuistl::detail::to_size_t(i));
BOOST_CHECK_EQUAL(size_t(i), Opm::gpuistl::detail::to_size_t(i));
}
BOOST_CHECK_EQUAL(size_t(std::numeric_limits<int>::max()),
Opm::cuistl::detail::to_size_t(std::numeric_limits<int>::max()));
Opm::gpuistl::detail::to_size_t(std::numeric_limits<int>::max()));
}

View File

@@ -31,7 +31,7 @@ using Matrix = Dune::BCRSMatrix<Dune::FieldMatrix<double, dim, dim>>;
using Vector = Dune::BlockVector<Dune::FieldVector<double, dim>>;
using Moperator = Dune::MatrixAdapter<Matrix, Vector, Vector>;
using PrecondFactory = Opm::PreconditionerFactory<Moperator, Dune::Amg::SequentialInformation>;
using SolverAdapter = Opm::cuistl::SolverAdapter<Moperator, Dune::BiCGSTABSolver, Vector>;
using SolverAdapter = Opm::gpuistl::SolverAdapter<Moperator, Dune::BiCGSTABSolver, Vector>;
namespace
{