mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
Merge pull request #4636 from kjetilly/cuistl_multigpu
Path to multigpu: Cuistl multigpu
This commit is contained in:
commit
ee6edd83a0
@ -552,6 +552,8 @@ if(CUDA_FOUND)
|
|||||||
cuvector
|
cuvector
|
||||||
cusparsematrix
|
cusparsematrix
|
||||||
cuseqilu0
|
cuseqilu0
|
||||||
|
cuowneroverlapcopy
|
||||||
|
solver_adapter
|
||||||
PROPERTIES LABELS gpu_cuda)
|
PROPERTIES LABELS gpu_cuda)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
@ -175,7 +175,10 @@ if(CUDA_FOUND)
|
|||||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/CuSeqILU0.hpp)
|
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/CuSeqILU0.hpp)
|
||||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp)
|
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp)
|
||||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp)
|
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp)
|
||||||
|
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/CuOwnerOverlapCopy.hpp)
|
||||||
|
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/SolverAdapter.hpp)
|
||||||
|
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/CuBlockPreconditioner.hpp)
|
||||||
|
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/PreconditionerHolder.hpp)
|
||||||
|
|
||||||
endif()
|
endif()
|
||||||
if(OPENCL_FOUND)
|
if(OPENCL_FOUND)
|
||||||
@ -265,6 +268,9 @@ if(CUDA_FOUND)
|
|||||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_safe_conversion.cpp)
|
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_safe_conversion.cpp)
|
||||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuseqilu0.cpp)
|
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuseqilu0.cpp)
|
||||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_converttofloatadapter.cpp)
|
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_converttofloatadapter.cpp)
|
||||||
|
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuowneroverlapcopy.cpp)
|
||||||
|
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_solver_adapter.cpp)
|
||||||
|
|
||||||
|
|
||||||
endif()
|
endif()
|
||||||
if(OPENCL_FOUND)
|
if(OPENCL_FOUND)
|
||||||
|
@ -37,6 +37,10 @@
|
|||||||
#include <dune/istl/owneroverlapcopy.hh>
|
#include <dune/istl/owneroverlapcopy.hh>
|
||||||
#include <dune/istl/paamg/pinfo.hh>
|
#include <dune/istl/paamg/pinfo.hh>
|
||||||
|
|
||||||
|
#if HAVE_CUDA
|
||||||
|
#include <opm/simulators/linalg/cuistl/SolverAdapter.hpp>
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace Dune
|
namespace Dune
|
||||||
{
|
{
|
||||||
/// Create a sequential solver.
|
/// Create a sequential solver.
|
||||||
@ -167,7 +171,7 @@ namespace Dune
|
|||||||
*scalarproduct_,
|
*scalarproduct_,
|
||||||
*preconditioner_,
|
*preconditioner_,
|
||||||
tol,// desired residual reduction factor
|
tol,// desired residual reduction factor
|
||||||
restart,
|
restart,
|
||||||
maxiter, // maximum number of iterations
|
maxiter, // maximum number of iterations
|
||||||
verbosity);
|
verbosity);
|
||||||
} else if (solver_type == "flexgmres") {
|
} else if (solver_type == "flexgmres") {
|
||||||
@ -176,7 +180,7 @@ namespace Dune
|
|||||||
*scalarproduct_,
|
*scalarproduct_,
|
||||||
*preconditioner_,
|
*preconditioner_,
|
||||||
tol,// desired residual reduction factor
|
tol,// desired residual reduction factor
|
||||||
restart,
|
restart,
|
||||||
maxiter, // maximum number of iterations
|
maxiter, // maximum number of iterations
|
||||||
verbosity);
|
verbosity);
|
||||||
#if HAVE_SUITESPARSE_UMFPACK
|
#if HAVE_SUITESPARSE_UMFPACK
|
||||||
@ -184,6 +188,16 @@ namespace Dune
|
|||||||
bool dummy = false;
|
bool dummy = false;
|
||||||
using MatrixType = std::remove_const_t<std::remove_reference_t<decltype(linearoperator_for_solver_->getmat())>>;
|
using MatrixType = std::remove_const_t<std::remove_reference_t<decltype(linearoperator_for_solver_->getmat())>>;
|
||||||
linsolver_ = std::make_shared<Dune::UMFPack<MatrixType>>(linearoperator_for_solver_->getmat(), verbosity, dummy);
|
linsolver_ = std::make_shared<Dune::UMFPack<MatrixType>>(linearoperator_for_solver_->getmat(), verbosity, dummy);
|
||||||
|
#endif
|
||||||
|
#if HAVE_CUDA
|
||||||
|
} else if (solver_type == "cubicgstab") {
|
||||||
|
linsolver_.reset(new Opm::cuistl::SolverAdapter<Operator, Dune::BiCGSTABSolver, VectorType>(
|
||||||
|
*linearoperator_for_solver_,
|
||||||
|
*scalarproduct_,
|
||||||
|
preconditioner_,
|
||||||
|
tol, // desired residual reduction factor
|
||||||
|
maxiter, // maximum number of iterations
|
||||||
|
verbosity));
|
||||||
#endif
|
#endif
|
||||||
} else {
|
} else {
|
||||||
OPM_THROW(std::invalid_argument,
|
OPM_THROW(std::invalid_argument,
|
||||||
|
@ -45,6 +45,8 @@
|
|||||||
#include <opm/simulators/linalg/cuistl/CuSeqILU0.hpp>
|
#include <opm/simulators/linalg/cuistl/CuSeqILU0.hpp>
|
||||||
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
|
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
|
||||||
#include <opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp>
|
#include <opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp>
|
||||||
|
#include <opm/simulators/linalg/cuistl/CuBlockPreconditioner.hpp>
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
@ -224,6 +226,19 @@ struct StandardPreconditioners
|
|||||||
return std::make_shared<OwningTwoLevelPreconditioner<O, V, LevelTransferPolicy, Comm>>(op, prm, weightsCalculator, pressureIndex, comm);
|
return std::make_shared<OwningTwoLevelPreconditioner<O, V, LevelTransferPolicy, Comm>>(op, prm, weightsCalculator, pressureIndex, comm);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if HAVE_CUDA
|
||||||
|
F::addCreator("CUILU0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
|
||||||
|
const double w = prm.get<double>("relaxation", 1.0);
|
||||||
|
using field_type = typename V::field_type;
|
||||||
|
using CuILU0 = typename Opm::cuistl::CuSeqILU0<M, Opm::cuistl::CuVector<field_type>, Opm::cuistl::CuVector<field_type>>;
|
||||||
|
auto cuILU0 = std::make_shared<CuILU0>(op.getmat(), w);
|
||||||
|
|
||||||
|
auto adapted = std::make_shared<Opm::cuistl::PreconditionerAdapter<V, V, CuILU0>>(cuILU0);
|
||||||
|
auto wrapped = std::make_shared<Opm::cuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
|
||||||
|
return wrapped;
|
||||||
|
});
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
125
opm/simulators/linalg/cuistl/CuBlockPreconditioner.hpp
Normal file
125
opm/simulators/linalg/cuistl/CuBlockPreconditioner.hpp
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2022-2023 SINTEF AS
|
||||||
|
|
||||||
|
This file is part of the Open Porous Media project (OPM).
|
||||||
|
|
||||||
|
OPM is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
OPM is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
#ifndef OPM_CUISTL_CUBLOCKPRECONDITIONER_HPP
|
||||||
|
#define OPM_CUISTL_CUBLOCKPRECONDITIONER_HPP
|
||||||
|
|
||||||
|
#include <dune/common/shared_ptr.hh>
|
||||||
|
#include <memory>
|
||||||
|
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
|
||||||
|
#include <opm/simulators/linalg/cuistl/PreconditionerHolder.hpp>
|
||||||
|
#include <opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp>
|
||||||
|
|
||||||
|
namespace Opm::cuistl
|
||||||
|
{
|
||||||
|
//! @brief Is an adaptation of Dune::BlockPreconditioner that works within the CuISTL framework.
|
||||||
|
//!
|
||||||
|
//! @note We aim to intgrate this into OwningBlockPreconditioner (or a relative thereof).
|
||||||
|
template <class X, class Y, class C, class P = Dune::PreconditionerWithUpdate<X, Y>>
|
||||||
|
class CuBlockPreconditioner : public Dune::PreconditionerWithUpdate<X, Y>, public PreconditionerHolder<X, Y>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
using domain_type = X;
|
||||||
|
using range_type = Y;
|
||||||
|
using field_type = typename X::field_type;
|
||||||
|
using communication_type = C;
|
||||||
|
|
||||||
|
|
||||||
|
//! @brief Constructor.
|
||||||
|
//!
|
||||||
|
//! constructor gets all parameters to operate the prec.
|
||||||
|
//! @param p The sequential preconditioner.
|
||||||
|
//! @param c The communication object for syncing overlap and copy
|
||||||
|
//! data points. (E.~g. OwnerOverlapCopyCommunication )
|
||||||
|
//!
|
||||||
|
CuBlockPreconditioner(const std::shared_ptr<P>& p, const std::shared_ptr<const communication_type>& c)
|
||||||
|
: m_preconditioner(p)
|
||||||
|
, m_communication(c)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
CuBlockPreconditioner(const std::shared_ptr<P>& p, const communication_type& c)
|
||||||
|
: m_preconditioner(p)
|
||||||
|
, m_communication(Dune::stackobject_to_shared_ptr(c))
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
//! \brief Prepare the preconditioner.
|
||||||
|
//!
|
||||||
|
//! \copydoc Preconditioner::pre(X&,Y&)
|
||||||
|
virtual void pre(X& x, Y& b) override
|
||||||
|
{
|
||||||
|
// TODO: [perf] Do we always need to copy, or only when we need the pre step?
|
||||||
|
m_communication->copyOwnerToAll(x, x); // make Dirichlet values consistent
|
||||||
|
if constexpr (detail::shouldCallPreconditionerPre<P>()) {
|
||||||
|
m_preconditioner->pre(x, b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//! \brief Apply the preconditioner
|
||||||
|
//!
|
||||||
|
//! \copydoc Preconditioner::apply(X&,const Y&)
|
||||||
|
virtual void apply(X& v, const Y& d) override
|
||||||
|
{
|
||||||
|
m_preconditioner->apply(v, d);
|
||||||
|
m_communication->copyOwnerToAll(v, v);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
virtual void update() override
|
||||||
|
{
|
||||||
|
m_preconditioner->update();
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void post(X& x) override
|
||||||
|
{
|
||||||
|
if constexpr (detail::shouldCallPreconditionerPost<P>()) {
|
||||||
|
m_preconditioner->post(x);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//! Category of the preconditioner (see SolverCategory::Category)
|
||||||
|
virtual Dune::SolverCategory::Category category() const override
|
||||||
|
{
|
||||||
|
return Dune::SolverCategory::overlapping;
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr bool shouldCallPre()
|
||||||
|
{
|
||||||
|
return detail::shouldCallPreconditionerPost<P>();
|
||||||
|
}
|
||||||
|
static constexpr bool shouldCallPost()
|
||||||
|
{
|
||||||
|
return detail::shouldCallPreconditionerPre<P>();
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual std::shared_ptr<Dune::PreconditionerWithUpdate<X, Y>> getUnderlyingPreconditioner() override
|
||||||
|
{
|
||||||
|
return m_preconditioner;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private:
|
||||||
|
//! \brief a sequential preconditioner
|
||||||
|
std::shared_ptr<P> m_preconditioner;
|
||||||
|
|
||||||
|
//! \brief the communication object
|
||||||
|
std::shared_ptr<const communication_type> m_communication;
|
||||||
|
};
|
||||||
|
} // namespace Opm::cuistl
|
||||||
|
#endif
|
149
opm/simulators/linalg/cuistl/CuOwnerOverlapCopy.hpp
Normal file
149
opm/simulators/linalg/cuistl/CuOwnerOverlapCopy.hpp
Normal file
@ -0,0 +1,149 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2022-2023 SINTEF AS
|
||||||
|
|
||||||
|
This file is part of the Open Porous Media project (OPM).
|
||||||
|
|
||||||
|
OPM is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
OPM is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
#ifndef OPM_CUISTL_CUOWNEROVERLAPCOPY_HPP
|
||||||
|
#define OPM_CUISTL_CUOWNEROVERLAPCOPY_HPP
|
||||||
|
#include <dune/istl/owneroverlapcopy.hh>
|
||||||
|
#include <memory>
|
||||||
|
#include <mutex>
|
||||||
|
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||||
|
|
||||||
|
namespace Opm::cuistl
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @brief CUDA compatiable variant of Dune::OwnerOverlapCopyCommunication
|
||||||
|
*
|
||||||
|
* This class can essentially be seen as an adapter around Dune::OwnerOverlapCopyCommunication, and should work as
|
||||||
|
* a Dune::OwnerOverlapCopyCommunication on CuVectors
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* @note This currently only has the functionality to parallelize the linear solve.
|
||||||
|
*
|
||||||
|
* @tparam field_type should be a field_type supported by CuVector (double, float)
|
||||||
|
* @tparam block_size the block size used (this is relevant for say figuring out the correct indices)
|
||||||
|
* @tparam OwnerOverlapCopyCommunicationType should mimic Dune::OwnerOverlapCopyCommunication.
|
||||||
|
*/
|
||||||
|
template <class field_type, int block_size, class OwnerOverlapCopyCommunicationType>
|
||||||
|
class CuOwnerOverlapCopy
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
using X = CuVector<field_type>;
|
||||||
|
|
||||||
|
CuOwnerOverlapCopy(const OwnerOverlapCopyCommunicationType& cpuOwnerOverlapCopy)
|
||||||
|
: m_cpuOwnerOverlapCopy(cpuOwnerOverlapCopy)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* @brief dot will carry out the dot product between x and y on the owned indices, then sum up the result across MPI
|
||||||
|
* processes.
|
||||||
|
* @param[out] output result will be stored here
|
||||||
|
*
|
||||||
|
* @note This uses the same interface as its DUNE equivalent.
|
||||||
|
*/
|
||||||
|
void dot(const X& x, const X& y, field_type& output) const
|
||||||
|
{
|
||||||
|
std::call_once(m_initializedIndices, [&]() { initIndexSet(); });
|
||||||
|
|
||||||
|
const auto dotAtRank = x.dot(y, *m_indicesOwner);
|
||||||
|
output = m_cpuOwnerOverlapCopy.communicator().sum(dotAtRank);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief norm computes the l^2-norm of x across processes.
|
||||||
|
*
|
||||||
|
* This will compute the dot product of x with itself on owned indices, then
|
||||||
|
* sum the result across process and return the square root of the sum.
|
||||||
|
*/
|
||||||
|
field_type norm(const X& x) const
|
||||||
|
{
|
||||||
|
auto xDotX = field_type(0);
|
||||||
|
this->dot(x, x, xDotX);
|
||||||
|
|
||||||
|
using std::sqrt;
|
||||||
|
return sqrt(xDotX);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief project will project x to the owned subspace
|
||||||
|
*
|
||||||
|
* For each component i which is not owned, x_i will be set to 0
|
||||||
|
*
|
||||||
|
* @param[inout] x the vector to project
|
||||||
|
*/
|
||||||
|
void project(X& x) const
|
||||||
|
{
|
||||||
|
std::call_once(m_initializedIndices, [&]() { initIndexSet(); });
|
||||||
|
x.setZeroAtIndexSet(*m_indicesCopy);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief copyOwnerToAll will copy source to the CPU, then call OwnerOverlapCopyCommunicationType::copyOwnerToAll on
|
||||||
|
* the copied data, and copy the result back to the GPU
|
||||||
|
* @param[in] source
|
||||||
|
* @param[out] dest
|
||||||
|
*/
|
||||||
|
void copyOwnerToAll(const X& source, X& dest) const
|
||||||
|
{
|
||||||
|
// TODO: [perf] Can we reduce copying from the GPU here?
|
||||||
|
// TODO: [perf] Maybe create a global buffer instead?
|
||||||
|
auto sourceAsDuneVector = source.template asDuneBlockVector<block_size>();
|
||||||
|
auto destAsDuneVector = dest.template asDuneBlockVector<block_size>();
|
||||||
|
m_cpuOwnerOverlapCopy.copyOwnerToAll(sourceAsDuneVector, destAsDuneVector);
|
||||||
|
dest.copyFromHost(destAsDuneVector);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
private:
|
||||||
|
const OwnerOverlapCopyCommunicationType& m_cpuOwnerOverlapCopy;
|
||||||
|
|
||||||
|
// Used to call the initIndexSet. Note that this is kind of a
|
||||||
|
// premature optimization, in the sense that we could just initialize these indices
|
||||||
|
// always, but they are not always used.
|
||||||
|
mutable std::once_flag m_initializedIndices;
|
||||||
|
mutable std::unique_ptr<CuVector<int>> m_indicesCopy;
|
||||||
|
mutable std::unique_ptr<CuVector<int>> m_indicesOwner;
|
||||||
|
|
||||||
|
|
||||||
|
void initIndexSet() const
|
||||||
|
{
|
||||||
|
// We need indices that we we will use in the project, dot and norm calls.
|
||||||
|
// TODO: [premature perf] Can this be run once per instance? Or do we need to rebuild every time?
|
||||||
|
const auto& pis = m_cpuOwnerOverlapCopy.indexSet();
|
||||||
|
std::vector<int> indicesCopyOnCPU;
|
||||||
|
std::vector<int> indicesOwnerCPU;
|
||||||
|
for (const auto& index : pis) {
|
||||||
|
if (index.local().attribute() == Dune::OwnerOverlapCopyAttributeSet::copy) {
|
||||||
|
for (int component = 0; component < block_size; ++component) {
|
||||||
|
indicesCopyOnCPU.push_back(index.local().local() * block_size + component);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (index.local().attribute() == Dune::OwnerOverlapCopyAttributeSet::owner) {
|
||||||
|
for (int component = 0; component < block_size; ++component) {
|
||||||
|
indicesOwnerCPU.push_back(index.local().local() * block_size + component);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
m_indicesCopy = std::make_unique<CuVector<int>>(indicesCopyOnCPU);
|
||||||
|
m_indicesOwner = std::make_unique<CuVector<int>>(indicesOwnerCPU);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace Opm::cuistl
|
||||||
|
#endif
|
@ -22,6 +22,7 @@
|
|||||||
#include <dune/istl/preconditioner.hh>
|
#include <dune/istl/preconditioner.hh>
|
||||||
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
|
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
|
||||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||||
|
#include <opm/simulators/linalg/cuistl/PreconditionerHolder.hpp>
|
||||||
#include <opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp>
|
#include <opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp>
|
||||||
|
|
||||||
|
|
||||||
@ -36,7 +37,9 @@ namespace Opm::cuistl
|
|||||||
//! \tparam Y the range type (should be on the CPU). Typicall a Dune::BlockVector
|
//! \tparam Y the range type (should be on the CPU). Typicall a Dune::BlockVector
|
||||||
//! \tparam CudaPreconditionerType the preconditioner taking CuVector<real_type> as arguments to apply
|
//! \tparam CudaPreconditionerType the preconditioner taking CuVector<real_type> as arguments to apply
|
||||||
template <class X, class Y, class CudaPreconditionerType>
|
template <class X, class Y, class CudaPreconditionerType>
|
||||||
class PreconditionerAdapter : public Dune::PreconditionerWithUpdate<X, Y>
|
class PreconditionerAdapter
|
||||||
|
: public Dune::PreconditionerWithUpdate<X, Y>,
|
||||||
|
public PreconditionerHolder<CuVector<typename X::field_type>, CuVector<typename Y::field_type>>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
//! \brief The domain type of the preconditioner.
|
//! \brief The domain type of the preconditioner.
|
||||||
@ -114,6 +117,12 @@ public:
|
|||||||
return detail::shouldCallPreconditionerPre<CudaPreconditionerType>();
|
return detail::shouldCallPreconditionerPre<CudaPreconditionerType>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual std::shared_ptr<Dune::PreconditionerWithUpdate<CuVector<field_type>, CuVector<field_type>>>
|
||||||
|
getUnderlyingPreconditioner() override
|
||||||
|
{
|
||||||
|
return m_underlyingPreconditioner;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
//! \brief the underlying preconditioner to use
|
//! \brief the underlying preconditioner to use
|
||||||
std::shared_ptr<CudaPreconditionerType> m_underlyingPreconditioner;
|
std::shared_ptr<CudaPreconditionerType> m_underlyingPreconditioner;
|
||||||
|
43
opm/simulators/linalg/cuistl/PreconditionerHolder.hpp
Normal file
43
opm/simulators/linalg/cuistl/PreconditionerHolder.hpp
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2022-2023 SINTEF AS
|
||||||
|
|
||||||
|
This file is part of the Open Porous Media project (OPM).
|
||||||
|
|
||||||
|
OPM is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
OPM is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
#ifndef OPM_CUISTL_PRECONDITIONERHOLDER_HPP
|
||||||
|
#define OPM_CUISTL_PRECONDITIONERHOLDER_HPP
|
||||||
|
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
|
||||||
|
|
||||||
|
|
||||||
|
namespace Opm::cuistl
|
||||||
|
{
|
||||||
|
//! \brief Common interface for adapters that hold preconditioners.
|
||||||
|
//!
|
||||||
|
//! \note The goal is that this class will be made useless after further
|
||||||
|
//! restructuring of the solver interfaces (FlexibleSolver and friends), but
|
||||||
|
//! for this is needed as an intermediate layer. See specifically SolverAdapter.hpp
|
||||||
|
//! for how this is used.
|
||||||
|
template <class X, class Y>
|
||||||
|
class PreconditionerHolder
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* @brief getUnderlyingPreconditioner gets the underlying preconditioner (preconditioner being held)
|
||||||
|
*/
|
||||||
|
virtual std::shared_ptr<Dune::PreconditionerWithUpdate<X, Y>> getUnderlyingPreconditioner() = 0;
|
||||||
|
};
|
||||||
|
} // end namespace Opm::cuistl
|
||||||
|
|
||||||
|
#endif
|
214
opm/simulators/linalg/cuistl/SolverAdapter.hpp
Normal file
214
opm/simulators/linalg/cuistl/SolverAdapter.hpp
Normal file
@ -0,0 +1,214 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2022-2023 SINTEF AS
|
||||||
|
|
||||||
|
This file is part of the Open Porous Media project (OPM).
|
||||||
|
|
||||||
|
OPM is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
OPM is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
#ifndef OPM_SOLVERADAPTER_HPP
|
||||||
|
#define OPM_SOLVERADAPTER_HPP
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include <dune/istl/bcrsmatrix.hh>
|
||||||
|
#include <dune/istl/operators.hh>
|
||||||
|
#include <dune/istl/schwarz.hh>
|
||||||
|
#include <dune/istl/solver.hh>
|
||||||
|
#include <opm/common/ErrorMacros.hpp>
|
||||||
|
#include <opm/simulators/linalg/cuistl/CuBlockPreconditioner.hpp>
|
||||||
|
#include <opm/simulators/linalg/cuistl/CuOwnerOverlapCopy.hpp>
|
||||||
|
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
|
||||||
|
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||||
|
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
|
||||||
|
#include <opm/simulators/linalg/cuistl/detail/has_function.hpp>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
namespace Opm::cuistl
|
||||||
|
{
|
||||||
|
//! @brief Wraps a CUDA solver to work with CPU data.
|
||||||
|
//!
|
||||||
|
//! @tparam Operator the Dune::LinearOperator to use
|
||||||
|
//! @tparam UnderlyingSolver a Dune solver like class, eg Dune::BiCGSTABSolver
|
||||||
|
//! @tparam X the outer type to use (eg. Dune::BlockVector<Dune::FieldVector<...>>)
|
||||||
|
template <class Operator, template <class> class UnderlyingSolver, class X>
|
||||||
|
class SolverAdapter : public Dune::IterativeSolver<X, X>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
using typename Dune::IterativeSolver<X, X>::domain_type;
|
||||||
|
using typename Dune::IterativeSolver<X, X>::range_type;
|
||||||
|
using typename Dune::IterativeSolver<X, X>::field_type;
|
||||||
|
using typename Dune::IterativeSolver<X, X>::real_type;
|
||||||
|
using typename Dune::IterativeSolver<X, X>::scalar_real_type;
|
||||||
|
static constexpr auto block_size = domain_type::block_type::dimension;
|
||||||
|
using XGPU = Opm::cuistl::CuVector<real_type>;
|
||||||
|
|
||||||
|
// TODO: Use a std::forward
|
||||||
|
SolverAdapter(Operator& op,
|
||||||
|
Dune::ScalarProduct<X>& sp,
|
||||||
|
std::shared_ptr<Dune::Preconditioner<X, X>> prec,
|
||||||
|
scalar_real_type reduction,
|
||||||
|
int maxit,
|
||||||
|
int verbose)
|
||||||
|
: Dune::IterativeSolver<X, X>(op, sp, *prec, reduction, maxit, verbose)
|
||||||
|
, m_opOnCPUWithMatrix(op)
|
||||||
|
, m_matrix(CuSparseMatrix<real_type>::fromMatrix(op.getmat()))
|
||||||
|
, m_underlyingSolver(constructSolver(prec, reduction, maxit, verbose))
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void apply(X& x, X& b, double reduction, Dune::InverseOperatorResult& res) override
|
||||||
|
{
|
||||||
|
// TODO: Can we do this without reimplementing the other function?
|
||||||
|
// TODO: [perf] Do we need to update the matrix every time? Probably yes
|
||||||
|
m_matrix.updateNonzeroValues(m_opOnCPUWithMatrix.getmat());
|
||||||
|
|
||||||
|
if (!m_inputBuffer) {
|
||||||
|
m_inputBuffer.reset(new XGPU(b.dim()));
|
||||||
|
m_outputBuffer.reset(new XGPU(x.dim()));
|
||||||
|
}
|
||||||
|
|
||||||
|
m_inputBuffer->copyFromHost(b);
|
||||||
|
// TODO: [perf] do we need to copy x here?
|
||||||
|
m_outputBuffer->copyFromHost(x);
|
||||||
|
|
||||||
|
m_underlyingSolver.apply(*m_outputBuffer, *m_inputBuffer, reduction, res);
|
||||||
|
|
||||||
|
// TODO: [perf] do we need to copy b here?
|
||||||
|
m_inputBuffer->copyToHost(b);
|
||||||
|
m_outputBuffer->copyToHost(x);
|
||||||
|
}
|
||||||
|
virtual void apply(X& x, X& b, Dune::InverseOperatorResult& res) override
|
||||||
|
{
|
||||||
|
// TODO: [perf] Do we need to update the matrix every time? Probably yes
|
||||||
|
m_matrix.updateNonzeroValues(m_opOnCPUWithMatrix.getmat());
|
||||||
|
|
||||||
|
if (!m_inputBuffer) {
|
||||||
|
m_inputBuffer.reset(new XGPU(b.dim()));
|
||||||
|
m_outputBuffer.reset(new XGPU(x.dim()));
|
||||||
|
}
|
||||||
|
|
||||||
|
m_inputBuffer->copyFromHost(b);
|
||||||
|
// TODO: [perf] do we need to copy x here?
|
||||||
|
m_outputBuffer->copyFromHost(x);
|
||||||
|
|
||||||
|
m_underlyingSolver.apply(*m_outputBuffer, *m_inputBuffer, res);
|
||||||
|
|
||||||
|
// TODO: [perf] do we need to copy b here?
|
||||||
|
m_inputBuffer->copyToHost(b);
|
||||||
|
m_outputBuffer->copyToHost(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
Operator& m_opOnCPUWithMatrix;
|
||||||
|
CuSparseMatrix<real_type> m_matrix;
|
||||||
|
|
||||||
|
UnderlyingSolver<XGPU> m_underlyingSolver;
|
||||||
|
|
||||||
|
|
||||||
|
// TODO: Use a std::forward
|
||||||
|
UnderlyingSolver<XGPU> constructSolver(std::shared_ptr<Dune::Preconditioner<X, X>> prec,
|
||||||
|
scalar_real_type reduction,
|
||||||
|
int maxit,
|
||||||
|
int verbose)
|
||||||
|
{
|
||||||
|
|
||||||
|
OPM_ERROR_IF(
|
||||||
|
detail::is_a_well_operator<Operator>::value,
|
||||||
|
"Currently we only support operators of type MatrixAdapter in the CUDA solver. "
|
||||||
|
"Use --matrix-add-well-contributions=true. "
|
||||||
|
"Using WellModelMatrixAdapter with SolverAdapter is not well-defined so it will not work well, or at all.");
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if constexpr (detail::has_communication<Operator>::value) {
|
||||||
|
// TODO: See the below TODO over the definition of precHolder in the other branch
|
||||||
|
// TODO: We are currently double wrapping preconditioners in the preconditioner factory to be extra
|
||||||
|
// compatible
|
||||||
|
// with CPU. Probably a cleaner way eventually would be to do more modifications to the flexible
|
||||||
|
// solver to accomodate the pure GPU better.
|
||||||
|
auto precAsHolder = std::dynamic_pointer_cast<PreconditionerHolder<X, X>>(prec);
|
||||||
|
if (!precAsHolder) {
|
||||||
|
OPM_THROW(std::invalid_argument,
|
||||||
|
"The preconditioner needs to be a CUDA preconditioner (eg. CuILU0) wrapped in a "
|
||||||
|
"Opm::cuistl::PreconditionerAdapter wrapped in a "
|
||||||
|
"Opm::cuistl::CuBlockPreconditioner. If you are unsure what this means, set "
|
||||||
|
"preconditioner to 'CUILU0'"); // TODO: Suggest a better preconditioner
|
||||||
|
}
|
||||||
|
|
||||||
|
auto preconditionerAdapter = precAsHolder->getUnderlyingPreconditioner();
|
||||||
|
auto preconditionerAdapterAsHolder
|
||||||
|
= std::dynamic_pointer_cast<PreconditionerHolder<XGPU, XGPU>>(preconditionerAdapter);
|
||||||
|
if (!preconditionerAdapterAsHolder) {
|
||||||
|
OPM_THROW(std::invalid_argument,
|
||||||
|
"The preconditioner needs to be a CUDA preconditioner (eg. CuILU0) wrapped in a "
|
||||||
|
"Opm::cuistl::PreconditionerAdapter wrapped in a "
|
||||||
|
"Opm::cuistl::CuBlockPreconditioner. If you are unsure what this means, set "
|
||||||
|
"preconditioner to 'CUILU0'"); // TODO: Suggest a better preconditioner
|
||||||
|
}
|
||||||
|
// We need to get the underlying preconditioner:
|
||||||
|
auto preconditionerReallyOnGPU = preconditionerAdapterAsHolder->getUnderlyingPreconditioner();
|
||||||
|
const auto& communication = m_opOnCPUWithMatrix.getCommunication();
|
||||||
|
|
||||||
|
using CudaCommunication = CuOwnerOverlapCopy<real_type, block_size, typename Operator::communication_type>;
|
||||||
|
using SchwarzOperator
|
||||||
|
= Dune::OverlappingSchwarzOperator<CuSparseMatrix<real_type>, XGPU, XGPU, CudaCommunication>;
|
||||||
|
auto cudaCommunication = std::make_shared<CudaCommunication>(communication);
|
||||||
|
|
||||||
|
auto mpiPreconditioner = std::make_shared<CuBlockPreconditioner<XGPU, XGPU, CudaCommunication>>(
|
||||||
|
preconditionerReallyOnGPU, cudaCommunication);
|
||||||
|
|
||||||
|
auto scalarProduct = std::make_shared<Dune::ParallelScalarProduct<XGPU, CudaCommunication>>(
|
||||||
|
cudaCommunication, m_opOnCPUWithMatrix.category());
|
||||||
|
|
||||||
|
|
||||||
|
// NOTE: Ownsership of cudaCommunication is handled by mpiPreconditioner. However, just to make sure we
|
||||||
|
// remember
|
||||||
|
// this, we add this check. So remember that we hold one count in this scope, and one in the
|
||||||
|
// CuBlockPreconditioner instance. We accomedate for the fact that it could be passed around in
|
||||||
|
// CuBlockPreconditioner, hence we do not test for != 2
|
||||||
|
OPM_ERROR_IF(cudaCommunication.use_count() < 2, "Internal error. Shared pointer not owned properly.");
|
||||||
|
auto overlappingCudaOperator = std::make_shared<SchwarzOperator>(m_matrix, *cudaCommunication);
|
||||||
|
|
||||||
|
return UnderlyingSolver<XGPU>(
|
||||||
|
overlappingCudaOperator, scalarProduct, mpiPreconditioner, reduction, maxit, verbose);
|
||||||
|
} else {
|
||||||
|
// TODO: Fix the reliance on casting here. This is a code smell to a certain degree, and assumes
|
||||||
|
// a certain setup beforehand. The only reason we do it this way is to minimize edits to the
|
||||||
|
// flexible solver. We could design it differently, but keep this for the time being until
|
||||||
|
// we figure out how we want to GPU-ify the rest of the system.
|
||||||
|
auto precAsHolder = std::dynamic_pointer_cast<PreconditionerHolder<XGPU, XGPU>>(prec);
|
||||||
|
if (!precAsHolder) {
|
||||||
|
OPM_THROW(std::invalid_argument,
|
||||||
|
"The preconditioner needs to be a CUDA preconditioner wrapped in a "
|
||||||
|
"Opm::cuistl::PreconditionerHolder (eg. CuILU0).");
|
||||||
|
}
|
||||||
|
auto preconditionerOnGPU = precAsHolder->getUnderlyingPreconditioner();
|
||||||
|
|
||||||
|
auto matrixOperator
|
||||||
|
= std::make_shared<Dune::MatrixAdapter<CuSparseMatrix<real_type>, XGPU, XGPU>>(m_matrix);
|
||||||
|
auto scalarProduct = std::make_shared<Dune::SeqScalarProduct<XGPU>>();
|
||||||
|
return UnderlyingSolver<XGPU>(
|
||||||
|
matrixOperator, scalarProduct, preconditionerOnGPU, reduction, maxit, verbose);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<XGPU> m_inputBuffer;
|
||||||
|
std::unique_ptr<XGPU> m_outputBuffer;
|
||||||
|
};
|
||||||
|
} // namespace Opm::cuistl
|
||||||
|
|
||||||
|
#endif
|
@ -84,5 +84,42 @@ public:
|
|||||||
static constexpr bool value = std::is_same_v<decltype(test<T>(0)), std::true_type>;
|
static constexpr bool value = std::is_same_v<decltype(test<T>(0)), std::true_type>;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief The has_communication class checks if the type has the member function getCommunication.
|
||||||
|
*
|
||||||
|
* This is used in the SolverAdapter to check if the operator has a communication, and it will then select a different
|
||||||
|
* path accordingly.
|
||||||
|
*/
|
||||||
|
template <typename T>
|
||||||
|
class has_communication
|
||||||
|
{
|
||||||
|
template <typename U>
|
||||||
|
static std::true_type test(decltype(&U::getCommunication));
|
||||||
|
template <typename U>
|
||||||
|
static std::false_type test(...);
|
||||||
|
|
||||||
|
public:
|
||||||
|
static constexpr bool value = std::is_same_v<decltype(test<T>(0)), std::true_type>;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief The is_a_well_operator class tries to guess if the operator is a well type operator
|
||||||
|
*
|
||||||
|
* @note This is mainly done in the solver adapter to detect incompatible runtime arguments. When the GPU linear solve
|
||||||
|
* paths supports wells, this class can be removed.
|
||||||
|
*/
|
||||||
|
template <typename T>
|
||||||
|
class is_a_well_operator
|
||||||
|
{
|
||||||
|
template <typename U>
|
||||||
|
static std::true_type test(decltype(&U::addWellPressureEquations));
|
||||||
|
template <typename U>
|
||||||
|
static std::false_type test(...);
|
||||||
|
|
||||||
|
public:
|
||||||
|
static constexpr bool value = std::is_same_v<decltype(test<T>(0)), std::true_type>;
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace Opm::cuistl::detail
|
} // namespace Opm::cuistl::detail
|
||||||
#endif
|
#endif
|
||||||
|
104
tests/cuistl/test_cuowneroverlapcopy.cpp
Normal file
104
tests/cuistl/test_cuowneroverlapcopy.cpp
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2023 SINTEF AS
|
||||||
|
|
||||||
|
This file is part of the Open Porous Media project (OPM).
|
||||||
|
|
||||||
|
OPM is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
OPM is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
#include <config.h>
|
||||||
|
|
||||||
|
#define BOOST_TEST_MODULE TestCuOwnerOverlapCopy
|
||||||
|
#define BOOST_TEST_NO_MAIN
|
||||||
|
|
||||||
|
#include <boost/test/unit_test.hpp>
|
||||||
|
#include <dune/common/parallel/mpihelper.hh>
|
||||||
|
#include <dune/istl/bcrsmatrix.hh>
|
||||||
|
#include <dune/istl/owneroverlapcopy.hh>
|
||||||
|
#include <memory>
|
||||||
|
#include <opm/simulators/linalg/cuistl/CuOwnerOverlapCopy.hpp>
|
||||||
|
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||||
|
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||||
|
#include <random>
|
||||||
|
|
||||||
|
bool
|
||||||
|
init_unit_test_func()
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
main(int argc, char** argv)
|
||||||
|
{
|
||||||
|
[[maybe_unused]] const auto& helper = Dune::MPIHelper::instance(argc, argv);
|
||||||
|
boost::unit_test::unit_test_main(&init_unit_test_func, argc, argv);
|
||||||
|
}
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_CASE(TestProject)
|
||||||
|
{
|
||||||
|
|
||||||
|
// We're going to have three points: Centered is owned, left and right is copied. We assume a periodic domain
|
||||||
|
// ([0,1]/0~1)
|
||||||
|
auto indexInfo = Dune::IndexInfoFromGrid<int, int>();
|
||||||
|
indexInfo.addLocalIndex(std::make_tuple(0, 0, Dune::OwnerOverlapCopyAttributeSet::copy));
|
||||||
|
indexInfo.addLocalIndex(std::make_tuple(1, 1, Dune::OwnerOverlapCopyAttributeSet::owner));
|
||||||
|
indexInfo.addLocalIndex(std::make_tuple(2, 2, Dune::OwnerOverlapCopyAttributeSet::copy));
|
||||||
|
|
||||||
|
auto ownerOverlapCopy = Dune::OwnerOverlapCopyCommunication<int>(indexInfo, MPI_COMM_WORLD);
|
||||||
|
auto xCPU = std::vector<double> {{1.0, 2.0, 3.0}};
|
||||||
|
auto xGPU = Opm::cuistl::CuVector<double>(xCPU);
|
||||||
|
|
||||||
|
auto cuOwnerOverlapCopy
|
||||||
|
= Opm::cuistl::CuOwnerOverlapCopy<double, 1, Dune::OwnerOverlapCopyCommunication<int>>(ownerOverlapCopy);
|
||||||
|
|
||||||
|
cuOwnerOverlapCopy.project(xGPU);
|
||||||
|
|
||||||
|
auto resultOfProject = xGPU.asStdVector();
|
||||||
|
|
||||||
|
BOOST_CHECK_EQUAL(0.0, resultOfProject[0]);
|
||||||
|
BOOST_CHECK_EQUAL(2.0, resultOfProject[1]);
|
||||||
|
BOOST_CHECK_EQUAL(0.0, resultOfProject[2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_CASE(TestDot)
|
||||||
|
{
|
||||||
|
|
||||||
|
// We're going to have three points: Centered is owned, left and right is copied. We assume a periodic domain
|
||||||
|
// ([0,1]/0~1)
|
||||||
|
auto indexInfo = Dune::IndexInfoFromGrid<int, int>();
|
||||||
|
indexInfo.addLocalIndex(std::make_tuple(0, 0, Dune::OwnerOverlapCopyAttributeSet::copy));
|
||||||
|
indexInfo.addLocalIndex(std::make_tuple(1, 1, Dune::OwnerOverlapCopyAttributeSet::owner));
|
||||||
|
indexInfo.addLocalIndex(std::make_tuple(2, 2, Dune::OwnerOverlapCopyAttributeSet::copy));
|
||||||
|
|
||||||
|
indexInfo.addRemoteIndex(std::make_tuple(0, 0, Dune::OwnerOverlapCopyAttributeSet::copy));
|
||||||
|
indexInfo.addRemoteIndex(std::make_tuple(0, 1, Dune::OwnerOverlapCopyAttributeSet::owner));
|
||||||
|
indexInfo.addRemoteIndex(std::make_tuple(0, 2, Dune::OwnerOverlapCopyAttributeSet::copy));
|
||||||
|
auto ownerOverlapCopy = Dune::OwnerOverlapCopyCommunication<int>(indexInfo, MPI_COMM_WORLD);
|
||||||
|
auto xCPU = std::vector<double> {{1.0, 2.0, 3.0}};
|
||||||
|
auto xGPU = Opm::cuistl::CuVector<double>(xCPU);
|
||||||
|
|
||||||
|
auto cuOwnerOverlapCopy
|
||||||
|
= Opm::cuistl::CuOwnerOverlapCopy<double, 1, Dune::OwnerOverlapCopyCommunication<int>>(ownerOverlapCopy);
|
||||||
|
|
||||||
|
double outputDune = -1.0;
|
||||||
|
auto xDune = xGPU.asDuneBlockVector<1>();
|
||||||
|
ownerOverlapCopy.dot(xDune, xDune, outputDune);
|
||||||
|
|
||||||
|
double output = -1.0;
|
||||||
|
cuOwnerOverlapCopy.dot(xGPU, xGPU, output);
|
||||||
|
|
||||||
|
|
||||||
|
BOOST_CHECK_EQUAL(outputDune, output);
|
||||||
|
BOOST_CHECK_EQUAL(4.0, output);
|
||||||
|
}
|
117
tests/cuistl/test_solver_adapter.cpp
Normal file
117
tests/cuistl/test_solver_adapter.cpp
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2022-2023 SINTEF AS
|
||||||
|
|
||||||
|
This file is part of the Open Porous Media project (OPM).
|
||||||
|
|
||||||
|
OPM is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
OPM is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
#include <config.h>
|
||||||
|
|
||||||
|
#define BOOST_TEST_MODULE TestSolverAdapter
|
||||||
|
|
||||||
|
#include <boost/test/unit_test.hpp>
|
||||||
|
#include <dune/istl/solvers.hh>
|
||||||
|
#include <opm/simulators/linalg/PreconditionerFactory.hpp>
|
||||||
|
#include <opm/simulators/linalg/PropertyTree.hpp>
|
||||||
|
#include <opm/simulators/linalg/cuistl/SolverAdapter.hpp>
|
||||||
|
|
||||||
|
static const constexpr int dim = 3;
|
||||||
|
using Matrix = Dune::BCRSMatrix<Dune::FieldMatrix<double, dim, dim>>;
|
||||||
|
using Vector = Dune::BlockVector<Dune::FieldVector<double, dim>>;
|
||||||
|
using Moperator = Dune::MatrixAdapter<Matrix, Vector, Vector>;
|
||||||
|
using PrecondFactory = Opm::PreconditionerFactory<Moperator, Dune::Amg::SequentialInformation>;
|
||||||
|
using SolverAdapter = Opm::cuistl::SolverAdapter<Moperator, Dune::BiCGSTABSolver, Vector>;
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
auto
|
||||||
|
createSolverAdapterWithMatrix(const size_t N = 10)
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
const int nonZeroes = N * 3 - 2;
|
||||||
|
|
||||||
|
// We need to hold the matrix in memory somehow, but we don't want to deference
|
||||||
|
// a pointer all the time (quality of life...):
|
||||||
|
auto matrixPtr = std::make_shared<Matrix>(N, N, nonZeroes, Matrix::row_wise);
|
||||||
|
auto& matrix = *matrixPtr;
|
||||||
|
for (auto row = matrix.createbegin(); row != matrix.createend(); ++row) {
|
||||||
|
// Add nonzeros for left neighbour, diagonal and right neighbour
|
||||||
|
if (row.index() > 0) {
|
||||||
|
row.insert(row.index() - 1);
|
||||||
|
}
|
||||||
|
row.insert(row.index());
|
||||||
|
if (row.index() < matrix.N() - 1) {
|
||||||
|
row.insert(row.index() + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// This might not be the most elegant way of filling in a Dune sparse matrix, but it works.
|
||||||
|
for (size_t i = 0; i < N; ++i) {
|
||||||
|
for (int k = 0; k < dim; ++k) {
|
||||||
|
matrix[i][i][k][k] = -2;
|
||||||
|
}
|
||||||
|
if (i < N - 1) {
|
||||||
|
for (int k = 0; k < dim; ++k) {
|
||||||
|
matrix[i][i + 1][k][k] = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i > 0) {
|
||||||
|
for (int k = 0; k < dim; ++k) {
|
||||||
|
matrix[i][i - 1][k][k] = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto op = std::make_shared<Moperator>(matrix);
|
||||||
|
auto sp = std::make_shared<Dune::ScalarProduct<Vector>>();
|
||||||
|
auto prm = Opm::PropertyTree();
|
||||||
|
prm.put<double>("relaxation", 1.0);
|
||||||
|
prm.put<std::string>("type", "CUILU0");
|
||||||
|
auto prec = PrecondFactory::create(*op, prm);
|
||||||
|
auto solverAdapter = std::make_shared<SolverAdapter>(*op, *sp, prec, 1.0, 10, 0);
|
||||||
|
|
||||||
|
return std::make_tuple(matrixPtr, solverAdapter, op, sp);
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_CASE(TestCreation)
|
||||||
|
{
|
||||||
|
BOOST_CHECK_NO_THROW(createSolverAdapterWithMatrix(););
|
||||||
|
}
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_CASE(TestSolve)
|
||||||
|
{
|
||||||
|
const size_t N = 10;
|
||||||
|
auto [matrix, solverAdapter, op, sp] = createSolverAdapterWithMatrix(N);
|
||||||
|
|
||||||
|
Vector xActual(N), xInitial(N), b(N);
|
||||||
|
for (size_t i = 0; i < N; ++i) {
|
||||||
|
for (size_t j = 0; j < dim; ++j) {
|
||||||
|
xActual[i][j] = 1.0;
|
||||||
|
xInitial[i][j] = 0.1 * i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
matrix->mv(xActual, b);
|
||||||
|
Dune::InverseOperatorResult res;
|
||||||
|
solverAdapter->apply(xInitial, b, res);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < N; ++i) {
|
||||||
|
for (size_t j = 0; j < dim; ++j) {
|
||||||
|
// This should actually be up to rounding exact since ILU is just the inverse
|
||||||
|
// for this matrix.
|
||||||
|
BOOST_CHECK_CLOSE(xActual[i][j], xInitial[i][j], 1e-13);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user