mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
Expose CuISTL solver in FlexibleSolver.
This commit is contained in:
parent
0269f7215c
commit
ceb15e22e3
@ -552,6 +552,8 @@ if(CUDA_FOUND)
|
||||
cuvector
|
||||
cusparsematrix
|
||||
cuseqilu0
|
||||
cuowneroverlapcopy
|
||||
solver_adapter
|
||||
PROPERTIES LABELS gpu_cuda)
|
||||
endif()
|
||||
|
||||
|
@ -172,16 +172,13 @@ if(CUDA_FOUND)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/has_function.hpp)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp)
|
||||
<<<<<<< HEAD
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/CuSeqILU0.hpp)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp)
|
||||
=======
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp)
|
||||
|
||||
|
||||
>>>>>>> 05ee1a855 (Added conversion preconditioner.)
|
||||
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/CuOwnerOverlapCopy.hpp)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/SolverAdapter.hpp)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/CuBlockPreconditioner.hpp)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/PreconditionerHolder.hpp)
|
||||
|
||||
endif()
|
||||
if(OPENCL_FOUND)
|
||||
@ -271,6 +268,9 @@ if(CUDA_FOUND)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_safe_conversion.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuseqilu0.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_converttofloatadapter.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuowneroverlapcopy.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_solver_adapter.cpp)
|
||||
|
||||
|
||||
endif()
|
||||
if(OPENCL_FOUND)
|
||||
|
@ -37,6 +37,10 @@
|
||||
#include <dune/istl/owneroverlapcopy.hh>
|
||||
#include <dune/istl/paamg/pinfo.hh>
|
||||
|
||||
#if HAVE_CUDA
|
||||
#include <opm/simulators/linalg/cuistl/SolverAdapter.hpp>
|
||||
#endif
|
||||
|
||||
namespace Dune
|
||||
{
|
||||
/// Create a sequential solver.
|
||||
@ -184,6 +188,16 @@ namespace Dune
|
||||
bool dummy = false;
|
||||
using MatrixType = std::remove_const_t<std::remove_reference_t<decltype(linearoperator_for_solver_->getmat())>>;
|
||||
linsolver_ = std::make_shared<Dune::UMFPack<MatrixType>>(linearoperator_for_solver_->getmat(), verbosity, dummy);
|
||||
#endif
|
||||
#if HAVE_CUDA
|
||||
} else if (solver_type == "cubicgstab") {
|
||||
linsolver_.reset(new Opm::cuistl::SolverAdapter<Operator, Dune::BiCGSTABSolver, VectorType>(
|
||||
*linearoperator_for_solver_,
|
||||
scalarproduct_,
|
||||
preconditioner_,
|
||||
tol, // desired residual reduction factor
|
||||
maxiter, // maximum number of iterations
|
||||
verbosity));
|
||||
#endif
|
||||
} else {
|
||||
OPM_THROW(std::invalid_argument,
|
||||
|
125
opm/simulators/linalg/cuistl/CuBlockPreconditioner.hpp
Normal file
125
opm/simulators/linalg/cuistl/CuBlockPreconditioner.hpp
Normal file
@ -0,0 +1,125 @@
|
||||
/*
|
||||
Copyright 2022-2023 SINTEF AS
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef OPM_CUISTL_CUBLOCKPRECONDITIONER_HPP
|
||||
#define OPM_CUISTL_CUBLOCKPRECONDITIONER_HPP
|
||||
|
||||
#include <dune/common/shared_ptr.hh>
|
||||
#include <memory>
|
||||
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/PreconditionerHolder.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp>
|
||||
|
||||
namespace Opm::cuistl
|
||||
{
|
||||
//! @brief Is an adaptation of Dune::BlockPreconditioner that works within the CuISTL framework.
|
||||
//!
|
||||
//! @note We aim to intgrate this into OwningBlockPreconditioner (or a relative thereof).
|
||||
template <class X, class Y, class C, class P = Dune::PreconditionerWithUpdate<X, Y>>
|
||||
class CuBlockPreconditioner : public Dune::PreconditionerWithUpdate<X, Y>, public PreconditionerHolder<X, Y>
|
||||
{
|
||||
public:
|
||||
using domain_type = X;
|
||||
using range_type = Y;
|
||||
using field_type = typename X::field_type;
|
||||
using communication_type = C;
|
||||
|
||||
|
||||
//! @brief Constructor.
|
||||
//!
|
||||
//! constructor gets all parameters to operate the prec.
|
||||
//! @param p The sequential preconditioner.
|
||||
//! @param c The communication object for syncing overlap and copy
|
||||
//! data points. (E.~g. OwnerOverlapCopyCommunication )
|
||||
//!
|
||||
CuBlockPreconditioner(const std::shared_ptr<P>& p, const std::shared_ptr<const communication_type>& c)
|
||||
: m_preconditioner(p)
|
||||
, m_communication(c)
|
||||
{
|
||||
}
|
||||
|
||||
CuBlockPreconditioner(const std::shared_ptr<P>& p, const communication_type& c)
|
||||
: m_preconditioner(p)
|
||||
, m_communication(Dune::stackobject_to_shared_ptr(c))
|
||||
{
|
||||
}
|
||||
|
||||
//! \brief Prepare the preconditioner.
|
||||
//!
|
||||
//! \copydoc Preconditioner::pre(X&,Y&)
|
||||
virtual void pre(X& x, Y& b) override
|
||||
{
|
||||
// TODO: [perf] Do we always need to copy, or only when we need the pre step?
|
||||
m_communication->copyOwnerToAll(x, x); // make Dirichlet values consistent
|
||||
if constexpr (detail::shouldCallPreconditionerPre<P>()) {
|
||||
m_preconditioner->pre(x, b);
|
||||
}
|
||||
}
|
||||
|
||||
//! \brief Apply the preconditioner
|
||||
//!
|
||||
//! \copydoc Preconditioner::apply(X&,const Y&)
|
||||
virtual void apply(X& v, const Y& d) override
|
||||
{
|
||||
m_preconditioner->apply(v, d);
|
||||
m_communication->copyOwnerToAll(v, v);
|
||||
}
|
||||
|
||||
|
||||
virtual void update() override
|
||||
{
|
||||
m_preconditioner->update();
|
||||
}
|
||||
|
||||
virtual void post(X& x) override
|
||||
{
|
||||
if constexpr (detail::shouldCallPreconditionerPost<P>()) {
|
||||
m_preconditioner->post(x);
|
||||
}
|
||||
}
|
||||
|
||||
//! Category of the preconditioner (see SolverCategory::Category)
|
||||
virtual Dune::SolverCategory::Category category() const override
|
||||
{
|
||||
return Dune::SolverCategory::overlapping;
|
||||
}
|
||||
|
||||
static constexpr bool shouldCallPre()
|
||||
{
|
||||
return detail::shouldCallPreconditionerPost<P>();
|
||||
}
|
||||
static constexpr bool shouldCallPost()
|
||||
{
|
||||
return detail::shouldCallPreconditionerPre<P>();
|
||||
}
|
||||
|
||||
virtual std::shared_ptr<Dune::PreconditionerWithUpdate<X, Y>> getUnderlyingPreconditioner() override
|
||||
{
|
||||
return m_preconditioner;
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
//! \brief a sequential preconditioner
|
||||
std::shared_ptr<P> m_preconditioner;
|
||||
|
||||
//! \brief the communication object
|
||||
std::shared_ptr<const communication_type> m_communication;
|
||||
};
|
||||
} // namespace Opm::cuistl
|
||||
#endif
|
149
opm/simulators/linalg/cuistl/CuOwnerOverlapCopy.hpp
Normal file
149
opm/simulators/linalg/cuistl/CuOwnerOverlapCopy.hpp
Normal file
@ -0,0 +1,149 @@
|
||||
/*
|
||||
Copyright 2022-2023 SINTEF AS
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef OPM_CUISTL_CUOWNEROVERLAPCOPY_HPP
|
||||
#define OPM_CUISTL_CUOWNEROVERLAPCOPY_HPP
|
||||
#include <dune/istl/owneroverlapcopy.hh>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
|
||||
namespace Opm::cuistl
|
||||
{
|
||||
/**
|
||||
* @brief CUDA compatiable variant of Dune::OwnerOverlapCopyCommunication
|
||||
*
|
||||
* This class can essentially be seen as an adapter around Dune::OwnerOverlapCopyCommunication, and should work as
|
||||
* a Dune::OwnerOverlapCopyCommunication on CuVectors
|
||||
*
|
||||
*
|
||||
* @note This currently only has the functionality to parallelize the linear solve.
|
||||
*
|
||||
* @tparam field_type should be a field_type supported by CuVector (double, float)
|
||||
* @tparam block_size the block size used (this is relevant for say figuring out the correct indices)
|
||||
* @tparam OwnerOverlapCopyCommunicationType should mimic Dune::OwnerOverlapCopyCommunication.
|
||||
*/
|
||||
template <class field_type, int block_size, class OwnerOverlapCopyCommunicationType>
|
||||
class CuOwnerOverlapCopy
|
||||
{
|
||||
public:
|
||||
using X = CuVector<field_type>;
|
||||
|
||||
CuOwnerOverlapCopy(const OwnerOverlapCopyCommunicationType& cpuOwnerOverlapCopy)
|
||||
: m_cpuOwnerOverlapCopy(cpuOwnerOverlapCopy)
|
||||
{
|
||||
}
|
||||
/**
|
||||
* @brief dot will carry out the dot product between x and y on the owned indices, then sum up the result across MPI
|
||||
* processes.
|
||||
* @param[out] output result will be stored here
|
||||
*
|
||||
* @note This uses the same interface as its DUNE equivalent.
|
||||
*/
|
||||
void dot(const X& x, const X& y, field_type& output) const
|
||||
{
|
||||
std::call_once(m_initializedIndices, [&]() { initIndexSet(); });
|
||||
|
||||
const auto dotAtRank = x.dot(y, *m_indicesOwner);
|
||||
output = m_cpuOwnerOverlapCopy.communicator().sum(dotAtRank);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief norm computes the l^2-norm of x across processes.
|
||||
*
|
||||
* This will compute the dot product of x with itself on owned indices, then
|
||||
* sum the result across process and return the square root of the sum.
|
||||
*/
|
||||
field_type norm(const X& x) const
|
||||
{
|
||||
auto xDotX = field_type(0);
|
||||
this->dot(x, x, xDotX);
|
||||
|
||||
using std::sqrt;
|
||||
return sqrt(xDotX);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief project will project x to the owned subspace
|
||||
*
|
||||
* For each component i which is not owned, x_i will be set to 0
|
||||
*
|
||||
* @param[inout] x the vector to project
|
||||
*/
|
||||
void project(X& x) const
|
||||
{
|
||||
std::call_once(m_initializedIndices, [&]() { initIndexSet(); });
|
||||
x.setZeroAtIndexSet(*m_indicesCopy);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief copyOwnerToAll will copy source to the CPU, then call OwnerOverlapCopyCommunicationType::copyOwnerToAll on
|
||||
* the copied data, and copy the result back to the GPU
|
||||
* @param[in] source
|
||||
* @param[out] dest
|
||||
*/
|
||||
void copyOwnerToAll(const X& source, X& dest) const
|
||||
{
|
||||
// TODO: [perf] Can we reduce copying from the GPU here?
|
||||
// TODO: [perf] Maybe create a global buffer instead?
|
||||
auto sourceAsDuneVector = source.template asDuneBlockVector<block_size>();
|
||||
auto destAsDuneVector = dest.template asDuneBlockVector<block_size>();
|
||||
m_cpuOwnerOverlapCopy.copyOwnerToAll(sourceAsDuneVector, destAsDuneVector);
|
||||
dest.copyFromHost(destAsDuneVector);
|
||||
}
|
||||
|
||||
|
||||
|
||||
private:
|
||||
const OwnerOverlapCopyCommunicationType& m_cpuOwnerOverlapCopy;
|
||||
|
||||
// Used to call the initIndexSet. Note that this is kind of a
|
||||
// premature optimization, in the sense that we could just initialize these indices
|
||||
// always, but they are not always used.
|
||||
mutable std::once_flag m_initializedIndices;
|
||||
mutable std::unique_ptr<CuVector<int>> m_indicesCopy;
|
||||
mutable std::unique_ptr<CuVector<int>> m_indicesOwner;
|
||||
|
||||
|
||||
void initIndexSet() const
|
||||
{
|
||||
// We need indices that we we will use in the project, dot and norm calls.
|
||||
// TODO: [premature perf] Can this be run once per instance? Or do we need to rebuild every time?
|
||||
const auto& pis = m_cpuOwnerOverlapCopy.indexSet();
|
||||
std::vector<int> indicesCopyOnCPU;
|
||||
std::vector<int> indicesOwnerCPU;
|
||||
for (const auto& index : pis) {
|
||||
if (index.local().attribute() == Dune::OwnerOverlapCopyAttributeSet::copy) {
|
||||
for (int component = 0; component < block_size; ++component) {
|
||||
indicesCopyOnCPU.push_back(index.local().local() * block_size + component);
|
||||
}
|
||||
}
|
||||
|
||||
if (index.local().attribute() == Dune::OwnerOverlapCopyAttributeSet::owner) {
|
||||
for (int component = 0; component < block_size; ++component) {
|
||||
indicesOwnerCPU.push_back(index.local().local() * block_size + component);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_indicesCopy = std::make_unique<CuVector<int>>(indicesCopyOnCPU);
|
||||
m_indicesOwner = std::make_unique<CuVector<int>>(indicesOwnerCPU);
|
||||
}
|
||||
};
|
||||
} // namespace Opm::cuistl
|
||||
#endif
|
@ -22,6 +22,7 @@
|
||||
#include <dune/istl/preconditioner.hh>
|
||||
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/PreconditionerHolder.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp>
|
||||
|
||||
|
||||
@ -36,7 +37,9 @@ namespace Opm::cuistl
|
||||
//! \tparam Y the range type (should be on the CPU). Typicall a Dune::BlockVector
|
||||
//! \tparam CudaPreconditionerType the preconditioner taking CuVector<real_type> as arguments to apply
|
||||
template <class X, class Y, class CudaPreconditionerType>
|
||||
class PreconditionerAdapter : public Dune::PreconditionerWithUpdate<X, Y>
|
||||
class PreconditionerAdapter
|
||||
: public Dune::PreconditionerWithUpdate<X, Y>,
|
||||
public PreconditionerHolder<CuVector<typename X::field_type>, CuVector<typename Y::field_type>>
|
||||
{
|
||||
public:
|
||||
//! \brief The domain type of the preconditioner.
|
||||
@ -114,6 +117,12 @@ public:
|
||||
return detail::shouldCallPreconditionerPre<CudaPreconditionerType>();
|
||||
}
|
||||
|
||||
virtual std::shared_ptr<Dune::PreconditionerWithUpdate<CuVector<field_type>, CuVector<field_type>>>
|
||||
getUnderlyingPreconditioner() override
|
||||
{
|
||||
return m_underlyingPreconditioner;
|
||||
}
|
||||
|
||||
private:
|
||||
//! \brief the underlying preconditioner to use
|
||||
std::shared_ptr<CudaPreconditionerType> m_underlyingPreconditioner;
|
||||
|
43
opm/simulators/linalg/cuistl/PreconditionerHolder.hpp
Normal file
43
opm/simulators/linalg/cuistl/PreconditionerHolder.hpp
Normal file
@ -0,0 +1,43 @@
|
||||
/*
|
||||
Copyright 2022-2023 SINTEF AS
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef OPM_CUISTL_PRECONDITIONERHOLDER_HPP
|
||||
#define OPM_CUISTL_PRECONDITIONERHOLDER_HPP
|
||||
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
|
||||
|
||||
|
||||
namespace Opm::cuistl
|
||||
{
|
||||
//! \brief Common interface for adapters that hold preconditioners.
|
||||
//!
|
||||
//! \note The goal is that this class will be made useless after further
|
||||
//! restructuring of the solver interfaces (FlexibleSolver and friends), but
|
||||
//! for this is needed as an intermediate layer. See specifically SolverAdapter.hpp
|
||||
//! for how this is used.
|
||||
template <class X, class Y>
|
||||
class PreconditionerHolder
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* @brief getUnderlyingPreconditioner gets the underlying preconditioner (preconditioner being held)
|
||||
*/
|
||||
virtual std::shared_ptr<Dune::PreconditionerWithUpdate<X, Y>> getUnderlyingPreconditioner() = 0;
|
||||
};
|
||||
} // end namespace Opm::cuistl
|
||||
|
||||
#endif
|
214
opm/simulators/linalg/cuistl/SolverAdapter.hpp
Normal file
214
opm/simulators/linalg/cuistl/SolverAdapter.hpp
Normal file
@ -0,0 +1,214 @@
|
||||
/*
|
||||
Copyright 2022-2023 SINTEF AS
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef OPM_SOLVERADAPTER_HPP
|
||||
#define OPM_SOLVERADAPTER_HPP
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include <dune/istl/bcrsmatrix.hh>
|
||||
#include <dune/istl/operators.hh>
|
||||
#include <dune/istl/schwarz.hh>
|
||||
#include <dune/istl/solver.hh>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuBlockPreconditioner.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuOwnerOverlapCopy.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/has_function.hpp>
|
||||
|
||||
|
||||
|
||||
namespace Opm::cuistl
|
||||
{
|
||||
//! @brief Wraps a CUDA solver to work with CPU data.
|
||||
//!
|
||||
//! @tparam Operator the Dune::LinearOperator to use
|
||||
//! @tparam UnderlyingSolver a Dune solver like class, eg Dune::BiCGSTABSolver
|
||||
//! @tparam X the outer type to use (eg. Dune::BlockVector<Dune::FieldVector<...>>)
|
||||
template <class Operator, template <class> class UnderlyingSolver, class X>
|
||||
class SolverAdapter : public Dune::IterativeSolver<X, X>
|
||||
{
|
||||
public:
|
||||
using typename Dune::IterativeSolver<X, X>::domain_type;
|
||||
using typename Dune::IterativeSolver<X, X>::range_type;
|
||||
using typename Dune::IterativeSolver<X, X>::field_type;
|
||||
using typename Dune::IterativeSolver<X, X>::real_type;
|
||||
using typename Dune::IterativeSolver<X, X>::scalar_real_type;
|
||||
static constexpr auto block_size = domain_type::block_type::dimension;
|
||||
using XGPU = Opm::cuistl::CuVector<real_type>;
|
||||
|
||||
// TODO: Use a std::forward
|
||||
SolverAdapter(Operator& op,
|
||||
std::shared_ptr<Dune::ScalarProduct<X>> sp,
|
||||
std::shared_ptr<Dune::Preconditioner<X, X>> prec,
|
||||
scalar_real_type reduction,
|
||||
int maxit,
|
||||
int verbose)
|
||||
: Dune::IterativeSolver<X, X>(op, *sp, *prec, reduction, maxit, verbose)
|
||||
, m_opOnCPUWithMatrix(op)
|
||||
, m_matrix(CuSparseMatrix<real_type>::fromMatrix(op.getmat()))
|
||||
, m_underlyingSolver(constructSolver(prec, reduction, maxit, verbose))
|
||||
{
|
||||
}
|
||||
|
||||
virtual void apply(X& x, X& b, double reduction, Dune::InverseOperatorResult& res) override
|
||||
{
|
||||
// TODO: Can we do this without reimplementing the other function?
|
||||
// TODO: [perf] Do we need to update the matrix every time? Probably yes
|
||||
m_matrix.updateNonzeroValues(m_opOnCPUWithMatrix.getmat());
|
||||
|
||||
if (!m_inputBuffer) {
|
||||
m_inputBuffer.reset(new XGPU(b.dim()));
|
||||
m_outputBuffer.reset(new XGPU(x.dim()));
|
||||
}
|
||||
|
||||
m_inputBuffer->copyFromHost(b);
|
||||
// TODO: [perf] do we need to copy x here?
|
||||
m_outputBuffer->copyFromHost(x);
|
||||
|
||||
m_underlyingSolver.apply(*m_outputBuffer, *m_inputBuffer, reduction, res);
|
||||
|
||||
// TODO: [perf] do we need to copy b here?
|
||||
m_inputBuffer->copyToHost(b);
|
||||
m_outputBuffer->copyToHost(x);
|
||||
}
|
||||
virtual void apply(X& x, X& b, Dune::InverseOperatorResult& res) override
|
||||
{
|
||||
// TODO: [perf] Do we need to update the matrix every time? Probably yes
|
||||
m_matrix.updateNonzeroValues(m_opOnCPUWithMatrix.getmat());
|
||||
|
||||
if (!m_inputBuffer) {
|
||||
m_inputBuffer.reset(new XGPU(b.dim()));
|
||||
m_outputBuffer.reset(new XGPU(x.dim()));
|
||||
}
|
||||
|
||||
m_inputBuffer->copyFromHost(b);
|
||||
// TODO: [perf] do we need to copy x here?
|
||||
m_outputBuffer->copyFromHost(x);
|
||||
|
||||
m_underlyingSolver.apply(*m_outputBuffer, *m_inputBuffer, res);
|
||||
|
||||
// TODO: [perf] do we need to copy b here?
|
||||
m_inputBuffer->copyToHost(b);
|
||||
m_outputBuffer->copyToHost(x);
|
||||
}
|
||||
|
||||
private:
|
||||
Operator& m_opOnCPUWithMatrix;
|
||||
CuSparseMatrix<real_type> m_matrix;
|
||||
|
||||
UnderlyingSolver<XGPU> m_underlyingSolver;
|
||||
|
||||
|
||||
// TODO: Use a std::forward
|
||||
UnderlyingSolver<XGPU> constructSolver(std::shared_ptr<Dune::Preconditioner<X, X>> prec,
|
||||
scalar_real_type reduction,
|
||||
int maxit,
|
||||
int verbose)
|
||||
{
|
||||
|
||||
OPM_ERROR_IF(
|
||||
detail::is_a_well_operator<Operator>::value,
|
||||
"Currently we only support operators of type MatrixAdapter in the CUDA solver. "
|
||||
"Use --matrix-add-well-contributions=true. "
|
||||
"Using WellModelMatrixAdapter with SolverAdapter is not well-defined so it will not work well, or at all.");
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if constexpr (detail::has_communication<Operator>::value) {
|
||||
// TODO: See the below TODO over the definition of precHolder in the other branch
|
||||
// TODO: We are currently double wrapping preconditioners in the preconditioner factory to be extra
|
||||
// compatible
|
||||
// with CPU. Probably a cleaner way eventually would be to do more modifications to the flexible
|
||||
// solver to accomodate the pure GPU better.
|
||||
auto precAsHolder = std::dynamic_pointer_cast<PreconditionerHolder<X, X>>(prec);
|
||||
if (!precAsHolder) {
|
||||
OPM_THROW(std::invalid_argument,
|
||||
"The preconditioner needs to be a CUDA preconditioner (eg. CuILU0) wrapped in a "
|
||||
"Opm::cuistl::PreconditionerAdapter wrapped in a "
|
||||
"Opm::cuistl::CuBlockPreconditioner. If you are unsure what this means, set "
|
||||
"preconditioner to 'CUILU0'"); // TODO: Suggest a better preconditioner
|
||||
}
|
||||
|
||||
auto preconditionerAdapter = precAsHolder->getUnderlyingPreconditioner();
|
||||
auto preconditionerAdapterAsHolder
|
||||
= std::dynamic_pointer_cast<PreconditionerHolder<XGPU, XGPU>>(preconditionerAdapter);
|
||||
if (!preconditionerAdapterAsHolder) {
|
||||
OPM_THROW(std::invalid_argument,
|
||||
"The preconditioner needs to be a CUDA preconditioner (eg. CuILU0) wrapped in a "
|
||||
"Opm::cuistl::PreconditionerAdapter wrapped in a "
|
||||
"Opm::cuistl::CuBlockPreconditioner. If you are unsure what this means, set "
|
||||
"preconditioner to 'CUILU0'"); // TODO: Suggest a better preconditioner
|
||||
}
|
||||
// We need to get the underlying preconditioner:
|
||||
auto preconditionerReallyOnGPU = preconditionerAdapterAsHolder->getUnderlyingPreconditioner();
|
||||
const auto& communication = m_opOnCPUWithMatrix.getCommunication();
|
||||
|
||||
using CudaCommunication = CuOwnerOverlapCopy<real_type, block_size, typename Operator::communication_type>;
|
||||
using SchwarzOperator
|
||||
= Dune::OverlappingSchwarzOperator<CuSparseMatrix<real_type>, XGPU, XGPU, CudaCommunication>;
|
||||
auto cudaCommunication = std::make_shared<CudaCommunication>(communication);
|
||||
|
||||
auto mpiPreconditioner = std::make_shared<CuBlockPreconditioner<XGPU, XGPU, CudaCommunication>>(
|
||||
preconditionerReallyOnGPU, cudaCommunication);
|
||||
|
||||
auto scalarProduct = std::make_shared<Dune::ParallelScalarProduct<XGPU, CudaCommunication>>(
|
||||
cudaCommunication, m_opOnCPUWithMatrix.category());
|
||||
|
||||
|
||||
// NOTE: Ownsership of cudaCommunication is handled by mpiPreconditioner. However, just to make sure we
|
||||
// remember
|
||||
// this, we add this check. So remember that we hold one count in this scope, and one in the
|
||||
// CuBlockPreconditioner instance. We accomedate for the fact that it could be passed around in
|
||||
// CuBlockPreconditioner, hence we do not test for != 2
|
||||
OPM_ERROR_IF(cudaCommunication.use_count() < 2, "Internal error. Shared pointer not owned properly.");
|
||||
auto overlappingCudaOperator = std::make_shared<SchwarzOperator>(m_matrix, *cudaCommunication);
|
||||
|
||||
return UnderlyingSolver<XGPU>(
|
||||
overlappingCudaOperator, scalarProduct, mpiPreconditioner, reduction, maxit, verbose);
|
||||
} else {
|
||||
// TODO: Fix the reliance on casting here. This is a code smell to a certain degree, and assumes
|
||||
// a certain setup beforehand. The only reason we do it this way is to minimize edits to the
|
||||
// flexible solver. We could design it differently, but keep this for the time being until
|
||||
// we figure out how we want to GPU-ify the rest of the system.
|
||||
auto precAsHolder = std::dynamic_pointer_cast<PreconditionerHolder<XGPU, XGPU>>(prec);
|
||||
if (!precAsHolder) {
|
||||
OPM_THROW(std::invalid_argument,
|
||||
"The preconditioner needs to be a CUDA preconditioner wrapped in a "
|
||||
"Opm::cuistl::PreconditionerHolder (eg. CuILU0).");
|
||||
}
|
||||
auto preconditionerOnGPU = precAsHolder->getUnderlyingPreconditioner();
|
||||
|
||||
auto matrixOperator
|
||||
= std::make_shared<Dune::MatrixAdapter<CuSparseMatrix<real_type>, XGPU, XGPU>>(m_matrix);
|
||||
auto scalarProduct = std::make_shared<Dune::SeqScalarProduct<XGPU>>();
|
||||
return UnderlyingSolver<XGPU>(
|
||||
matrixOperator, scalarProduct, preconditionerOnGPU, reduction, maxit, verbose);
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<XGPU> m_inputBuffer;
|
||||
std::unique_ptr<XGPU> m_outputBuffer;
|
||||
};
|
||||
} // namespace Opm::cuistl
|
||||
|
||||
#endif
|
@ -84,5 +84,42 @@ public:
|
||||
static constexpr bool value = std::is_same_v<decltype(test<T>(0)), std::true_type>;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* @brief The has_communication class checks if the type has the member function getCommunication.
|
||||
*
|
||||
* This is used in the SolverAdapter to check if the operator has a communication, and it will then select a different
|
||||
* path accordingly.
|
||||
*/
|
||||
template <typename T>
|
||||
class has_communication
|
||||
{
|
||||
template <typename U>
|
||||
static std::true_type test(decltype(&U::getCommunication));
|
||||
template <typename U>
|
||||
static std::false_type test(...);
|
||||
|
||||
public:
|
||||
static constexpr bool value = std::is_same_v<decltype(test<T>(0)), std::true_type>;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief The is_a_well_operator class tries to guess if the operator is a well type operator
|
||||
*
|
||||
* @note This is mainly done in the solver adapter to detect incompatible runtime arguments. When the GPU linear solve
|
||||
* paths supports wells, this class can be removed.
|
||||
*/
|
||||
template <typename T>
|
||||
class is_a_well_operator
|
||||
{
|
||||
template <typename U>
|
||||
static std::true_type test(decltype(&U::addWellPressureEquations));
|
||||
template <typename U>
|
||||
static std::false_type test(...);
|
||||
|
||||
public:
|
||||
static constexpr bool value = std::is_same_v<decltype(test<T>(0)), std::true_type>;
|
||||
};
|
||||
|
||||
} // namespace Opm::cuistl::detail
|
||||
#endif
|
||||
|
104
tests/cuistl/test_cuowneroverlapcopy.cpp
Normal file
104
tests/cuistl/test_cuowneroverlapcopy.cpp
Normal file
@ -0,0 +1,104 @@
|
||||
/*
|
||||
Copyright 2023 SINTEF AS
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include <config.h>
|
||||
|
||||
#define BOOST_TEST_MODULE TestCuOwnerOverlapCopy
|
||||
#define BOOST_TEST_NO_MAIN
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <dune/common/parallel/mpihelper.hh>
|
||||
#include <dune/istl/bcrsmatrix.hh>
|
||||
#include <dune/istl/owneroverlapcopy.hh>
|
||||
#include <memory>
|
||||
#include <opm/simulators/linalg/cuistl/CuOwnerOverlapCopy.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
#include <random>
|
||||
|
||||
bool
|
||||
init_unit_test_func()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char** argv)
|
||||
{
|
||||
[[maybe_unused]] const auto& helper = Dune::MPIHelper::instance(argc, argv);
|
||||
boost::unit_test::unit_test_main(&init_unit_test_func, argc, argv);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestProject)
|
||||
{
|
||||
|
||||
// We're going to have three points: Centered is owned, left and right is copied. We assume a periodic domain
|
||||
// ([0,1]/0~1)
|
||||
auto indexInfo = Dune::IndexInfoFromGrid<int, int>();
|
||||
indexInfo.addLocalIndex(std::make_tuple(0, 0, Dune::OwnerOverlapCopyAttributeSet::copy));
|
||||
indexInfo.addLocalIndex(std::make_tuple(1, 1, Dune::OwnerOverlapCopyAttributeSet::owner));
|
||||
indexInfo.addLocalIndex(std::make_tuple(2, 2, Dune::OwnerOverlapCopyAttributeSet::copy));
|
||||
|
||||
auto ownerOverlapCopy = Dune::OwnerOverlapCopyCommunication<int>(indexInfo, MPI_COMM_WORLD);
|
||||
auto xCPU = std::vector<double> {{1.0, 2.0, 3.0}};
|
||||
auto xGPU = Opm::cuistl::CuVector<double>(xCPU);
|
||||
|
||||
auto cuOwnerOverlapCopy
|
||||
= Opm::cuistl::CuOwnerOverlapCopy<double, 1, Dune::OwnerOverlapCopyCommunication<int>>(ownerOverlapCopy);
|
||||
|
||||
cuOwnerOverlapCopy.project(xGPU);
|
||||
|
||||
auto resultOfProject = xGPU.asStdVector();
|
||||
|
||||
BOOST_CHECK_EQUAL(0.0, resultOfProject[0]);
|
||||
BOOST_CHECK_EQUAL(2.0, resultOfProject[1]);
|
||||
BOOST_CHECK_EQUAL(0.0, resultOfProject[2]);
|
||||
}
|
||||
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestDot)
|
||||
{
|
||||
|
||||
// We're going to have three points: Centered is owned, left and right is copied. We assume a periodic domain
|
||||
// ([0,1]/0~1)
|
||||
auto indexInfo = Dune::IndexInfoFromGrid<int, int>();
|
||||
indexInfo.addLocalIndex(std::make_tuple(0, 0, Dune::OwnerOverlapCopyAttributeSet::copy));
|
||||
indexInfo.addLocalIndex(std::make_tuple(1, 1, Dune::OwnerOverlapCopyAttributeSet::owner));
|
||||
indexInfo.addLocalIndex(std::make_tuple(2, 2, Dune::OwnerOverlapCopyAttributeSet::copy));
|
||||
|
||||
indexInfo.addRemoteIndex(std::make_tuple(0, 0, Dune::OwnerOverlapCopyAttributeSet::copy));
|
||||
indexInfo.addRemoteIndex(std::make_tuple(0, 1, Dune::OwnerOverlapCopyAttributeSet::owner));
|
||||
indexInfo.addRemoteIndex(std::make_tuple(0, 2, Dune::OwnerOverlapCopyAttributeSet::copy));
|
||||
auto ownerOverlapCopy = Dune::OwnerOverlapCopyCommunication<int>(indexInfo, MPI_COMM_WORLD);
|
||||
auto xCPU = std::vector<double> {{1.0, 2.0, 3.0}};
|
||||
auto xGPU = Opm::cuistl::CuVector<double>(xCPU);
|
||||
|
||||
auto cuOwnerOverlapCopy
|
||||
= Opm::cuistl::CuOwnerOverlapCopy<double, 1, Dune::OwnerOverlapCopyCommunication<int>>(ownerOverlapCopy);
|
||||
|
||||
double outputDune = -1.0;
|
||||
auto xDune = xGPU.asDuneBlockVector<1>();
|
||||
ownerOverlapCopy.dot(xDune, xDune, outputDune);
|
||||
|
||||
double output = -1.0;
|
||||
cuOwnerOverlapCopy.dot(xGPU, xGPU, output);
|
||||
|
||||
|
||||
BOOST_CHECK_EQUAL(outputDune, output);
|
||||
BOOST_CHECK_EQUAL(4.0, output);
|
||||
}
|
117
tests/cuistl/test_solver_adapter.cpp
Normal file
117
tests/cuistl/test_solver_adapter.cpp
Normal file
@ -0,0 +1,117 @@
|
||||
/*
|
||||
Copyright 2022-2023 SINTEF AS
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include <config.h>
|
||||
|
||||
#define BOOST_TEST_MODULE TestSolverAdapter
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <dune/istl/solvers.hh>
|
||||
#include <opm/simulators/linalg/PreconditionerFactory.hpp>
|
||||
#include <opm/simulators/linalg/PropertyTree.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/SolverAdapter.hpp>
|
||||
|
||||
static const constexpr int dim = 3;
|
||||
using Matrix = Dune::BCRSMatrix<Dune::FieldMatrix<double, dim, dim>>;
|
||||
using Vector = Dune::BlockVector<Dune::FieldVector<double, dim>>;
|
||||
using Moperator = Dune::MatrixAdapter<Matrix, Vector, Vector>;
|
||||
using PrecondFactory = Opm::PreconditionerFactory<Moperator, Dune::Amg::SequentialInformation>;
|
||||
using SolverAdapter = Opm::cuistl::SolverAdapter<Moperator, Dune::BiCGSTABSolver, Vector>;
|
||||
|
||||
namespace
|
||||
{
|
||||
auto
|
||||
createSolverAdapterWithMatrix(const size_t N = 10)
|
||||
{
|
||||
|
||||
|
||||
const int nonZeroes = N * 3 - 2;
|
||||
|
||||
// We need to hold the matrix in memory somehow, but we don't want to deference
|
||||
// a pointer all the time (quality of life...):
|
||||
auto matrixPtr = std::make_shared<Matrix>(N, N, nonZeroes, Matrix::row_wise);
|
||||
auto& matrix = *matrixPtr;
|
||||
for (auto row = matrix.createbegin(); row != matrix.createend(); ++row) {
|
||||
// Add nonzeros for left neighbour, diagonal and right neighbour
|
||||
if (row.index() > 0) {
|
||||
row.insert(row.index() - 1);
|
||||
}
|
||||
row.insert(row.index());
|
||||
if (row.index() < matrix.N() - 1) {
|
||||
row.insert(row.index() + 1);
|
||||
}
|
||||
}
|
||||
// This might not be the most elegant way of filling in a Dune sparse matrix, but it works.
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
for (int k = 0; k < dim; ++k) {
|
||||
matrix[i][i][k][k] = -2;
|
||||
}
|
||||
if (i < N - 1) {
|
||||
for (int k = 0; k < dim; ++k) {
|
||||
matrix[i][i + 1][k][k] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (i > 0) {
|
||||
for (int k = 0; k < dim; ++k) {
|
||||
matrix[i][i - 1][k][k] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
auto op = std::make_shared<Moperator>(matrix);
|
||||
auto sp = std::make_shared<Dune::ScalarProduct<Vector>>();
|
||||
auto prm = Opm::PropertyTree();
|
||||
prm.put<double>("relaxation", 1.0);
|
||||
prm.put<std::string>("type", "CUILU0");
|
||||
auto prec = PrecondFactory::create(*op, prm);
|
||||
auto solverAdapter = std::make_shared<SolverAdapter>(*op, sp, prec, 1.0, 10, 0);
|
||||
|
||||
return std::make_tuple(matrixPtr, solverAdapter, op);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestCreation)
|
||||
{
|
||||
BOOST_CHECK_NO_THROW(createSolverAdapterWithMatrix(););
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestSolve)
|
||||
{
|
||||
const size_t N = 10;
|
||||
auto [matrix, solverAdapter, op] = createSolverAdapterWithMatrix(N);
|
||||
|
||||
Vector xActual(N), xInitial(N), b(N);
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
for (size_t j = 0; j < dim; ++j) {
|
||||
xActual[i][j] = 1.0;
|
||||
xInitial[i][j] = 0.1 * i;
|
||||
}
|
||||
}
|
||||
|
||||
matrix->mv(xActual, b);
|
||||
Dune::InverseOperatorResult res;
|
||||
solverAdapter->apply(xInitial, b, res);
|
||||
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
for (size_t j = 0; j < dim; ++j) {
|
||||
// This should actually be up to rounding exact since ILU is just the inverse
|
||||
// for this matrix.
|
||||
BOOST_CHECK_CLOSE(xActual[i][j], xInitial[i][j], 1e-13);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user