Expose CuISTL solver in FlexibleSolver.

This commit is contained in:
Kjetil Olsen Lye 2023-05-31 14:51:00 +02:00
parent 0269f7215c
commit ceb15e22e3
11 changed files with 822 additions and 8 deletions

View File

@ -552,6 +552,8 @@ if(CUDA_FOUND)
cuvector
cusparsematrix
cuseqilu0
cuowneroverlapcopy
solver_adapter
PROPERTIES LABELS gpu_cuda)
endif()

View File

@ -172,16 +172,13 @@ if(CUDA_FOUND)
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/has_function.hpp)
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp)
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp)
<<<<<<< HEAD
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/CuSeqILU0.hpp)
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp)
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp)
=======
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp)
>>>>>>> 05ee1a855 (Added conversion preconditioner.)
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/CuOwnerOverlapCopy.hpp)
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/SolverAdapter.hpp)
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/CuBlockPreconditioner.hpp)
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/PreconditionerHolder.hpp)
endif()
if(OPENCL_FOUND)
@ -271,6 +268,9 @@ if(CUDA_FOUND)
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_safe_conversion.cpp)
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuseqilu0.cpp)
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_converttofloatadapter.cpp)
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuowneroverlapcopy.cpp)
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_solver_adapter.cpp)
endif()
if(OPENCL_FOUND)

View File

@ -37,6 +37,10 @@
#include <dune/istl/owneroverlapcopy.hh>
#include <dune/istl/paamg/pinfo.hh>
#if HAVE_CUDA
#include <opm/simulators/linalg/cuistl/SolverAdapter.hpp>
#endif
namespace Dune
{
/// Create a sequential solver.
@ -184,6 +188,16 @@ namespace Dune
bool dummy = false;
using MatrixType = std::remove_const_t<std::remove_reference_t<decltype(linearoperator_for_solver_->getmat())>>;
linsolver_ = std::make_shared<Dune::UMFPack<MatrixType>>(linearoperator_for_solver_->getmat(), verbosity, dummy);
#endif
#if HAVE_CUDA
} else if (solver_type == "cubicgstab") {
linsolver_.reset(new Opm::cuistl::SolverAdapter<Operator, Dune::BiCGSTABSolver, VectorType>(
*linearoperator_for_solver_,
scalarproduct_,
preconditioner_,
tol, // desired residual reduction factor
maxiter, // maximum number of iterations
verbosity));
#endif
} else {
OPM_THROW(std::invalid_argument,

View File

@ -0,0 +1,125 @@
/*
Copyright 2022-2023 SINTEF AS
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef OPM_CUISTL_CUBLOCKPRECONDITIONER_HPP
#define OPM_CUISTL_CUBLOCKPRECONDITIONER_HPP
#include <dune/common/shared_ptr.hh>
#include <memory>
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerHolder.hpp>
#include <opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp>
namespace Opm::cuistl
{
//! @brief Is an adaptation of Dune::BlockPreconditioner that works within the CuISTL framework.
//!
//! @note We aim to intgrate this into OwningBlockPreconditioner (or a relative thereof).
template <class X, class Y, class C, class P = Dune::PreconditionerWithUpdate<X, Y>>
class CuBlockPreconditioner : public Dune::PreconditionerWithUpdate<X, Y>, public PreconditionerHolder<X, Y>
{
public:
using domain_type = X;
using range_type = Y;
using field_type = typename X::field_type;
using communication_type = C;
//! @brief Constructor.
//!
//! constructor gets all parameters to operate the prec.
//! @param p The sequential preconditioner.
//! @param c The communication object for syncing overlap and copy
//! data points. (E.~g. OwnerOverlapCopyCommunication )
//!
CuBlockPreconditioner(const std::shared_ptr<P>& p, const std::shared_ptr<const communication_type>& c)
: m_preconditioner(p)
, m_communication(c)
{
}
CuBlockPreconditioner(const std::shared_ptr<P>& p, const communication_type& c)
: m_preconditioner(p)
, m_communication(Dune::stackobject_to_shared_ptr(c))
{
}
//! \brief Prepare the preconditioner.
//!
//! \copydoc Preconditioner::pre(X&,Y&)
virtual void pre(X& x, Y& b) override
{
// TODO: [perf] Do we always need to copy, or only when we need the pre step?
m_communication->copyOwnerToAll(x, x); // make Dirichlet values consistent
if constexpr (detail::shouldCallPreconditionerPre<P>()) {
m_preconditioner->pre(x, b);
}
}
//! \brief Apply the preconditioner
//!
//! \copydoc Preconditioner::apply(X&,const Y&)
virtual void apply(X& v, const Y& d) override
{
m_preconditioner->apply(v, d);
m_communication->copyOwnerToAll(v, v);
}
virtual void update() override
{
m_preconditioner->update();
}
virtual void post(X& x) override
{
if constexpr (detail::shouldCallPreconditionerPost<P>()) {
m_preconditioner->post(x);
}
}
//! Category of the preconditioner (see SolverCategory::Category)
virtual Dune::SolverCategory::Category category() const override
{
return Dune::SolverCategory::overlapping;
}
static constexpr bool shouldCallPre()
{
return detail::shouldCallPreconditionerPost<P>();
}
static constexpr bool shouldCallPost()
{
return detail::shouldCallPreconditionerPre<P>();
}
virtual std::shared_ptr<Dune::PreconditionerWithUpdate<X, Y>> getUnderlyingPreconditioner() override
{
return m_preconditioner;
}
private:
//! \brief a sequential preconditioner
std::shared_ptr<P> m_preconditioner;
//! \brief the communication object
std::shared_ptr<const communication_type> m_communication;
};
} // namespace Opm::cuistl
#endif

View File

@ -0,0 +1,149 @@
/*
Copyright 2022-2023 SINTEF AS
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef OPM_CUISTL_CUOWNEROVERLAPCOPY_HPP
#define OPM_CUISTL_CUOWNEROVERLAPCOPY_HPP
#include <dune/istl/owneroverlapcopy.hh>
#include <memory>
#include <mutex>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
namespace Opm::cuistl
{
/**
* @brief CUDA compatiable variant of Dune::OwnerOverlapCopyCommunication
*
* This class can essentially be seen as an adapter around Dune::OwnerOverlapCopyCommunication, and should work as
* a Dune::OwnerOverlapCopyCommunication on CuVectors
*
*
* @note This currently only has the functionality to parallelize the linear solve.
*
* @tparam field_type should be a field_type supported by CuVector (double, float)
* @tparam block_size the block size used (this is relevant for say figuring out the correct indices)
* @tparam OwnerOverlapCopyCommunicationType should mimic Dune::OwnerOverlapCopyCommunication.
*/
template <class field_type, int block_size, class OwnerOverlapCopyCommunicationType>
class CuOwnerOverlapCopy
{
public:
using X = CuVector<field_type>;
CuOwnerOverlapCopy(const OwnerOverlapCopyCommunicationType& cpuOwnerOverlapCopy)
: m_cpuOwnerOverlapCopy(cpuOwnerOverlapCopy)
{
}
/**
* @brief dot will carry out the dot product between x and y on the owned indices, then sum up the result across MPI
* processes.
* @param[out] output result will be stored here
*
* @note This uses the same interface as its DUNE equivalent.
*/
void dot(const X& x, const X& y, field_type& output) const
{
std::call_once(m_initializedIndices, [&]() { initIndexSet(); });
const auto dotAtRank = x.dot(y, *m_indicesOwner);
output = m_cpuOwnerOverlapCopy.communicator().sum(dotAtRank);
}
/**
* @brief norm computes the l^2-norm of x across processes.
*
* This will compute the dot product of x with itself on owned indices, then
* sum the result across process and return the square root of the sum.
*/
field_type norm(const X& x) const
{
auto xDotX = field_type(0);
this->dot(x, x, xDotX);
using std::sqrt;
return sqrt(xDotX);
}
/**
* @brief project will project x to the owned subspace
*
* For each component i which is not owned, x_i will be set to 0
*
* @param[inout] x the vector to project
*/
void project(X& x) const
{
std::call_once(m_initializedIndices, [&]() { initIndexSet(); });
x.setZeroAtIndexSet(*m_indicesCopy);
}
/**
* @brief copyOwnerToAll will copy source to the CPU, then call OwnerOverlapCopyCommunicationType::copyOwnerToAll on
* the copied data, and copy the result back to the GPU
* @param[in] source
* @param[out] dest
*/
void copyOwnerToAll(const X& source, X& dest) const
{
// TODO: [perf] Can we reduce copying from the GPU here?
// TODO: [perf] Maybe create a global buffer instead?
auto sourceAsDuneVector = source.template asDuneBlockVector<block_size>();
auto destAsDuneVector = dest.template asDuneBlockVector<block_size>();
m_cpuOwnerOverlapCopy.copyOwnerToAll(sourceAsDuneVector, destAsDuneVector);
dest.copyFromHost(destAsDuneVector);
}
private:
const OwnerOverlapCopyCommunicationType& m_cpuOwnerOverlapCopy;
// Used to call the initIndexSet. Note that this is kind of a
// premature optimization, in the sense that we could just initialize these indices
// always, but they are not always used.
mutable std::once_flag m_initializedIndices;
mutable std::unique_ptr<CuVector<int>> m_indicesCopy;
mutable std::unique_ptr<CuVector<int>> m_indicesOwner;
void initIndexSet() const
{
// We need indices that we we will use in the project, dot and norm calls.
// TODO: [premature perf] Can this be run once per instance? Or do we need to rebuild every time?
const auto& pis = m_cpuOwnerOverlapCopy.indexSet();
std::vector<int> indicesCopyOnCPU;
std::vector<int> indicesOwnerCPU;
for (const auto& index : pis) {
if (index.local().attribute() == Dune::OwnerOverlapCopyAttributeSet::copy) {
for (int component = 0; component < block_size; ++component) {
indicesCopyOnCPU.push_back(index.local().local() * block_size + component);
}
}
if (index.local().attribute() == Dune::OwnerOverlapCopyAttributeSet::owner) {
for (int component = 0; component < block_size; ++component) {
indicesOwnerCPU.push_back(index.local().local() * block_size + component);
}
}
}
m_indicesCopy = std::make_unique<CuVector<int>>(indicesCopyOnCPU);
m_indicesOwner = std::make_unique<CuVector<int>>(indicesOwnerCPU);
}
};
} // namespace Opm::cuistl
#endif

View File

@ -22,6 +22,7 @@
#include <dune/istl/preconditioner.hh>
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerHolder.hpp>
#include <opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp>
@ -36,7 +37,9 @@ namespace Opm::cuistl
//! \tparam Y the range type (should be on the CPU). Typicall a Dune::BlockVector
//! \tparam CudaPreconditionerType the preconditioner taking CuVector<real_type> as arguments to apply
template <class X, class Y, class CudaPreconditionerType>
class PreconditionerAdapter : public Dune::PreconditionerWithUpdate<X, Y>
class PreconditionerAdapter
: public Dune::PreconditionerWithUpdate<X, Y>,
public PreconditionerHolder<CuVector<typename X::field_type>, CuVector<typename Y::field_type>>
{
public:
//! \brief The domain type of the preconditioner.
@ -114,6 +117,12 @@ public:
return detail::shouldCallPreconditionerPre<CudaPreconditionerType>();
}
virtual std::shared_ptr<Dune::PreconditionerWithUpdate<CuVector<field_type>, CuVector<field_type>>>
getUnderlyingPreconditioner() override
{
return m_underlyingPreconditioner;
}
private:
//! \brief the underlying preconditioner to use
std::shared_ptr<CudaPreconditionerType> m_underlyingPreconditioner;

View File

@ -0,0 +1,43 @@
/*
Copyright 2022-2023 SINTEF AS
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef OPM_CUISTL_PRECONDITIONERHOLDER_HPP
#define OPM_CUISTL_PRECONDITIONERHOLDER_HPP
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
namespace Opm::cuistl
{
//! \brief Common interface for adapters that hold preconditioners.
//!
//! \note The goal is that this class will be made useless after further
//! restructuring of the solver interfaces (FlexibleSolver and friends), but
//! for this is needed as an intermediate layer. See specifically SolverAdapter.hpp
//! for how this is used.
template <class X, class Y>
class PreconditionerHolder
{
public:
/**
* @brief getUnderlyingPreconditioner gets the underlying preconditioner (preconditioner being held)
*/
virtual std::shared_ptr<Dune::PreconditionerWithUpdate<X, Y>> getUnderlyingPreconditioner() = 0;
};
} // end namespace Opm::cuistl
#endif

View File

@ -0,0 +1,214 @@
/*
Copyright 2022-2023 SINTEF AS
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef OPM_SOLVERADAPTER_HPP
#define OPM_SOLVERADAPTER_HPP
#include <memory>
#include <dune/istl/bcrsmatrix.hh>
#include <dune/istl/operators.hh>
#include <dune/istl/schwarz.hh>
#include <dune/istl/solver.hh>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/CuBlockPreconditioner.hpp>
#include <opm/simulators/linalg/cuistl/CuOwnerOverlapCopy.hpp>
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/cuistl/detail/has_function.hpp>
namespace Opm::cuistl
{
//! @brief Wraps a CUDA solver to work with CPU data.
//!
//! @tparam Operator the Dune::LinearOperator to use
//! @tparam UnderlyingSolver a Dune solver like class, eg Dune::BiCGSTABSolver
//! @tparam X the outer type to use (eg. Dune::BlockVector<Dune::FieldVector<...>>)
template <class Operator, template <class> class UnderlyingSolver, class X>
class SolverAdapter : public Dune::IterativeSolver<X, X>
{
public:
using typename Dune::IterativeSolver<X, X>::domain_type;
using typename Dune::IterativeSolver<X, X>::range_type;
using typename Dune::IterativeSolver<X, X>::field_type;
using typename Dune::IterativeSolver<X, X>::real_type;
using typename Dune::IterativeSolver<X, X>::scalar_real_type;
static constexpr auto block_size = domain_type::block_type::dimension;
using XGPU = Opm::cuistl::CuVector<real_type>;
// TODO: Use a std::forward
SolverAdapter(Operator& op,
std::shared_ptr<Dune::ScalarProduct<X>> sp,
std::shared_ptr<Dune::Preconditioner<X, X>> prec,
scalar_real_type reduction,
int maxit,
int verbose)
: Dune::IterativeSolver<X, X>(op, *sp, *prec, reduction, maxit, verbose)
, m_opOnCPUWithMatrix(op)
, m_matrix(CuSparseMatrix<real_type>::fromMatrix(op.getmat()))
, m_underlyingSolver(constructSolver(prec, reduction, maxit, verbose))
{
}
virtual void apply(X& x, X& b, double reduction, Dune::InverseOperatorResult& res) override
{
// TODO: Can we do this without reimplementing the other function?
// TODO: [perf] Do we need to update the matrix every time? Probably yes
m_matrix.updateNonzeroValues(m_opOnCPUWithMatrix.getmat());
if (!m_inputBuffer) {
m_inputBuffer.reset(new XGPU(b.dim()));
m_outputBuffer.reset(new XGPU(x.dim()));
}
m_inputBuffer->copyFromHost(b);
// TODO: [perf] do we need to copy x here?
m_outputBuffer->copyFromHost(x);
m_underlyingSolver.apply(*m_outputBuffer, *m_inputBuffer, reduction, res);
// TODO: [perf] do we need to copy b here?
m_inputBuffer->copyToHost(b);
m_outputBuffer->copyToHost(x);
}
virtual void apply(X& x, X& b, Dune::InverseOperatorResult& res) override
{
// TODO: [perf] Do we need to update the matrix every time? Probably yes
m_matrix.updateNonzeroValues(m_opOnCPUWithMatrix.getmat());
if (!m_inputBuffer) {
m_inputBuffer.reset(new XGPU(b.dim()));
m_outputBuffer.reset(new XGPU(x.dim()));
}
m_inputBuffer->copyFromHost(b);
// TODO: [perf] do we need to copy x here?
m_outputBuffer->copyFromHost(x);
m_underlyingSolver.apply(*m_outputBuffer, *m_inputBuffer, res);
// TODO: [perf] do we need to copy b here?
m_inputBuffer->copyToHost(b);
m_outputBuffer->copyToHost(x);
}
private:
Operator& m_opOnCPUWithMatrix;
CuSparseMatrix<real_type> m_matrix;
UnderlyingSolver<XGPU> m_underlyingSolver;
// TODO: Use a std::forward
UnderlyingSolver<XGPU> constructSolver(std::shared_ptr<Dune::Preconditioner<X, X>> prec,
scalar_real_type reduction,
int maxit,
int verbose)
{
OPM_ERROR_IF(
detail::is_a_well_operator<Operator>::value,
"Currently we only support operators of type MatrixAdapter in the CUDA solver. "
"Use --matrix-add-well-contributions=true. "
"Using WellModelMatrixAdapter with SolverAdapter is not well-defined so it will not work well, or at all.");
if constexpr (detail::has_communication<Operator>::value) {
// TODO: See the below TODO over the definition of precHolder in the other branch
// TODO: We are currently double wrapping preconditioners in the preconditioner factory to be extra
// compatible
// with CPU. Probably a cleaner way eventually would be to do more modifications to the flexible
// solver to accomodate the pure GPU better.
auto precAsHolder = std::dynamic_pointer_cast<PreconditionerHolder<X, X>>(prec);
if (!precAsHolder) {
OPM_THROW(std::invalid_argument,
"The preconditioner needs to be a CUDA preconditioner (eg. CuILU0) wrapped in a "
"Opm::cuistl::PreconditionerAdapter wrapped in a "
"Opm::cuistl::CuBlockPreconditioner. If you are unsure what this means, set "
"preconditioner to 'CUILU0'"); // TODO: Suggest a better preconditioner
}
auto preconditionerAdapter = precAsHolder->getUnderlyingPreconditioner();
auto preconditionerAdapterAsHolder
= std::dynamic_pointer_cast<PreconditionerHolder<XGPU, XGPU>>(preconditionerAdapter);
if (!preconditionerAdapterAsHolder) {
OPM_THROW(std::invalid_argument,
"The preconditioner needs to be a CUDA preconditioner (eg. CuILU0) wrapped in a "
"Opm::cuistl::PreconditionerAdapter wrapped in a "
"Opm::cuistl::CuBlockPreconditioner. If you are unsure what this means, set "
"preconditioner to 'CUILU0'"); // TODO: Suggest a better preconditioner
}
// We need to get the underlying preconditioner:
auto preconditionerReallyOnGPU = preconditionerAdapterAsHolder->getUnderlyingPreconditioner();
const auto& communication = m_opOnCPUWithMatrix.getCommunication();
using CudaCommunication = CuOwnerOverlapCopy<real_type, block_size, typename Operator::communication_type>;
using SchwarzOperator
= Dune::OverlappingSchwarzOperator<CuSparseMatrix<real_type>, XGPU, XGPU, CudaCommunication>;
auto cudaCommunication = std::make_shared<CudaCommunication>(communication);
auto mpiPreconditioner = std::make_shared<CuBlockPreconditioner<XGPU, XGPU, CudaCommunication>>(
preconditionerReallyOnGPU, cudaCommunication);
auto scalarProduct = std::make_shared<Dune::ParallelScalarProduct<XGPU, CudaCommunication>>(
cudaCommunication, m_opOnCPUWithMatrix.category());
// NOTE: Ownsership of cudaCommunication is handled by mpiPreconditioner. However, just to make sure we
// remember
// this, we add this check. So remember that we hold one count in this scope, and one in the
// CuBlockPreconditioner instance. We accomedate for the fact that it could be passed around in
// CuBlockPreconditioner, hence we do not test for != 2
OPM_ERROR_IF(cudaCommunication.use_count() < 2, "Internal error. Shared pointer not owned properly.");
auto overlappingCudaOperator = std::make_shared<SchwarzOperator>(m_matrix, *cudaCommunication);
return UnderlyingSolver<XGPU>(
overlappingCudaOperator, scalarProduct, mpiPreconditioner, reduction, maxit, verbose);
} else {
// TODO: Fix the reliance on casting here. This is a code smell to a certain degree, and assumes
// a certain setup beforehand. The only reason we do it this way is to minimize edits to the
// flexible solver. We could design it differently, but keep this for the time being until
// we figure out how we want to GPU-ify the rest of the system.
auto precAsHolder = std::dynamic_pointer_cast<PreconditionerHolder<XGPU, XGPU>>(prec);
if (!precAsHolder) {
OPM_THROW(std::invalid_argument,
"The preconditioner needs to be a CUDA preconditioner wrapped in a "
"Opm::cuistl::PreconditionerHolder (eg. CuILU0).");
}
auto preconditionerOnGPU = precAsHolder->getUnderlyingPreconditioner();
auto matrixOperator
= std::make_shared<Dune::MatrixAdapter<CuSparseMatrix<real_type>, XGPU, XGPU>>(m_matrix);
auto scalarProduct = std::make_shared<Dune::SeqScalarProduct<XGPU>>();
return UnderlyingSolver<XGPU>(
matrixOperator, scalarProduct, preconditionerOnGPU, reduction, maxit, verbose);
}
}
std::unique_ptr<XGPU> m_inputBuffer;
std::unique_ptr<XGPU> m_outputBuffer;
};
} // namespace Opm::cuistl
#endif

View File

@ -84,5 +84,42 @@ public:
static constexpr bool value = std::is_same_v<decltype(test<T>(0)), std::true_type>;
};
/**
* @brief The has_communication class checks if the type has the member function getCommunication.
*
* This is used in the SolverAdapter to check if the operator has a communication, and it will then select a different
* path accordingly.
*/
template <typename T>
class has_communication
{
template <typename U>
static std::true_type test(decltype(&U::getCommunication));
template <typename U>
static std::false_type test(...);
public:
static constexpr bool value = std::is_same_v<decltype(test<T>(0)), std::true_type>;
};
/**
* @brief The is_a_well_operator class tries to guess if the operator is a well type operator
*
* @note This is mainly done in the solver adapter to detect incompatible runtime arguments. When the GPU linear solve
* paths supports wells, this class can be removed.
*/
template <typename T>
class is_a_well_operator
{
template <typename U>
static std::true_type test(decltype(&U::addWellPressureEquations));
template <typename U>
static std::false_type test(...);
public:
static constexpr bool value = std::is_same_v<decltype(test<T>(0)), std::true_type>;
};
} // namespace Opm::cuistl::detail
#endif

View File

@ -0,0 +1,104 @@
/*
Copyright 2023 SINTEF AS
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#include <config.h>
#define BOOST_TEST_MODULE TestCuOwnerOverlapCopy
#define BOOST_TEST_NO_MAIN
#include <boost/test/unit_test.hpp>
#include <dune/common/parallel/mpihelper.hh>
#include <dune/istl/bcrsmatrix.hh>
#include <dune/istl/owneroverlapcopy.hh>
#include <memory>
#include <opm/simulators/linalg/cuistl/CuOwnerOverlapCopy.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <random>
bool
init_unit_test_func()
{
return true;
}
int
main(int argc, char** argv)
{
[[maybe_unused]] const auto& helper = Dune::MPIHelper::instance(argc, argv);
boost::unit_test::unit_test_main(&init_unit_test_func, argc, argv);
}
BOOST_AUTO_TEST_CASE(TestProject)
{
// We're going to have three points: Centered is owned, left and right is copied. We assume a periodic domain
// ([0,1]/0~1)
auto indexInfo = Dune::IndexInfoFromGrid<int, int>();
indexInfo.addLocalIndex(std::make_tuple(0, 0, Dune::OwnerOverlapCopyAttributeSet::copy));
indexInfo.addLocalIndex(std::make_tuple(1, 1, Dune::OwnerOverlapCopyAttributeSet::owner));
indexInfo.addLocalIndex(std::make_tuple(2, 2, Dune::OwnerOverlapCopyAttributeSet::copy));
auto ownerOverlapCopy = Dune::OwnerOverlapCopyCommunication<int>(indexInfo, MPI_COMM_WORLD);
auto xCPU = std::vector<double> {{1.0, 2.0, 3.0}};
auto xGPU = Opm::cuistl::CuVector<double>(xCPU);
auto cuOwnerOverlapCopy
= Opm::cuistl::CuOwnerOverlapCopy<double, 1, Dune::OwnerOverlapCopyCommunication<int>>(ownerOverlapCopy);
cuOwnerOverlapCopy.project(xGPU);
auto resultOfProject = xGPU.asStdVector();
BOOST_CHECK_EQUAL(0.0, resultOfProject[0]);
BOOST_CHECK_EQUAL(2.0, resultOfProject[1]);
BOOST_CHECK_EQUAL(0.0, resultOfProject[2]);
}
BOOST_AUTO_TEST_CASE(TestDot)
{
// We're going to have three points: Centered is owned, left and right is copied. We assume a periodic domain
// ([0,1]/0~1)
auto indexInfo = Dune::IndexInfoFromGrid<int, int>();
indexInfo.addLocalIndex(std::make_tuple(0, 0, Dune::OwnerOverlapCopyAttributeSet::copy));
indexInfo.addLocalIndex(std::make_tuple(1, 1, Dune::OwnerOverlapCopyAttributeSet::owner));
indexInfo.addLocalIndex(std::make_tuple(2, 2, Dune::OwnerOverlapCopyAttributeSet::copy));
indexInfo.addRemoteIndex(std::make_tuple(0, 0, Dune::OwnerOverlapCopyAttributeSet::copy));
indexInfo.addRemoteIndex(std::make_tuple(0, 1, Dune::OwnerOverlapCopyAttributeSet::owner));
indexInfo.addRemoteIndex(std::make_tuple(0, 2, Dune::OwnerOverlapCopyAttributeSet::copy));
auto ownerOverlapCopy = Dune::OwnerOverlapCopyCommunication<int>(indexInfo, MPI_COMM_WORLD);
auto xCPU = std::vector<double> {{1.0, 2.0, 3.0}};
auto xGPU = Opm::cuistl::CuVector<double>(xCPU);
auto cuOwnerOverlapCopy
= Opm::cuistl::CuOwnerOverlapCopy<double, 1, Dune::OwnerOverlapCopyCommunication<int>>(ownerOverlapCopy);
double outputDune = -1.0;
auto xDune = xGPU.asDuneBlockVector<1>();
ownerOverlapCopy.dot(xDune, xDune, outputDune);
double output = -1.0;
cuOwnerOverlapCopy.dot(xGPU, xGPU, output);
BOOST_CHECK_EQUAL(outputDune, output);
BOOST_CHECK_EQUAL(4.0, output);
}

View File

@ -0,0 +1,117 @@
/*
Copyright 2022-2023 SINTEF AS
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#include <config.h>
#define BOOST_TEST_MODULE TestSolverAdapter
#include <boost/test/unit_test.hpp>
#include <dune/istl/solvers.hh>
#include <opm/simulators/linalg/PreconditionerFactory.hpp>
#include <opm/simulators/linalg/PropertyTree.hpp>
#include <opm/simulators/linalg/cuistl/SolverAdapter.hpp>
static const constexpr int dim = 3;
using Matrix = Dune::BCRSMatrix<Dune::FieldMatrix<double, dim, dim>>;
using Vector = Dune::BlockVector<Dune::FieldVector<double, dim>>;
using Moperator = Dune::MatrixAdapter<Matrix, Vector, Vector>;
using PrecondFactory = Opm::PreconditionerFactory<Moperator, Dune::Amg::SequentialInformation>;
using SolverAdapter = Opm::cuistl::SolverAdapter<Moperator, Dune::BiCGSTABSolver, Vector>;
namespace
{
auto
createSolverAdapterWithMatrix(const size_t N = 10)
{
const int nonZeroes = N * 3 - 2;
// We need to hold the matrix in memory somehow, but we don't want to deference
// a pointer all the time (quality of life...):
auto matrixPtr = std::make_shared<Matrix>(N, N, nonZeroes, Matrix::row_wise);
auto& matrix = *matrixPtr;
for (auto row = matrix.createbegin(); row != matrix.createend(); ++row) {
// Add nonzeros for left neighbour, diagonal and right neighbour
if (row.index() > 0) {
row.insert(row.index() - 1);
}
row.insert(row.index());
if (row.index() < matrix.N() - 1) {
row.insert(row.index() + 1);
}
}
// This might not be the most elegant way of filling in a Dune sparse matrix, but it works.
for (size_t i = 0; i < N; ++i) {
for (int k = 0; k < dim; ++k) {
matrix[i][i][k][k] = -2;
}
if (i < N - 1) {
for (int k = 0; k < dim; ++k) {
matrix[i][i + 1][k][k] = 1;
}
}
if (i > 0) {
for (int k = 0; k < dim; ++k) {
matrix[i][i - 1][k][k] = 1;
}
}
}
auto op = std::make_shared<Moperator>(matrix);
auto sp = std::make_shared<Dune::ScalarProduct<Vector>>();
auto prm = Opm::PropertyTree();
prm.put<double>("relaxation", 1.0);
prm.put<std::string>("type", "CUILU0");
auto prec = PrecondFactory::create(*op, prm);
auto solverAdapter = std::make_shared<SolverAdapter>(*op, sp, prec, 1.0, 10, 0);
return std::make_tuple(matrixPtr, solverAdapter, op);
}
} // namespace
BOOST_AUTO_TEST_CASE(TestCreation)
{
BOOST_CHECK_NO_THROW(createSolverAdapterWithMatrix(););
}
BOOST_AUTO_TEST_CASE(TestSolve)
{
const size_t N = 10;
auto [matrix, solverAdapter, op] = createSolverAdapterWithMatrix(N);
Vector xActual(N), xInitial(N), b(N);
for (size_t i = 0; i < N; ++i) {
for (size_t j = 0; j < dim; ++j) {
xActual[i][j] = 1.0;
xInitial[i][j] = 0.1 * i;
}
}
matrix->mv(xActual, b);
Dune::InverseOperatorResult res;
solverAdapter->apply(xInitial, b, res);
for (size_t i = 0; i < N; ++i) {
for (size_t j = 0; j < dim; ++j) {
// This should actually be up to rounding exact since ILU is just the inverse
// for this matrix.
BOOST_CHECK_CLOSE(xActual[i][j], xInitial[i][j], 1e-13);
}
}
}