From ceb15e22e3d3ade3eb1f306d1db82c7c9b70adc1 Mon Sep 17 00:00:00 2001 From: Kjetil Olsen Lye Date: Wed, 31 May 2023 14:51:00 +0200 Subject: [PATCH] Expose CuISTL solver in FlexibleSolver. --- CMakeLists.txt | 2 + CMakeLists_files.cmake | 14 +- opm/simulators/linalg/FlexibleSolver_impl.hpp | 14 ++ .../linalg/cuistl/CuBlockPreconditioner.hpp | 125 ++++++++++ .../linalg/cuistl/CuOwnerOverlapCopy.hpp | 149 ++++++++++++ .../linalg/cuistl/PreconditionerAdapter.hpp | 11 +- .../linalg/cuistl/PreconditionerHolder.hpp | 43 ++++ .../linalg/cuistl/SolverAdapter.hpp | 214 ++++++++++++++++++ .../linalg/cuistl/detail/has_function.hpp | 37 +++ tests/cuistl/test_cuowneroverlapcopy.cpp | 104 +++++++++ tests/cuistl/test_solver_adapter.cpp | 117 ++++++++++ 11 files changed, 822 insertions(+), 8 deletions(-) create mode 100644 opm/simulators/linalg/cuistl/CuBlockPreconditioner.hpp create mode 100644 opm/simulators/linalg/cuistl/CuOwnerOverlapCopy.hpp create mode 100644 opm/simulators/linalg/cuistl/PreconditionerHolder.hpp create mode 100644 opm/simulators/linalg/cuistl/SolverAdapter.hpp create mode 100644 tests/cuistl/test_cuowneroverlapcopy.cpp create mode 100644 tests/cuistl/test_solver_adapter.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index b96d01690..5d3a208b4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -552,6 +552,8 @@ if(CUDA_FOUND) cuvector cusparsematrix cuseqilu0 + cuowneroverlapcopy + solver_adapter PROPERTIES LABELS gpu_cuda) endif() diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake index 9a76bb4a7..cd5f1b340 100644 --- a/CMakeLists_files.cmake +++ b/CMakeLists_files.cmake @@ -172,16 +172,13 @@ if(CUDA_FOUND) list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/has_function.hpp) list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/preconditioner_should_call_post_pre.hpp) list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp) -<<<<<<< HEAD list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/CuSeqILU0.hpp) list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp) list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp) -======= - list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp) - - ->>>>>>> 05ee1a855 (Added conversion preconditioner.) - + list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/CuOwnerOverlapCopy.hpp) + list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/SolverAdapter.hpp) + list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/CuBlockPreconditioner.hpp) + list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/PreconditionerHolder.hpp) endif() if(OPENCL_FOUND) @@ -271,6 +268,9 @@ if(CUDA_FOUND) list(APPEND TEST_SOURCE_FILES tests/cuistl/test_safe_conversion.cpp) list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuseqilu0.cpp) list(APPEND TEST_SOURCE_FILES tests/cuistl/test_converttofloatadapter.cpp) + list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuowneroverlapcopy.cpp) + list(APPEND TEST_SOURCE_FILES tests/cuistl/test_solver_adapter.cpp) + endif() if(OPENCL_FOUND) diff --git a/opm/simulators/linalg/FlexibleSolver_impl.hpp b/opm/simulators/linalg/FlexibleSolver_impl.hpp index 8c75cc346..a377ef47d 100644 --- a/opm/simulators/linalg/FlexibleSolver_impl.hpp +++ b/opm/simulators/linalg/FlexibleSolver_impl.hpp @@ -37,6 +37,10 @@ #include #include +#if HAVE_CUDA +#include +#endif + namespace Dune { /// Create a sequential solver. @@ -184,6 +188,16 @@ namespace Dune bool dummy = false; using MatrixType = std::remove_const_tgetmat())>>; linsolver_ = std::make_shared>(linearoperator_for_solver_->getmat(), verbosity, dummy); +#endif +#if HAVE_CUDA + } else if (solver_type == "cubicgstab") { + linsolver_.reset(new Opm::cuistl::SolverAdapter( + *linearoperator_for_solver_, + scalarproduct_, + preconditioner_, + tol, // desired residual reduction factor + maxiter, // maximum number of iterations + verbosity)); #endif } else { OPM_THROW(std::invalid_argument, diff --git a/opm/simulators/linalg/cuistl/CuBlockPreconditioner.hpp b/opm/simulators/linalg/cuistl/CuBlockPreconditioner.hpp new file mode 100644 index 000000000..c5cfde3a3 --- /dev/null +++ b/opm/simulators/linalg/cuistl/CuBlockPreconditioner.hpp @@ -0,0 +1,125 @@ +/* + Copyright 2022-2023 SINTEF AS + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +#ifndef OPM_CUISTL_CUBLOCKPRECONDITIONER_HPP +#define OPM_CUISTL_CUBLOCKPRECONDITIONER_HPP + +#include +#include +#include +#include +#include + +namespace Opm::cuistl +{ +//! @brief Is an adaptation of Dune::BlockPreconditioner that works within the CuISTL framework. +//! +//! @note We aim to intgrate this into OwningBlockPreconditioner (or a relative thereof). +template > +class CuBlockPreconditioner : public Dune::PreconditionerWithUpdate, public PreconditionerHolder +{ +public: + using domain_type = X; + using range_type = Y; + using field_type = typename X::field_type; + using communication_type = C; + + + //! @brief Constructor. + //! + //! constructor gets all parameters to operate the prec. + //! @param p The sequential preconditioner. + //! @param c The communication object for syncing overlap and copy + //! data points. (E.~g. OwnerOverlapCopyCommunication ) + //! + CuBlockPreconditioner(const std::shared_ptr

& p, const std::shared_ptr& c) + : m_preconditioner(p) + , m_communication(c) + { + } + + CuBlockPreconditioner(const std::shared_ptr

& p, const communication_type& c) + : m_preconditioner(p) + , m_communication(Dune::stackobject_to_shared_ptr(c)) + { + } + + //! \brief Prepare the preconditioner. + //! + //! \copydoc Preconditioner::pre(X&,Y&) + virtual void pre(X& x, Y& b) override + { + // TODO: [perf] Do we always need to copy, or only when we need the pre step? + m_communication->copyOwnerToAll(x, x); // make Dirichlet values consistent + if constexpr (detail::shouldCallPreconditionerPre

()) { + m_preconditioner->pre(x, b); + } + } + + //! \brief Apply the preconditioner + //! + //! \copydoc Preconditioner::apply(X&,const Y&) + virtual void apply(X& v, const Y& d) override + { + m_preconditioner->apply(v, d); + m_communication->copyOwnerToAll(v, v); + } + + + virtual void update() override + { + m_preconditioner->update(); + } + + virtual void post(X& x) override + { + if constexpr (detail::shouldCallPreconditionerPost

()) { + m_preconditioner->post(x); + } + } + + //! Category of the preconditioner (see SolverCategory::Category) + virtual Dune::SolverCategory::Category category() const override + { + return Dune::SolverCategory::overlapping; + } + + static constexpr bool shouldCallPre() + { + return detail::shouldCallPreconditionerPost

(); + } + static constexpr bool shouldCallPost() + { + return detail::shouldCallPreconditionerPre

(); + } + + virtual std::shared_ptr> getUnderlyingPreconditioner() override + { + return m_preconditioner; + } + + +private: + //! \brief a sequential preconditioner + std::shared_ptr

m_preconditioner; + + //! \brief the communication object + std::shared_ptr m_communication; +}; +} // namespace Opm::cuistl +#endif diff --git a/opm/simulators/linalg/cuistl/CuOwnerOverlapCopy.hpp b/opm/simulators/linalg/cuistl/CuOwnerOverlapCopy.hpp new file mode 100644 index 000000000..a39b98b5e --- /dev/null +++ b/opm/simulators/linalg/cuistl/CuOwnerOverlapCopy.hpp @@ -0,0 +1,149 @@ +/* + Copyright 2022-2023 SINTEF AS + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +#ifndef OPM_CUISTL_CUOWNEROVERLAPCOPY_HPP +#define OPM_CUISTL_CUOWNEROVERLAPCOPY_HPP +#include +#include +#include +#include + +namespace Opm::cuistl +{ +/** + * @brief CUDA compatiable variant of Dune::OwnerOverlapCopyCommunication + * + * This class can essentially be seen as an adapter around Dune::OwnerOverlapCopyCommunication, and should work as + * a Dune::OwnerOverlapCopyCommunication on CuVectors + * + * + * @note This currently only has the functionality to parallelize the linear solve. + * + * @tparam field_type should be a field_type supported by CuVector (double, float) + * @tparam block_size the block size used (this is relevant for say figuring out the correct indices) + * @tparam OwnerOverlapCopyCommunicationType should mimic Dune::OwnerOverlapCopyCommunication. + */ +template +class CuOwnerOverlapCopy +{ +public: + using X = CuVector; + + CuOwnerOverlapCopy(const OwnerOverlapCopyCommunicationType& cpuOwnerOverlapCopy) + : m_cpuOwnerOverlapCopy(cpuOwnerOverlapCopy) + { + } + /** + * @brief dot will carry out the dot product between x and y on the owned indices, then sum up the result across MPI + * processes. + * @param[out] output result will be stored here + * + * @note This uses the same interface as its DUNE equivalent. + */ + void dot(const X& x, const X& y, field_type& output) const + { + std::call_once(m_initializedIndices, [&]() { initIndexSet(); }); + + const auto dotAtRank = x.dot(y, *m_indicesOwner); + output = m_cpuOwnerOverlapCopy.communicator().sum(dotAtRank); + } + + /** + * @brief norm computes the l^2-norm of x across processes. + * + * This will compute the dot product of x with itself on owned indices, then + * sum the result across process and return the square root of the sum. + */ + field_type norm(const X& x) const + { + auto xDotX = field_type(0); + this->dot(x, x, xDotX); + + using std::sqrt; + return sqrt(xDotX); + } + + /** + * @brief project will project x to the owned subspace + * + * For each component i which is not owned, x_i will be set to 0 + * + * @param[inout] x the vector to project + */ + void project(X& x) const + { + std::call_once(m_initializedIndices, [&]() { initIndexSet(); }); + x.setZeroAtIndexSet(*m_indicesCopy); + } + + /** + * @brief copyOwnerToAll will copy source to the CPU, then call OwnerOverlapCopyCommunicationType::copyOwnerToAll on + * the copied data, and copy the result back to the GPU + * @param[in] source + * @param[out] dest + */ + void copyOwnerToAll(const X& source, X& dest) const + { + // TODO: [perf] Can we reduce copying from the GPU here? + // TODO: [perf] Maybe create a global buffer instead? + auto sourceAsDuneVector = source.template asDuneBlockVector(); + auto destAsDuneVector = dest.template asDuneBlockVector(); + m_cpuOwnerOverlapCopy.copyOwnerToAll(sourceAsDuneVector, destAsDuneVector); + dest.copyFromHost(destAsDuneVector); + } + + + +private: + const OwnerOverlapCopyCommunicationType& m_cpuOwnerOverlapCopy; + + // Used to call the initIndexSet. Note that this is kind of a + // premature optimization, in the sense that we could just initialize these indices + // always, but they are not always used. + mutable std::once_flag m_initializedIndices; + mutable std::unique_ptr> m_indicesCopy; + mutable std::unique_ptr> m_indicesOwner; + + + void initIndexSet() const + { + // We need indices that we we will use in the project, dot and norm calls. + // TODO: [premature perf] Can this be run once per instance? Or do we need to rebuild every time? + const auto& pis = m_cpuOwnerOverlapCopy.indexSet(); + std::vector indicesCopyOnCPU; + std::vector indicesOwnerCPU; + for (const auto& index : pis) { + if (index.local().attribute() == Dune::OwnerOverlapCopyAttributeSet::copy) { + for (int component = 0; component < block_size; ++component) { + indicesCopyOnCPU.push_back(index.local().local() * block_size + component); + } + } + + if (index.local().attribute() == Dune::OwnerOverlapCopyAttributeSet::owner) { + for (int component = 0; component < block_size; ++component) { + indicesOwnerCPU.push_back(index.local().local() * block_size + component); + } + } + } + + m_indicesCopy = std::make_unique>(indicesCopyOnCPU); + m_indicesOwner = std::make_unique>(indicesOwnerCPU); + } +}; +} // namespace Opm::cuistl +#endif diff --git a/opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp b/opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp index 88e83011d..f4f382538 100644 --- a/opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp +++ b/opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -36,7 +37,9 @@ namespace Opm::cuistl //! \tparam Y the range type (should be on the CPU). Typicall a Dune::BlockVector //! \tparam CudaPreconditionerType the preconditioner taking CuVector as arguments to apply template -class PreconditionerAdapter : public Dune::PreconditionerWithUpdate +class PreconditionerAdapter + : public Dune::PreconditionerWithUpdate, + public PreconditionerHolder, CuVector> { public: //! \brief The domain type of the preconditioner. @@ -114,6 +117,12 @@ public: return detail::shouldCallPreconditionerPre(); } + virtual std::shared_ptr, CuVector>> + getUnderlyingPreconditioner() override + { + return m_underlyingPreconditioner; + } + private: //! \brief the underlying preconditioner to use std::shared_ptr m_underlyingPreconditioner; diff --git a/opm/simulators/linalg/cuistl/PreconditionerHolder.hpp b/opm/simulators/linalg/cuistl/PreconditionerHolder.hpp new file mode 100644 index 000000000..bd0732414 --- /dev/null +++ b/opm/simulators/linalg/cuistl/PreconditionerHolder.hpp @@ -0,0 +1,43 @@ +/* + Copyright 2022-2023 SINTEF AS + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +#ifndef OPM_CUISTL_PRECONDITIONERHOLDER_HPP +#define OPM_CUISTL_PRECONDITIONERHOLDER_HPP +#include + + +namespace Opm::cuistl +{ +//! \brief Common interface for adapters that hold preconditioners. +//! +//! \note The goal is that this class will be made useless after further +//! restructuring of the solver interfaces (FlexibleSolver and friends), but +//! for this is needed as an intermediate layer. See specifically SolverAdapter.hpp +//! for how this is used. +template +class PreconditionerHolder +{ +public: + /** + * @brief getUnderlyingPreconditioner gets the underlying preconditioner (preconditioner being held) + */ + virtual std::shared_ptr> getUnderlyingPreconditioner() = 0; +}; +} // end namespace Opm::cuistl + +#endif diff --git a/opm/simulators/linalg/cuistl/SolverAdapter.hpp b/opm/simulators/linalg/cuistl/SolverAdapter.hpp new file mode 100644 index 000000000..95589015a --- /dev/null +++ b/opm/simulators/linalg/cuistl/SolverAdapter.hpp @@ -0,0 +1,214 @@ +/* + Copyright 2022-2023 SINTEF AS + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +#ifndef OPM_SOLVERADAPTER_HPP +#define OPM_SOLVERADAPTER_HPP + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +namespace Opm::cuistl +{ +//! @brief Wraps a CUDA solver to work with CPU data. +//! +//! @tparam Operator the Dune::LinearOperator to use +//! @tparam UnderlyingSolver a Dune solver like class, eg Dune::BiCGSTABSolver +//! @tparam X the outer type to use (eg. Dune::BlockVector>) +template class UnderlyingSolver, class X> +class SolverAdapter : public Dune::IterativeSolver +{ +public: + using typename Dune::IterativeSolver::domain_type; + using typename Dune::IterativeSolver::range_type; + using typename Dune::IterativeSolver::field_type; + using typename Dune::IterativeSolver::real_type; + using typename Dune::IterativeSolver::scalar_real_type; + static constexpr auto block_size = domain_type::block_type::dimension; + using XGPU = Opm::cuistl::CuVector; + + // TODO: Use a std::forward + SolverAdapter(Operator& op, + std::shared_ptr> sp, + std::shared_ptr> prec, + scalar_real_type reduction, + int maxit, + int verbose) + : Dune::IterativeSolver(op, *sp, *prec, reduction, maxit, verbose) + , m_opOnCPUWithMatrix(op) + , m_matrix(CuSparseMatrix::fromMatrix(op.getmat())) + , m_underlyingSolver(constructSolver(prec, reduction, maxit, verbose)) + { + } + + virtual void apply(X& x, X& b, double reduction, Dune::InverseOperatorResult& res) override + { + // TODO: Can we do this without reimplementing the other function? + // TODO: [perf] Do we need to update the matrix every time? Probably yes + m_matrix.updateNonzeroValues(m_opOnCPUWithMatrix.getmat()); + + if (!m_inputBuffer) { + m_inputBuffer.reset(new XGPU(b.dim())); + m_outputBuffer.reset(new XGPU(x.dim())); + } + + m_inputBuffer->copyFromHost(b); + // TODO: [perf] do we need to copy x here? + m_outputBuffer->copyFromHost(x); + + m_underlyingSolver.apply(*m_outputBuffer, *m_inputBuffer, reduction, res); + + // TODO: [perf] do we need to copy b here? + m_inputBuffer->copyToHost(b); + m_outputBuffer->copyToHost(x); + } + virtual void apply(X& x, X& b, Dune::InverseOperatorResult& res) override + { + // TODO: [perf] Do we need to update the matrix every time? Probably yes + m_matrix.updateNonzeroValues(m_opOnCPUWithMatrix.getmat()); + + if (!m_inputBuffer) { + m_inputBuffer.reset(new XGPU(b.dim())); + m_outputBuffer.reset(new XGPU(x.dim())); + } + + m_inputBuffer->copyFromHost(b); + // TODO: [perf] do we need to copy x here? + m_outputBuffer->copyFromHost(x); + + m_underlyingSolver.apply(*m_outputBuffer, *m_inputBuffer, res); + + // TODO: [perf] do we need to copy b here? + m_inputBuffer->copyToHost(b); + m_outputBuffer->copyToHost(x); + } + +private: + Operator& m_opOnCPUWithMatrix; + CuSparseMatrix m_matrix; + + UnderlyingSolver m_underlyingSolver; + + + // TODO: Use a std::forward + UnderlyingSolver constructSolver(std::shared_ptr> prec, + scalar_real_type reduction, + int maxit, + int verbose) + { + + OPM_ERROR_IF( + detail::is_a_well_operator::value, + "Currently we only support operators of type MatrixAdapter in the CUDA solver. " + "Use --matrix-add-well-contributions=true. " + "Using WellModelMatrixAdapter with SolverAdapter is not well-defined so it will not work well, or at all."); + + + + + + if constexpr (detail::has_communication::value) { + // TODO: See the below TODO over the definition of precHolder in the other branch + // TODO: We are currently double wrapping preconditioners in the preconditioner factory to be extra + // compatible + // with CPU. Probably a cleaner way eventually would be to do more modifications to the flexible + // solver to accomodate the pure GPU better. + auto precAsHolder = std::dynamic_pointer_cast>(prec); + if (!precAsHolder) { + OPM_THROW(std::invalid_argument, + "The preconditioner needs to be a CUDA preconditioner (eg. CuILU0) wrapped in a " + "Opm::cuistl::PreconditionerAdapter wrapped in a " + "Opm::cuistl::CuBlockPreconditioner. If you are unsure what this means, set " + "preconditioner to 'CUILU0'"); // TODO: Suggest a better preconditioner + } + + auto preconditionerAdapter = precAsHolder->getUnderlyingPreconditioner(); + auto preconditionerAdapterAsHolder + = std::dynamic_pointer_cast>(preconditionerAdapter); + if (!preconditionerAdapterAsHolder) { + OPM_THROW(std::invalid_argument, + "The preconditioner needs to be a CUDA preconditioner (eg. CuILU0) wrapped in a " + "Opm::cuistl::PreconditionerAdapter wrapped in a " + "Opm::cuistl::CuBlockPreconditioner. If you are unsure what this means, set " + "preconditioner to 'CUILU0'"); // TODO: Suggest a better preconditioner + } + // We need to get the underlying preconditioner: + auto preconditionerReallyOnGPU = preconditionerAdapterAsHolder->getUnderlyingPreconditioner(); + const auto& communication = m_opOnCPUWithMatrix.getCommunication(); + + using CudaCommunication = CuOwnerOverlapCopy; + using SchwarzOperator + = Dune::OverlappingSchwarzOperator, XGPU, XGPU, CudaCommunication>; + auto cudaCommunication = std::make_shared(communication); + + auto mpiPreconditioner = std::make_shared>( + preconditionerReallyOnGPU, cudaCommunication); + + auto scalarProduct = std::make_shared>( + cudaCommunication, m_opOnCPUWithMatrix.category()); + + + // NOTE: Ownsership of cudaCommunication is handled by mpiPreconditioner. However, just to make sure we + // remember + // this, we add this check. So remember that we hold one count in this scope, and one in the + // CuBlockPreconditioner instance. We accomedate for the fact that it could be passed around in + // CuBlockPreconditioner, hence we do not test for != 2 + OPM_ERROR_IF(cudaCommunication.use_count() < 2, "Internal error. Shared pointer not owned properly."); + auto overlappingCudaOperator = std::make_shared(m_matrix, *cudaCommunication); + + return UnderlyingSolver( + overlappingCudaOperator, scalarProduct, mpiPreconditioner, reduction, maxit, verbose); + } else { + // TODO: Fix the reliance on casting here. This is a code smell to a certain degree, and assumes + // a certain setup beforehand. The only reason we do it this way is to minimize edits to the + // flexible solver. We could design it differently, but keep this for the time being until + // we figure out how we want to GPU-ify the rest of the system. + auto precAsHolder = std::dynamic_pointer_cast>(prec); + if (!precAsHolder) { + OPM_THROW(std::invalid_argument, + "The preconditioner needs to be a CUDA preconditioner wrapped in a " + "Opm::cuistl::PreconditionerHolder (eg. CuILU0)."); + } + auto preconditionerOnGPU = precAsHolder->getUnderlyingPreconditioner(); + + auto matrixOperator + = std::make_shared, XGPU, XGPU>>(m_matrix); + auto scalarProduct = std::make_shared>(); + return UnderlyingSolver( + matrixOperator, scalarProduct, preconditionerOnGPU, reduction, maxit, verbose); + } + } + + std::unique_ptr m_inputBuffer; + std::unique_ptr m_outputBuffer; +}; +} // namespace Opm::cuistl + +#endif diff --git a/opm/simulators/linalg/cuistl/detail/has_function.hpp b/opm/simulators/linalg/cuistl/detail/has_function.hpp index 2a7b10501..ebc395d78 100644 --- a/opm/simulators/linalg/cuistl/detail/has_function.hpp +++ b/opm/simulators/linalg/cuistl/detail/has_function.hpp @@ -84,5 +84,42 @@ public: static constexpr bool value = std::is_same_v(0)), std::true_type>; }; + +/** + * @brief The has_communication class checks if the type has the member function getCommunication. + * + * This is used in the SolverAdapter to check if the operator has a communication, and it will then select a different + * path accordingly. + */ +template +class has_communication +{ + template + static std::true_type test(decltype(&U::getCommunication)); + template + static std::false_type test(...); + +public: + static constexpr bool value = std::is_same_v(0)), std::true_type>; +}; + +/** + * @brief The is_a_well_operator class tries to guess if the operator is a well type operator + * + * @note This is mainly done in the solver adapter to detect incompatible runtime arguments. When the GPU linear solve + * paths supports wells, this class can be removed. + */ +template +class is_a_well_operator +{ + template + static std::true_type test(decltype(&U::addWellPressureEquations)); + template + static std::false_type test(...); + +public: + static constexpr bool value = std::is_same_v(0)), std::true_type>; +}; + } // namespace Opm::cuistl::detail #endif diff --git a/tests/cuistl/test_cuowneroverlapcopy.cpp b/tests/cuistl/test_cuowneroverlapcopy.cpp new file mode 100644 index 000000000..74e127444 --- /dev/null +++ b/tests/cuistl/test_cuowneroverlapcopy.cpp @@ -0,0 +1,104 @@ +/* + Copyright 2023 SINTEF AS + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +#include + +#define BOOST_TEST_MODULE TestCuOwnerOverlapCopy +#define BOOST_TEST_NO_MAIN + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +bool +init_unit_test_func() +{ + return true; +} + +int +main(int argc, char** argv) +{ + [[maybe_unused]] const auto& helper = Dune::MPIHelper::instance(argc, argv); + boost::unit_test::unit_test_main(&init_unit_test_func, argc, argv); +} + +BOOST_AUTO_TEST_CASE(TestProject) +{ + + // We're going to have three points: Centered is owned, left and right is copied. We assume a periodic domain + // ([0,1]/0~1) + auto indexInfo = Dune::IndexInfoFromGrid(); + indexInfo.addLocalIndex(std::make_tuple(0, 0, Dune::OwnerOverlapCopyAttributeSet::copy)); + indexInfo.addLocalIndex(std::make_tuple(1, 1, Dune::OwnerOverlapCopyAttributeSet::owner)); + indexInfo.addLocalIndex(std::make_tuple(2, 2, Dune::OwnerOverlapCopyAttributeSet::copy)); + + auto ownerOverlapCopy = Dune::OwnerOverlapCopyCommunication(indexInfo, MPI_COMM_WORLD); + auto xCPU = std::vector {{1.0, 2.0, 3.0}}; + auto xGPU = Opm::cuistl::CuVector(xCPU); + + auto cuOwnerOverlapCopy + = Opm::cuistl::CuOwnerOverlapCopy>(ownerOverlapCopy); + + cuOwnerOverlapCopy.project(xGPU); + + auto resultOfProject = xGPU.asStdVector(); + + BOOST_CHECK_EQUAL(0.0, resultOfProject[0]); + BOOST_CHECK_EQUAL(2.0, resultOfProject[1]); + BOOST_CHECK_EQUAL(0.0, resultOfProject[2]); +} + + +BOOST_AUTO_TEST_CASE(TestDot) +{ + + // We're going to have three points: Centered is owned, left and right is copied. We assume a periodic domain + // ([0,1]/0~1) + auto indexInfo = Dune::IndexInfoFromGrid(); + indexInfo.addLocalIndex(std::make_tuple(0, 0, Dune::OwnerOverlapCopyAttributeSet::copy)); + indexInfo.addLocalIndex(std::make_tuple(1, 1, Dune::OwnerOverlapCopyAttributeSet::owner)); + indexInfo.addLocalIndex(std::make_tuple(2, 2, Dune::OwnerOverlapCopyAttributeSet::copy)); + + indexInfo.addRemoteIndex(std::make_tuple(0, 0, Dune::OwnerOverlapCopyAttributeSet::copy)); + indexInfo.addRemoteIndex(std::make_tuple(0, 1, Dune::OwnerOverlapCopyAttributeSet::owner)); + indexInfo.addRemoteIndex(std::make_tuple(0, 2, Dune::OwnerOverlapCopyAttributeSet::copy)); + auto ownerOverlapCopy = Dune::OwnerOverlapCopyCommunication(indexInfo, MPI_COMM_WORLD); + auto xCPU = std::vector {{1.0, 2.0, 3.0}}; + auto xGPU = Opm::cuistl::CuVector(xCPU); + + auto cuOwnerOverlapCopy + = Opm::cuistl::CuOwnerOverlapCopy>(ownerOverlapCopy); + + double outputDune = -1.0; + auto xDune = xGPU.asDuneBlockVector<1>(); + ownerOverlapCopy.dot(xDune, xDune, outputDune); + + double output = -1.0; + cuOwnerOverlapCopy.dot(xGPU, xGPU, output); + + + BOOST_CHECK_EQUAL(outputDune, output); + BOOST_CHECK_EQUAL(4.0, output); +} diff --git a/tests/cuistl/test_solver_adapter.cpp b/tests/cuistl/test_solver_adapter.cpp new file mode 100644 index 000000000..46661d891 --- /dev/null +++ b/tests/cuistl/test_solver_adapter.cpp @@ -0,0 +1,117 @@ +/* + Copyright 2022-2023 SINTEF AS + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +#include + +#define BOOST_TEST_MODULE TestSolverAdapter + +#include +#include +#include +#include +#include + +static const constexpr int dim = 3; +using Matrix = Dune::BCRSMatrix>; +using Vector = Dune::BlockVector>; +using Moperator = Dune::MatrixAdapter; +using PrecondFactory = Opm::PreconditionerFactory; +using SolverAdapter = Opm::cuistl::SolverAdapter; + +namespace +{ +auto +createSolverAdapterWithMatrix(const size_t N = 10) +{ + + + const int nonZeroes = N * 3 - 2; + + // We need to hold the matrix in memory somehow, but we don't want to deference + // a pointer all the time (quality of life...): + auto matrixPtr = std::make_shared(N, N, nonZeroes, Matrix::row_wise); + auto& matrix = *matrixPtr; + for (auto row = matrix.createbegin(); row != matrix.createend(); ++row) { + // Add nonzeros for left neighbour, diagonal and right neighbour + if (row.index() > 0) { + row.insert(row.index() - 1); + } + row.insert(row.index()); + if (row.index() < matrix.N() - 1) { + row.insert(row.index() + 1); + } + } + // This might not be the most elegant way of filling in a Dune sparse matrix, but it works. + for (size_t i = 0; i < N; ++i) { + for (int k = 0; k < dim; ++k) { + matrix[i][i][k][k] = -2; + } + if (i < N - 1) { + for (int k = 0; k < dim; ++k) { + matrix[i][i + 1][k][k] = 1; + } + } + + if (i > 0) { + for (int k = 0; k < dim; ++k) { + matrix[i][i - 1][k][k] = 1; + } + } + } + auto op = std::make_shared(matrix); + auto sp = std::make_shared>(); + auto prm = Opm::PropertyTree(); + prm.put("relaxation", 1.0); + prm.put("type", "CUILU0"); + auto prec = PrecondFactory::create(*op, prm); + auto solverAdapter = std::make_shared(*op, sp, prec, 1.0, 10, 0); + + return std::make_tuple(matrixPtr, solverAdapter, op); +} +} // namespace + +BOOST_AUTO_TEST_CASE(TestCreation) +{ + BOOST_CHECK_NO_THROW(createSolverAdapterWithMatrix();); +} + +BOOST_AUTO_TEST_CASE(TestSolve) +{ + const size_t N = 10; + auto [matrix, solverAdapter, op] = createSolverAdapterWithMatrix(N); + + Vector xActual(N), xInitial(N), b(N); + for (size_t i = 0; i < N; ++i) { + for (size_t j = 0; j < dim; ++j) { + xActual[i][j] = 1.0; + xInitial[i][j] = 0.1 * i; + } + } + + matrix->mv(xActual, b); + Dune::InverseOperatorResult res; + solverAdapter->apply(xInitial, b, res); + + for (size_t i = 0; i < N; ++i) { + for (size_t j = 0; j < dim; ++j) { + // This should actually be up to rounding exact since ILU is just the inverse + // for this matrix. + BOOST_CHECK_CLOSE(xActual[i][j], xInitial[i][j], 1e-13); + } + } +}