From 68322c06e55467855293d248f4dec6a345e5bdda Mon Sep 17 00:00:00 2001 From: hnil Date: Wed, 9 Aug 2023 12:57:05 +0200 Subject: [PATCH] added forgotten GPU versions --- .../linalg/ISTLSolverEbosWithGPU.cpp | 250 ++++++++++++++++ .../linalg/ISTLSolverEbosWithGPU.hpp | 280 ++++++++++++++++++ 2 files changed, 530 insertions(+) create mode 100644 opm/simulators/linalg/ISTLSolverEbosWithGPU.cpp create mode 100644 opm/simulators/linalg/ISTLSolverEbosWithGPU.hpp diff --git a/opm/simulators/linalg/ISTLSolverEbosWithGPU.cpp b/opm/simulators/linalg/ISTLSolverEbosWithGPU.cpp new file mode 100644 index 000000000..dff8c4af6 --- /dev/null +++ b/opm/simulators/linalg/ISTLSolverEbosWithGPU.cpp @@ -0,0 +1,250 @@ +/* + Copyright 2016 IRIS AS + Copyright 2019, 2020 Equinor ASA + Copyright 2020 SINTEF Digital, Mathematics and Cybernetics + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ + +#include +#include +#include + +#include + +#include + +#include +#include +#include + +#include + +#if COMPILE_BDA_BRIDGE +#include +#include +#endif + +#if HAVE_DUNE_ALUGRID +#include +#include +#endif // HAVE_DUNE_ALUGRID + +#include + +namespace Opm { +namespace detail { + +#if COMPILE_BDA_BRIDGE +template +BdaSolverInfo:: +BdaSolverInfo(const std::string& accelerator_mode, + const int linear_solver_verbosity, + const int maxit, + const double tolerance, + const int platformID, + const int deviceID, + const bool opencl_ilu_parallel, + const std::string& linsolver) + : bridge_(std::make_unique(accelerator_mode, + linear_solver_verbosity, maxit, + tolerance, platformID, deviceID, + opencl_ilu_parallel, linsolver)) + , accelerator_mode_(accelerator_mode) +{} + +template +BdaSolverInfo::~BdaSolverInfo() = default; + +template +template +void BdaSolverInfo:: +prepare(const Grid& grid, + const Dune::CartesianIndexMapper& cartMapper, + const std::vector& wellsForConn, + const std::vector& cellPartition, + const size_t nonzeroes, + const bool useWellConn) +{ + if (numJacobiBlocks_ > 1) { + detail::setWellConnections(grid, cartMapper, wellsForConn, + useWellConn, + wellConnectionsGraph_, + numJacobiBlocks_); + this->blockJacobiAdjacency(grid, cellPartition, nonzeroes); + } +} + +template +bool BdaSolverInfo:: +apply(Vector& rhs, + const bool useWellConn, + WellContribFunc getContribs, + const int rank, + Matrix& matrix, + Vector& x, + Dune::InverseOperatorResult& result) +{ + bool use_gpu = bridge_->getUseGpu(); + if (use_gpu) { + auto wellContribs = WellContributions::create(accelerator_mode_, useWellConn); + bridge_->initWellContributions(*wellContribs, x.N() * x[0].N()); + + // the WellContributions can only be applied separately with CUDA or OpenCL, not with amgcl or rocalution +#if HAVE_CUDA || HAVE_OPENCL + if (!useWellConn) { + getContribs(*wellContribs); + } +#endif + + if (numJacobiBlocks_ > 1) { + this->copyMatToBlockJac(matrix, *blockJacobiForGPUILU0_); + // Const_cast needed since the CUDA stuff overwrites values for better matrix condition.. + bridge_->solve_system(&matrix, blockJacobiForGPUILU0_.get(), + numJacobiBlocks_, rhs, *wellContribs, result); + } + else + bridge_->solve_system(&matrix, &matrix, + numJacobiBlocks_, rhs, *wellContribs, result); + if (result.converged) { + // get result vector x from non-Dune backend, iff solve was successful + bridge_->get_result(x); + return true; + } else { + // warn about CPU fallback + // BdaBridge might have disabled its BdaSolver for this simulation due to some error + // in that case the BdaBridge is disabled and flexibleSolver is always used + // or maybe the BdaSolver did not converge in time, then it will be used next linear solve + if (rank == 0) { + OpmLog::warning(bridge_->getAccleratorName() + " did not converge, now trying Dune to solve current linear system..."); + } + } + } + + return false; +} + +template +bool BdaSolverInfo:: +gpuActive() +{ + return bridge_->getUseGpu(); +} + +template +template +void BdaSolverInfo:: +blockJacobiAdjacency(const Grid& grid, + const std::vector& cell_part, + size_t nonzeroes) +{ + using size_type = typename Matrix::size_type; + using Iter = typename Matrix::CreateIterator; + size_type numCells = grid.size(0); + blockJacobiForGPUILU0_ = std::make_unique(numCells, numCells, + nonzeroes, Matrix::row_wise); + + const auto& lid = grid.localIdSet(); + const auto& gridView = grid.leafGridView(); + auto elemIt = gridView.template begin<0>(); // should never overrun, since blockJacobiForGPUILU0_ is initialized with numCells rows + + //Loop over cells + for (Iter row = blockJacobiForGPUILU0_->createbegin(); row != blockJacobiForGPUILU0_->createend(); ++elemIt, ++row) + { + const auto& elem = *elemIt; + size_type idx = lid.id(elem); + row.insert(idx); + + // Add well non-zero connections + for (const auto wc : wellConnectionsGraph_[idx]) { + row.insert(wc); + } + + int locPart = cell_part[idx]; + + //Add neighbor if it is on the same part + auto isend = gridView.iend(elem); + for (auto is = gridView.ibegin(elem); is!=isend; ++is) + { + //check if face has neighbor + if (is->neighbor()) + { + size_type nid = lid.id(is->outside()); + int nabPart = cell_part[nid]; + if (locPart == nabPart) { + row.insert(nid); + } + } + } + } +} + +template +void BdaSolverInfo:: +copyMatToBlockJac(const Matrix& mat, Matrix& blockJac) +{ + auto rbegin = blockJac.begin(); + auto rend = blockJac.end(); + auto outerRow = mat.begin(); + for (auto row = rbegin; row != rend; ++row, ++outerRow) { + auto outerCol = (*outerRow).begin(); + for (auto col = (*row).begin(); col != (*row).end(); ++col) { + // outerRow is guaranteed to have all column entries that row has! + while(outerCol.index() < col.index()) ++outerCol; + assert(outerCol.index() == col.index()); + *col = *outerCol; // copy nonzero block + } + } +} +#endif // COMPILE_BDA_BRIDGE + + +#if COMPILE_BDA_BRIDGE + +#define INSTANCE_GRID(Dim, Grid) \ + template void BdaSolverInfo,BV>:: \ + prepare(const Grid&, \ + const Dune::CartesianIndexMapper&, \ + const std::vector&, \ + const std::vector&, \ + const size_t, const bool); +using PolyHedralGrid3D = Dune::PolyhedralGrid<3, 3>; +#if HAVE_DUNE_ALUGRID +#if HAVE_MPI + using ALUGrid3CN = Dune::ALUGrid<3, 3, Dune::cube, Dune::nonconforming, Dune::ALUGridMPIComm>; +#else + using ALUGrid3CN = Dune::ALUGrid<3, 3, Dune::cube, Dune::nonconforming, Dune::ALUGridNoComm>; +#endif //HAVE_MPI +#define INSTANCE(Dim) \ + template struct BdaSolverInfo,BV>; \ + INSTANCE_GRID(Dim,Dune::CpGrid) \ + INSTANCE_GRID(Dim,ALUGrid3CN) \ + INSTANCE_GRID(Dim,PolyHedralGrid3D) \ + INSTANCE_FLEX(Dim) +#else +#define INSTANCE(Dim) \ + template struct BdaSolverInfo,BV>; \ + INSTANCE_GRID(Dim,Dune::CpGrid) \ + INSTANCE_GRID(Dim,PolyHedralGrid3D) \ + INSTANCE_FLEX(Dim) +#endif +#else +#define INSTANCE(Dim) \ + INSTANCE_FLEX(Dim) +#endif // COMPILE_BDA_BRIDGE + +} +} diff --git a/opm/simulators/linalg/ISTLSolverEbosWithGPU.hpp b/opm/simulators/linalg/ISTLSolverEbosWithGPU.hpp new file mode 100644 index 000000000..6c0cb5cc8 --- /dev/null +++ b/opm/simulators/linalg/ISTLSolverEbosWithGPU.hpp @@ -0,0 +1,280 @@ +/* + Copyright 2016 IRIS AS + Copyright 2019, 2020 Equinor ASA + Copyright 2020 SINTEF Digital, Mathematics and Cybernetics + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ + +#ifndef OPM_ISTLSOLVER_EBOS_HEADER_INCLUDED +#define OPM_ISTLSOLVER_EBOS_HEADER_INCLUDED +#if COMPILE_BDA_BRIDGE + +#include + +namespace Opm::Properties { + + +namespace Opm +{ + +template class BdaBridge; +class WellContributions; +template +struct BdaSolverInfo +{ + using WellContribFunc = std::function; + using Bridge = BdaBridge; + + BdaSolverInfo(const std::string& accelerator_mode, + const int linear_solver_verbosity, + const int maxit, + const double tolerance, + const int platformID, + const int deviceID, + const bool opencl_ilu_parallel, + const std::string& linsolver); + + ~BdaSolverInfo(); + + template + void prepare(const Grid& grid, + const Dune::CartesianIndexMapper& cartMapper, + const std::vector& wellsForConn, + const std::vector& cellPartition, + const size_t nonzeroes, + const bool useWellConn); + + bool apply(Vector& rhs, + const bool useWellConn, + WellContribFunc getContribs, + const int rank, + Matrix& matrix, + Vector& x, + Dune::InverseOperatorResult& result); + + bool gpuActive(); + + int numJacobiBlocks_ = 0; + +private: + /// Create sparsity pattern for block-Jacobi matrix based on partitioning of grid. + /// Do not initialize the values, that is done in copyMatToBlockJac() + template + void blockJacobiAdjacency(const Grid& grid, + const std::vector& cell_part, + size_t nonzeroes); + + void copyMatToBlockJac(const Matrix& mat, Matrix& blockJac); + + std::unique_ptr bridge_; + std::string accelerator_mode_; + std::unique_ptr blockJacobiForGPUILU0_; + std::vector> wellConnectionsGraph_; +}; + + + +/// Create sparsity pattern for block-Jacobi matrix based on partitioning of grid. +/// Do not initialize the values, that is done in copyMatToBlockJac() +} + + /// This class solves the fully implicit black-oil system by + /// solving the reduced system (after eliminating well variables) + /// as a block-structured matrix (one block for all cell variables) for a fixed + /// number of cell variables np . + template + class ISTLSolverEbosWithGPU : public ISTLSolverEbos + { + protected: + using GridView = GetPropType; + using Scalar = GetPropType; + using SparseMatrixAdapter = GetPropType; + using Vector = GetPropType; + using Indices = GetPropType; + using WellModel = GetPropType; + using Simulator = GetPropType; + using Matrix = typename SparseMatrixAdapter::IstlMatrix; + using ThreadManager = GetPropType; + using ElementContext = GetPropType; + using AbstractSolverType = Dune::InverseOperator; + using AbstractOperatorType = Dune::AssembledLinearOperator; + using AbstractPreconditionerType = Dune::PreconditionerWithUpdate; + using WellModelOperator = WellModelAsLinearOperator; + using ElementMapper = GetPropType; + constexpr static std::size_t pressureIndex = GetPropType::pressureSwitchIdx; + +#if HAVE_MPI + using CommunicationType = Dune::OwnerOverlapCopyCommunication; +#else + using CommunicationType = Dune::CollectiveCommunication; +#endif + + public: + using AssembledLinearOperatorType = Dune::AssembledLinearOperator< Matrix, Vector, Vector >; + + /// Construct a system solver. + /// \param[in] simulator The opm-models simulator object + /// \param[in] parameters Explicit parameters for solver setup, do not + /// read them from command line parameters. + ISTLSolverEbosGPU(const Simulator& simulator, const FlowLinearSolverParameters& parameters) + : ISTLSolverEbos(simulator), + { + this->initialize(); + } + + /// Construct a system solver. + /// \param[in] simulator The opm-models simulator object + explicit ISTLSolverEbos(const Simulator& simulator) + : simulator_(simulator), + iterations_( 0 ), + calls_( 0 ), + converged_(false), + matrix_(nullptr) + { + parameters_.template init(simulator_.vanguard().eclState().getSimulationConfig().useCPR()); + initialize(); + } + + void initialize() + { + OPM_TIMEBLOCK(IstlSolverEbos); + IstlSolverEbos::initialize() +#if COMPILE_BDA_BRIDGE + { + std::string accelerator_mode = EWOMS_GET_PARAM(TypeTag, std::string, AcceleratorMode); + if ((simulator_.vanguard().grid().comm().size() > 1) && (accelerator_mode != "none")) { + if (on_io_rank) { + OpmLog::warning("Cannot use GPU with MPI, GPU are disabled"); + } + accelerator_mode = "none"; + } + const int platformID = EWOMS_GET_PARAM(TypeTag, int, OpenclPlatformId); + const int deviceID = EWOMS_GET_PARAM(TypeTag, int, BdaDeviceId); + const int maxit = EWOMS_GET_PARAM(TypeTag, int, LinearSolverMaxIter); + const double tolerance = EWOMS_GET_PARAM(TypeTag, double, LinearSolverReduction); + const bool opencl_ilu_parallel = EWOMS_GET_PARAM(TypeTag, bool, OpenclIluParallel); + const int linear_solver_verbosity = parameters_.linear_solver_verbosity_; + std::string linsolver = EWOMS_GET_PARAM(TypeTag, std::string, LinearSolver); + bdaBridge = std::make_unique>(accelerator_mode, + linear_solver_verbosity, + maxit, + tolerance, + platformID, + deviceID, + opencl_ilu_parallel, + linsolver); + } +#else + if (EWOMS_GET_PARAM(TypeTag, std::string, AcceleratorMode) != "none") { + OPM_THROW(std::logic_error,"Cannot use accelerated solver since CUDA, OpenCL and amgcl were not found by cmake"); + } +#endif + } + + void prepare(const Matrix& M, Vector& b) + { + OPM_TIMEBLOCK(istlSolverEbosPrepare); + IstlSolverEbos::prepare(M,b); + const bool firstcall = (matrix_ == nullptr); + // update matrix entries for solvers. + if (firstcall) { + // ebos will not change the matrix object. Hence simply store a pointer + // to the original one with a deleter that does nothing. + // Outch! We need to be able to scale the linear system! Hence const_cast + // setup sparsity pattern for jacobi matrix for preconditioner (only used for openclSolver) +#if HAVE_OPENCL + bdaBridge->numJacobiBlocks_ = EWOMS_GET_PARAM(TypeTag, int, NumJacobiBlocks); + bdaBridge->prepare(simulator_.vanguard().grid(), + simulator_.vanguard().cartesianIndexMapper(), + simulator_.vanguard().schedule().getWellsatEnd(), + simulator_.vanguard().cellPartition(), + getMatrix().nonzeroes(), useWellConn_); +#endif + } + } + + + void setResidual(Vector& /* b */) + { + // rhs_ = &b; // Must be handled in prepare() instead. + } + + void getResidual(Vector& b) const + { + b = *rhs_; + } + + void setMatrix(const SparseMatrixAdapter& /* M */) + { + // matrix_ = &M.istlMatrix(); // Must be handled in prepare() instead. + } + + bool solve(Vector& x) + { + OPM_TIMEBLOCK(istlSolverEbosSolve); + calls_ += 1; + // Write linear system if asked for. + const int verbosity = prm_.get("verbosity", 0); + const bool write_matrix = verbosity > 10; + if (write_matrix) { + Helper::writeSystem(simulator_, //simulator is only used to get names + getMatrix(), + *rhs_, + comm_.get()); + } + + // Solve system. + Dune::InverseOperatorResult result; + + std::function getContribs = + [this](WellContributions& w) + { + this->simulator_.problem().wellModel().getWellContributions(w); + }; + if (!bdaBridge->apply(*rhs_, useWellConn_, getContribs, + simulator_.gridView().comm().rank(), + const_cast(this->getMatrix()), + x, result)) + { + OPM_TIMEBLOCK(flexibleSolverApply); + if(bdaBridge->gpuActive()){ + // bda solve fails use istl solver setup need to be done since it is not setup in prepeare + ISTLSolverEbos::prepareFlexibleSolver(); + } + assert(flexibleSolver_.solver_); + flexibleSolver_.solver_->apply(x, *rhs_, result); + } + + // Check convergence, iterations etc. + checkConvergence(result); + + return converged_; + } + protected: + + void prepareFlexibleSolver() + { + if(bdaBridge->gpuActive()){ + ISTLSolverEbos::prepareFlexibleSolver(); + } + } + std::unique_ptr> bdaBridge; + }; // end ISTLSolver + +} // namespace Opm +#endif //COMPILE_BDA +#endif