/* Copyright 2016 IRIS AS Copyright 2019, 2020 Equinor ASA Copyright 2020 SINTEF Digital, Mathematics and Cybernetics This file is part of the Open Porous Media project (OPM). OPM is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. OPM is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OPM. If not, see . */ #ifndef OPM_ISTLSOLVER_EBOS_HEADER_INCLUDED #define OPM_ISTLSOLVER_EBOS_HEADER_INCLUDED #include #include #include #include #include #include #include #include #include #include #include #if HAVE_CUDA || HAVE_OPENCL || HAVE_FPGA || HAVE_AMGCL #include #include #endif namespace Opm::Properties { namespace TTag { struct FlowIstlSolver { using InheritsFrom = std::tuple; }; } template struct EclWellModel; //! Set the type of a global jacobian matrix for linear solvers that are based on //! dune-istl. template struct SparseMatrixAdapter { private: using Scalar = GetPropType; enum { numEq = getPropValue() }; typedef MatrixBlock Block; public: typedef typename Linear::IstlSparseMatrixAdapter type; }; } // namespace Opm::Properties namespace Opm { /// This class solves the fully implicit black-oil system by /// solving the reduced system (after eliminating well variables) /// as a block-structured matrix (one block for all cell variables) for a fixed /// number of cell variables np . template class ISTLSolverEbos { protected: using GridView = GetPropType; using Scalar = GetPropType; using SparseMatrixAdapter = GetPropType; using Vector = GetPropType; using Indices = GetPropType; using WellModel = GetPropType; using Simulator = GetPropType; using Matrix = typename SparseMatrixAdapter::IstlMatrix; using ThreadManager = GetPropType; using ElementContext = GetPropType; using AbstractSolverType = Dune::InverseOperator; using AbstractOperatorType = Dune::AssembledLinearOperator; using AbstractPreconditionerType = Dune::PreconditionerWithUpdate; using WellModelOperator = WellModelAsLinearOperator; using ElementMapper = GetPropType; constexpr static std::size_t pressureIndex = GetPropType::pressureSwitchIdx; #if HAVE_CUDA || HAVE_OPENCL || HAVE_FPGA || HAVE_AMGCL static const unsigned int block_size = Matrix::block_type::rows; std::unique_ptr> bdaBridge; #endif #if HAVE_MPI using CommunicationType = Dune::OwnerOverlapCopyCommunication; #else using CommunicationType = Dune::CollectiveCommunication< int >; #endif public: using AssembledLinearOperatorType = Dune::AssembledLinearOperator< Matrix, Vector, Vector >; static void registerParameters() { FlowLinearSolverParameters::registerParameters(); } /// Construct a system solver. /// \param[in] parallelInformation In the case of a parallel run /// with dune-istl the information about the parallelization. explicit ISTLSolverEbos(const Simulator& simulator) : simulator_(simulator), iterations_( 0 ), calls_( 0 ), converged_(false), matrix_() { const bool on_io_rank = (simulator.gridView().comm().rank() == 0); #if HAVE_MPI comm_.reset( new CommunicationType( simulator_.vanguard().grid().comm() ) ); #endif parameters_.template init(); prm_ = setupPropertyTree(parameters_, EWOMS_PARAM_IS_SET(TypeTag, int, LinearSolverMaxIter), EWOMS_PARAM_IS_SET(TypeTag, int, CprMaxEllIter)); #if HAVE_CUDA || HAVE_OPENCL || HAVE_FPGA || HAVE_AMGCL { std::string accelerator_mode = EWOMS_GET_PARAM(TypeTag, std::string, AcceleratorMode); if ((simulator_.vanguard().grid().comm().size() > 1) && (accelerator_mode != "none")) { if (on_io_rank) { OpmLog::warning("Cannot use GPU or FPGA with MPI, GPU/FPGA are disabled"); } accelerator_mode = "none"; } const int platformID = EWOMS_GET_PARAM(TypeTag, int, OpenclPlatformId); const int deviceID = EWOMS_GET_PARAM(TypeTag, int, BdaDeviceId); const int maxit = EWOMS_GET_PARAM(TypeTag, int, LinearSolverMaxIter); const double tolerance = EWOMS_GET_PARAM(TypeTag, double, LinearSolverReduction); const std::string opencl_ilu_reorder = EWOMS_GET_PARAM(TypeTag, std::string, OpenclIluReorder); const int linear_solver_verbosity = parameters_.linear_solver_verbosity_; std::string fpga_bitstream = EWOMS_GET_PARAM(TypeTag, std::string, FpgaBitstream); std::string linsolver = EWOMS_GET_PARAM(TypeTag, std::string, Linsolver); bdaBridge.reset(new BdaBridge(accelerator_mode, fpga_bitstream, linear_solver_verbosity, maxit, tolerance, platformID, deviceID, opencl_ilu_reorder, linsolver)); } #else if (EWOMS_GET_PARAM(TypeTag, std::string, AcceleratorMode) != "none") { OPM_THROW(std::logic_error,"Cannot use accelerated solver since CUDA, OpenCL and amgcl were not found by cmake and FPGA was not enabled"); } #endif extractParallelGridInformationToISTL(simulator_.vanguard().grid(), parallelInformation_); // For some reason simulator_.model().elementMapper() is not initialized at this stage //const auto& elemMapper = simulator_.model().elementMapper(); //does not work. // Set it up manually ElementMapper elemMapper(simulator_.vanguard().gridView(), Dune::mcmgElementLayout()); detail::findOverlapAndInterior(simulator_.vanguard().grid(), elemMapper, overlapRows_, interiorRows_); useWellConn_ = EWOMS_GET_PARAM(TypeTag, bool, MatrixAddWellContributions); #if HAVE_FPGA // check usage of MatrixAddWellContributions: for FPGA they must be included if (EWOMS_GET_PARAM(TypeTag, std::string, AcceleratorMode) == "fpga" && !useWellConn_) { OPM_THROW(std::logic_error,"fpgaSolver needs --matrix-add-well-contributions=true"); } #endif const bool ownersFirst = EWOMS_GET_PARAM(TypeTag, bool, OwnerCellsFirst); if (!ownersFirst) { const std::string msg = "The linear solver no longer supports --owner-cells-first=false."; if (on_io_rank) { OpmLog::error(msg); } OPM_THROW_NOLOG(std::runtime_error, msg); } interiorCellNum_ = detail::numMatrixRowsToUseInSolver(simulator_.vanguard().grid(), true); // Print parameters to PRT/DBG logs. if (on_io_rank) { std::ostringstream os; os << "Property tree for linear solver:\n"; prm_.write_json(os, true); OpmLog::note(os.str()); } } // nothing to clean here void eraseMatrix() { } void prepare(const SparseMatrixAdapter& M, Vector& b) { static bool firstcall = true; #if HAVE_MPI if (firstcall && parallelInformation_.type() == typeid(ParallelISTLInformation)) { // Parallel case. const ParallelISTLInformation* parinfo = std::any_cast(¶llelInformation_); assert(parinfo); const size_t size = M.istlMatrix().N(); parinfo->copyValuesTo(comm_->indexSet(), comm_->remoteIndices(), size, 1); } #endif // update matrix entries for solvers. if (firstcall) { // ebos will not change the matrix object. Hence simply store a pointer // to the original one with a deleter that does nothing. // Outch! We need to be able to scale the linear system! Hence const_cast matrix_ = const_cast(&M.istlMatrix()); // setup sparsity pattern for jacobi matrix for preconditioner (only used for openclSolver) #if HAVE_OPENCL this->numJacobiBlocks_ = EWOMS_GET_PARAM(TypeTag, int, NumJacobiBlocks); #else this->numJacobiBlocks_ = 0; #endif useWellConn_ = EWOMS_GET_PARAM(TypeTag, bool, MatrixAddWellContributions); if (numJacobiBlocks_ > 1) { const auto wellsForConn = simulator_.vanguard().schedule().getWellsatEnd(); const auto& cartMapper = simulator_.vanguard().cartesianIndexMapper(); detail::setWellConnections(simulator_.vanguard().grid(), cartMapper, wellsForConn, useWellConn_, wellConnectionsGraph_, numJacobiBlocks_); std::cout << "Create block-Jacobi pattern" << std::endl; blockJacobiAdjacency(); } } else { // Pointers should not change if ( &(M.istlMatrix()) != matrix_ ) { OPM_THROW(std::logic_error, "Matrix objects are expected to be reused when reassembling!" <<" old pointer was " << matrix_ << ", new one is " << (&M.istlMatrix()) ); } } rhs_ = &b; if (isParallel() && prm_.get("preconditioner.type") != "ParOverILU0") { makeOverlapRowsInvalid(getMatrix()); } prepareFlexibleSolver(); firstcall = false; } void setResidual(Vector& /* b */) { // rhs_ = &b; // Must be handled in prepare() instead. } void getResidual(Vector& b) const { b = *rhs_; } void setMatrix(const SparseMatrixAdapter& /* M */) { // matrix_ = &M.istlMatrix(); // Must be handled in prepare() instead. } bool solve(Vector& x) { calls_ += 1; // Write linear system if asked for. const int verbosity = prm_.get("verbosity", 0); const bool write_matrix = verbosity > 10; if (write_matrix) { Helper::writeSystem(simulator_, //simulator is only used to get names getMatrix(), *rhs_, comm_.get()); } // Solve system. Dune::InverseOperatorResult result; bool accelerator_was_used = false; // Use GPU if: available, chosen by user, and successful. // Use FPGA if: support compiled, chosen by user, and successful. #if HAVE_CUDA || HAVE_OPENCL || HAVE_FPGA || HAVE_AMGCL bool use_gpu = bdaBridge->getUseGpu(); bool use_fpga = bdaBridge->getUseFpga(); if (use_gpu || use_fpga) { const std::string accelerator_mode = EWOMS_GET_PARAM(TypeTag, std::string, AcceleratorMode); auto wellContribs = WellContributions::create(accelerator_mode, useWellConn_); bdaBridge->initWellContributions(*wellContribs, x.N() * x[0].N()); // the WellContributions can only be applied separately with CUDA or OpenCL, not with an FPGA or amgcl #if HAVE_CUDA || HAVE_OPENCL if (!useWellConn_) { simulator_.problem().wellModel().getWellContributions(*wellContribs); } #endif if (numJacobiBlocks_ > 1) { copyMatToBlockJac(getMatrix(), *blockJacobiForGPUILU0_); // Const_cast needed since the CUDA stuff overwrites values for better matrix condition.. bdaBridge->solve_system(const_cast(&getMatrix()), &*blockJacobiForGPUILU0_, numJacobiBlocks_, *rhs_, *wellContribs, result); } else bdaBridge->solve_system(const_cast(&getMatrix()), const_cast(&getMatrix()), numJacobiBlocks_, *rhs_, *wellContribs, result); if (result.converged) { // get result vector x from non-Dune backend, iff solve was successful bdaBridge->get_result(x); accelerator_was_used = true; } else { // warn about CPU fallback // BdaBridge might have disabled its BdaSolver for this simulation due to some error // in that case the BdaBridge is disabled and flexibleSolver is always used // or maybe the BdaSolver did not converge in time, then it will be used next linear solve if (simulator_.gridView().comm().rank() == 0) { OpmLog::warning(bdaBridge->getAccleratorName() + " did not converge, now trying Dune to solve current linear system..."); } } } #endif // Otherwise, use flexible istl solver. if (!accelerator_was_used) { assert(flexibleSolver_); flexibleSolver_->apply(x, *rhs_, result); } // Check convergence, iterations etc. checkConvergence(result); return converged_; } /// Solve the system of linear equations Ax = b, with A being the /// combined derivative matrix of the residual and b /// being the residual itself. /// \param[in] residual residual object containing A and b. /// \return the solution x /// \copydoc NewtonIterationBlackoilInterface::iterations int iterations () const { return iterations_; } /// \copydoc NewtonIterationBlackoilInterface::parallelInformation const std::any& parallelInformation() const { return parallelInformation_; } protected: // 3x3 matrix block inversion was unstable at least 2.3 until and including // 2.5.0. There may still be some issue with the 4x4 matrix block inversion // we therefore still use the block inversion in OPM typedef ParallelOverlappingILU0 >, Vector, Vector> SeqPreconditioner; #if HAVE_MPI typedef Dune::OwnerOverlapCopyCommunication Comm; // 3x3 matrix block inversion was unstable from at least 2.3 until and // including 2.5.0 typedef ParallelOverlappingILU0 ParPreconditioner; #endif void checkConvergence( const Dune::InverseOperatorResult& result ) const { // store number of iterations iterations_ = result.iterations; converged_ = result.converged; // Check for failure of linear solver. if (!parameters_.ignoreConvergenceFailure_ && !result.converged) { const std::string msg("Convergence failure for linear solver."); OPM_THROW_NOLOG(NumericalIssue, msg); } } protected: bool isParallel() const { #if HAVE_MPI return comm_->communicator().size() > 1; #else return false; #endif } void prepareFlexibleSolver() { std::function weightsCalculator = getWeightsCalculator(); if (shouldCreateSolver()) { if (isParallel()) { #if HAVE_MPI if (useWellConn_) { using ParOperatorType = Dune::OverlappingSchwarzOperator; auto op = std::make_unique(getMatrix(), *comm_); using FlexibleSolverType = Dune::FlexibleSolver; auto sol = std::make_unique(*op, *comm_, prm_, weightsCalculator, pressureIndex); preconditionerForFlexibleSolver_ = &(sol->preconditioner()); linearOperatorForFlexibleSolver_ = std::move(op); flexibleSolver_ = std::move(sol); } else { using ParOperatorType = WellModelGhostLastMatrixAdapter; wellOperator_ = std::make_unique(simulator_.problem().wellModel()); auto op = std::make_unique(getMatrix(), *wellOperator_, interiorCellNum_); using FlexibleSolverType = Dune::FlexibleSolver; auto sol = std::make_unique(*op, *comm_, prm_, weightsCalculator, pressureIndex); preconditionerForFlexibleSolver_ = &(sol->preconditioner()); linearOperatorForFlexibleSolver_ = std::move(op); flexibleSolver_ = std::move(sol); } #endif } else { if (useWellConn_) { using SeqOperatorType = Dune::MatrixAdapter; auto op = std::make_unique(getMatrix()); using FlexibleSolverType = Dune::FlexibleSolver; auto sol = std::make_unique(*op, prm_, weightsCalculator, pressureIndex); preconditionerForFlexibleSolver_ = &(sol->preconditioner()); linearOperatorForFlexibleSolver_ = std::move(op); flexibleSolver_ = std::move(sol); } else { using SeqOperatorType = WellModelMatrixAdapter; wellOperator_ = std::make_unique(simulator_.problem().wellModel()); auto op = std::make_unique(getMatrix(), *wellOperator_); using FlexibleSolverType = Dune::FlexibleSolver; auto sol = std::make_unique(*op, prm_, weightsCalculator, pressureIndex); preconditionerForFlexibleSolver_ = &(sol->preconditioner()); linearOperatorForFlexibleSolver_ = std::move(op); flexibleSolver_ = std::move(sol); } } } else { preconditionerForFlexibleSolver_->update(); } } /// Return true if we should (re)create the whole solver, /// instead of just calling update() on the preconditioner. bool shouldCreateSolver() const { // Decide if we should recreate the solver or just do // a minimal preconditioner update. if (!flexibleSolver_) { return true; } if (this->parameters_.cpr_reuse_setup_ == 0) { // Always recreate solver. return true; } if (this->parameters_.cpr_reuse_setup_ == 1) { // Recreate solver on the first iteration of every timestep. const int newton_iteration = this->simulator_.model().newtonMethod().numIterations(); return newton_iteration == 0; } if (this->parameters_.cpr_reuse_setup_ == 2) { // Recreate solver if the last solve used more than 10 iterations. return this->iterations() > 10; } if (this->parameters_.cpr_reuse_setup_ == 3) { // Recreate solver if the last solve used more than 10 iterations. return false; } if (this->parameters_.cpr_reuse_setup_ == 4) { // Recreate solver every 'step' solve calls. const int step = this->parameters_.cpr_reuse_interval_; const bool create = ((calls_ % step) == 0); return create; } // If here, we have an invalid parameter. const bool on_io_rank = (simulator_.gridView().comm().rank() == 0); std::string msg = "Invalid value: " + std::to_string(this->parameters_.cpr_reuse_setup_) + " for --cpr-reuse-setup parameter, run with --help to see allowed values."; if (on_io_rank) { OpmLog::error(msg); } throw std::runtime_error(msg); // Never reached. return false; } /// Return an appropriate weight function if a cpr preconditioner is asked for. std::function getWeightsCalculator() const { std::function weightsCalculator; using namespace std::string_literals; auto preconditionerType = prm_.get("preconditioner.type"s, "cpr"s); if (preconditionerType == "cpr" || preconditionerType == "cprt" || preconditionerType == "cprw" || preconditionerType == "cprwt") { const bool transpose = preconditionerType == "cprt" || preconditionerType == "cprwt"; const auto weightsType = prm_.get("preconditioner.weight_type"s, "quasiimpes"s); if (weightsType == "quasiimpes") { // weights will be created as default in the solver // assignment p = pressureIndex prevent compiler warning about // capturing variable with non-automatic storage duration weightsCalculator = [this, transpose, p = pressureIndex]() { return Amg::getQuasiImpesWeights(this->getMatrix(), p, transpose); }; } else if (weightsType == "trueimpes") { // assignment p = pressureIndex prevent compiler warning about // capturing variable with non-automatic storage duration weightsCalculator = [this, p = pressureIndex]() { return this->getTrueImpesWeights(p); }; } else { OPM_THROW(std::invalid_argument, "Weights type " << weightsType << "not implemented for cpr." << " Please use quasiimpes or trueimpes."); } } return weightsCalculator; } // Weights to make approximate pressure equations. // Calculated from the storage terms (only) of the // conservation equations, ignoring all other terms. Vector getTrueImpesWeights(int pressureVarIndex) const { Vector weights(rhs_->size()); ElementContext elemCtx(simulator_); Amg::getTrueImpesWeights(pressureVarIndex, weights, simulator_.vanguard().gridView(), elemCtx, simulator_.model(), ThreadManager::threadId()); return weights; } /// Zero out off-diagonal blocks on rows corresponding to overlap cells /// Diagonal blocks on ovelap rows are set to diag(1.0). void makeOverlapRowsInvalid(Matrix& matrix) const { //value to set on diagonal const int numEq = Matrix::block_type::rows; typename Matrix::block_type diag_block(0.0); for (int eq = 0; eq < numEq; ++eq) diag_block[eq][eq] = 1.0; //loop over precalculated overlap rows and columns for (auto row = overlapRows_.begin(); row != overlapRows_.end(); row++ ) { int lcell = *row; // Zero out row. matrix[lcell] = 0.0; //diagonal block set to diag(1.0). matrix[lcell][lcell] = diag_block; } } /// Create sparsity pattern for block-Jacobi matrix based on partitioning of grid. /// Do not initialize the values, that is done in copyMatToBlockJac() void blockJacobiAdjacency() { const auto& grid = simulator_.vanguard().grid(); std::vector cell_part = simulator_.vanguard().cellPartition(); typedef typename Matrix::size_type size_type; typedef typename Matrix::CreateIterator Iter; size_type numCells = grid.size( 0 ); blockJacobiForGPUILU0_.reset(new Matrix(numCells, numCells, getMatrix().nonzeroes(), Matrix::row_wise)); const auto& lid = grid.localIdSet(); const auto& gridView = grid.leafGridView(); auto elemIt = gridView.template begin<0>(); // should never overrun, since blockJacobiForGPUILU0_ is initialized with numCells rows //Loop over cells for (Iter row = blockJacobiForGPUILU0_->createbegin(); row != blockJacobiForGPUILU0_->createend(); ++elemIt, ++row) { const auto& elem = *elemIt; size_type idx = lid.id(elem); row.insert(idx); // Add well non-zero connections for (auto wc = wellConnectionsGraph_[idx].begin(); wc!=wellConnectionsGraph_[idx].end(); ++wc) { row.insert(*wc); } int locPart = cell_part[idx]; //Add neighbor if it is on the same part auto isend = gridView.iend(elem); for (auto is = gridView.ibegin(elem); is!=isend; ++is) { //check if face has neighbor if (is->neighbor()) { size_type nid = lid.id(is->outside()); int nabPart = cell_part[nid]; if (locPart == nabPart) { row.insert(nid); } } } } } void copyMatToBlockJac(const Matrix& mat, Matrix& blockJac) { auto rbegin = blockJac.begin(); auto rend = blockJac.end(); auto outerRow = mat.begin(); for (auto row = rbegin; row != rend; ++row, ++outerRow) { auto outerCol = (*outerRow).begin(); for (auto col = (*row).begin(); col != (*row).end(); ++col) { // outerRow is guaranteed to have all column entries that row has! while(outerCol.index() < col.index()) ++outerCol; assert(outerCol.index() == col.index()); *col = *outerCol; // copy nonzero block } } } Matrix& getMatrix() { return *matrix_; } const Matrix& getMatrix() const { return *matrix_; } const Simulator& simulator_; mutable int iterations_; mutable int calls_; mutable bool converged_; std::any parallelInformation_; // non-const to be able to scale the linear system Matrix* matrix_; Vector *rhs_; std::unique_ptr blockJacobiForGPUILU0_; std::unique_ptr flexibleSolver_; std::unique_ptr linearOperatorForFlexibleSolver_; AbstractPreconditionerType* preconditionerForFlexibleSolver_; std::unique_ptr> wellOperator_; std::vector overlapRows_; std::vector interiorRows_; std::vector> wellConnectionsGraph_; bool useWellConn_; size_t interiorCellNum_; FlowLinearSolverParameters parameters_; PropertyTree prm_; bool scale_variables_; int numJacobiBlocks_; std::shared_ptr< CommunicationType > comm_; }; // end ISTLSolver } // namespace Opm #endif