diff --git a/opm/simulators/linalg/ISTLSolverEbosWithGPU.cpp b/opm/simulators/linalg/ISTLSolverEbosWithGPU.cpp
new file mode 100644
index 000000000..dff8c4af6
--- /dev/null
+++ b/opm/simulators/linalg/ISTLSolverEbosWithGPU.cpp
@@ -0,0 +1,250 @@
+/*
+ Copyright 2016 IRIS AS
+ Copyright 2019, 2020 Equinor ASA
+ Copyright 2020 SINTEF Digital, Mathematics and Cybernetics
+
+ This file is part of the Open Porous Media project (OPM).
+
+ OPM is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ OPM is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with OPM. If not, see .
+*/
+
+#include
+#include
+#include
+
+#include
+
+#include
+
+#include
+#include
+#include
+
+#include
+
+#if COMPILE_BDA_BRIDGE
+#include
+#include
+#endif
+
+#if HAVE_DUNE_ALUGRID
+#include
+#include
+#endif // HAVE_DUNE_ALUGRID
+
+#include
+
+namespace Opm {
+namespace detail {
+
+#if COMPILE_BDA_BRIDGE
+template
+BdaSolverInfo::
+BdaSolverInfo(const std::string& accelerator_mode,
+ const int linear_solver_verbosity,
+ const int maxit,
+ const double tolerance,
+ const int platformID,
+ const int deviceID,
+ const bool opencl_ilu_parallel,
+ const std::string& linsolver)
+ : bridge_(std::make_unique(accelerator_mode,
+ linear_solver_verbosity, maxit,
+ tolerance, platformID, deviceID,
+ opencl_ilu_parallel, linsolver))
+ , accelerator_mode_(accelerator_mode)
+{}
+
+template
+BdaSolverInfo::~BdaSolverInfo() = default;
+
+template
+template
+void BdaSolverInfo::
+prepare(const Grid& grid,
+ const Dune::CartesianIndexMapper& cartMapper,
+ const std::vector& wellsForConn,
+ const std::vector& cellPartition,
+ const size_t nonzeroes,
+ const bool useWellConn)
+{
+ if (numJacobiBlocks_ > 1) {
+ detail::setWellConnections(grid, cartMapper, wellsForConn,
+ useWellConn,
+ wellConnectionsGraph_,
+ numJacobiBlocks_);
+ this->blockJacobiAdjacency(grid, cellPartition, nonzeroes);
+ }
+}
+
+template
+bool BdaSolverInfo::
+apply(Vector& rhs,
+ const bool useWellConn,
+ WellContribFunc getContribs,
+ const int rank,
+ Matrix& matrix,
+ Vector& x,
+ Dune::InverseOperatorResult& result)
+{
+ bool use_gpu = bridge_->getUseGpu();
+ if (use_gpu) {
+ auto wellContribs = WellContributions::create(accelerator_mode_, useWellConn);
+ bridge_->initWellContributions(*wellContribs, x.N() * x[0].N());
+
+ // the WellContributions can only be applied separately with CUDA or OpenCL, not with amgcl or rocalution
+#if HAVE_CUDA || HAVE_OPENCL
+ if (!useWellConn) {
+ getContribs(*wellContribs);
+ }
+#endif
+
+ if (numJacobiBlocks_ > 1) {
+ this->copyMatToBlockJac(matrix, *blockJacobiForGPUILU0_);
+ // Const_cast needed since the CUDA stuff overwrites values for better matrix condition..
+ bridge_->solve_system(&matrix, blockJacobiForGPUILU0_.get(),
+ numJacobiBlocks_, rhs, *wellContribs, result);
+ }
+ else
+ bridge_->solve_system(&matrix, &matrix,
+ numJacobiBlocks_, rhs, *wellContribs, result);
+ if (result.converged) {
+ // get result vector x from non-Dune backend, iff solve was successful
+ bridge_->get_result(x);
+ return true;
+ } else {
+ // warn about CPU fallback
+ // BdaBridge might have disabled its BdaSolver for this simulation due to some error
+ // in that case the BdaBridge is disabled and flexibleSolver is always used
+ // or maybe the BdaSolver did not converge in time, then it will be used next linear solve
+ if (rank == 0) {
+ OpmLog::warning(bridge_->getAccleratorName() + " did not converge, now trying Dune to solve current linear system...");
+ }
+ }
+ }
+
+ return false;
+}
+
+template
+bool BdaSolverInfo::
+gpuActive()
+{
+ return bridge_->getUseGpu();
+}
+
+template
+template
+void BdaSolverInfo::
+blockJacobiAdjacency(const Grid& grid,
+ const std::vector& cell_part,
+ size_t nonzeroes)
+{
+ using size_type = typename Matrix::size_type;
+ using Iter = typename Matrix::CreateIterator;
+ size_type numCells = grid.size(0);
+ blockJacobiForGPUILU0_ = std::make_unique(numCells, numCells,
+ nonzeroes, Matrix::row_wise);
+
+ const auto& lid = grid.localIdSet();
+ const auto& gridView = grid.leafGridView();
+ auto elemIt = gridView.template begin<0>(); // should never overrun, since blockJacobiForGPUILU0_ is initialized with numCells rows
+
+ //Loop over cells
+ for (Iter row = blockJacobiForGPUILU0_->createbegin(); row != blockJacobiForGPUILU0_->createend(); ++elemIt, ++row)
+ {
+ const auto& elem = *elemIt;
+ size_type idx = lid.id(elem);
+ row.insert(idx);
+
+ // Add well non-zero connections
+ for (const auto wc : wellConnectionsGraph_[idx]) {
+ row.insert(wc);
+ }
+
+ int locPart = cell_part[idx];
+
+ //Add neighbor if it is on the same part
+ auto isend = gridView.iend(elem);
+ for (auto is = gridView.ibegin(elem); is!=isend; ++is)
+ {
+ //check if face has neighbor
+ if (is->neighbor())
+ {
+ size_type nid = lid.id(is->outside());
+ int nabPart = cell_part[nid];
+ if (locPart == nabPart) {
+ row.insert(nid);
+ }
+ }
+ }
+ }
+}
+
+template
+void BdaSolverInfo::
+copyMatToBlockJac(const Matrix& mat, Matrix& blockJac)
+{
+ auto rbegin = blockJac.begin();
+ auto rend = blockJac.end();
+ auto outerRow = mat.begin();
+ for (auto row = rbegin; row != rend; ++row, ++outerRow) {
+ auto outerCol = (*outerRow).begin();
+ for (auto col = (*row).begin(); col != (*row).end(); ++col) {
+ // outerRow is guaranteed to have all column entries that row has!
+ while(outerCol.index() < col.index()) ++outerCol;
+ assert(outerCol.index() == col.index());
+ *col = *outerCol; // copy nonzero block
+ }
+ }
+}
+#endif // COMPILE_BDA_BRIDGE
+
+
+#if COMPILE_BDA_BRIDGE
+
+#define INSTANCE_GRID(Dim, Grid) \
+ template void BdaSolverInfo,BV>:: \
+ prepare(const Grid&, \
+ const Dune::CartesianIndexMapper&, \
+ const std::vector&, \
+ const std::vector&, \
+ const size_t, const bool);
+using PolyHedralGrid3D = Dune::PolyhedralGrid<3, 3>;
+#if HAVE_DUNE_ALUGRID
+#if HAVE_MPI
+ using ALUGrid3CN = Dune::ALUGrid<3, 3, Dune::cube, Dune::nonconforming, Dune::ALUGridMPIComm>;
+#else
+ using ALUGrid3CN = Dune::ALUGrid<3, 3, Dune::cube, Dune::nonconforming, Dune::ALUGridNoComm>;
+#endif //HAVE_MPI
+#define INSTANCE(Dim) \
+ template struct BdaSolverInfo,BV>; \
+ INSTANCE_GRID(Dim,Dune::CpGrid) \
+ INSTANCE_GRID(Dim,ALUGrid3CN) \
+ INSTANCE_GRID(Dim,PolyHedralGrid3D) \
+ INSTANCE_FLEX(Dim)
+#else
+#define INSTANCE(Dim) \
+ template struct BdaSolverInfo,BV>; \
+ INSTANCE_GRID(Dim,Dune::CpGrid) \
+ INSTANCE_GRID(Dim,PolyHedralGrid3D) \
+ INSTANCE_FLEX(Dim)
+#endif
+#else
+#define INSTANCE(Dim) \
+ INSTANCE_FLEX(Dim)
+#endif // COMPILE_BDA_BRIDGE
+
+}
+}
diff --git a/opm/simulators/linalg/ISTLSolverEbosWithGPU.hpp b/opm/simulators/linalg/ISTLSolverEbosWithGPU.hpp
new file mode 100644
index 000000000..6c0cb5cc8
--- /dev/null
+++ b/opm/simulators/linalg/ISTLSolverEbosWithGPU.hpp
@@ -0,0 +1,280 @@
+/*
+ Copyright 2016 IRIS AS
+ Copyright 2019, 2020 Equinor ASA
+ Copyright 2020 SINTEF Digital, Mathematics and Cybernetics
+
+ This file is part of the Open Porous Media project (OPM).
+
+ OPM is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ OPM is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with OPM. If not, see .
+*/
+
+#ifndef OPM_ISTLSOLVER_EBOS_HEADER_INCLUDED
+#define OPM_ISTLSOLVER_EBOS_HEADER_INCLUDED
+#if COMPILE_BDA_BRIDGE
+
+#include
+
+namespace Opm::Properties {
+
+
+namespace Opm
+{
+
+template class BdaBridge;
+class WellContributions;
+template
+struct BdaSolverInfo
+{
+ using WellContribFunc = std::function;
+ using Bridge = BdaBridge;
+
+ BdaSolverInfo(const std::string& accelerator_mode,
+ const int linear_solver_verbosity,
+ const int maxit,
+ const double tolerance,
+ const int platformID,
+ const int deviceID,
+ const bool opencl_ilu_parallel,
+ const std::string& linsolver);
+
+ ~BdaSolverInfo();
+
+ template
+ void prepare(const Grid& grid,
+ const Dune::CartesianIndexMapper& cartMapper,
+ const std::vector& wellsForConn,
+ const std::vector& cellPartition,
+ const size_t nonzeroes,
+ const bool useWellConn);
+
+ bool apply(Vector& rhs,
+ const bool useWellConn,
+ WellContribFunc getContribs,
+ const int rank,
+ Matrix& matrix,
+ Vector& x,
+ Dune::InverseOperatorResult& result);
+
+ bool gpuActive();
+
+ int numJacobiBlocks_ = 0;
+
+private:
+ /// Create sparsity pattern for block-Jacobi matrix based on partitioning of grid.
+ /// Do not initialize the values, that is done in copyMatToBlockJac()
+ template
+ void blockJacobiAdjacency(const Grid& grid,
+ const std::vector& cell_part,
+ size_t nonzeroes);
+
+ void copyMatToBlockJac(const Matrix& mat, Matrix& blockJac);
+
+ std::unique_ptr bridge_;
+ std::string accelerator_mode_;
+ std::unique_ptr blockJacobiForGPUILU0_;
+ std::vector> wellConnectionsGraph_;
+};
+
+
+
+/// Create sparsity pattern for block-Jacobi matrix based on partitioning of grid.
+/// Do not initialize the values, that is done in copyMatToBlockJac()
+}
+
+ /// This class solves the fully implicit black-oil system by
+ /// solving the reduced system (after eliminating well variables)
+ /// as a block-structured matrix (one block for all cell variables) for a fixed
+ /// number of cell variables np .
+ template
+ class ISTLSolverEbosWithGPU : public ISTLSolverEbos
+ {
+ protected:
+ using GridView = GetPropType;
+ using Scalar = GetPropType;
+ using SparseMatrixAdapter = GetPropType;
+ using Vector = GetPropType;
+ using Indices = GetPropType;
+ using WellModel = GetPropType;
+ using Simulator = GetPropType;
+ using Matrix = typename SparseMatrixAdapter::IstlMatrix;
+ using ThreadManager = GetPropType;
+ using ElementContext = GetPropType;
+ using AbstractSolverType = Dune::InverseOperator;
+ using AbstractOperatorType = Dune::AssembledLinearOperator;
+ using AbstractPreconditionerType = Dune::PreconditionerWithUpdate;
+ using WellModelOperator = WellModelAsLinearOperator;
+ using ElementMapper = GetPropType;
+ constexpr static std::size_t pressureIndex = GetPropType::pressureSwitchIdx;
+
+#if HAVE_MPI
+ using CommunicationType = Dune::OwnerOverlapCopyCommunication;
+#else
+ using CommunicationType = Dune::CollectiveCommunication;
+#endif
+
+ public:
+ using AssembledLinearOperatorType = Dune::AssembledLinearOperator< Matrix, Vector, Vector >;
+
+ /// Construct a system solver.
+ /// \param[in] simulator The opm-models simulator object
+ /// \param[in] parameters Explicit parameters for solver setup, do not
+ /// read them from command line parameters.
+ ISTLSolverEbosGPU(const Simulator& simulator, const FlowLinearSolverParameters& parameters)
+ : ISTLSolverEbos(simulator),
+ {
+ this->initialize();
+ }
+
+ /// Construct a system solver.
+ /// \param[in] simulator The opm-models simulator object
+ explicit ISTLSolverEbos(const Simulator& simulator)
+ : simulator_(simulator),
+ iterations_( 0 ),
+ calls_( 0 ),
+ converged_(false),
+ matrix_(nullptr)
+ {
+ parameters_.template init(simulator_.vanguard().eclState().getSimulationConfig().useCPR());
+ initialize();
+ }
+
+ void initialize()
+ {
+ OPM_TIMEBLOCK(IstlSolverEbos);
+ IstlSolverEbos::initialize()
+#if COMPILE_BDA_BRIDGE
+ {
+ std::string accelerator_mode = EWOMS_GET_PARAM(TypeTag, std::string, AcceleratorMode);
+ if ((simulator_.vanguard().grid().comm().size() > 1) && (accelerator_mode != "none")) {
+ if (on_io_rank) {
+ OpmLog::warning("Cannot use GPU with MPI, GPU are disabled");
+ }
+ accelerator_mode = "none";
+ }
+ const int platformID = EWOMS_GET_PARAM(TypeTag, int, OpenclPlatformId);
+ const int deviceID = EWOMS_GET_PARAM(TypeTag, int, BdaDeviceId);
+ const int maxit = EWOMS_GET_PARAM(TypeTag, int, LinearSolverMaxIter);
+ const double tolerance = EWOMS_GET_PARAM(TypeTag, double, LinearSolverReduction);
+ const bool opencl_ilu_parallel = EWOMS_GET_PARAM(TypeTag, bool, OpenclIluParallel);
+ const int linear_solver_verbosity = parameters_.linear_solver_verbosity_;
+ std::string linsolver = EWOMS_GET_PARAM(TypeTag, std::string, LinearSolver);
+ bdaBridge = std::make_unique>(accelerator_mode,
+ linear_solver_verbosity,
+ maxit,
+ tolerance,
+ platformID,
+ deviceID,
+ opencl_ilu_parallel,
+ linsolver);
+ }
+#else
+ if (EWOMS_GET_PARAM(TypeTag, std::string, AcceleratorMode) != "none") {
+ OPM_THROW(std::logic_error,"Cannot use accelerated solver since CUDA, OpenCL and amgcl were not found by cmake");
+ }
+#endif
+ }
+
+ void prepare(const Matrix& M, Vector& b)
+ {
+ OPM_TIMEBLOCK(istlSolverEbosPrepare);
+ IstlSolverEbos::prepare(M,b);
+ const bool firstcall = (matrix_ == nullptr);
+ // update matrix entries for solvers.
+ if (firstcall) {
+ // ebos will not change the matrix object. Hence simply store a pointer
+ // to the original one with a deleter that does nothing.
+ // Outch! We need to be able to scale the linear system! Hence const_cast
+ // setup sparsity pattern for jacobi matrix for preconditioner (only used for openclSolver)
+#if HAVE_OPENCL
+ bdaBridge->numJacobiBlocks_ = EWOMS_GET_PARAM(TypeTag, int, NumJacobiBlocks);
+ bdaBridge->prepare(simulator_.vanguard().grid(),
+ simulator_.vanguard().cartesianIndexMapper(),
+ simulator_.vanguard().schedule().getWellsatEnd(),
+ simulator_.vanguard().cellPartition(),
+ getMatrix().nonzeroes(), useWellConn_);
+#endif
+ }
+ }
+
+
+ void setResidual(Vector& /* b */)
+ {
+ // rhs_ = &b; // Must be handled in prepare() instead.
+ }
+
+ void getResidual(Vector& b) const
+ {
+ b = *rhs_;
+ }
+
+ void setMatrix(const SparseMatrixAdapter& /* M */)
+ {
+ // matrix_ = &M.istlMatrix(); // Must be handled in prepare() instead.
+ }
+
+ bool solve(Vector& x)
+ {
+ OPM_TIMEBLOCK(istlSolverEbosSolve);
+ calls_ += 1;
+ // Write linear system if asked for.
+ const int verbosity = prm_.get("verbosity", 0);
+ const bool write_matrix = verbosity > 10;
+ if (write_matrix) {
+ Helper::writeSystem(simulator_, //simulator is only used to get names
+ getMatrix(),
+ *rhs_,
+ comm_.get());
+ }
+
+ // Solve system.
+ Dune::InverseOperatorResult result;
+
+ std::function getContribs =
+ [this](WellContributions& w)
+ {
+ this->simulator_.problem().wellModel().getWellContributions(w);
+ };
+ if (!bdaBridge->apply(*rhs_, useWellConn_, getContribs,
+ simulator_.gridView().comm().rank(),
+ const_cast(this->getMatrix()),
+ x, result))
+ {
+ OPM_TIMEBLOCK(flexibleSolverApply);
+ if(bdaBridge->gpuActive()){
+ // bda solve fails use istl solver setup need to be done since it is not setup in prepeare
+ ISTLSolverEbos::prepareFlexibleSolver();
+ }
+ assert(flexibleSolver_.solver_);
+ flexibleSolver_.solver_->apply(x, *rhs_, result);
+ }
+
+ // Check convergence, iterations etc.
+ checkConvergence(result);
+
+ return converged_;
+ }
+ protected:
+
+ void prepareFlexibleSolver()
+ {
+ if(bdaBridge->gpuActive()){
+ ISTLSolverEbos::prepareFlexibleSolver();
+ }
+ }
+ std::unique_ptr> bdaBridge;
+ }; // end ISTLSolver
+
+} // namespace Opm
+#endif //COMPILE_BDA
+#endif