changed: ewoms/linear -> opm/simulators/linalg

2025-02-25 18:55:30 -06:00 · 2019-09-09 09:20:08 +02:00 · 2019-09-09 09:20:08 +02:00 · cf9252765c
commit cf9252765c
parent 3543500553
32 changed files with 7683 additions and 3 deletions
--- a/examples/problems/co2injectionproblem.hh
+++ b/examples/problems/co2injectionproblem.hh
@ -29,7 +29,7 @@
 #define EWOMS_CO2_INJECTION_PROBLEM_HH

 #include <ewoms/models/immiscible/immisciblemodel.hh>
-#include <ewoms/linear/parallelamgbackend.hh>
+#include <opm/simulators/linalg/parallelamgbackend.hh>

 #include <opm/material/fluidsystems/H2ON2FluidSystem.hpp>
 #include <opm/material/fluidsystems/BrineCO2FluidSystem.hpp>
--- a/examples/problems/groundwaterproblem.hh
+++ b/examples/problems/groundwaterproblem.hh
@ -29,7 +29,7 @@
 #define EWOMS_GROUND_WATER_PROBLEM_HH

 #include <ewoms/models/immiscible/immiscibleproperties.hh>
-#include <ewoms/linear/parallelistlbackend.hh>
+#include <opm/simulators/linalg/parallelistlbackend.hh>

 #include <opm/material/components/SimpleH2O.hpp>
 #include <opm/material/fluidstates/ImmiscibleFluidState.hpp>
--- a/examples/problems/waterairproblem.hh
+++ b/examples/problems/waterairproblem.hh
@ -29,7 +29,7 @@
 #define EWOMS_WATER_AIR_PROBLEM_HH

 #include <ewoms/models/pvs/pvsproperties.hh>
-#include <ewoms/linear/parallelistlbackend.hh>
+#include <opm/simulators/linalg/parallelistlbackend.hh>

 #include <opm/material/fluidsystems/H2OAirFluidSystem.hpp>
 #include <opm/material/fluidstates/ImmiscibleFluidState.hpp>
--- a/opm/simulators/linalg/bicgstabsolver.hh
+++ b/opm/simulators/linalg/bicgstabsolver.hh
@ -0,0 +1,356 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 2 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+
+  Consult the COPYING file in the top-level source directory of this
+  module for the precise wording of the license and the list of
+  copyright holders.
+*/
+/*!
+ * \file
+ * \copydoc Opm::Linear::BiCGStabSolver
+ */
+#ifndef EWOMS_BICG_STAB_SOLVER_HH
+#define EWOMS_BICG_STAB_SOLVER_HH
+
+#include "convergencecriterion.hh"
+#include "residreductioncriterion.hh"
+#include "linearsolverreport.hh"
+
+#include <ewoms/common/timer.hh>
+#include <ewoms/common/timerguard.hh>
+
+#include <opm/material/common/Exceptions.hpp>
+
+#include <memory>
+
+namespace Opm {
+namespace Linear {
+/*!
+ * \brief Implements a preconditioned stabilized BiCG linear solver.
+ *
+ * This solves a linear system of equations Ax = b, where the matrix A is sparse and may
+ * be unsymmetric.
+ *
+ * See https://en.wikipedia.org/wiki/Biconjugate_gradient_stabilized_method, (article
+ * date: December 19, 2016)
+ */
+template <class LinearOperator, class Vector, class Preconditioner>
+class BiCGStabSolver
+{
+    typedef Opm::Linear::ConvergenceCriterion<Vector> ConvergenceCriterion;
+    typedef typename LinearOperator::field_type Scalar;
+
+public:
+    BiCGStabSolver(Preconditioner& preconditioner,
+                   ConvergenceCriterion& convergenceCriterion,
+                   Dune::ScalarProduct<Vector>& scalarProduct)
+        : preconditioner_(preconditioner)
+        , convergenceCriterion_(convergenceCriterion)
+        , scalarProduct_(scalarProduct)
+    {
+        A_ = nullptr;
+        b_ = nullptr;
+
+        maxIterations_ = 1000;
+    }
+
+    /*!
+     * \brief Set the maximum number of iterations before we give up without achieving
+     *        convergence.
+     */
+    void setMaxIterations(unsigned value)
+    { maxIterations_ = value; }
+
+    /*!
+     * \brief Return the maximum number of iterations before we give up without achieving
+     *        convergence.
+     */
+    unsigned maxIterations() const
+    { return maxIterations_; }
+
+    /*!
+     * \brief Set the verbosity level of the linear solver
+     *
+     * The levels correspont to those used by the dune-istl solvers:
+     *
+     * - 0: no output
+     * - 1: summary output at the end of the solution proceedure (if no exception was
+     *      thrown)
+     * - 2: detailed output after each iteration
+     */
+    void setVerbosity(unsigned value)
+    { verbosity_ = value; }
+
+    /*!
+     * \brief Return the verbosity level of the linear solver.
+     */
+    unsigned verbosity() const
+    { return verbosity_; }
+
+    /*!
+     * \brief Set the matrix "A" of the linear system.
+     */
+    void setLinearOperator(const LinearOperator* A)
+    { A_ = A; }
+
+    /*!
+     * \brief Set the right hand side "b" of the linear system.
+     */
+    void setRhs(const Vector* b)
+    { b_ = b; }
+
+    /*!
+     * \brief Run the stabilized BiCG solver and store the result into the "x" vector.
+     */
+    bool apply(Vector& x)
+    {
+        // epsilon used for detecting breakdowns
+        const Scalar breakdownEps = std::numeric_limits<Scalar>::min() * Scalar(1e10);
+
+        // start the stop watch for the solution proceedure, but make sure that it is
+        // turned off regardless of how we leave the stadium. (i.e., that the timer gets
+        // stopped in case exceptions are thrown as well as if the method returns
+        // regularly.)
+        report_.reset();
+        Opm::TimerGuard reportTimerGuard(report_.timer());
+        report_.timer().start();
+
+        // preconditioned stabilized biconjugate gradient method
+        //
+        // See https://en.wikipedia.org/wiki/Biconjugate_gradient_stabilized_method,
+        // (article date: December 19, 2016)
+
+        // set the initial solution to the zero vector
+        x = 0.0;
+
+        // prepare the preconditioner. to allow some optimizations, we assume that the
+        // preconditioner does not change the initial solution x if the initial solution
+        // is a zero vector.
+        Vector r = *b_;
+        preconditioner_.pre(x, r);
+
+#ifndef NDEBUG
+        // ensure that the preconditioner does not change the initial solution. since
+        // this is a debugging check, we don't care if it does not work properly in
+        // parallel. (because this goes wrong, it should be considered to be a bug in the
+        // code anyway.)
+        for (unsigned i = 0; i < x.size(); ++i) {
+            const auto& u = x[i];
+            if (u*u != 0.0)
+                throw std::logic_error("The preconditioner is assumed not to modify the initial solution!");
+        }
+#endif // NDEBUG
+
+        convergenceCriterion_.setInitial(x, r);
+        if (convergenceCriterion_.converged()) {
+            report_.setConverged(true);
+            return report_.converged();
+        }
+
+        if (verbosity_ > 0) {
+            std::cout << "-------- BiCGStabSolver --------" << std::endl;
+            convergenceCriterion_.printInitial();
+        }
+
+        // r0 = b - Ax (i.e., r -= A*x_0 = b, because x_0 == 0)
+        //A_->applyscaleadd(/*alpha=*/-1.0, x, r);
+
+        // r0hat = r0
+        const Vector& r0hat = *b_;
+
+        // rho0 = alpha = omega0 = 1
+        Scalar rho = 1.0;
+        Scalar alpha = 1.0;
+        Scalar omega = 1.0;
+
+        // v_0 = p_0 = 0;
+        Vector v(r);
+        v = 0.0;
+        Vector p(v);
+
+        // create all the temporary vectors which we need. Be aware that some of them
+        // actually point to the same object because they are not needed at the same time!
+        Vector y(x);
+        Vector& h(x);
+        Vector& s(r);
+        Vector z(x);
+        Vector& t(y);
+        unsigned n = x.size();
+
+        for (; report_.iterations() < maxIterations_; report_.increment()) {
+            // rho_i = (r0hat,r_(i-1))
+            Scalar rho_i = scalarProduct_.dot(r0hat, r);
+
+            // beta = (rho_i/rho_(i-1))*(alpha/omega_(i-1))
+            if (std::abs(rho) <= breakdownEps || std::abs(omega) <= breakdownEps)
+                throw Opm::NumericalIssue("Breakdown of the BiCGStab solver (division by zero)");
+            Scalar beta = (rho_i/rho)*(alpha/omega);
+
+            // make rho correspond to the current iteration (i.e., forget rho_(i-1))
+            rho = rho_i;
+
+            // this loop conflates the following operations:
+            //
+            // p_i = r_(i-1) + beta*(p_(i-1) - omega_(i-1)*v_(i-1))
+            // y = p
+            for (unsigned i = 0; i < n; ++i) {
+                // p_i = r_(i-1) + beta*(p_(i-1) - omega_(i-1)*v_(i-1))
+                auto tmp = v[i];
+                tmp *= omega;
+                tmp -= p[i];
+                tmp *= -beta;
+                p[i] = r[i];
+                p[i] += tmp;
+
+                // y = p; not required because the precontioner overwrites y anyway...
+                // y[i] = p[i];
+            }
+
+            // y = K^-1 * p_i
+            preconditioner_.apply(y, p);
+
+            // v_i = A*y
+            A_->apply(y, v);
+
+            // alpha = rho_i/(r0hat,v_i)
+            Scalar denom = scalarProduct_.dot(r0hat, v);
+            if (std::abs(denom) <= breakdownEps)
+                throw Opm::NumericalIssue("Breakdown of the BiCGStab solver (division by zero)");
+            alpha = rho_i/denom;
+            if (std::abs(alpha) <= breakdownEps)
+                throw Opm::NumericalIssue("Breakdown of the BiCGStab solver (stagnation detected)");
+
+            // h = x_(i-1) + alpha*y
+            // s = r_(i-1) - alpha*v_i
+            for (unsigned i = 0; i < n; ++i) {
+                auto tmp = y[i];
+                tmp *= alpha;
+                tmp += x[i];
+                h[i] = tmp;
+
+                //s[i] = r[i]; // not necessary because r and s are the same object
+                tmp = v[i];
+                tmp *= alpha;
+                s[i] -= tmp;
+            }
+
+            // do convergence check and print terminal output
+            convergenceCriterion_.update(/*curSol=*/h, /*delta=*/y, s);
+            if (convergenceCriterion_.converged()) {
+                if (verbosity_ > 0) {
+                    convergenceCriterion_.print(report_.iterations() + 0.5);
+                    std::cout << "-------- /BiCGStabSolver --------" << std::endl;
+                }
+
+                // x = h; // not necessary because x and h are the same object
+                preconditioner_.post(x);
+                report_.setConverged(true);
+                return report_.converged();
+            }
+            else if (convergenceCriterion_.failed()) {
+                if (verbosity_ > 0) {
+                    convergenceCriterion_.print(report_.iterations() + 0.5);
+                    std::cout << "-------- /BiCGStabSolver --------" << std::endl;
+                }
+
+                report_.setConverged(false);
+                return report_.converged();
+            }
+
+            if (verbosity_ > 1)
+                convergenceCriterion_.print(report_.iterations() + 0.5);
+
+            // z = K^-1*s
+            z = s;
+            preconditioner_.apply(z, s);
+
+            // t = Az
+            t = z;
+            A_->apply(z, t);
+
+            // omega_i = (t*s)/(t*t)
+            denom = scalarProduct_.dot(t, t);
+            if (std::abs(denom) <= breakdownEps)
+                throw Opm::NumericalIssue("Breakdown of the BiCGStab solver (division by zero)");
+            omega = scalarProduct_.dot(t, s)/denom;
+            if (std::abs(omega) <= breakdownEps)
+                throw Opm::NumericalIssue("Breakdown of the BiCGStab solver (stagnation detected)");
+
+            // x_i = h + omega_i*z
+            // x = h; // not necessary because x and h are the same object
+            x.axpy(/*a=*/omega, /*y=*/z);
+
+            // do convergence check and print terminal output
+            convergenceCriterion_.update(/*curSol=*/x, /*delta=*/z, r);
+            if (convergenceCriterion_.converged()) {
+                if (verbosity_ > 0) {
+                    convergenceCriterion_.print(1.0 + report_.iterations());
+                    std::cout << "-------- /BiCGStabSolver --------" << std::endl;
+                }
+
+                preconditioner_.post(x);
+                report_.setConverged(true);
+                return report_.converged();
+            }
+            else if (convergenceCriterion_.failed()) {
+                if (verbosity_ > 0) {
+                    convergenceCriterion_.print(1.0 + report_.iterations());
+                    std::cout << "-------- /BiCGStabSolver --------" << std::endl;
+                }
+
+                report_.setConverged(false);
+                return report_.converged();
+            }
+
+            if (verbosity_ > 1)
+                convergenceCriterion_.print(1.0 + report_.iterations());
+
+            // r_i = s - omega*t
+            // r = s; // not necessary because r and s are the same object
+            r.axpy(/*a=*/-omega, /*y=*/t);
+        }
+
+        report_.setConverged(false);
+        return report_.converged();
+    }
+
+    void setConvergenceCriterion(ConvergenceCriterion& crit)
+    {
+        convergenceCriterion_ = &crit;
+    }
+
+    const Opm::Linear::SolverReport& report() const
+    { return report_; }
+
+private:
+    const LinearOperator* A_;
+    const Vector* b_;
+
+    Preconditioner& preconditioner_;
+    ConvergenceCriterion& convergenceCriterion_;
+    Dune::ScalarProduct<Vector>& scalarProduct_;
+    Opm::Linear::SolverReport report_;
+
+    unsigned maxIterations_;
+    unsigned verbosity_;
+};
+
+} // namespace Linear
+} // namespace Opm
+
+#endif
--- a/opm/simulators/linalg/blacklist.hh
+++ b/opm/simulators/linalg/blacklist.hh
@ -0,0 +1,186 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 2 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+
+  Consult the COPYING file in the top-level source directory of this
+  module for the precise wording of the license and the list of
+  copyright holders.
+*/
+/*!
+ * \file
+ * \copydoc Opm::Linear::BlackList
+ */
+#ifndef EWOMS_BLACK_LIST_HH
+#define EWOMS_BLACK_LIST_HH
+
+#include "overlaptypes.hh"
+
+#if HAVE_MPI
+#include <ewoms/parallel/mpibuffer.hh>
+
+#include <dune/grid/common/datahandleif.hh>
+#include <dune/grid/common/gridenums.hh>
+#endif // HAVE_MPI
+
+#include <iostream>
+#include <algorithm>
+
+namespace Opm {
+namespace Linear {
+/*!
+ * \brief Expresses which degrees of freedom are blacklisted for the parallel linear
+ *        solvers and which domestic indices they correspond to.
+ */
+class BlackList
+{
+public:
+    struct PeerBlackListedEntry {
+        Index nativeIndexOfPeer;
+        Index myOwnNativeIndex;
+    };
+    typedef std::vector<PeerBlackListedEntry> PeerBlackList;
+    typedef std::map<ProcessRank, PeerBlackList> PeerBlackLists;
+
+    BlackList()
+    { }
+
+    BlackList(const BlackList&) = default;
+
+    bool hasIndex(Index nativeIdx) const
+    { return nativeBlackListedIndices_.count(nativeIdx) > 0; }
+
+    void addIndex(Index nativeIdx)
+    { nativeBlackListedIndices_.insert(nativeIdx); }
+
+    Index nativeToDomestic(Index nativeIdx) const
+    {
+        auto it = nativeToDomesticMap_.find(nativeIdx);
+        if (it == nativeToDomesticMap_.end())
+            return -1;
+        return it->second;
+    }
+
+    void setPeerList(ProcessRank peerRank, const PeerBlackList& peerBlackList)
+    { peerBlackLists_[peerRank] = peerBlackList; }
+
+    template <class DomesticOverlap>
+    void updateNativeToDomesticMap(const DomesticOverlap& domesticOverlap OPM_UNUSED_NOMPI)
+    {
+#if HAVE_MPI
+        auto peerListIt = peerBlackLists_.begin();
+        const auto& peerListEndIt = peerBlackLists_.end();
+        for (; peerListIt != peerListEndIt; ++peerListIt) {
+            sendGlobalIndices_(peerListIt->first,
+                               peerListIt->second,
+                               domesticOverlap);
+        }
+
+        peerListIt = peerBlackLists_.begin();
+        for (; peerListIt != peerListEndIt; ++peerListIt) {
+            receiveGlobalIndices_(peerListIt->first, domesticOverlap);
+        }
+
+        peerListIt = peerBlackLists_.begin();
+        for (; peerListIt != peerListEndIt; ++peerListIt) {
+            numGlobalIdxSendBuff_.at(peerListIt->first).wait();
+            globalIdxSendBuff_.at(peerListIt->first).wait();
+        }
+#endif // HAVE_MPI
+    }
+
+    void print() const
+    {
+        std::cout << "my own blacklisted indices:\n";
+        auto idxIt = nativeBlackListedIndices_.begin();
+        const auto& idxEndIt = nativeBlackListedIndices_.end();
+        for (; idxIt != idxEndIt; ++idxIt)
+            std::cout << " (native index: " << *idxIt
+                      << ", domestic index: " << nativeToDomestic(*idxIt) << ")\n";
+        std::cout << "blacklisted indices of the peers in my own domain:\n";
+        auto peerListIt = peerBlackLists_.begin();
+        const auto& peerListEndIt = peerBlackLists_.end();
+        for (; peerListIt != peerListEndIt; ++peerListIt) {
+            ProcessRank peerRank = peerListIt->first;
+            std::cout << " peer " << peerRank << ":\n";
+            auto idx2It = peerListIt->second.begin();
+            const auto& idx2EndIt = peerListIt->second.end();
+            for (; idx2It != idx2EndIt; ++ idx2It)
+                std::cout << "   (native index: " << idx2It->myOwnNativeIndex
+                          << ", native peer index: " << idx2It->nativeIndexOfPeer << ")\n";
+        }
+    }
+
+private:
+#if HAVE_MPI
+    template <class DomesticOverlap>
+    void sendGlobalIndices_(ProcessRank peerRank,
+                            const PeerBlackList& peerIndices,
+                            const DomesticOverlap& domesticOverlap)
+    {
+        auto& numIdxBuff = numGlobalIdxSendBuff_[peerRank];
+        auto& idxBuff = globalIdxSendBuff_[peerRank];
+
+        numIdxBuff.resize(1);
+        numIdxBuff[0] = static_cast<unsigned>(peerIndices.size());
+        numIdxBuff.send(peerRank);
+
+        idxBuff.resize(2*peerIndices.size());
+        for (size_t i = 0; i < peerIndices.size(); ++i) {
+            // global index
+            Index myNativeIdx = peerIndices[i].myOwnNativeIndex;
+            Index myDomesticIdx = domesticOverlap.nativeToDomestic(myNativeIdx);
+            idxBuff[2*i + 0] = domesticOverlap.domesticToGlobal(myDomesticIdx);
+
+            // native peer index
+            idxBuff[2*i + 1] = peerIndices[i].nativeIndexOfPeer;
+        }
+        idxBuff.send(peerRank);
+    }
+
+    template <class DomesticOverlap>
+    void receiveGlobalIndices_(ProcessRank peerRank,
+                               const DomesticOverlap& domesticOverlap)
+    {
+        MpiBuffer<unsigned> numGlobalIdxBuf(1);
+        numGlobalIdxBuf.receive(peerRank);
+        unsigned numIndices = numGlobalIdxBuf[0];
+
+        MpiBuffer<Index> globalIdxBuf(2*numIndices);
+        globalIdxBuf.receive(peerRank);
+        for (unsigned i = 0; i < numIndices; ++i) {
+            Index globalIdx = globalIdxBuf[2*i + 0];
+            Index nativeIdx = globalIdxBuf[2*i + 1];
+
+            nativeToDomesticMap_[nativeIdx] = domesticOverlap.globalToDomestic(globalIdx);
+        }
+    }
+#endif // HAVE_MPI
+
+    std::set<Index> nativeBlackListedIndices_;
+    std::map<Index, Index> nativeToDomesticMap_;
+#if HAVE_MPI
+    std::map<ProcessRank, MpiBuffer<unsigned>> numGlobalIdxSendBuff_;
+    std::map<ProcessRank, MpiBuffer<Index>> globalIdxSendBuff_;
+#endif // HAVE_MPI
+
+    PeerBlackLists peerBlackLists_;
+};
+
+} // namespace Linear
+} // namespace Opm
+
+#endif
--- a/opm/simulators/linalg/combinedcriterion.hh
+++ b/opm/simulators/linalg/combinedcriterion.hh
@ -0,0 +1,242 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 2 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+
+  Consult the COPYING file in the top-level source directory of this
+  module for the precise wording of the license and the list of
+  copyright holders.
+*/
+/*!
+ * \file
+ * \copydoc Opm::Linear::CombinedCriterion
+ */
+#ifndef EWOMS_COMBINED_CRITERION_HH
+#define EWOMS_COMBINED_CRITERION_HH
+
+#include "convergencecriterion.hh"
+
+#include <iostream>
+
+namespace Opm {
+namespace Linear {
+
+/*! \addtogroup Linear
+ * \{
+ */
+
+/*!
+ * \brief Convergence criterion which looks at the absolute value of the residual and
+ *        fails if the linear solver stagnates.
+ *
+ * For the CombinedCriterion, the error of the solution is defined as \f[ e^k = \max_i\{
+ * \left| r^k_i \right| \}\;, \f]
+ *
+ * where \f$r^k = \mathbf{A} x^k - b \f$ is the residual for the k-th iterative solution
+ * vector \f$x^k\f$.
+ *
+ * In addition, to the reduction of the maximum residual, the linear solver is aborted
+ * early if the residual goes below or above absolute limits.
+ */
+template <class Vector, class CollectiveCommunication>
+class CombinedCriterion : public ConvergenceCriterion<Vector>
+{
+    typedef typename Vector::field_type Scalar;
+    typedef typename Vector::block_type BlockType;
+
+public:
+    CombinedCriterion(const CollectiveCommunication& comm)
+        : comm_(comm)
+    {}
+
+    CombinedCriterion(const CollectiveCommunication& comm,
+                                       Scalar residualReductionTolerance,
+                                       Scalar absResidualTolerance = 0.0,
+                                       Scalar maxResidual = 0.0)
+        : comm_(comm),
+          residualReductionTolerance_(residualReductionTolerance),
+          absResidualTolerance_(absResidualTolerance),
+          maxResidual_(maxResidual)
+    { }
+
+    /*!
+     * \brief Sets the residual reduction tolerance.
+     */
+    void setResidualReductionTolerance(Scalar tol)
+    { residualReductionTolerance_ = tol; }
+
+    /*!
+     * \brief Returns the tolerance of the residual reduction of the solution.
+     */
+    Scalar residualReductionTolerance() const
+    { return residualReductionTolerance_; }
+
+    /*!
+     * \brief Returns the reduction of the maximum of the residual compared to the
+     *        initial solution.
+     */
+    Scalar residualReduction() const
+    { return residualError_/std::max<Scalar>(1e-20, initialResidualError_); }
+
+    /*!
+     * \brief Sets the maximum absolute tolerated residual.
+     */
+    void setAbsResidualTolerance(Scalar tol)
+    { absResidualTolerance_ = tol; }
+
+    /*!
+     * \brief Returns the tolerated maximum of the the infinity norm of the absolute
+     *        residual.
+     */
+    Scalar absResidualTolerance() const
+    { return absResidualTolerance_; }
+
+    /*!
+     * \brief Returns the infinity norm of the absolute residual.
+     */
+    Scalar absResidual() const
+    { return residualError_; }
+
+    /*!
+     * \copydoc ConvergenceCriterion::setInitial(const Vector& , const Vector& )
+     */
+    void setInitial(const Vector& curSol, const Vector& curResid) override
+    {
+        updateErrors_(curSol, curSol, curResid);
+        stagnates_ = false;
+
+        // to avoid divisions by zero, make sure that we don't use an initial error of 0
+        residualError_ = std::max<Scalar>(residualError_,
+                                          std::numeric_limits<Scalar>::min()*1e10);
+        initialResidualError_ = residualError_;
+        lastResidualError_ = residualError_;
+    }
+
+    /*!
+     * \copydoc ConvergenceCriterion::update(const Vector&, const Vector&, const Vector&)
+     */
+    void update(const Vector& curSol, const Vector& changeIndicator, const Vector& curResid) override
+    { updateErrors_(curSol, changeIndicator, curResid);  }
+
+    /*!
+     * \copydoc ConvergenceCriterion::converged()
+     */
+    bool converged() const override
+    {
+        // we're converged if the solution is better than the tolerance
+        // fix-point and residual tolerance.
+        return
+            residualReduction() <= residualReductionTolerance() ||
+            absResidual() <= absResidualTolerance();
+    }
+
+    /*!
+     * \copydoc ConvergenceCriterion::failed()
+     */
+    bool failed() const override
+    { return !converged() && (stagnates_ || residualError_ > maxResidual_); }
+
+    /*!
+     * \copydoc ConvergenceCriterion::accuracy()
+     *
+     * For the accuracy we only take the residual into account,
+     */
+    Scalar accuracy() const override
+    { return residualError_/initialResidualError_; }
+
+    /*!
+     * \copydoc ConvergenceCriterion::printInitial()
+     */
+    void printInitial(std::ostream& os = std::cout) const override
+    {
+        os << std::setw(20) << "iteration ";
+        os << std::setw(20) << "residual ";
+        os << std::setw(20) << "reduction ";
+        os << std::setw(20) << "rate ";
+        os << std::endl;
+    }
+
+    /*!
+     * \copydoc ConvergenceCriterion::print()
+     */
+    void print(Scalar iter, std::ostream& os = std::cout) const override
+    {
+        const Scalar eps = std::numeric_limits<Scalar>::min()*1e10;
+
+        os << std::setw(20) << iter << " ";
+        os << std::setw(20) << absResidual() << " ";
+        os << std::setw(20) << accuracy() << " ";
+        os << std::setw(20) << lastResidualError_/std::max<Scalar>(residualError_, eps) << " ";
+        os << std::endl << std::flush;
+    }
+
+private:
+    // update the weighted absolute residual
+    void updateErrors_(const Vector& curSol OPM_UNUSED, const Vector& changeIndicator,  const Vector& curResid)
+    {
+        lastResidualError_ = residualError_;
+        residualError_ = 0.0;
+        stagnates_ = true;
+        for (size_t i = 0; i < curResid.size(); ++i) {
+            for (unsigned j = 0; j < BlockType::dimension; ++j) {
+                residualError_ =
+                    std::max<Scalar>(residualError_,
+                                     std::abs(curResid[i][j]));
+
+                if (stagnates_ && changeIndicator[i][j] != 0.0)
+                    // only stagnation means that we've failed!
+                    stagnates_ = false;
+            }
+        }
+
+        residualError_ = comm_.max(residualError_);
+
+        // the linear solver only stagnates if all processes stagnate
+        stagnates_ = comm_.min(stagnates_);
+    }
+
+    const CollectiveCommunication& comm_;
+
+    // the infinity norm of the residual of the last iteration
+    Scalar lastResidualError_;
+
+    // the infinity norm of the residual of the current iteration
+    Scalar residualError_;
+
+    // the infinity norm of the residual of the initial solution
+    Scalar initialResidualError_;
+
+    // the minimum reduction of the residual norm where the solution is to be considered
+    // converged
+    Scalar residualReductionTolerance_;
+
+    // the maximum residual norm for the residual for the solution to be considered to be
+    // converged
+    Scalar absResidualTolerance_;
+
+    // The maximum error which is tolerated before we fail.
+    Scalar maxResidual_;
+
+    // does the linear solver seem to stagnate, i.e. were the last two solutions
+    // identical?
+    bool stagnates_;
+};
+
+//! \} end documentation
+
+}} // end namespace Linear,Ewoms
+
+#endif
--- a/opm/simulators/linalg/convergencecriterion.hh
+++ b/opm/simulators/linalg/convergencecriterion.hh
@ -0,0 +1,156 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 2 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+
+  Consult the COPYING file in the top-level source directory of this
+  module for the precise wording of the license and the list of
+  copyright holders.
+*/
+/*!
+ * \file
+ * \copydoc Opm::ConvergenceCriterion
+ */
+#ifndef EWOMS_ISTL_CONVERGENCE_CRITERION_HH
+#define EWOMS_ISTL_CONVERGENCE_CRITERION_HH
+
+#include <opm/material/common/Unused.hpp>
+
+#include <dune/common/version.hh>
+#include <dune/common/fvector.hh>
+
+#include <cmath>
+#include <iostream>
+#include <iomanip>
+
+namespace Opm {
+namespace Linear {
+
+/*! \addtogroup Linear
+ * \{
+ */
+
+/*!
+ * \file
+ * \brief Define some base class for the convergence criteria of the linear
+ * solvers of DUNE-ISTL.
+ */
+
+/*!
+ * \brief Base class for all convergence criteria which only defines an virtual
+ * API.
+ */
+template <class Vector>
+class ConvergenceCriterion
+{
+    //! \brief The real type of the field type (is the same if using real numbers, but differs for std::complex)
+    typedef typename Dune::FieldTraits<typename Vector::field_type>::real_type real_type;
+
+    typedef real_type Scalar;
+
+public:
+    /*!
+     * \brief Destructor.
+     *
+     * In the ConvergenceCriterion it does not do anything, but it is
+     * required to be declared virtual.
+     */
+    virtual ~ConvergenceCriterion()
+    {}
+
+    /*!
+     * \brief Set the initial solution of the linear system of equations.
+     *
+     * This version of the method does NOT take the two-norm of the
+     * residual as argument. If the two-norm of the defect is available
+     * for the linear solver, the version of the update() method with it
+     * should be called.
+     *
+     * \param curSol The current iterative solution of the linear system
+     *               of equations
+     * \param curResid The residual vector of the current iterative
+     *                 solution of the linear system of equations
+     */
+    virtual void setInitial(const Vector& curSol, const Vector& curResid) = 0;
+
+    /*!
+     * \brief Update the internal members of the convergence criterion
+     *        with the current solution.
+     *
+     * This version of the method does NOT take the two-norm of the
+     * residual as argument. If the two-norm of the defect is available
+     * for the linear solver, the version of the update() method with it
+     * should be called.
+     *
+     * \param curSol The current iterative solution of the linear system
+     *               of equations
+     * \param changeIndicator A vector where all non-zero values indicate that the
+     *                        solution has changed since the last iteration.
+     * \param curResid The residual vector of the current iterative
+     *                 solution of the linear system of equations
+     */
+    virtual void update(const Vector& curSol, const Vector& changeIndicator, const Vector& curResid) = 0;
+
+    /*!
+     * \brief Returns true if and only if the convergence criterion is
+     *        met.
+     */
+    virtual bool converged() const = 0;
+
+    /*!
+     * \brief Returns true if the convergence criterion cannot be met anymore because the
+     *        solver has broken down.
+     */
+    virtual bool failed() const
+    { return false; }
+
+    /*!
+     * \brief Returns the accuracy of the solution at the last update.
+     *
+     * A value of zero means that the solution was exact.
+     */
+    virtual Scalar accuracy() const = 0;
+
+    /*!
+     * \brief Prints the initial information about the convergence behaviour.
+     *
+     * This method is called after setInitial() if the solver thinks
+     * it's a good idea to be verbose. In practice, "printing the
+     * initial information" means printing column headers and the
+     * initial state.
+     *
+     * \param os The output stream to which the message gets written.
+     */
+    virtual void printInitial(std::ostream& os OPM_UNUSED= std::cout) const
+    {}
+
+    /*!
+     * \brief Prints the information about the convergence behaviour for
+     *        the current iteration.
+     *
+     * \param iter The iteration number. The semantics of this parameter
+     *             are chosen by the linear solver.
+     * \param os The output stream to which the message gets written.
+     */
+    virtual void print(Scalar iter OPM_UNUSED, std::ostream& os OPM_UNUSED = std::cout) const
+    {}
+};
+
+//! \} end documentation
+
+}} // end namespace Linear, Ewoms
+
+#endif
--- a/opm/simulators/linalg/domesticoverlapfrombcrsmatrix.hh
+++ b/opm/simulators/linalg/domesticoverlapfrombcrsmatrix.hh
@ -0,0 +1,583 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 2 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+
+  Consult the COPYING file in the top-level source directory of this
+  module for the precise wording of the license and the list of
+  copyright holders.
+*/
+/*!
+ * \file
+ * \copydoc Opm::Linear::DomesticOverlapFromBCRSMatrix
+ */
+#ifndef EWOMS_DOMESTIC_OVERLAP_FROM_BCRS_MATRIX_HH
+#define EWOMS_DOMESTIC_OVERLAP_FROM_BCRS_MATRIX_HH
+
+#include "foreignoverlapfrombcrsmatrix.hh"
+#include "blacklist.hh"
+#include "globalindices.hh"
+
+#include <ewoms/parallel/mpibuffer.hh>
+
+#include <algorithm>
+#include <limits>
+#include <set>
+#include <map>
+#include <vector>
+
+namespace Opm {
+namespace Linear {
+
+/*!
+ * \brief This class creates and manages the foreign overlap given an
+ *        initial list of border indices and a BCRS matrix.
+ *
+ * The foreign overlap are all (row) indices which overlap with the
+ * some of the current process's local indices.
+ */
+class DomesticOverlapFromBCRSMatrix
+{
+    typedef Opm::Linear::ForeignOverlapFromBCRSMatrix ForeignOverlap;
+    typedef Opm::Linear::GlobalIndices<ForeignOverlap> GlobalIndices;
+
+public:
+    // overlaps should never be copied!
+    DomesticOverlapFromBCRSMatrix(const DomesticOverlapFromBCRSMatrix&) = delete;
+
+    /*!
+     * \brief Constructs the foreign overlap given a BCRS matrix and
+     *        an initial list of border indices.
+     */
+    template <class BCRSMatrix>
+    DomesticOverlapFromBCRSMatrix(const BCRSMatrix& A,
+                                  const BorderList& borderList,
+                                  const BlackList& blackList,
+                                  unsigned overlapSize)
+        : foreignOverlap_(A, borderList, blackList, overlapSize)
+        , blackList_(blackList)
+        , globalIndices_(foreignOverlap_)
+    {
+        myRank_ = 0;
+        worldSize_ = 1;
+
+#if HAVE_MPI
+        int tmp;
+        MPI_Comm_rank(MPI_COMM_WORLD, &tmp);
+        myRank_ = static_cast<ProcessRank>(tmp);
+        MPI_Comm_size(MPI_COMM_WORLD, &tmp);
+        worldSize_ = static_cast<unsigned>(tmp);
+#endif // HAVE_MPI
+
+        buildDomesticOverlap_();
+        updateMasterRanks_();
+        blackList_.updateNativeToDomesticMap(*this);
+
+        setupDebugMapping_();
+    }
+
+    void check() const
+    {
+#ifndef NDEBUG
+        // check consistency of global indices
+        for (unsigned domIdx = 0; domIdx < numDomestic(); ++domIdx) {
+            assert(globalToDomestic(domesticToGlobal(domIdx)) == static_cast<Index>(domIdx));
+        }
+#endif // NDEBUG
+
+        // send the foreign overlap for which we are master to the
+        // peers
+        std::map<int, MpiBuffer<unsigned> *> sizeBufferMap;
+
+        auto peerIt = peerSet_.begin();
+        const auto& peerEndIt = peerSet_.end();
+        for (; peerIt != peerEndIt; ++peerIt) {
+            auto& buffer = *(new MpiBuffer<unsigned>(1));
+            sizeBufferMap[*peerIt] = &buffer;
+            buffer[0] = foreignOverlap_.foreignOverlapWithPeer(*peerIt).size();
+            buffer.send(*peerIt);
+        }
+
+        peerIt = peerSet_.begin();
+        for (; peerIt != peerEndIt; ++peerIt) {
+            MpiBuffer<unsigned> rcvBuffer(1);
+            rcvBuffer.receive(*peerIt);
+
+            assert(rcvBuffer[0] == domesticOverlapWithPeer_.find(*peerIt)->second.size());
+        }
+
+        peerIt = peerSet_.begin();
+        for (; peerIt != peerEndIt; ++peerIt) {
+            sizeBufferMap[*peerIt]->wait();
+            delete sizeBufferMap[*peerIt];
+        }
+    }
+
+    /*!
+     * \brief Returns the rank of the current process.
+     */
+    ProcessRank myRank() const
+    { return myRank_; }
+
+    /*!
+     * \brief Returns the number of processes in the global MPI communicator.
+     */
+    unsigned worldSize() const
+    { return worldSize_; }
+
+    /*!
+     * \brief Return the set of process ranks which share an overlap
+     *        with the current process.
+     */
+    const PeerSet& peerSet() const
+    { return peerSet_; }
+
+    /*!
+     * \brief Returns true iff a domestic index is a border index.
+     */
+    bool isBorder(Index domesticIdx) const
+    {
+        return isLocal(domesticIdx)
+            && foreignOverlap_.isBorder(mapExternalToInternal_(domesticIdx));
+    }
+
+    /*!
+     * \brief Returns true iff a domestic index is on the border with
+     *        a given peer process.
+     */
+    bool isBorderWith(Index domesticIdx, ProcessRank peerRank) const
+    {
+        return isLocal(domesticIdx)
+            && foreignOverlap_.isBorderWith(mapExternalToInternal_(domesticIdx),
+                                            peerRank);
+    }
+
+    /*!
+     * \brief Returns the number of indices on the front within a given
+     *        peer rank's grid partition.
+     */
+    size_t numFront(ProcessRank peerRank) const
+    { return foreignOverlap_.numFront(peerRank); }
+
+    /*!
+     * \brief Returns true iff a domestic index is on the front.
+     */
+    bool isFront(Index domesticIdx) const
+    {
+        if (isLocal(domesticIdx))
+            return false;
+        Index internalDomesticIdx = mapExternalToInternal_(domesticIdx);
+
+        // check wether the border distance of the domestic overlap is
+        // maximal for the index
+        const auto& domOverlap = domesticOverlapByIndex_[internalDomesticIdx];
+        return domOverlap.size() > 0
+            && domOverlap.begin()->second == foreignOverlap_.overlapSize();
+    }
+
+    /*!
+     * \brief Returns the object which represents the black-listed native indices.
+     */
+    const BlackList& blackList() const
+    { return blackList_; }
+
+    /*!
+     * \brief Returns the number of processes which "see" a given
+     *        index.
+     */
+    size_t numPeers(Index domesticIdx) const
+    { return domesticOverlapByIndex_[mapExternalToInternal_(domesticIdx)].size(); }
+
+    /*!
+     * \brief Returns the size of the overlap region
+     */
+    unsigned overlapSize() const
+    { return foreignOverlap_.overlapSize(); }
+
+    /*!
+     * \brief Returns the number native indices
+     *
+     * I.e. the number of indices of the "raw" grid partition of the
+     * local process (including the indices in ghost and overlap
+     * elements).
+     */
+    size_t numNative() const
+    { return foreignOverlap_.numNative(); }
+
+    /*!
+     * \brief Returns the number local indices
+     *
+     * I.e. indices in the interior or on the border of the process'
+     * domain.
+     */
+    size_t numLocal() const
+    { return foreignOverlap_.numLocal(); }
+
+    /*!
+     * \brief Returns the number domestic indices.
+     *
+     * The domestic indices are defined as the process' local indices
+     * plus its domestic overlap (i.e. indices for which it is not
+     * neither master nor are on the process border).
+     */
+    size_t numDomestic() const
+    { return globalIndices_.numDomestic(); }
+
+    /*!
+     * \brief Return true if a domestic index is local for the process
+     *
+     * I.e. the entity for this index is in the interior or on the
+     * border of the process' domain.
+     */
+    bool isLocal(Index domesticIdx) const
+    { return mapExternalToInternal_(domesticIdx) < static_cast<Index>(numLocal()); }
+
+    /*!
+     * \brief Return true iff the current process is the master of a
+     *        given domestic index.
+     */
+    bool iAmMasterOf(Index domesticIdx) const
+    {
+        if (!isLocal(domesticIdx))
+            return false;
+        return foreignOverlap_.iAmMasterOf(mapExternalToInternal_(domesticIdx));
+    }
+
+    /*!
+     * \brief Return the rank of a master process for a domestic index
+     */
+    ProcessRank masterRank(Index domesticIdx) const
+    { return masterRank_[static_cast<unsigned>(mapExternalToInternal_(domesticIdx))]; }
+
+    /*!
+     * \brief Print the foreign overlap for debugging purposes.
+     */
+    void print() const
+    { globalIndices_.print(); }
+
+    /*!
+     * \brief Returns a domestic index given a global one
+     */
+    Index globalToDomestic(Index globalIdx) const
+    {
+        Index internalIdx = globalIndices_.globalToDomestic(globalIdx);
+        if (internalIdx < 0)
+            return -1;
+        return mapInternalToExternal_(internalIdx);
+    }
+
+    /*!
+     * \brief Returns a global index given a domestic one
+     */
+    Index domesticToGlobal(Index domIdx) const
+    { return globalIndices_.domesticToGlobal(mapExternalToInternal_(domIdx)); }
+
+    /*!
+     * \brief Returns a native index given a domestic one
+     */
+    Index domesticToNative(Index domIdx) const
+    {
+        Index internalIdx = mapExternalToInternal_(domIdx);
+        if (internalIdx >= static_cast<Index>(numLocal()))
+            return -1;
+        return foreignOverlap_.localToNative(internalIdx);
+    }
+
+    /*!
+     * \brief Returns a domestic index given a native one
+     */
+    Index nativeToDomestic(Index nativeIdx) const
+    {
+        Index localIdx = foreignOverlap_.nativeToLocal(nativeIdx);
+        if (localIdx < 0)
+            return localIdx;
+        return mapInternalToExternal_(localIdx);
+    }
+
+    /*!
+     * \brief Returns true if a given domestic index is either in the
+     *        foreign or in the domestic overlap.
+     */
+    bool isInOverlap(Index domesticIdx) const
+    {
+        return !this->isLocal(domesticIdx)
+               || this->foreignOverlap_.isInOverlap(mapExternalToInternal_(domesticIdx));
+    }
+
+    /*!
+     * \brief Returns true if a given domestic index is a front index
+     *        for a peer rank.
+     */
+    bool isFrontFor(ProcessRank peerRank, Index domesticIdx) const
+    {
+        Index internalIdx = mapExternalToInternal_(domesticIdx);
+        return this->foreignOverlap_.isFrontFor(peerRank, internalIdx);
+    }
+
+    /*!
+     * \brief Returns true iff a domestic index is seen by a peer rank.
+     */
+    bool peerHasIndex(int peerRank, Index domesticIdx) const
+    {
+        return foreignOverlap_.peerHasIndex(peerRank,
+                                            mapExternalToInternal_(domesticIdx));
+    }
+
+    /*!
+     * \brief Returns number of indices which are contained in the
+     *        foreign overlap with a peer.
+     */
+    size_t foreignOverlapSize(ProcessRank peerRank) const
+    { return foreignOverlap_.foreignOverlapWithPeer(peerRank).size(); }
+
+    /*!
+     * \brief Returns the domestic index given an offset in the
+     *        foreign overlap of a peer process with the local
+     *        process.
+     */
+    Index foreignOverlapOffsetToDomesticIdx(ProcessRank peerRank, unsigned overlapOffset) const
+    {
+        Index internalIdx =
+            foreignOverlap_.foreignOverlapWithPeer(peerRank)[overlapOffset].index;
+        return mapInternalToExternal_(internalIdx);
+    }
+
+    /*!
+     * \brief Returns number of indices which are contained in the
+     *        domestic overlap with a peer.
+     */
+    size_t domesticOverlapSize(ProcessRank peerRank) const
+    { return domesticOverlapWithPeer_.at(peerRank).size(); }
+
+    /*!
+     * \brief Returns the domestic index given an offset in the
+     *        domestic overlap of a peer process with the local
+     *        process.
+     */
+    Index domesticOverlapOffsetToDomesticIdx(ProcessRank peerRank, Index overlapOffset) const
+    {
+        Index internalIdx = domesticOverlapWithPeer_.at(peerRank)[overlapOffset];
+        return mapInternalToExternal_(internalIdx);
+    }
+
+protected:
+    void buildDomesticOverlap_()
+    {
+        // copy the set of peers from the foreign overlap
+        peerSet_ = foreignOverlap_.peerSet();
+
+        // resize the array which stores the number of peers for
+        // each entry.
+        domesticOverlapByIndex_.resize(numLocal());
+        borderDistance_.resize(numLocal(), 0);
+
+        PeerSet::const_iterator peerIt;
+        PeerSet::const_iterator peerEndIt = peerSet_.end();
+
+        // send the overlap indices to all peer processes
+        peerIt = peerSet_.begin();
+        for (; peerIt != peerEndIt; ++peerIt) {
+            ProcessRank peerRank = *peerIt;
+            sendIndicesToPeer_(peerRank);
+        }
+
+        // receive our overlap from the processes to all peer processes
+        peerIt = peerSet_.begin();
+        for (; peerIt != peerEndIt; ++peerIt) {
+            ProcessRank peerRank = *peerIt;
+            receiveIndicesFromPeer_(peerRank);
+        }
+
+        // wait until all send operations complete
+        peerIt = peerSet_.begin();
+        for (; peerIt != peerEndIt; ++peerIt) {
+            ProcessRank peerRank = *peerIt;
+            waitSendIndices_(peerRank);
+        }
+    }
+
+    void updateMasterRanks_()
+    {
+        size_t nLocal = numLocal();
+        size_t nDomestic = numDomestic();
+        masterRank_.resize(nDomestic);
+
+        // take the master ranks for the local indices from the
+        // foreign overlap
+        for (unsigned i = 0; i < nLocal; ++i) {
+            masterRank_[i] = foreignOverlap_.masterRank(static_cast<Index>(i));
+        }
+
+        // for non-local indices, initially use INT_MAX as their master
+        // rank
+        for (size_t i = nLocal; i < nDomestic; ++i)
+            masterRank_[i] = std::numeric_limits<ProcessRank>::max();
+
+        // for the non-local indices, take the peer process for which
+        // a given local index is in the interior
+        auto peerIt = peerSet_.begin();
+        const auto& peerEndIt = peerSet_.end();
+        for (; peerIt != peerEndIt; ++peerIt) {
+            const auto& overlapWithPeer = domesticOverlapWithPeer_.find(*peerIt)->second;
+
+            auto idxIt = overlapWithPeer.begin();
+            const auto& idxEndIt = overlapWithPeer.end();
+            for (; idxIt != idxEndIt; ++idxIt) {
+                if (*idxIt >= 0 && foreignOverlap_.isLocal(*idxIt))
+                    continue; // ignore border indices
+
+                masterRank_[static_cast<unsigned>(*idxIt)] = std::min(masterRank_[static_cast<unsigned>(*idxIt)], *peerIt);
+            }
+        }
+    }
+
+    void sendIndicesToPeer_(ProcessRank peerRank OPM_UNUSED_NOMPI)
+    {
+#if HAVE_MPI
+        const auto& foreignOverlap = foreignOverlap_.foreignOverlapWithPeer(peerRank);
+
+        // first, send a message containing the number of additional
+        // indices stemming from the overlap (i.e. without the border
+        // indices)
+        size_t numIndices = foreignOverlap.size();
+        numIndicesSendBuffer_[peerRank] = new MpiBuffer<size_t>(1);
+        (*numIndicesSendBuffer_[peerRank])[0] = numIndices;
+        numIndicesSendBuffer_[peerRank]->send(peerRank);
+
+        // create MPI buffers
+        indicesSendBuffer_[peerRank] = new MpiBuffer<IndexDistanceNpeers>(numIndices);
+
+        // then send the additional indices themselfs
+        auto overlapIt = foreignOverlap.begin();
+        const auto& overlapEndIt = foreignOverlap.end();
+        for (unsigned i = 0; overlapIt != overlapEndIt; ++overlapIt, ++i) {
+            Index localIdx = overlapIt->index;
+            BorderDistance borderDistance = overlapIt->borderDistance;
+            size_t numPeers = foreignOverlap_.foreignOverlapByLocalIndex(localIdx).size();
+
+            IndexDistanceNpeers tmp;
+            tmp.index = globalIndices_.domesticToGlobal(localIdx);
+            tmp.borderDistance = borderDistance;
+            tmp.numPeers = static_cast<unsigned>(numPeers);
+
+            (*indicesSendBuffer_[peerRank])[i] = tmp;
+        }
+
+        indicesSendBuffer_[peerRank]->send(peerRank);
+#endif // HAVE_MPI
+    }
+
+    void waitSendIndices_(ProcessRank peerRank)
+    {
+        numIndicesSendBuffer_[peerRank]->wait();
+        delete numIndicesSendBuffer_[peerRank];
+
+        indicesSendBuffer_[peerRank]->wait();
+        delete indicesSendBuffer_[peerRank];
+    }
+
+    void receiveIndicesFromPeer_(ProcessRank peerRank OPM_UNUSED_NOMPI)
+    {
+#if HAVE_MPI
+        // receive the number of additional indices
+        int numIndices = -1;
+        MpiBuffer<size_t> numIndicesRecvBuff(1);
+        numIndicesRecvBuff.receive(peerRank);
+        numIndices = static_cast<int>(numIndicesRecvBuff[0]);
+
+        // receive the additional indices themselfs
+        MpiBuffer<IndexDistanceNpeers> recvBuff(static_cast<size_t>(numIndices));
+        recvBuff.receive(peerRank);
+        for (unsigned i = 0; i < static_cast<unsigned>(numIndices); ++i) {
+            Index globalIdx = recvBuff[i].index;
+            BorderDistance borderDistance = recvBuff[i].borderDistance;
+
+            // if the index is not already known, add it to the
+            // domestic indices
+            if (!globalIndices_.hasGlobalIndex(globalIdx)) {
+                Index newDomesticIdx = static_cast<Index>(globalIndices_.numDomestic());
+                globalIndices_.addIndex(newDomesticIdx, globalIdx);
+
+                size_t newSize = globalIndices_.numDomestic();
+                borderDistance_.resize(newSize, std::numeric_limits<int>::max());
+                domesticOverlapByIndex_.resize(newSize);
+            }
+
+            // convert the global index into a domestic one
+            Index domesticIdx = globalIndices_.globalToDomestic(globalIdx);
+
+            // extend the domestic overlap
+            domesticOverlapByIndex_[static_cast<unsigned>(domesticIdx)][static_cast<unsigned>(peerRank)] = borderDistance;
+            domesticOverlapWithPeer_[static_cast<unsigned>(peerRank)].push_back(domesticIdx);
+
+            //assert(borderDistance >= 0);
+            assert(globalIdx >= 0);
+            assert(domesticIdx >= 0);
+            assert(!(borderDistance == 0 && !foreignOverlap_.isLocal(domesticIdx)));
+            assert(!(borderDistance > 0 && foreignOverlap_.isLocal(domesticIdx)));
+
+            borderDistance_[static_cast<unsigned>(domesticIdx)] = std::min(borderDistance, borderDistance_[static_cast<unsigned>(domesticIdx)]);
+        }
+#endif // HAVE_MPI
+    }
+
+    // this method is intended to set up the code mapping code for
+    // mapping domestic indices to the same ones used by a sequential
+    // grid. this requires detailed knowledge about how a grid
+    // distributes the degrees of freedom over multiple processes, but
+    // it can simplify debugging considerably because the indices can
+    // be made identical for the parallel and the sequential
+    // computations.
+    //
+    // by default, this method does nothing
+    void setupDebugMapping_()
+    {}
+
+    // this method is intended to map domestic indices to the ones
+    // used by a sequential grid.
+    //
+    // by default, this method does nothing
+    Index mapInternalToExternal_(Index internalIdx) const
+    { return internalIdx; }
+
+    // this method is intended to map the indices used by a sequential
+    // to grid domestic indices ones.
+    //
+    // by default, this method does nothing
+    Index mapExternalToInternal_(Index externalIdx) const
+    { return externalIdx; }
+
+    ProcessRank myRank_;
+    unsigned worldSize_;
+    ForeignOverlap foreignOverlap_;
+
+    BlackList blackList_;
+
+    DomesticOverlapByRank domesticOverlapWithPeer_;
+    OverlapByIndex domesticOverlapByIndex_;
+    std::vector<BorderDistance> borderDistance_;
+    std::vector<ProcessRank> masterRank_;
+
+    std::map<ProcessRank, MpiBuffer<size_t> *> numIndicesSendBuffer_;
+    std::map<ProcessRank, MpiBuffer<IndexDistanceNpeers> *> indicesSendBuffer_;
+    GlobalIndices globalIndices_;
+    PeerSet peerSet_;
+};
+
+} // namespace Linear
+} // namespace Opm
+
+#endif
--- a/opm/simulators/linalg/elementborderlistfromgrid.hh
+++ b/opm/simulators/linalg/elementborderlistfromgrid.hh
@ -0,0 +1,261 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 2 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+
+  Consult the COPYING file in the top-level source directory of this
+  module for the precise wording of the license and the list of
+  copyright holders.
+*/
+/*!
+ * \file
+ * \copydoc Opm::Linear::ElementBorderListFromGrid
+ */
+#ifndef EWOMS_ELEMENT_BORDER_LIST_FROM_GRID_HH
+#define EWOMS_ELEMENT_BORDER_LIST_FROM_GRID_HH
+
+#include "overlaptypes.hh"
+#include "blacklist.hh"
+
+#include <opm/material/common/Unused.hpp>
+
+#include <dune/grid/common/datahandleif.hh>
+#include <dune/grid/common/gridenums.hh>
+#include <dune/istl/bcrsmatrix.hh>
+#include <dune/istl/scalarproducts.hh>
+#include <dune/istl/operators.hh>
+#include <dune/common/version.hh>
+
+#include <algorithm>
+
+namespace Opm {
+namespace Linear {
+/*!
+ * \brief Uses communication on the grid to find the initial seed list
+ *        of indices for methods which use element-based degrees of
+ *        freedom.
+ */
+template <class GridView, class ElementMapper>
+class ElementBorderListFromGrid
+{
+    typedef BlackList::PeerBlackListedEntry PeerBlackListedEntry;
+    typedef BlackList::PeerBlackList PeerBlackList;
+    typedef BlackList::PeerBlackLists PeerBlackLists;
+
+    typedef typename GridView::template Codim<0>::Entity Element;
+
+    class BorderListHandle_
+        : public Dune::CommDataHandleIF<BorderListHandle_, ProcessRank>
+    {
+    public:
+        BorderListHandle_(const GridView& gridView,
+                          const ElementMapper& map,
+                          BlackList& blackList,
+                          BorderList& borderList)
+            : gridView_(gridView)
+            , map_(map)
+            , blackList_(blackList)
+            , borderList_(borderList)
+        {
+            auto elemIt = gridView_.template begin<0>();
+            const auto& elemEndIt = gridView_.template end<0>();
+            for (; elemIt != elemEndIt; ++elemIt) {
+                if (elemIt->partitionType() != Dune::InteriorEntity) {
+                    Index elemIdx = static_cast<Index>(map_.index(*elemIt));
+                    blackList_.addIndex(elemIdx);
+                }
+            }
+        }
+
+        // data handle methods
+        bool contains(int dim OPM_UNUSED, int codim) const
+        { return codim == 0; }
+
+        bool fixedsize(int dim OPM_UNUSED, int codim OPM_UNUSED) const
+        { return true; }
+
+        template <class EntityType>
+        size_t size(const EntityType& e OPM_UNUSED) const
+        { return 2; }
+
+        template <class MessageBufferImp, class EntityType>
+        void gather(MessageBufferImp& buff, const EntityType& e) const
+        {
+            unsigned myIdx = static_cast<unsigned>(map_.index(e));
+            buff.write(static_cast<unsigned>(gridView_.comm().rank()));
+            buff.write(myIdx);
+        }
+
+        template <class MessageBufferImp>
+        void scatter(MessageBufferImp& buff,
+                     const Element& e,
+                     size_t n OPM_UNUSED)
+        {
+            // discard the index if it is not on the process boundary
+            bool isInteriorNeighbor = false;
+            auto isIt = gridView_.ibegin(e);
+            const auto& isEndIt = gridView_.iend(e);
+            for (; isIt != isEndIt; ++isIt) {
+                if (!isIt->neighbor())
+                    continue;
+                else if (isIt->outside().partitionType() == Dune::InteriorEntity) {
+                    isInteriorNeighbor = true;
+                    break;
+                }
+            }
+            if (!isInteriorNeighbor)
+                return;
+
+            BorderIndex bIdx;
+
+            bIdx.localIdx = static_cast<Index>(map_.index(e));
+            {
+                ProcessRank tmp;
+                buff.read(tmp);
+                bIdx.peerRank = tmp;
+                peerSet_.insert(tmp);
+            }
+            {
+                unsigned tmp;
+                buff.read(tmp);
+                bIdx.peerIdx = static_cast<Index>(tmp);
+            }
+            bIdx.borderDistance = 1;
+
+            borderList_.push_back(bIdx);
+        }
+
+        // this template method is needed because the above one only works for codim-0
+        // entities (i.e., elements) but the dune grid uses some code which causes the
+        // compiler to invoke the scatter method for every codim...
+        template <class MessageBufferImp, class EntityType>
+        void scatter(MessageBufferImp& buff OPM_UNUSED,
+                     const EntityType& e OPM_UNUSED,
+                     size_t n OPM_UNUSED)
+        { }
+
+        const std::set<ProcessRank>& peerSet() const
+        { return peerSet_; }
+
+    private:
+        GridView gridView_;
+        const ElementMapper& map_;
+        std::set<ProcessRank> peerSet_;
+        BlackList& blackList_;
+        BorderList& borderList_;
+    };
+
+    class PeerBlackListHandle_
+        : public Dune::CommDataHandleIF<PeerBlackListHandle_, int>
+    {
+    public:
+        PeerBlackListHandle_(const GridView& gridView,
+                             const ElementMapper& map,
+                             PeerBlackLists& peerBlackLists)
+            : gridView_(gridView)
+            , map_(map)
+            , peerBlackLists_(peerBlackLists)
+        {}
+
+        // data handle methods
+        bool contains(int dim OPM_UNUSED, int codim) const
+        { return codim == 0; }
+
+        bool fixedsize(int dim OPM_UNUSED, int codim OPM_UNUSED) const
+        { return true; }
+
+        template <class EntityType>
+        size_t size(const EntityType& e OPM_UNUSED) const
+        { return 2; }
+
+        template <class MessageBufferImp, class EntityType>
+        void gather(MessageBufferImp& buff, const EntityType& e) const
+        {
+            buff.write(static_cast<int>(gridView_.comm().rank()));
+            buff.write(static_cast<int>(map_.index(e)));
+        }
+
+        template <class MessageBufferImp, class EntityType>
+        void scatter(MessageBufferImp& buff, const EntityType& e, size_t n OPM_UNUSED)
+        {
+            int peerRank;
+            int peerIdx;
+            Index localIdx;
+
+            buff.read(peerRank);
+            buff.read(peerIdx);
+            localIdx = static_cast<Index>(map_.index(e));
+
+            PeerBlackListedEntry pIdx;
+            pIdx.nativeIndexOfPeer = static_cast<Index>(peerIdx);
+            pIdx.myOwnNativeIndex = static_cast<Index>(localIdx);
+
+            peerBlackLists_[static_cast<ProcessRank>(peerRank)].push_back(pIdx);
+        }
+
+        const PeerBlackList& peerBlackList(ProcessRank peerRank) const
+        { return peerBlackLists_.at(peerRank); }
+
+    private:
+        GridView gridView_;
+        const ElementMapper& map_;
+        PeerBlackLists peerBlackLists_;
+    };
+
+public:
+    ElementBorderListFromGrid(const GridView& gridView, const ElementMapper& map)
+        : gridView_(gridView)
+        , map_(map)
+    {
+        BorderListHandle_ blh(gridView, map, blackList_, borderList_);
+        gridView.communicate(blh,
+                             Dune::InteriorBorder_All_Interface,
+                             Dune::BackwardCommunication);
+
+        PeerBlackListHandle_ pblh(gridView, map, peerBlackLists_);
+        gridView.communicate(pblh,
+                             Dune::InteriorBorder_All_Interface,
+                             Dune::BackwardCommunication);
+
+        auto peerIt = blh.peerSet().begin();
+        const auto& peerEndIt = blh.peerSet().end();
+        for (; peerIt != peerEndIt; ++peerIt) {
+            blackList_.setPeerList(*peerIt, pblh.peerBlackList(*peerIt));
+        }
+    }
+
+    // Access to the border list.
+    const BorderList& borderList() const
+    { return borderList_; }
+
+    // Access to the black-list indices.
+    const BlackList& blackList() const
+    { return blackList_; }
+
+private:
+    const GridView gridView_;
+    const ElementMapper& map_;
+
+    BorderList borderList_;
+
+    BlackList blackList_;
+    PeerBlackLists peerBlackLists_;
+};
+
+} // namespace Linear
+} // namespace Opm
+
+#endif
--- a/opm/simulators/linalg/fixpointcriterion.hh
+++ b/opm/simulators/linalg/fixpointcriterion.hh
@ -0,0 +1,185 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 2 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+
+  Consult the COPYING file in the top-level source directory of this
+  module for the precise wording of the license and the list of
+  copyright holders.
+*/
+/*!
+ * \file
+ * \copydoc Opm::FixPointCriterion
+ */
+#ifndef EWOMS_ISTL_FIXPOINT_CRITERION_HH
+#define EWOMS_ISTL_FIXPOINT_CRITERION_HH
+
+#include "convergencecriterion.hh"
+
+#include <opm/material/common/Unused.hpp>
+
+namespace Opm {
+namespace Linear {
+
+/*! \addtogroup Linear
+ * \{
+ */
+
+/*!
+ * \brief Provides a convergence criterion for the linear solvers
+ *        which looks at the weighted maximum of the difference
+ *        between two iterations.
+ *
+ * For the FixPointCriterion, the error of the solution is defined
+ * as
+ * \f[ e^k = \max_i\{ \left| w_i \delta^k_i \right| \}\;, \f]
+ *
+ * where \f$\delta = x^k - x^{k + 1} \f$ is the difference between
+ * two consequtive iterative solution vectors \f$x^k\f$ and \f$x^{k + 1}\f$
+ * and \f$w_i\f$ is the weight of the \f$i\f$-th degree of freedom.
+ *
+ * This criterion requires that the block type of the
+ * vector is a Dune::FieldVector
+ */
+template <class Vector, class CollectiveCommunication>
+class FixPointCriterion : public ConvergenceCriterion<Vector>
+{
+    typedef typename Vector::field_type Scalar;
+    typedef typename Vector::block_type BlockType;
+
+public:
+    FixPointCriterion(const CollectiveCommunication& comm) : comm_(comm)
+    {}
+
+    FixPointCriterion(const CollectiveCommunication& comm,
+                      const Vector& weightVec, Scalar reduction)
+        : comm_(comm), weightVec_(weightVec), tolerance_(reduction)
+    {}
+
+    /*!
+     * \brief Sets the relative weight of a primary variable
+     *
+     * For the FixPointCriterion, the error of the solution is defined
+     * as
+     * \f[ e^k = \max_i\{ \left| w_i \delta^k_i \right| \}\;, \f]
+     *
+     * where \f$\delta = x^k - x^{k + 1} \f$ is the difference between
+     * two consequtive iterative solution vectors \f$x^k\f$ and \f$x^{k + 1}\f$
+     * and \f$w_i\f$ is the weight of the \f$i\f$-th degree of freedom.
+     *
+     * This method is specific to the FixPointCriterion.
+     *
+     * \param weightVec A Dune::BlockVector<Dune::FieldVector<Scalar, n> >
+     *                  with the relative weights of the degrees of freedom
+     */
+    void setWeight(const Vector& weightVec)
+    { weightVec_ = weightVec; }
+
+    /*!
+     * \brief Return the relative weight of a primary variable
+     *
+     * For the FixPointCriterion, the error of the solution is defined
+     * as
+     * \f[ e^k = \max_i\{ \left| w_i \delta^k_i \right| \}\;, \f]
+     *
+     * where \f$\delta = x^k - x^{k + 1} \f$ is the difference between
+     * two consequtive iterative solution vectors \f$x^k\f$ and \f$x^{k + 1}\f$
+     * and \f$w_i\f$ is the weight of the \f$i\f$-th degree of freedom.
+     *
+     * This method is specific to the FixPointCriterion.
+     *
+     * \param outerIdx The index of the outer vector (i.e. Dune::BlockVector)
+     * \param innerIdx The index of the inner vector (i.e. Dune::FieldVector)
+     */
+    Scalar weight(int outerIdx, int innerIdx) const
+    { return (weightVec_.size() == 0) ? 1.0 : weightVec_[outerIdx][innerIdx]; }
+
+    /*!
+     * \brief Set the maximum allowed weighted maximum difference between two
+     * iterations
+     */
+    /*!
+     * \brief Set the maximum allowed maximum difference between two
+     *        iterationsfor the solution considered to be converged.
+     */
+    void setTolerance(Scalar tol)
+    { tolerance_ = tol; }
+
+    /*!
+     * \brief Return the maximum allowed weighted difference between two
+     *        iterations for the solution considered to be converged.
+     */
+    Scalar tolerance() const
+    { return tolerance_; }
+
+    /*!
+     * \copydoc ConvergenceCriterion::setInitial(const Vector&, const Vector&)
+     */
+    void setInitial(const Vector& curSol, const Vector& curResid OPM_UNUSED)
+    {
+        lastSol_ = curSol;
+        delta_ = 1000 * tolerance_;
+    }
+
+    /*!
+     * \copydoc ConvergenceCriterion::update(const Vector&, const Vector&, const Vector&)
+     */
+    void update(const Vector& curSol,
+                const Vector& changeIndicator OPM_UNUSED,
+                const Vector& curResid OPM_UNUSED)
+    {
+        assert(curSol.size() == lastSol_.size());
+
+        delta_ = 0.0;
+        for (size_t i = 0; i < curSol.size(); ++i) {
+            for (size_t j = 0; j < BlockType::dimension; ++j) {
+                delta_ =
+                    std::max(delta_, weight(i, j)*std::abs(curSol[i][j] - lastSol_[i][j]));
+            }
+        }
+
+        delta_ = comm_.max(delta_);
+        lastSol_ = curSol;
+    }
+
+    /*!
+     * \copydoc ConvergenceCriterion::converged()
+     */
+    bool converged() const
+    { return accuracy() < tolerance(); }
+
+    /*!
+     * \copydoc ConvergenceCriterion::accuracy()
+     */
+    Scalar accuracy() const
+    { return delta_; }
+
+private:
+    const CollectiveCommunication& comm_;
+
+    Vector lastSol_;   // solution of the last iteration
+    Vector weightVec_; // solution of the last iteration
+    Scalar delta_;     // the maximum of the absolute weighted difference of the
+                       // last two iterations
+    Scalar tolerance_; // the maximum allowed delta for the solution to be
+                       // considered converged
+};
+
+//! \} end documentation
+
+}} // end namespace Linear, Ewoms
+
+#endif
--- a/opm/simulators/linalg/foreignoverlapfrombcrsmatrix.hh
+++ b/opm/simulators/linalg/foreignoverlapfrombcrsmatrix.hh
@ -0,0 +1,712 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 2 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+
+  Consult the COPYING file in the top-level source directory of this
+  module for the precise wording of the license and the list of
+  copyright holders.
+*/
+/*!
+ * \file
+ * \copydoc Opm::Linear::ForeignOverlapFromBCRSMatrix
+ */
+#ifndef EWOMS_FOREIGN_OVERLAP_FROM_BCRS_MATRIX_HH
+#define EWOMS_FOREIGN_OVERLAP_FROM_BCRS_MATRIX_HH
+
+#include "overlaptypes.hh"
+#include "blacklist.hh"
+
+#include <ewoms/parallel/mpibuffer.hh>
+
+#include <opm/material/common/Unused.hpp>
+
+#include <dune/grid/common/datahandleif.hh>
+#include <dune/istl/bcrsmatrix.hh>
+#include <dune/istl/scalarproducts.hh>
+#include <dune/istl/operators.hh>
+
+#include <algorithm>
+#include <iostream>
+#include <map>
+#include <vector>
+
+#if HAVE_MPI
+#include <mpi.h>
+#endif // HAVE_MPI
+
+namespace Opm {
+namespace Linear {
+
+/*!
+ * \brief This class creates and manages the foreign overlap given an
+ *        initial list of border indices and a BCRS matrix.
+ *
+ * The foreign overlap are all (row) indices which overlap with the
+ * some of the current process's local indices.
+ */
+class ForeignOverlapFromBCRSMatrix
+{
+public:
+    // overlaps should never be copied!
+    ForeignOverlapFromBCRSMatrix(const ForeignOverlapFromBCRSMatrix&) = delete;
+
+    /*!
+     * \brief Constructs the foreign overlap given a BCRS matrix and
+     *        an initial list of border indices.
+     */
+    template <class BCRSMatrix>
+    ForeignOverlapFromBCRSMatrix(const BCRSMatrix& A,
+                                 const BorderList& borderList,
+                                 const BlackList& blackList,
+                                 unsigned overlapSize)
+        : borderList_(borderList), blackList_(blackList)
+    {
+        overlapSize_ = overlapSize;
+
+        myRank_ = 0;
+#if HAVE_MPI
+        {
+            int tmp;
+            MPI_Comm_rank(MPI_COMM_WORLD, &tmp);
+            myRank_ = static_cast<ProcessRank>(tmp);
+        }
+#endif
+        numNative_ = A.N();
+
+        // Computes the local <-> native index maps
+        createLocalIndices_();
+
+        // calculate the set of local indices on the border (beware:
+        // _not_ the native ones)
+        auto it = borderList.begin();
+        const auto& endIt = borderList.end();
+        for (; it != endIt; ++it) {
+            Index localIdx = nativeToLocal(it->localIdx);
+            if (localIdx < 0)
+                continue;
+
+            localBorderIndices_.insert(localIdx);
+        }
+
+        // compute the set of processes which are neighbors of the
+        // local process ...
+        neighborPeerSet_.update(borderList);
+        // ... and the initial set of processes which we will have to
+        // communicate with. We must always communicate with our
+        // neighbors, but depending on the size of the overlap region,
+        // we might have to communicate with additional processes as
+        // well (these will be added later).
+        peerSet_ = neighborPeerSet_;
+
+        // Create an initial seed list of indices which are in the
+        // overlap.
+        SeedList initialSeedList;
+        initialSeedList.update(borderList);
+
+        // calculate the minimum distance from the border of the
+        // initial seed list
+        unsigned minBorderDist = overlapSize;
+        auto borderIt = borderList.begin();
+        const auto& borderEndIt = borderList.end();
+        for (; borderIt != borderEndIt; ++borderIt) {
+            minBorderDist = std::min(minBorderDist, borderIt->borderDistance);
+        }
+
+        // calculate the foreign overlap for the local partition,
+        // i.e. find the distance of each row from the seed set.
+        foreignOverlapByLocalIndex_.resize(numLocal());
+        extendForeignOverlap_(A, initialSeedList, minBorderDist, overlapSize);
+
+        // computes the process with the lowest rank for all local
+        // indices.
+        computeMasterRanks_();
+
+        // group foreign overlap by peer process rank
+        groupForeignOverlapByRank_();
+    }
+
+    /*!
+     * \brief Returns the size of the overlap region.
+     */
+    unsigned overlapSize() const
+    { return overlapSize_; }
+
+    /*!
+     * \brief Returns true iff a local index is a border index.
+     */
+    bool isBorder(Index localIdx) const
+    { return localBorderIndices_.count(localIdx) > 0; }
+
+    /*!
+     * \brief Returns true iff a local index is a border index shared with a
+     * given peer process.
+     */
+    bool isBorderWith(Index localIdx, ProcessRank peerRank) const
+    {
+        const auto& indexOverlap = foreignOverlapByLocalIndex_[static_cast<unsigned>(localIdx)];
+        const auto& borderDistIt = indexOverlap.find(peerRank);
+        if (borderDistIt == indexOverlap.end())
+            return false;
+
+        // border distance of the index needs to be 0
+        return borderDistIt->second == 0;
+    }
+
+    /*!
+     * \brief Return the rank of the master process of an
+     *        index.
+     */
+    ProcessRank masterRank(Index localIdx) const
+    { return masterRank_[static_cast<unsigned>(localIdx)]; }
+
+    /*!
+     * \brief Return true if the current rank is the "master" of an
+     *        index.
+     *
+     * If the index is at the interior of some process, we define this
+     * process as its master, if the index is on the boundary, then
+     * the master is defined as the process with the lowest rank.
+     */
+    bool iAmMasterOf(Index localIdx) const
+    { return masterRank_[static_cast<unsigned>(localIdx)] == myRank_; }
+
+    /*!
+     * \brief Returns the list of indices which intersect the process
+     *        border.
+     */
+    const BorderList& borderList() const
+    { return borderList_; }
+
+    /*!
+     * \brief Return the list of (local indices, border distance,
+     *        number of processes) triples which are in the overlap of
+     *        a given peer rank.
+     */
+    const OverlapWithPeer& foreignOverlapWithPeer(ProcessRank peerRank) const
+    {
+        assert(foreignOverlapByRank_.find(peerRank) != foreignOverlapByRank_.end());
+        return foreignOverlapByRank_.find(peerRank)->second;
+    }
+
+    /*!
+     * \brief Return the map of (peer rank, border distance) for a given local
+     * index.
+     */
+    const std::map<ProcessRank, BorderDistance> &
+    foreignOverlapByLocalIndex(Index localIdx) const
+    {
+        assert(isLocal(localIdx));
+        return foreignOverlapByLocalIndex_[static_cast<unsigned>(localIdx)];
+    }
+
+    /*!
+     * \brief Returns true iff a local index is seen by a peer rank.
+     */
+    bool peerHasIndex(ProcessRank peerRank, Index localIdx) const
+    {
+        const auto& idxOverlap = foreignOverlapByLocalIndex_[localIdx];
+        return idxOverlap.find(peerRank) != idxOverlap.end();
+    }
+
+    /*!
+     * \brief Returns the number of front indices of a peer process in
+     *        the local partition.
+     */
+    size_t numFront(ProcessRank peerRank) const
+    {
+        const auto& peerOverlap = foreignOverlapByRank_.find(peerRank)->second;
+
+        size_t n = 0;
+        auto it = peerOverlap.begin();
+        const auto& endIt = peerOverlap.end();
+        for (; it != endIt; ++it) {
+            if (it->borderDistance == overlapSize_)
+                ++n;
+        }
+        return n;
+    }
+
+    /*!
+     * \brief Returns whether a given local index is on the front of a
+     *        given peer rank.
+     */
+    bool isFrontFor(ProcessRank peerRank, Index localIdx) const
+    {
+        const auto& idxOverlap = foreignOverlapByLocalIndex_[localIdx];
+
+        auto it = idxOverlap.find(peerRank);
+        if (it == idxOverlap.end())
+            return false; // index is not in overlap
+
+        return it->second == overlapSize_;
+    }
+
+    /*!
+     * \brief Return the set of process ranks which share an overlap
+     *        with the current process.
+     */
+    const PeerSet& peerSet() const
+    { return peerSet_; }
+
+    /*!
+     * \brief Return the set of process ranks which share a border index
+     *        with the current process.
+     */
+    const PeerSet& neighborPeerSet() const
+    { return neighborPeerSet_; }
+
+    /*!
+     * \brief Returns the number of native indices
+     */
+    size_t numNative() const
+    { return numNative_; }
+
+    /*!
+     * \brief Returns the number of local indices
+     */
+    size_t numLocal() const
+    { return numLocal_; }
+
+    /*!
+     * \brief Returns true iff a domestic index is local
+     */
+    bool isLocal(Index domesticIdx) const
+    { return static_cast<unsigned>(domesticIdx) < numLocal(); }
+
+    /*!
+     * \brief Convert a native index to a local one.
+     *
+     * If a given native index is not in the set of local indices,
+     * this method returns -1.
+     */
+    Index nativeToLocal(Index nativeIdx) const
+    { return nativeToLocalIndices_[static_cast<unsigned>(nativeIdx)]; }
+
+    /*!
+     * \brief Convert a local index to a native one.
+     */
+    Index localToNative(Index localIdx) const
+    {
+        assert(localIdx < static_cast<Index>(localToNativeIndices_.size()));
+        return localToNativeIndices_[static_cast<unsigned>(localIdx)];
+    }
+
+    /*!
+     * \brief Returns the object which represents the black-listed native indices.
+     */
+    const BlackList& blackList() const
+    { return blackList_; }
+
+    /*!
+     * \brief Return the number of peer ranks for which a given local
+     *        index is visible.
+     */
+    size_t numPeers(Index localIdx) const
+    { return foreignOverlapByLocalIndex_[static_cast<unsigned>(localIdx)].size(); }
+
+    /*!
+     * \brief Returns true if a given local index is in the foreign overlap of
+     * any rank.
+     */
+    bool isInOverlap(Index localIdx) const
+    { return foreignOverlapByLocalIndex_[static_cast<unsigned>(localIdx)].size() > 0; }
+
+    /*!
+     * \brief Print the foreign overlap for debugging purposes.
+     */
+    void print() const
+    {
+        auto it = foreignOverlapByRank_.begin();
+        const auto& endIt = foreignOverlapByRank_.end();
+        for (; it != endIt; ++it) {
+            std::cout << "Overlap rows(distance) for rank " << it->first << ": ";
+
+            auto rowIt = it->second.begin();
+            const auto& rowEndIt = it->second.end();
+            for (; rowIt != rowEndIt; ++rowIt)
+                std::cout << rowIt->index << "(" << rowIt->borderDistance << ") ";
+            std::cout << "\n" << std::flush;
+        }
+    }
+
+protected:
+    // extend the foreign overlaps by 'overlapSize' levels. this uses
+    // a greedy algorithm which extends the region by one level and
+    // then calls itself recursively...
+    template <class BCRSMatrix>
+    void extendForeignOverlap_(const BCRSMatrix& A,
+                               SeedList& seedList,
+                               BorderDistance borderDistance,
+                               BorderDistance overlapSize)
+    {
+        // communicate the non-neigbor overlap indices
+        addNonNeighborOverlapIndices_(A, seedList, borderDistance);
+
+        // add all processes in the seed rows of the current overlap level
+        auto seedIt = seedList.begin();
+        const auto& seedEndIt = seedList.end();
+        for (; seedIt != seedEndIt; ++seedIt) {
+            Index localIdx = nativeToLocal(seedIt->index);
+            ProcessRank peerRank = seedIt->peerRank;
+            unsigned distance = borderDistance;
+            if (localIdx < 0)
+                continue;
+            if (foreignOverlapByLocalIndex_[static_cast<unsigned>(localIdx)].count(peerRank) == 0)
+                foreignOverlapByLocalIndex_[static_cast<unsigned>(localIdx)][peerRank] = distance;
+        }
+
+        // if we have reached the maximum overlap distance, i.e. we're
+        // finished and break the recursion
+        if (borderDistance >= overlapSize)
+            return;
+
+        // find the seed list for the next overlap level using the
+        // seed set for the current level
+        SeedList nextSeedList;
+        seedIt = seedList.begin();
+        for (; seedIt != seedEndIt; ++seedIt) {
+            Index nativeRowIdx = seedIt->index;
+            if (nativeToLocal(nativeRowIdx) < 0)
+                continue; // ignore blacklisted indices
+            ProcessRank peerRank = seedIt->peerRank;
+
+            // find all column indices in the row. The indices of the
+            // columns are the additional indices of the overlap which
+            // we would like to add
+            typedef typename BCRSMatrix::ConstColIterator ColIterator;
+            ColIterator colIt = A[static_cast<unsigned>(nativeRowIdx)].begin();
+            ColIterator colEndIt = A[static_cast<unsigned>(nativeRowIdx)].end();
+            for (; colIt != colEndIt; ++colIt) {
+                Index nativeColIdx = static_cast<Index>(colIt.index());
+                Index localColIdx = nativeToLocal(nativeColIdx);
+
+                // ignore if the native index is not a local one
+                if (localColIdx < 0)
+                    continue;
+                // if the process is already is in the overlap of the
+                // column index, ignore this column index!
+                else if (foreignOverlapByLocalIndex_[static_cast<unsigned>(localColIdx)].count(peerRank) > 0)
+                    continue;
+
+                // check whether the new index is already in the overlap
+                bool hasIndex = false;
+                typename SeedList::iterator sIt = nextSeedList.begin();
+                typename SeedList::iterator sEndIt = nextSeedList.end();
+                for (; sIt != sEndIt; ++sIt) {
+                    if (sIt->index == nativeColIdx && sIt->peerRank == peerRank) {
+                        hasIndex = true;
+                        break;
+                    }
+                }
+                if (hasIndex)
+                    continue; // we already have this index
+
+                // add the current processes to the seed list for the
+                // next overlap level
+                IndexRankDist newTuple;
+                newTuple.index = nativeColIdx;
+                newTuple.peerRank = peerRank;
+                newTuple.borderDistance = seedIt->borderDistance + 1;
+                nextSeedList.push_back(newTuple);
+            }
+        }
+
+        // clear the old seed list to save some memory
+        seedList.clear();
+
+        // Perform the same excercise for the next overlap distance
+        extendForeignOverlap_(A, nextSeedList, borderDistance + 1, overlapSize);
+    }
+
+    // Computes the local <-> native index maps
+    void createLocalIndices_()
+    {
+        // create the native <-> local maps
+        Index localIdx = 0;
+        for (unsigned nativeIdx = 0; nativeIdx < numNative_;) {
+            if (!blackList_.hasIndex(static_cast<Index>(nativeIdx))) {
+                localToNativeIndices_.push_back(static_cast<Index>(nativeIdx));
+                nativeToLocalIndices_.push_back(static_cast<Index>(localIdx));
+                ++nativeIdx;
+                ++localIdx;
+            }
+            else {
+                nativeToLocalIndices_.push_back(-1);
+                ++nativeIdx;
+            }
+        }
+
+        numLocal_ = localToNativeIndices_.size();
+    }
+
+    Index localToPeerIdx_(Index localIdx, ProcessRank peerRank) const
+    {
+        auto it = borderList_.begin();
+        const auto& endIt = borderList_.end();
+        for (; it != endIt; ++it) {
+            if (it->localIdx == localIdx && it->peerRank == peerRank)
+                return it->peerIdx;
+        }
+
+        return -1;
+    }
+
+    template <class BCRSMatrix>
+    void addNonNeighborOverlapIndices_(const BCRSMatrix& A OPM_UNUSED,
+                                       SeedList& seedList OPM_UNUSED_NOMPI,
+                                       BorderDistance borderDist OPM_UNUSED_NOMPI)
+    {
+        // TODO: this probably does not work! (the matrix A is unused, but it is needed
+        // from a logical POV.)
+#if HAVE_MPI
+        // first, create the buffers which will contain the number of
+        // border indices relevant for a neighbor peer
+        std::map<ProcessRank, std::vector<BorderIndex> > borderIndices;
+
+        // get all indices in the border which have borderDist as
+        // their distance to the closest border of their local process
+        auto it = seedList.begin();
+        const auto& endIt = seedList.end();
+        for (; it != endIt; ++it) {
+            Index localIdx = nativeToLocal(it->index);
+            if (!isBorder(localIdx))
+                continue;
+            BorderIndex borderHandle;
+            borderHandle.localIdx = localIdx;
+            borderHandle.peerRank = it->peerRank;
+            borderHandle.borderDistance = it->borderDistance;
+
+            // add the border index to all the neighboring peers
+            auto neighborIt = foreignOverlapByLocalIndex_[static_cast<unsigned>(localIdx)].begin();
+            const auto& neighborEndIt = foreignOverlapByLocalIndex_[static_cast<unsigned>(localIdx)].end();
+            for (; neighborIt != neighborEndIt; ++neighborIt) {
+                if (neighborIt->second != 0)
+                    // not a border index for the neighbor
+                    continue;
+                else if (neighborIt->first == borderHandle.peerRank)
+                    // don't communicate the indices which are owned
+                    // by the peer to itself
+                    continue;
+
+                Index peerIdx = localToPeerIdx_(localIdx, neighborIt->first);
+                if (peerIdx < 0)
+                    // the index is on the border, but is not on the border
+                    // with the considered neighboring process. Ignore it!
+                    continue;
+                borderHandle.peerIdx = peerIdx;
+                borderIndices[neighborIt->first].push_back(borderHandle);
+            }
+        }
+
+        // now borderIndices contains the lists of indices which we
+        // would like to send to each neighbor. Let's create the MPI
+        // buffers.
+        std::map<ProcessRank, Opm::MpiBuffer<unsigned> > numIndicesSendBufs;
+        std::map<ProcessRank, Opm::MpiBuffer<BorderIndex> > indicesSendBufs;
+        auto peerIt = neighborPeerSet().begin();
+        const auto& peerEndIt = neighborPeerSet().end();
+        for (; peerIt != peerEndIt; ++peerIt) {
+            ProcessRank peerRank = *peerIt;
+            size_t numIndices = borderIndices[peerRank].size();
+            numIndicesSendBufs[peerRank].resize(1);
+            numIndicesSendBufs[peerRank][0] = static_cast<unsigned>(numIndices);
+
+            const auto& peerBorderIndices = borderIndices[peerRank];
+            indicesSendBufs[peerRank].resize(numIndices);
+
+            auto tmpIt = peerBorderIndices.begin();
+            const auto& tmpEndIt = peerBorderIndices.end();
+            size_t i = 0;
+            for (; tmpIt != tmpEndIt; ++tmpIt, ++i) {
+                indicesSendBufs[peerRank][i] = *tmpIt;
+            }
+        }
+
+        // now, send all these nice buffers to our neighbors
+        peerIt = neighborPeerSet().begin();
+        for (; peerIt != peerEndIt; ++peerIt) {
+            ProcessRank neighborPeer = *peerIt;
+            numIndicesSendBufs[neighborPeer].send(neighborPeer);
+            indicesSendBufs[neighborPeer].send(neighborPeer);
+        }
+
+        // receive all data from the neighbors
+        std::map<ProcessRank, MpiBuffer<unsigned> > numIndicesRcvBufs;
+        std::map<ProcessRank, MpiBuffer<BorderIndex> > indicesRcvBufs;
+        peerIt = neighborPeerSet().begin();
+        for (; peerIt != peerEndIt; ++peerIt) {
+            ProcessRank neighborPeer = *peerIt;
+            auto& numIndicesRcvBuf = numIndicesRcvBufs[neighborPeer];
+            auto& indicesRcvBuf = indicesRcvBufs[neighborPeer];
+
+            numIndicesRcvBuf.resize(1);
+            numIndicesRcvBuf.receive(neighborPeer);
+            unsigned numIndices = numIndicesRcvBufs[neighborPeer][0];
+            indicesRcvBuf.resize(numIndices);
+            indicesRcvBuf.receive(neighborPeer);
+
+            // filter out all indices which are already in the peer
+            // processes' overlap and add them to the seed list. also
+            // extend the set of peer processes.
+            for (unsigned i = 0; i < numIndices; ++i) {
+                // swap the local and the peer indices, because they were
+                // created with the point view of the sender
+                std::swap(indicesRcvBuf[i].localIdx, indicesRcvBuf[i].peerIdx);
+
+                ProcessRank peerRank = indicesRcvBuf[i].peerRank;
+                // Index peerIdx = indicesRcvBuf[i].peerIdx;
+                Index localIdx = indicesRcvBuf[i].localIdx;
+
+                // check if the index is already in the overlap for
+                // the peer
+                const auto& distIt = foreignOverlapByLocalIndex_[static_cast<unsigned>(localIdx)].find(peerRank);
+                if (distIt != foreignOverlapByLocalIndex_[static_cast<unsigned>(localIdx)].end())
+                    continue;
+
+                // make sure the index is not already in the seed list
+                bool inSeedList = false;
+                auto seedIt = seedList.begin();
+                const auto& seedEndIt = seedList.end();
+                for (; seedIt != seedEndIt; ++seedIt) {
+                    if (seedIt->index == localIdx && seedIt->peerRank == peerRank) {
+                        inSeedList = true;
+                        break;
+                    }
+                }
+                if (inSeedList)
+                    continue;
+
+                IndexRankDist seedEntry;
+                seedEntry.index = localIdx;
+                seedEntry.peerRank = peerRank;
+                seedEntry.borderDistance = borderDist;
+                seedList.push_back(seedEntry);
+
+                // update the peer set
+                peerSet_.insert(peerRank);
+            }
+        }
+
+        // make sure all data was send
+        peerIt = neighborPeerSet().begin();
+        for (; peerIt != peerEndIt; ++peerIt) {
+            ProcessRank neighborPeer = *peerIt;
+            numIndicesSendBufs[neighborPeer].wait();
+            indicesSendBufs[neighborPeer].wait();
+        }
+#endif // HAVE_MPI
+    }
+
+    // given a list of border indices and provided that
+    // borderListToSeedList_() was already called, calculate the
+    // master process of each local index.
+    void computeMasterRanks_()
+    {
+        // determine the minimum rank for all indices
+        masterRank_.resize(numLocal_);
+        for (unsigned localIdx = 0; localIdx < numLocal_; ++localIdx) {
+            unsigned masterRank = myRank_;
+            if (isBorder(static_cast<Index>(localIdx))) {
+                // if the local index is a border index, loop over all ranks
+                // for which this index is also a border index. the lowest
+                // rank wins!
+                auto it = foreignOverlapByLocalIndex_[static_cast<unsigned>(localIdx)].begin();
+                const auto& endIt = foreignOverlapByLocalIndex_[static_cast<unsigned>(localIdx)].end();
+                for (; it != endIt; ++it) {
+                    if (it->second == 0) {
+                        // if the border distance is zero, the rank with the
+                        // minimum
+                        masterRank = std::min<ProcessRank>(masterRank, it->first);
+                    }
+                }
+            }
+            masterRank_[static_cast<unsigned>(localIdx)] = masterRank;
+        }
+    }
+
+    // assuming that the foreign overlap has been created for each
+    // local index, this method groups the foreign overlap by peer
+    // process rank
+    void groupForeignOverlapByRank_()
+    {
+        // loop over all indices which are in the overlap of some
+        // process
+        size_t numLocal = foreignOverlapByLocalIndex_.size();
+        for (unsigned localIdx = 0; localIdx < numLocal; ++localIdx) {
+            // loop over the list of processes for the current index
+            auto it = foreignOverlapByLocalIndex_[localIdx].begin();
+            const auto& endIt = foreignOverlapByLocalIndex_[localIdx].end();
+            size_t nRanks = foreignOverlapByLocalIndex_[localIdx].size();
+            for (; it != endIt; ++it) {
+                IndexDistanceNpeers tmp;
+                tmp.index = static_cast<Index>(localIdx);
+                tmp.borderDistance = it->second;
+                tmp.numPeers = static_cast<unsigned>(nRanks);
+                foreignOverlapByRank_[it->first].push_back(tmp);
+            }
+        }
+    }
+
+    // set of processes with which we have to communicate
+    PeerSet peerSet_;
+
+    // set of processes which are direct neighbors of us
+    PeerSet neighborPeerSet_;
+
+    // the list of indices on the border
+    const BorderList& borderList_;
+
+    // the set of indices which should not be considered
+    const BlackList& blackList_;
+
+    // local indices are the native indices sans the black listed ones
+    std::vector<Index> nativeToLocalIndices_;
+    std::vector<Index> localToNativeIndices_;
+
+    // an array which contains the rank of the master process for each
+    // index
+    std::vector<ProcessRank> masterRank_;
+
+    // set of all local indices which are on the border of some remote
+    // process
+    std::set<Index> localBorderIndices_;
+
+    // stores the set of process ranks which are in the overlap for a
+    // given row index "owned" by the current rank. The second value
+    // store the distance from the nearest process border.
+    OverlapByIndex foreignOverlapByLocalIndex_;
+
+    // stores a list of foreign overlap indices for each rank
+    OverlapByRank foreignOverlapByRank_;
+
+    // size of the overlap region
+    unsigned overlapSize_;
+
+    // number of local indices
+    size_t numLocal_;
+
+    // number of native indices
+    size_t numNative_;
+
+    // the MPI rank of the local process
+    ProcessRank myRank_;
+};
+
+} // namespace Linear
+} // namespace Opm
+
+#endif
--- a/opm/simulators/linalg/globalindices.hh
+++ b/opm/simulators/linalg/globalindices.hh
@ -0,0 +1,349 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 2 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+
+  Consult the COPYING file in the top-level source directory of this
+  module for the precise wording of the license and the list of
+  copyright holders.
+*/
+/*!
+ * \file
+ * \copydoc Opm::Linear::GlobalIndices
+ */
+#ifndef EWOMS_GLOBAL_INDICES_HH
+#define EWOMS_GLOBAL_INDICES_HH
+
+#include <dune/grid/common/datahandleif.hh>
+#include <dune/istl/bcrsmatrix.hh>
+#include <dune/istl/scalarproducts.hh>
+#include <dune/istl/operators.hh>
+
+#include <algorithm>
+#include <set>
+#include <map>
+#include <iostream>
+#include <tuple>
+
+#if HAVE_MPI
+#include <mpi.h>
+#endif
+
+#include "overlaptypes.hh"
+
+namespace Opm {
+namespace Linear {
+/*!
+ * \brief This class maps domestic row indices to and from "global"
+ *        indices which is used to construct an algebraic overlap
+ *        for the parallel linear solvers.
+ */
+template <class ForeignOverlap>
+class GlobalIndices
+{
+    GlobalIndices(const GlobalIndices& ) = delete;
+
+    typedef std::map<Index, Index> GlobalToDomesticMap;
+    typedef std::map<Index, Index> DomesticToGlobalMap;
+
+public:
+    GlobalIndices(const ForeignOverlap& foreignOverlap)
+        : foreignOverlap_(foreignOverlap)
+    {
+        myRank_ = 0;
+        mpiSize_ = 1;
+
+#if HAVE_MPI
+        {
+            int tmp;
+            MPI_Comm_rank(MPI_COMM_WORLD, &tmp);
+            myRank_ = static_cast<ProcessRank>(tmp);
+            MPI_Comm_size(MPI_COMM_WORLD, &tmp);
+            mpiSize_ = static_cast<size_t>(tmp);
+        }
+#endif
+
+        // calculate the domestic overlap (i.e. all overlap indices in
+        // foreign processes which the current process overlaps.)
+        // This requires communication via MPI.
+        buildGlobalIndices_();
+    }
+
+    /*!
+     * \brief Converts a domestic index to a global one.
+     */
+    Index domesticToGlobal(Index domesticIdx) const
+    {
+        assert(domesticToGlobal_.find(domesticIdx) != domesticToGlobal_.end());
+
+        return domesticToGlobal_.find(domesticIdx)->second;
+    }
+
+    /*!
+     * \brief Converts a global index to a domestic one.
+     */
+    Index globalToDomestic(Index globalIdx) const
+    {
+        const auto& tmp = globalToDomestic_.find(globalIdx);
+
+        if (tmp == globalToDomestic_.end())
+            return -1;
+
+        return tmp->second;
+    }
+
+    /*!
+     * \brief Returns the number of indices which are in the interior or
+     *        on the border of the current rank.
+     */
+    size_t numLocal() const
+    { return foreignOverlap_.numLocal(); }
+
+    /*!
+     * \brief Returns the number domestic indices.
+     *
+     * The domestic indices are defined as the process' local indices
+     * plus its copies of indices in the overlap regions
+     */
+    size_t numDomestic() const
+    { return numDomestic_; }
+
+    /*!
+     * \brief Add an index to the domestic<->global mapping.
+     */
+    void addIndex(Index domesticIdx, Index globalIdx)
+    {
+        domesticToGlobal_[domesticIdx] = globalIdx;
+        globalToDomestic_[globalIdx] = domesticIdx;
+        numDomestic_ = domesticToGlobal_.size();
+
+        assert(domesticToGlobal_.size() == globalToDomestic_.size());
+    }
+
+    /*!
+     * \brief Send a border index to a remote process.
+     */
+    void sendBorderIndex(ProcessRank peerRank OPM_UNUSED_NOMPI,
+                         Index domesticIdx OPM_UNUSED_NOMPI,
+                         Index peerLocalIdx OPM_UNUSED_NOMPI)
+    {
+#if HAVE_MPI
+        PeerIndexGlobalIndex sendBuf;
+        sendBuf.peerIdx = peerLocalIdx;
+        sendBuf.globalIdx = domesticToGlobal(domesticIdx);
+        MPI_Send(&sendBuf,                     // buff
+                 sizeof(PeerIndexGlobalIndex), // count
+                 MPI_BYTE,                     // data type
+                 static_cast<int>(peerRank),   // peer process
+                 0,                            // tag
+                 MPI_COMM_WORLD);              // communicator
+#endif
+    }
+
+    /*!
+     * \brief Receive an index on the border from a remote
+     *        process and add it the translation maps.
+     */
+    void receiveBorderIndex(ProcessRank peerRank OPM_UNUSED_NOMPI)
+    {
+#if HAVE_MPI
+        PeerIndexGlobalIndex recvBuf;
+        MPI_Recv(&recvBuf,                     // buff
+                 sizeof(PeerIndexGlobalIndex), // count
+                 MPI_BYTE,                     // data type
+                 static_cast<int>(peerRank),   // peer process
+                 0,                            // tag
+                 MPI_COMM_WORLD,               // communicator
+                 MPI_STATUS_IGNORE);           // status
+
+        Index domesticIdx = foreignOverlap_.nativeToLocal(recvBuf.peerIdx);
+        if (domesticIdx >= 0) {
+            Index globalIdx = recvBuf.globalIdx;
+            addIndex(domesticIdx, globalIdx);
+        }
+#endif // HAVE_MPI
+    }
+
+    /*!
+     * \brief Return true iff a given global index already exists
+     */
+    bool hasGlobalIndex(Index globalIdx) const
+    { return globalToDomestic_.find(globalIdx) != globalToDomestic_.end(); }
+
+    /*!
+     * \brief Prints the global indices of all domestic indices
+     *        for debugging purposes.
+     */
+    void print() const
+    {
+        std::cout << "(domestic index, global index, domestic->global->domestic)"
+                  << " list for rank " << myRank_ << "\n";
+
+        for (size_t domIdx = 0; domIdx < domesticToGlobal_.size(); ++domIdx)
+            std::cout << "(" << domIdx << ", " << domesticToGlobal(domIdx)
+                      << ", " << globalToDomestic(domesticToGlobal(domIdx)) << ") ";
+        std::cout << "\n" << std::flush;
+    }
+
+protected:
+    // retrieve the offset for the indices where we are master in the
+    // global index list
+    void buildGlobalIndices_()
+    {
+#if HAVE_MPI
+        numDomestic_ = 0;
+#else
+        numDomestic_ = foreignOverlap_.numLocal();
+#endif
+
+#if HAVE_MPI
+        if (myRank_ == 0) {
+            // the first rank starts at index zero
+            domesticOffset_ = 0;
+        }
+        else {
+            // all other ranks retrieve their offset from the next
+            // lower rank
+            MPI_Recv(&domesticOffset_, // buffer
+                     1,                // count
+                     MPI_INT,          // data type
+                     static_cast<int>(myRank_ - 1), // peer rank
+                     0,                // tag
+                     MPI_COMM_WORLD,   // communicator
+                     MPI_STATUS_IGNORE);
+        }
+
+        // create maps for all indices for which the current process
+        // is the master
+        int numMaster = 0;
+        for (unsigned i = 0; i < foreignOverlap_.numLocal(); ++i) {
+            if (!foreignOverlap_.iAmMasterOf(static_cast<Index>(i)))
+                continue;
+
+            addIndex(static_cast<Index>(i),
+                     static_cast<Index>(domesticOffset_ + numMaster));
+            ++numMaster;
+        }
+
+        if (myRank_ < mpiSize_ - 1) {
+            // send the domestic offset plus the number of master
+            // indices to the process which is one rank higher
+            int tmp = domesticOffset_ + numMaster;
+            MPI_Send(&tmp,            // buff
+                     1,               // count
+                     MPI_INT,         // data type
+                     static_cast<int>(myRank_ + 1), // peer rank
+                     0,               // tag
+                     MPI_COMM_WORLD); // communicator
+        }
+
+        typename PeerSet::const_iterator peerIt;
+        typename PeerSet::const_iterator peerEndIt = peerSet_().end();
+        // receive the border indices from the lower ranks
+        peerIt = peerSet_().begin();
+        for (; peerIt != peerEndIt; ++peerIt) {
+            if (*peerIt < myRank_)
+                receiveBorderFrom_(*peerIt);
+        }
+
+        // send the border indices to the higher ranks
+        peerIt = peerSet_().begin();
+        for (; peerIt != peerEndIt; ++peerIt) {
+            if (*peerIt > myRank_)
+                sendBorderTo_(*peerIt);
+        }
+
+        // receive the border indices from the higher ranks
+        peerIt = peerSet_().begin();
+        for (; peerIt != peerEndIt; ++peerIt) {
+            if (*peerIt > myRank_)
+                receiveBorderFrom_(*peerIt);
+        }
+
+        // send the border indices to the lower ranks
+        peerIt = peerSet_().begin();
+        for (; peerIt != peerEndIt; ++peerIt) {
+            if (*peerIt < myRank_)
+                sendBorderTo_(*peerIt);
+        }
+#endif // HAVE_MPI
+    }
+
+    void sendBorderTo_(ProcessRank peerRank OPM_UNUSED_NOMPI)
+    {
+#if HAVE_MPI
+        // send (local index on myRank, global index) pairs to the
+        // peers
+        BorderList::const_iterator borderIt = borderList_().begin();
+        BorderList::const_iterator borderEndIt = borderList_().end();
+        for (; borderIt != borderEndIt; ++borderIt) {
+            ProcessRank borderPeer = borderIt->peerRank;
+            BorderDistance borderDistance = borderIt->borderDistance;
+            if (borderPeer != peerRank || borderDistance != 0)
+                continue;
+
+            Index localIdx = foreignOverlap_.nativeToLocal(borderIt->localIdx);
+            Index peerIdx = borderIt->peerIdx;
+            assert(localIdx >= 0);
+            if (foreignOverlap_.iAmMasterOf(localIdx)) {
+                sendBorderIndex(borderPeer, localIdx, peerIdx);
+            }
+        }
+#endif // HAVE_MPI
+    }
+
+    void receiveBorderFrom_(ProcessRank peerRank OPM_UNUSED_NOMPI)
+    {
+#if HAVE_MPI
+        // retrieve the global indices for which we are not master
+        // from the processes with lower rank
+        BorderList::const_iterator borderIt = borderList_().begin();
+        BorderList::const_iterator borderEndIt = borderList_().end();
+        for (; borderIt != borderEndIt; ++borderIt) {
+            ProcessRank borderPeer = borderIt->peerRank;
+            BorderDistance borderDistance = borderIt->borderDistance;
+            if (borderPeer != peerRank || borderDistance != 0)
+                continue;
+
+            Index nativeIdx = borderIt->localIdx;
+            Index localIdx = foreignOverlap_.nativeToLocal(nativeIdx);
+            if (localIdx >= 0 && foreignOverlap_.masterRank(localIdx) == borderPeer)
+                receiveBorderIndex(borderPeer);
+        }
+#endif // HAVE_MPI
+    }
+
+    const PeerSet& peerSet_() const
+    { return foreignOverlap_.peerSet(); }
+
+    const BorderList& borderList_() const
+    { return foreignOverlap_.borderList(); }
+
+    ProcessRank myRank_;
+    size_t mpiSize_;
+
+    int domesticOffset_;
+    size_t numDomestic_;
+    const ForeignOverlap& foreignOverlap_;
+
+    GlobalToDomesticMap globalToDomestic_;
+    DomesticToGlobalMap domesticToGlobal_;
+};
+
+} // namespace Linear
+} // namespace Opm
+
+#endif
--- a/opm/simulators/linalg/istlpreconditionerwrappers.hh
+++ b/opm/simulators/linalg/istlpreconditionerwrappers.hh
@ -0,0 +1,205 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 2 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+
+  Consult the COPYING file in the top-level source directory of this
+  module for the precise wording of the license and the list of
+  copyright holders.
+*/
+/*!
+ * \file
+ * \brief Provides wrapper classes for the (non-AMG) preconditioners provided by
+ *        dune-istl.
+ *
+ * In conjunction with a suitable solver backend, preconditioner wrappers work by
+ * specifying the "PreconditionerWrapper" property:
+ * \code
+ * SET_TYPE_PROP(YourTypeTag, PreconditionerWrapper,
+ *               Opm::Linear::PreconditionerWrapper$PRECONDITIONER<TypeTag>);
+ * \endcode
+ *
+ * Where the choices possible for '\c $PRECONDITIONER' are:
+ * - \c Jacobi: A Jacobi preconditioner
+ * - \c GaussSeidel: A Gauss-Seidel preconditioner
+ * - \c SSOR: A symmetric successive overrelaxation (SSOR) preconditioner
+ * - \c SOR: A successive overrelaxation (SOR) preconditioner
+ * - \c ILUn: An ILU(n) preconditioner
+ * - \c ILU0: A specialized (and optimized) ILU(0) preconditioner
+ */
+#ifndef EWOMS_ISTL_PRECONDITIONER_WRAPPERS_HH
+#define EWOMS_ISTL_PRECONDITIONER_WRAPPERS_HH
+
+#include <ewoms/common/propertysystem.hh>
+#include <ewoms/common/parametersystem.hh>
+
+#include <dune/istl/preconditioners.hh>
+
+#include <dune/common/version.hh>
+
+BEGIN_PROPERTIES
+NEW_PROP_TAG(Scalar);
+NEW_PROP_TAG(SparseMatrixAdapter);
+NEW_PROP_TAG(OverlappingMatrix);
+NEW_PROP_TAG(OverlappingVector);
+NEW_PROP_TAG(PreconditionerOrder);
+NEW_PROP_TAG(PreconditionerRelaxation);
+END_PROPERTIES
+
+namespace Opm {
+namespace Linear {
+#define EWOMS_WRAP_ISTL_PRECONDITIONER(PREC_NAME, ISTL_PREC_TYPE)               \
+    template <class TypeTag>                                                    \
+    class PreconditionerWrapper##PREC_NAME                                      \
+    {                                                                           \
+        typedef typename GET_PROP_TYPE(TypeTag, Scalar) Scalar;                 \
+        typedef typename GET_PROP_TYPE(TypeTag, SparseMatrixAdapter) SparseMatrixAdapter; \
+        typedef typename SparseMatrixAdapter::IstlMatrix IstlMatrix;            \
+        typedef typename GET_PROP_TYPE(TypeTag, OverlappingVector) OverlappingVector; \
+                                                                                \
+    public:                                                                     \
+        typedef ISTL_PREC_TYPE<IstlMatrix, OverlappingVector,                   \
+                               OverlappingVector> SequentialPreconditioner;     \
+        PreconditionerWrapper##PREC_NAME()                                      \
+        {}                                                                      \
+                                                                                \
+        static void registerParameters()                                        \
+        {                                                                       \
+            EWOMS_REGISTER_PARAM(TypeTag, int, PreconditionerOrder,             \
+                                 "The order of the preconditioner");            \
+            EWOMS_REGISTER_PARAM(TypeTag, Scalar, PreconditionerRelaxation,     \
+                                 "The relaxation factor of the "                \
+                                 "preconditioner");                             \
+        }                                                                       \
+                                                                                \
+        void prepare(IstlMatrix& matrix)                                        \
+        {                                                                       \
+            int order = EWOMS_GET_PARAM(TypeTag, int, PreconditionerOrder);     \
+            Scalar relaxationFactor = EWOMS_GET_PARAM(TypeTag, Scalar, PreconditionerRelaxation);   \
+            seqPreCond_ = new SequentialPreconditioner(matrix, order,           \
+                                                       relaxationFactor);       \
+        }                                                                       \
+                                                                                \
+        SequentialPreconditioner& get()                                         \
+        { return *seqPreCond_; }                                                \
+                                                                                \
+        void cleanup()                                                          \
+        { delete seqPreCond_; }                                                 \
+                                                                                \
+    private:                                                                    \
+        SequentialPreconditioner *seqPreCond_;                                  \
+    };
+
+// the same as the EWOMS_WRAP_ISTL_PRECONDITIONER macro, but without
+// an 'order' argument for the preconditioner's constructor
+#define EWOMS_WRAP_ISTL_SIMPLE_PRECONDITIONER(PREC_NAME, ISTL_PREC_TYPE)        \
+    template <class TypeTag>                                                    \
+    class PreconditionerWrapper##PREC_NAME                                      \
+    {                                                                           \
+        typedef typename GET_PROP_TYPE(TypeTag, Scalar) Scalar;                 \
+        typedef typename GET_PROP_TYPE(TypeTag, OverlappingMatrix) OverlappingMatrix; \
+        typedef typename GET_PROP_TYPE(TypeTag, OverlappingVector) OverlappingVector; \
+                                                                                \
+    public:                                                                     \
+        typedef ISTL_PREC_TYPE<OverlappingMatrix, OverlappingVector,            \
+                               OverlappingVector> SequentialPreconditioner;     \
+        PreconditionerWrapper##PREC_NAME()                                      \
+        {}                                                                      \
+                                                                                \
+        static void registerParameters()                                        \
+        {                                                                       \
+            EWOMS_REGISTER_PARAM(TypeTag, Scalar, PreconditionerRelaxation,     \
+                                 "The relaxation factor of the "                \
+                                 "preconditioner");                             \
+        }                                                                       \
+                                                                                \
+        void prepare(OverlappingMatrix& matrix)                                 \
+        {                                                                       \
+            Scalar relaxationFactor =                                           \
+                EWOMS_GET_PARAM(TypeTag, Scalar, PreconditionerRelaxation);     \
+            seqPreCond_ = new SequentialPreconditioner(matrix,                  \
+                                                       relaxationFactor);       \
+        }                                                                       \
+                                                                                \
+        SequentialPreconditioner& get()                                         \
+        { return *seqPreCond_; }                                                \
+                                                                                \
+        void cleanup()                                                          \
+        { delete seqPreCond_; }                                                 \
+                                                                                \
+    private:                                                                    \
+        SequentialPreconditioner *seqPreCond_;                                  \
+    };
+
+EWOMS_WRAP_ISTL_PRECONDITIONER(Jacobi, Dune::SeqJac)
+// EWOMS_WRAP_ISTL_PRECONDITIONER(Richardson, Dune::Richardson)
+EWOMS_WRAP_ISTL_PRECONDITIONER(GaussSeidel, Dune::SeqGS)
+EWOMS_WRAP_ISTL_PRECONDITIONER(SOR, Dune::SeqSOR)
+EWOMS_WRAP_ISTL_PRECONDITIONER(SSOR, Dune::SeqSSOR)
+
+#if DUNE_VERSION_NEWER(DUNE_ISTL, 2,7)
+
+// we need a custom preconditioner wrapper for ILU because the Dune::SeqILU class uses a
+// non-standard extra template parameter to specify its order.
+template <class TypeTag>
+class PreconditionerWrapperILU
+{
+    typedef typename GET_PROP_TYPE(TypeTag, Scalar) Scalar;
+    typedef typename GET_PROP_TYPE(TypeTag, OverlappingMatrix) OverlappingMatrix;
+    typedef typename GET_PROP_TYPE(TypeTag, OverlappingVector) OverlappingVector;
+
+    static constexpr int order = GET_PROP_VALUE(TypeTag, PreconditionerOrder);
+
+public:
+    typedef Dune::SeqILU<OverlappingMatrix, OverlappingVector, OverlappingVector, order>
+           SequentialPreconditioner;
+
+    PreconditionerWrapperILU()
+    {}
+
+    static void registerParameters()
+    {
+        EWOMS_REGISTER_PARAM(TypeTag, Scalar, PreconditionerRelaxation,
+                             "The relaxation factor of the preconditioner");
+    }
+
+    void prepare(OverlappingMatrix& matrix)
+    {
+        Scalar relaxationFactor = EWOMS_GET_PARAM(TypeTag, Scalar, PreconditionerRelaxation);
+
+        // create the sequential preconditioner.
+        seqPreCond_ = new SequentialPreconditioner(matrix, relaxationFactor);
+    }
+
+    SequentialPreconditioner& get()
+    { return *seqPreCond_; }
+
+    void cleanup()
+    { delete seqPreCond_; }
+
+private:
+    SequentialPreconditioner *seqPreCond_;
+};
+
+#else
+EWOMS_WRAP_ISTL_SIMPLE_PRECONDITIONER(ILU0, Dune::SeqILU0)
+EWOMS_WRAP_ISTL_PRECONDITIONER(ILUn, Dune::SeqILUn)
+#endif
+
+#undef EWOMS_WRAP_ISTL_PRECONDITIONER
+}} // namespace Linear, Ewoms
+
+#endif
--- a/opm/simulators/linalg/istlsolverwrappers.hh
+++ b/opm/simulators/linalg/istlsolverwrappers.hh
@ -0,0 +1,180 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 2 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+
+  Consult the COPYING file in the top-level source directory of this
+  module for the precise wording of the license and the list of
+  copyright holders.
+*/
+/*!
+ * \file
+ * \brief Provides wrapper classes for the iterative linear solvers available in
+ *        dune-istl.
+ *
+ * In conjunction with a suitable solver backend, solver wrappers work by specifying the
+ * "SolverWrapper" property:
+ * \code
+ * SET_TYPE_PROP(YourTypeTag, LinearSolverWrapper,
+ *               Opm::Linear::SolverWrapper$SOLVER<TypeTag>);
+ * \endcode
+ *
+ * The possible choices for '\c $SOLVER' are:
+ * - \c Richardson: A fixpoint solver using the Richardson iteration
+ * - \c SteepestDescent: The steepest descent solver
+ * - \c ConjugatedGradients: A conjugated gradients solver
+ * - \c BiCGStab: A stabilized bi-conjugated gradients solver
+ * - \c MinRes: A solver based on the  minimized residual algorithm
+ * - \c RestartedGMRes: A restarted GMRES solver
+ */
+#ifndef EWOMS_ISTL_SOLVER_WRAPPERS_HH
+#define EWOMS_ISTL_SOLVER_WRAPPERS_HH
+
+#include <ewoms/common/propertysystem.hh>
+#include <ewoms/common/parametersystem.hh>
+
+#include <dune/istl/solvers.hh>
+
+BEGIN_PROPERTIES
+
+NEW_PROP_TAG(Scalar);
+NEW_PROP_TAG(SparseMatrixAdapter);
+NEW_PROP_TAG(OverlappingMatrix);
+NEW_PROP_TAG(OverlappingVector);
+NEW_PROP_TAG(GMResRestart);
+NEW_PROP_TAG(LinearSolverTolerance);
+NEW_PROP_TAG(LinearSolverMaxIterations);
+NEW_PROP_TAG(LinearSolverVerbosity);
+
+END_PROPERTIES
+
+namespace Opm {
+
+namespace Linear {
+
+/*!
+ * \brief Macro to create a wrapper around an ISTL solver
+ */
+#define EWOMS_WRAP_ISTL_SOLVER(SOLVER_NAME, ISTL_SOLVER_NAME)                      \
+    template <class TypeTag>                                                       \
+    class SolverWrapper##SOLVER_NAME                                               \
+    {                                                                              \
+        typedef typename GET_PROP_TYPE(TypeTag, Scalar) Scalar;                    \
+        typedef typename GET_PROP_TYPE(TypeTag,                                    \
+                                       OverlappingMatrix) OverlappingMatrix;       \
+        typedef typename GET_PROP_TYPE(TypeTag,                                    \
+                                       OverlappingVector) OverlappingVector;       \
+                                                                                   \
+    public:                                                                        \
+        typedef ISTL_SOLVER_NAME<OverlappingVector> RawSolver;                     \
+                                                                                   \
+        SolverWrapper##SOLVER_NAME()                                               \
+        {}                                                                         \
+                                                                                   \
+        static void registerParameters()                                           \
+        {}                                                                         \
+                                                                                   \
+        template <class LinearOperator, class ScalarProduct, class Preconditioner> \
+        std::shared_ptr<RawSolver> get(LinearOperator& parOperator,                \
+                                       ScalarProduct& parScalarProduct,            \
+                                       Preconditioner& parPreCond)                 \
+        {                                                                          \
+            Scalar tolerance = EWOMS_GET_PARAM(TypeTag, Scalar,                    \
+                                               LinearSolverTolerance);             \
+            int maxIter = EWOMS_GET_PARAM(TypeTag, int, LinearSolverMaxIterations);\
+                                                                                   \
+            int verbosity = 0;                                                     \
+            if (parOperator.overlap().myRank() == 0)                               \
+                verbosity = EWOMS_GET_PARAM(TypeTag, int, LinearSolverVerbosity);  \
+            solver_ = std::make_shared<RawSolver>(parOperator, parScalarProduct,   \
+                                                  parPreCond, tolerance, maxIter,  \
+                                                  verbosity);                      \
+                                                                                   \
+            return solver_;                                                        \
+        }                                                                          \
+                                                                                   \
+        void cleanup()                                                             \
+        { solver_.reset(); }                                                       \
+                                                                                   \
+    private:                                                                       \
+        std::shared_ptr<RawSolver> solver_;                                        \
+    };
+
+EWOMS_WRAP_ISTL_SOLVER(Richardson, Dune::LoopSolver)
+EWOMS_WRAP_ISTL_SOLVER(SteepestDescent, Dune::GradientSolver)
+EWOMS_WRAP_ISTL_SOLVER(ConjugatedGradients, Dune::CGSolver)
+EWOMS_WRAP_ISTL_SOLVER(BiCGStab, Dune::BiCGSTABSolver)
+EWOMS_WRAP_ISTL_SOLVER(MinRes, Dune::MINRESSolver)
+
+/*!
+ * \brief Solver wrapper for the restarted GMRES solver of dune-istl.
+ *
+ * dune-istl uses a slightly different API for this solver than for the others...
+ */
+template <class TypeTag>
+class SolverWrapperRestartedGMRes
+{
+    typedef typename GET_PROP_TYPE(TypeTag, Scalar) Scalar;
+    typedef typename GET_PROP_TYPE(TypeTag, OverlappingMatrix) OverlappingMatrix;
+    typedef typename GET_PROP_TYPE(TypeTag, OverlappingVector) OverlappingVector;
+
+public:
+    typedef Dune::RestartedGMResSolver<OverlappingVector> RawSolver;
+
+    SolverWrapperRestartedGMRes()
+    {}
+
+    static void registerParameters()
+    {
+        EWOMS_REGISTER_PARAM(TypeTag, int, GMResRestart,
+                             "Number of iterations after which the GMRES linear solver is restarted");
+    }
+
+    template <class LinearOperator, class ScalarProduct, class Preconditioner>
+    std::shared_ptr<RawSolver> get(LinearOperator& parOperator,
+                                   ScalarProduct& parScalarProduct,
+                                   Preconditioner& parPreCond)
+    {
+        Scalar tolerance = EWOMS_GET_PARAM(TypeTag, Scalar, LinearSolverTolerance);
+        int maxIter = EWOMS_GET_PARAM(TypeTag, int, LinearSolverMaxIterations);
+
+        int verbosity = 0;
+        if (parOperator.overlap().myRank() == 0)
+            verbosity = EWOMS_GET_PARAM(TypeTag, int, LinearSolverVerbosity);
+        int restartAfter = EWOMS_GET_PARAM(TypeTag, int, GMResRestart);
+        solver_ = std::make_shared<RawSolver>(parOperator,
+                                              parScalarProduct,
+                                              parPreCond,
+                                              tolerance,
+                                              restartAfter,
+                                              maxIter,
+                                              verbosity);
+
+        return solver_;
+    }
+
+    void cleanup()
+    { solver_.reset(); }
+
+private:
+    std::shared_ptr<RawSolver> solver_;
+};
+
+#undef EWOMS_WRAP_ISTL_SOLVER
+
+}} // namespace Linear, Ewoms
+
+#endif
--- a/opm/simulators/linalg/istlsparsematrixadapter.hh
+++ b/opm/simulators/linalg/istlsparsematrixadapter.hh
@ -0,0 +1,197 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 2 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+
+  Consult the COPYING file in the top-level source directory of this
+  module for the precise wording of the license and the list of
+  copyright holders.
+*/
+/*!
+ * \file
+ * \copydoc Opm::Linear::IstlSparseMatrixAdapter
+ */
+#ifndef EWOMS_ISTL_SPARSE_MATRIX_ADAPTER_HH
+#define EWOMS_ISTL_SPARSE_MATRIX_ADAPTER_HH
+
+#include <dune/istl/bcrsmatrix.hh>
+#include <dune/common/fmatrix.hh>
+#include <dune/common/version.hh>
+
+namespace Opm {
+namespace Linear {
+
+/*!
+ * \ingroup Linear
+ * \brief A sparse matrix interface backend for BCRSMatrix from dune-istl.
+ */
+template <class MatrixBlockType, class AllocatorType=std::allocator<MatrixBlockType> >
+class IstlSparseMatrixAdapter
+{
+public:
+    //! \brief Implementation of matrix
+    typedef Dune::BCRSMatrix<MatrixBlockType, AllocatorType> IstlMatrix;
+
+    //! \brief block type forming the matrix entries
+    typedef typename IstlMatrix::block_type MatrixBlock;
+    static_assert(std::is_same<MatrixBlock, MatrixBlockType>::value,
+                  "IstlMatrix::block_type and MatrixBlockType must be identical");
+
+    //! \brief type of scalar
+    typedef typename MatrixBlock::field_type Scalar;
+
+    /*!
+     * \brief Constructor creating an empty matrix.
+     */
+    IstlSparseMatrixAdapter(const size_t rows, const size_t columns)
+        : rows_(rows)
+        , columns_(columns)
+        , istlMatrix_()
+    {}
+
+    /*!
+     * \brief Constructor taking simulator and creating an empty matrix .
+     */
+    template <class Simulator>
+    IstlSparseMatrixAdapter(const Simulator& simulator)
+        : IstlSparseMatrixAdapter(simulator.model().numTotalDof(), simulator.model().numTotalDof())
+    {}
+
+    /*!
+     * \brief Allocate matrix structure give a sparsity pattern.
+     */
+    template <class Set>
+    void reserve(const std::vector<Set>& sparsityPattern)
+    {
+        // allocate raw matrix
+        istlMatrix_.reset(new IstlMatrix(rows_, columns_, IstlMatrix::random));
+
+        // make sure sparsityPattern is consistent with number of rows
+        assert(rows_ == sparsityPattern.size());
+
+        // allocate space for the rows of the matrix
+        for (size_t dofIdx = 0; dofIdx < rows_; ++ dofIdx)
+            istlMatrix_->setrowsize(dofIdx, sparsityPattern[dofIdx].size());
+
+        istlMatrix_->endrowsizes();
+
+        // fill the rows with indices. each degree of freedom talks to
+        // all of its neighbors. (it also talks to itself since
+        // degrees of freedom are sometimes quite egocentric.)
+        for (size_t dofIdx = 0; dofIdx < rows_; ++ dofIdx) {
+            auto nIt    = sparsityPattern[dofIdx].begin();
+            auto nEndIt = sparsityPattern[dofIdx].end();
+            for (; nIt != nEndIt; ++nIt)
+                istlMatrix_->addindex(dofIdx, *nIt);
+        }
+        istlMatrix_->endindices();
+    }
+
+    /*!
+     * \brief Return constant reference to matrix implementation.
+     */
+    IstlMatrix& istlMatrix()
+    { return *istlMatrix_; }
+    const IstlMatrix& istlMatrix() const
+    { return *istlMatrix_; }
+
+    /*!
+     * \brief Return number of rows of the matrix.
+     */
+    size_t rows() const
+    { return rows_; }
+
+    /*!
+     * \brief Return number of columns of the matrix.
+     */
+    size_t cols() const
+    { return columns_; }
+
+    /*!
+     * \brief Set all matrix entries to zero.
+     */
+    void clear()
+    { (*istlMatrix_) = Scalar(0.0); }
+
+    /*!
+     * \brief Set given row to zero except for the main-diagonal entry (if it exists).
+     *
+     * If the sparsity pattern of the matrix features an explicit block on the main
+     * diagonal, the diagonal on that block is set to the second agument of the function.
+     */
+    void clearRow(const size_t row, const Scalar diag = 1.0)
+    {
+        MatrixBlock diagBlock(Scalar(0));
+        for (int i = 0; i < diagBlock.rows; ++i)
+            diagBlock[i][i] = diag;
+
+        auto& matRow = (*istlMatrix_)[row];
+        auto colIt = matRow.begin();
+        const auto& colEndIt = matRow.end();
+        for (; colIt != colEndIt; ++colIt) {
+            if (colIt.index() == row)
+                *colIt = diagBlock;
+            else
+                *colIt = Scalar(0.0);
+        }
+    }
+
+    /*!
+     * \brief Fill given block with entries stored in the matrix.
+     */
+    void block(const size_t rowIdx, const size_t colIdx, MatrixBlock& value) const
+    { value = (*istlMatrix_)[rowIdx][colIdx]; }
+
+    /*!
+     * \brief Set matrix block to given block.
+     */
+    void setBlock(const size_t rowIdx, const size_t colIdx, const MatrixBlock& value)
+    { (*istlMatrix_)[rowIdx][colIdx] = value; }
+
+    /*!
+     * \brief Add block to matrix block.
+     */
+    void addToBlock(const size_t rowIdx, const size_t colIdx, const MatrixBlock& value)
+    { (*istlMatrix_)[rowIdx][colIdx] += value; }
+
+    /*!
+     * \brief Commit matrix from local caches into matrix native structure.
+     *
+     * For the ISTL adapter this is unnecessary because there is no caching mechanism.
+     */
+    void commit()
+    { }
+
+    /*!
+     * \brief Finish modifying the matrix, i.e., convert the data structure from one
+     *        tailored for linearization to one aimed at the linear solver.
+     *
+     * This may compress the matrix if the build mode is implicit. For the ISTL adapter
+     * this is not required.
+     */
+    void finalize()
+    { }
+
+protected:
+    size_t rows_;
+    size_t columns_;
+
+    std::unique_ptr<IstlMatrix> istlMatrix_;
+};
+
+}} // namespace Linear, Ewoms
+
+#endif
--- a/opm/simulators/linalg/linearsolverreport.hh
+++ b/opm/simulators/linalg/linearsolverreport.hh
@ -0,0 +1,83 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 2 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+
+  Consult the COPYING file in the top-level source directory of this
+  module for the precise wording of the license and the list of
+  copyright holders.
+*/
+/*!
+ * \file
+ * \copydoc Opm::Linear::SolverReport
+ */
+#ifndef EWOMS_LINEAR_SOLVER_REPORT_HH
+#define EWOMS_LINEAR_SOLVER_REPORT_HH
+
+#include "convergencecriterion.hh"
+
+#include <ewoms/common/timer.hh>
+#include <ewoms/common/timerguard.hh>
+
+namespace Opm {
+namespace Linear {
+
+/*!
+ * \brief Collects summary information about the execution of the linear solver.
+ */
+class SolverReport
+{
+public:
+    SolverReport()
+    { reset(); }
+
+    void reset()
+    {
+        timer_.halt();
+        iterations_ = 0;
+        converged_ = 0;
+    }
+
+    const Opm::Timer& timer() const
+    { return timer_; }
+
+    Opm::Timer& timer()
+    { return timer_; }
+
+    unsigned iterations() const
+    { return iterations_; }
+
+    void increment()
+    { ++iterations_; }
+
+    SolverReport& operator++()
+    { ++iterations_; return *this; }
+
+    bool converged() const
+    { return converged_; }
+
+    void setConverged(bool value)
+    { converged_ = value; }
+
+private:
+    Opm::Timer timer_;
+    unsigned iterations_;
+    bool converged_;
+};
+
+}} // end namespace Linear, Ewoms
+
+#endif
--- a/opm/simulators/linalg/matrixblock.hh
+++ b/opm/simulators/linalg/matrixblock.hh
@ -0,0 +1,303 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 2 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+
+  Consult the COPYING file in the top-level source directory of this
+  module for the precise wording of the license and the list of
+  copyright holders.
+*/
+#ifndef EWOMS_MATRIX_BLOCK_HH
+#define EWOMS_MATRIX_BLOCK_HH
+
+#include <dune/istl/scalarproducts.hh>
+#include <dune/istl/operators.hh>
+#include <dune/istl/preconditioners.hh>
+#include <dune/istl/solvers.hh>
+#include <dune/istl/owneroverlapcopy.hh>
+#include <dune/istl/paamg/amg.hh>
+
+#include <dune/common/fmatrix.hh>
+
+namespace Opm {
+namespace MatrixBlockHelp {
+
+template <typename K, int m, int n>
+static inline void invertMatrix(Dune::FieldMatrix<K, m, n>& matrix)
+{ matrix.invert(); }
+
+template <typename K>
+static inline void invertMatrix(Dune::FieldMatrix<K, 1, 1>& matrix)
+{
+    matrix[0][0] = 1.0/matrix[0][0];
+}
+
+template <typename K>
+static inline void invertMatrix(Dune::FieldMatrix<K, 2, 2>& matrix)
+{
+    Dune::FieldMatrix<K, 2, 2> tmp(matrix);
+    Dune::FMatrixHelp::invertMatrix(tmp, matrix);
+}
+
+template <typename K>
+static inline void invertMatrix(Dune::FieldMatrix<K, 3, 3>& matrix)
+{
+    Dune::FieldMatrix<K, 3, 3> tmp(matrix);
+    Dune::FMatrixHelp::invertMatrix(tmp, matrix);
+}
+
+template <typename K>
+static inline void invertMatrix(Dune::FieldMatrix<K, 4, 4>& matrix)
+{
+    Dune::FieldMatrix<K, 4, 4> tmp(matrix);
+
+    matrix[0][0] =
+        tmp[1][1]*tmp[2][2]*tmp[3][3] -
+        tmp[1][1]*tmp[2][3]*tmp[3][2] -
+        tmp[2][1]*tmp[1][2]*tmp[3][3] +
+        tmp[2][1]*tmp[1][3]*tmp[3][2] +
+        tmp[3][1]*tmp[1][2]*tmp[2][3] -
+        tmp[3][1]*tmp[1][3]*tmp[2][2];
+
+    matrix[1][0] =
+       -tmp[1][0]*tmp[2][2]*tmp[3][3] +
+        tmp[1][0]*tmp[2][3]*tmp[3][2] +
+        tmp[2][0]*tmp[1][2]*tmp[3][3] -
+        tmp[2][0]*tmp[1][3]*tmp[3][2] -
+        tmp[3][0]*tmp[1][2]*tmp[2][3] +
+        tmp[3][0]*tmp[1][3]*tmp[2][2];
+
+    matrix[2][0] =
+        tmp[1][0]*tmp[2][1]*tmp[3][3] -
+        tmp[1][0]*tmp[2][3]*tmp[3][1] -
+        tmp[2][0]*tmp[1][1]*tmp[3][3] +
+        tmp[2][0]*tmp[1][3]*tmp[3][1] +
+        tmp[3][0]*tmp[1][1]*tmp[2][3] -
+        tmp[3][0]*tmp[1][3]*tmp[2][1];
+
+    matrix[3][0] =
+       -tmp[1][0]*tmp[2][1]*tmp[3][2] +
+        tmp[1][0]*tmp[2][2]*tmp[3][1] +
+        tmp[2][0]*tmp[1][1]*tmp[3][2] -
+        tmp[2][0]*tmp[1][2]*tmp[3][1] -
+        tmp[3][0]*tmp[1][1]*tmp[2][2] +
+        tmp[3][0]*tmp[1][2]*tmp[2][1];
+
+    matrix[0][1] =
+       -tmp[0][1]*tmp[2][2]*tmp[3][3] +
+        tmp[0][1]*tmp[2][3]*tmp[3][2] +
+        tmp[2][1]*tmp[0][2]*tmp[3][3] -
+        tmp[2][1]*tmp[0][3]*tmp[3][2] -
+        tmp[3][1]*tmp[0][2]*tmp[2][3] +
+        tmp[3][1]*tmp[0][3]*tmp[2][2];
+
+    matrix[1][1] =
+        tmp[0][0]*tmp[2][2]*tmp[3][3] -
+        tmp[0][0]*tmp[2][3]*tmp[3][2] -
+        tmp[2][0]*tmp[0][2]*tmp[3][3] +
+        tmp[2][0]*tmp[0][3]*tmp[3][2] +
+        tmp[3][0]*tmp[0][2]*tmp[2][3] -
+        tmp[3][0]*tmp[0][3]*tmp[2][2];
+
+    matrix[2][1] =
+       -tmp[0][0]*tmp[2][1]*tmp[3][3] +
+        tmp[0][0]*tmp[2][3]*tmp[3][1] +
+        tmp[2][0]*tmp[0][1]*tmp[3][3] -
+        tmp[2][0]*tmp[0][3]*tmp[3][1] -
+        tmp[3][0]*tmp[0][1]*tmp[2][3] +
+        tmp[3][0]*tmp[0][3]*tmp[2][1];
+
+    matrix[3][1] =
+        tmp[0][0]*tmp[2][1]*tmp[3][2] -
+        tmp[0][0]*tmp[2][2]*tmp[3][1] -
+        tmp[2][0]*tmp[0][1]*tmp[3][2] +
+        tmp[2][0]*tmp[0][2]*tmp[3][1] +
+        tmp[3][0]*tmp[0][1]*tmp[2][2] -
+        tmp[3][0]*tmp[0][2]*tmp[2][1];
+
+    matrix[0][2] =
+        tmp[0][1]*tmp[1][2]*tmp[3][3] -
+        tmp[0][1]*tmp[1][3]*tmp[3][2] -
+        tmp[1][1]*tmp[0][2]*tmp[3][3] +
+        tmp[1][1]*tmp[0][3]*tmp[3][2] +
+        tmp[3][1]*tmp[0][2]*tmp[1][3] -
+        tmp[3][1]*tmp[0][3]*tmp[1][2];
+
+    matrix[1][2] =
+       -tmp[0][0] *tmp[1][2]*tmp[3][3] +
+        tmp[0][0]*tmp[1][3]*tmp[3][2] +
+        tmp[1][0]*tmp[0][2]*tmp[3][3] -
+        tmp[1][0]*tmp[0][3]*tmp[3][2] -
+        tmp[3][0]*tmp[0][2]*tmp[1][3] +
+        tmp[3][0]*tmp[0][3]*tmp[1][2];
+
+    matrix[2][2] =
+        tmp[0][0]*tmp[1][1]*tmp[3][3] -
+        tmp[0][0]*tmp[1][3]*tmp[3][1] -
+        tmp[1][0]*tmp[0][1]*tmp[3][3] +
+        tmp[1][0]*tmp[0][3]*tmp[3][1] +
+        tmp[3][0]*tmp[0][1]*tmp[1][3] -
+        tmp[3][0]*tmp[0][3]*tmp[1][1];
+
+    matrix[3][2] =
+       -tmp[0][0]*tmp[1][1]*tmp[3][2] +
+        tmp[0][0]*tmp[1][2]*tmp[3][1] +
+        tmp[1][0]*tmp[0][1]*tmp[3][2] -
+        tmp[1][0]*tmp[0][2]*tmp[3][1] -
+        tmp[3][0]*tmp[0][1]*tmp[1][2] +
+        tmp[3][0]*tmp[0][2]*tmp[1][1];
+
+    matrix[0][3] =
+       -tmp[0][1]*tmp[1][2]*tmp[2][3] +
+        tmp[0][1]*tmp[1][3]*tmp[2][2] +
+        tmp[1][1]*tmp[0][2]*tmp[2][3] -
+        tmp[1][1]*tmp[0][3]*tmp[2][2] -
+        tmp[2][1]*tmp[0][2]*tmp[1][3] +
+        tmp[2][1]*tmp[0][3]*tmp[1][2];
+
+    matrix[1][3] =
+        tmp[0][0]*tmp[1][2]*tmp[2][3] -
+        tmp[0][0]*tmp[1][3]*tmp[2][2] -
+        tmp[1][0]*tmp[0][2]*tmp[2][3] +
+        tmp[1][0]*tmp[0][3]*tmp[2][2] +
+        tmp[2][0]*tmp[0][2]*tmp[1][3] -
+        tmp[2][0]*tmp[0][3]*tmp[1][2];
+
+    matrix[2][3] =
+       -tmp[0][0]*tmp[1][1]*tmp[2][3] +
+        tmp[0][0]*tmp[1][3]*tmp[2][1] +
+        tmp[1][0]*tmp[0][1]*tmp[2][3] -
+        tmp[1][0]*tmp[0][3]*tmp[2][1] -
+        tmp[2][0]*tmp[0][1]*tmp[1][3] +
+        tmp[2][0]*tmp[0][3]*tmp[1][1];
+
+    matrix[3][3] =
+        tmp[0][0]*tmp[1][1]*tmp[2][2] -
+        tmp[0][0]*tmp[1][2]*tmp[2][1] -
+        tmp[1][0]*tmp[0][1]*tmp[2][2] +
+        tmp[1][0]*tmp[0][2]*tmp[2][1] +
+        tmp[2][0]*tmp[0][1]*tmp[1][2] -
+        tmp[2][0]*tmp[0][2]*tmp[1][1];
+
+    K det =
+        tmp[0][0]*matrix[0][0] +
+        tmp[0][1]*matrix[1][0] +
+        tmp[0][2]*matrix[2][0] +
+        tmp[0][3]*matrix[3][0];
+
+    // return identity for singular or nearly singular matrices.
+    if (std::abs(det) < 1e-40)
+        matrix = std::numeric_limits<K>::quiet_NaN();
+    else
+        matrix *= 1.0/det;
+}
+} // namespace MatrixBlockHelp
+
+template <class Scalar, int n, int m>
+class MatrixBlock : public Dune::FieldMatrix<Scalar, n, m>
+{
+public:
+    typedef Dune::FieldMatrix<Scalar, n, m>  BaseType;
+
+    using BaseType::operator= ;
+    using BaseType::rows;
+    using BaseType::cols;
+
+    MatrixBlock()
+        : BaseType(Scalar(0.0))
+    {}
+
+    explicit MatrixBlock(const Scalar value)
+        : BaseType(value)
+    {}
+
+    void invert()
+    { Opm::MatrixBlockHelp::invertMatrix(asBase()); }
+
+    const BaseType& asBase() const
+    { return static_cast<const BaseType&>(*this); }
+
+    BaseType& asBase()
+    { return static_cast<BaseType&>(*this); }
+};
+
+} // namespace Opm
+
+namespace Dune {
+
+template<class K, int n, int m>
+void print_row(std::ostream& s, const Opm::MatrixBlock<K, n, m>& A,
+               typename FieldMatrix<K, n, m>::size_type I,
+               typename FieldMatrix<K, n, m>::size_type J,
+               typename FieldMatrix<K, n, m>::size_type therow,
+               int width,
+               int precision)
+{ print_row(s, A.asBase(), I, J, therow, width, precision); }
+
+template<class K, int n, int m>
+K& firstmatrixelement(Opm::MatrixBlock<K, n, m>& A)
+{ return firstmatrixelement(A.asBase()); }
+
+template <typename Scalar, int n, int m>
+struct MatrixDimension<Opm::MatrixBlock<Scalar, n, m> >
+    : public MatrixDimension<typename Opm::MatrixBlock<Scalar, n, m>::BaseType>
+{ };
+
+
+#if HAVE_UMFPACK
+/// \brief UMFPack specialization for Opm::MatrixBlock to make AMG happy
+///
+/// Without this the empty default implementation would be used.
+template <typename T, typename A, int n, int m>
+class UMFPack<BCRSMatrix<Opm::MatrixBlock<T, n, m>, A> >
+    : public UMFPack<BCRSMatrix<FieldMatrix<T, n, m>, A> >
+{
+    typedef UMFPack<BCRSMatrix<FieldMatrix<T, n, m>, A> > Base;
+    typedef BCRSMatrix<FieldMatrix<T, n, m>, A> Matrix;
+
+public:
+    typedef BCRSMatrix<Opm::MatrixBlock<T, n, m>, A> RealMatrix;
+
+    UMFPack(const RealMatrix& matrix, int verbose, bool)
+        : Base(reinterpret_cast<const Matrix&>(matrix), verbose)
+    {}
+};
+#endif
+
+#if HAVE_SUPERLU
+/// \brief SuperLU specialization for Opm::MatrixBlock to make AMG happy
+///
+/// Without this the empty default implementation would be used.
+template <typename T, typename A, int n, int m>
+class SuperLU<BCRSMatrix<Opm::MatrixBlock<T, n, m>, A> >
+    : public SuperLU<BCRSMatrix<FieldMatrix<T, n, m>, A> >
+{
+    typedef SuperLU<BCRSMatrix<FieldMatrix<T, n, m>, A> > Base;
+    typedef BCRSMatrix<FieldMatrix<T, n, m>, A> Matrix;
+
+public:
+    typedef BCRSMatrix<Opm::MatrixBlock<T, n, m>, A> RealMatrix;
+
+    SuperLU(const RealMatrix& matrix, int verb, bool reuse=true)
+        : Base(reinterpret_cast<const Matrix&>(matrix), verb, reuse)
+    {}
+};
+#endif
+
+} // end namespace Dune
+
+
+#endif
--- a/opm/simulators/linalg/nullborderlistmanager.hh
+++ b/opm/simulators/linalg/nullborderlistmanager.hh
@ -0,0 +1,73 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 2 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+
+  Consult the COPYING file in the top-level source directory of this
+  module for the precise wording of the license and the list of
+  copyright holders.
+*/
+/*!
+ * \file
+ * \copydoc Opm::Linear::NullBorderListCreator
+ */
+#ifndef EWOMS_NULL_BORDER_LIST_MANAGER_HH
+#define EWOMS_NULL_BORDER_LIST_MANAGER_HH
+
+#include "overlaptypes.hh"
+
+#include <opm/material/common/Unused.hpp>
+#include <opm/material/common/Exceptions.hpp>
+
+#include <dune/grid/common/datahandleif.hh>
+#include <dune/grid/common/gridenums.hh>
+#include <dune/istl/bcrsmatrix.hh>
+#include <dune/istl/scalarproducts.hh>
+#include <dune/istl/operators.hh>
+
+#include <algorithm>
+
+namespace Opm {
+namespace Linear {
+/*!
+ * \brief This is a grid manager which does not create any border list.
+ *
+ * This means that discretizations using this grid manager cannot be
+ * used for parallel computations!
+ */
+template <class GridView, class DofMapper>
+class NullBorderListCreator
+{
+public:
+    NullBorderListCreator(const GridView& gridView,
+                          const DofMapper& map OPM_UNUSED)
+    {
+        if (gridView.comm().size() > 1)
+            throw std::runtime_error("The used model is not usable for parallel computations");
+    }
+
+    // Access to the border list.
+    const BorderList& borderList() const
+    { return borderList_; }
+
+private:
+    BorderList borderList_;
+};
+
+} // namespace Linear
+} // namespace Opm
+
+#endif
--- a/opm/simulators/linalg/overlappingbcrsmatrix.hh
+++ b/opm/simulators/linalg/overlappingbcrsmatrix.hh
@ -0,0 +1,693 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 2 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+
+  Consult the COPYING file in the top-level source directory of this
+  module for the precise wording of the license and the list of
+  copyright holders.
+*/
+/*!
+ * \file
+ * \copydoc Opm::Linear::OverlappingBCRSMatrix
+ */
+#ifndef EWOMS_OVERLAPPING_BCRS_MATRIX_HH
+#define EWOMS_OVERLAPPING_BCRS_MATRIX_HH
+
+#include <opm/simulators/linalg/domesticoverlapfrombcrsmatrix.hh>
+#include <opm/simulators/linalg/globalindices.hh>
+#include <opm/simulators/linalg/blacklist.hh>
+#include <ewoms/parallel/mpibuffer.hh>
+
+#include <opm/material/common/Valgrind.hpp>
+
+#include <dune/istl/scalarproducts.hh>
+#include <dune/istl/io.hh>
+
+#include <algorithm>
+#include <set>
+#include <map>
+#include <iostream>
+#include <vector>
+#include <memory>
+
+namespace Opm {
+namespace Linear {
+
+/*!
+ * \brief An overlap aware block-compressed row storage (BCRS) matrix.
+ */
+template <class BCRSMatrix>
+class OverlappingBCRSMatrix : public BCRSMatrix
+{
+    typedef BCRSMatrix ParentType;
+
+public:
+    typedef Opm::Linear::DomesticOverlapFromBCRSMatrix Overlap;
+
+private:
+    typedef std::vector<std::set<Index> > Entries;
+
+public:
+    typedef typename ParentType::ColIterator ColIterator;
+    typedef typename ParentType::ConstColIterator ConstColIterator;
+    typedef typename ParentType::block_type block_type;
+    typedef typename ParentType::field_type field_type;
+
+    // no real copying done at the moment
+    OverlappingBCRSMatrix(const OverlappingBCRSMatrix& other)
+        : ParentType(other)
+    {}
+
+    template <class NativeBCRSMatrix>
+    OverlappingBCRSMatrix(const NativeBCRSMatrix& nativeMatrix,
+                          const BorderList& borderList,
+                          const BlackList& blackList,
+                          unsigned overlapSize)
+    {
+        overlap_ = std::make_shared<Overlap>(nativeMatrix, borderList, blackList, overlapSize);
+        myRank_ = 0;
+#if HAVE_MPI
+        MPI_Comm_rank(MPI_COMM_WORLD, &myRank_);
+#endif // HAVE_MPI
+
+        // build the overlapping matrix from the non-overlapping
+        // matrix and the overlap
+        build_(nativeMatrix);
+    }
+
+    // this constructor is required to make the class compatible with the SeqILU class of
+    // Dune >= 2.7.
+    OverlappingBCRSMatrix(size_t numRows OPM_UNUSED,
+                          size_t numCols OPM_UNUSED,
+                          typename BCRSMatrix::BuildMode buildMode OPM_UNUSED)
+    { throw std::logic_error("OverlappingBCRSMatrix objects cannot be build from scratch!"); }
+
+    ~OverlappingBCRSMatrix()
+    {
+        if (overlap_.use_count() == 0)
+            return;
+
+        // delete all MPI buffers
+        const PeerSet& peerSet = overlap_->peerSet();
+        typename PeerSet::const_iterator peerIt = peerSet.begin();
+        typename PeerSet::const_iterator peerEndIt = peerSet.end();
+        for (; peerIt != peerEndIt; ++peerIt) {
+            ProcessRank peerRank = *peerIt;
+
+            delete rowSizesRecvBuff_[peerRank];
+            delete rowIndicesRecvBuff_[peerRank];
+            delete entryColIndicesRecvBuff_[peerRank];
+            delete entryValuesRecvBuff_[peerRank];
+
+            delete numRowsSendBuff_[peerRank];
+            delete rowSizesSendBuff_[peerRank];
+            delete rowIndicesSendBuff_[peerRank];
+            delete entryColIndicesSendBuff_[peerRank];
+            delete entryValuesSendBuff_[peerRank];
+        }
+    }
+
+    ParentType& asParent()
+    { return *this; }
+
+    const ParentType& asParent() const
+    { return *this; }
+
+    /*!
+     * \brief Returns the domestic overlap for the process.
+     */
+    const Overlap& overlap() const
+    { return *overlap_; }
+
+    /*!
+     * \brief Assign and syncronize the overlapping matrix from a non-overlapping one.
+     */
+    void assignAdd(const ParentType& nativeMatrix)
+    {
+        // copy the native entries
+        assignFromNative(nativeMatrix);
+
+        // communicate and add the contents of overlapping rows
+        syncAdd();
+    }
+
+    /*!
+     * \brief Assign and syncronize the overlapping matrix from a
+     *       non-overlapping one.
+     *
+     * The non-master entries are copied from the master
+     */
+    template <class NativeBCRSMatrix>
+    void assignCopy(const NativeBCRSMatrix& nativeMatrix)
+    {
+        // copy the native entries
+        assignFromNative(nativeMatrix);
+
+        // communicate and add the contents of overlapping rows
+        syncCopy();
+    }
+
+    /*!
+     * \brief Set the identity matrix on the main diagonal of front indices.
+     */
+    void resetFront()
+    {
+        // create an identity matrix
+        block_type idMatrix(0.0);
+        for (unsigned i = 0; i < idMatrix.size(); ++i)
+            idMatrix[i][i] = 1.0;
+
+        int numLocal = overlap_->numLocal();
+        int numDomestic = overlap_->numDomestic();
+        for (int domRowIdx = numLocal; domRowIdx < numDomestic; ++domRowIdx) {
+            if (overlap_->isFront(domRowIdx)) {
+                // set the front rows to a diagonal 1
+                (*this)[domRowIdx] = 0.0;
+                (*this)[domRowIdx][domRowIdx] = idMatrix;
+            }
+        }
+    }
+
+    void print() const
+    {
+        overlap_->print();
+
+        for (int i = 0; i < this->N(); ++i) {
+            if (overlap_->isLocal(i))
+                std::cout << " ";
+            else
+                std::cout << "*";
+            std::cout << "row " << i << " ";
+
+            typedef typename BCRSMatrix::ConstColIterator ColIt;
+            ColIt colIt = (*this)[i].begin();
+            ColIt colEndIt = (*this)[i].end();
+            for (int j = 0; j < this->M(); ++j) {
+                if (colIt != colEndIt && j == colIt.index()) {
+                    ++colIt;
+                    if (overlap_->isBorder(j))
+                        std::cout << "|";
+                    else if (overlap_->isLocal(j))
+                        std::cout << "X";
+                    else
+                        std::cout << "*";
+                }
+                else
+                    std::cout << " ";
+            }
+            std::cout << "\n" << std::flush;
+        }
+        Dune::printSparseMatrix(std::cout,
+                                *static_cast<const BCRSMatrix *>(this),
+                                "M",
+                                "row");
+    }
+
+    template <class NativeBCRSMatrix>
+    void assignFromNative(const NativeBCRSMatrix& nativeMatrix)
+    {
+        // first, set everything to 0,
+        BCRSMatrix::operator=(0.0);
+
+        // then copy the domestic entries of the native matrix to the overlapping matrix
+        for (unsigned nativeRowIdx = 0; nativeRowIdx < nativeMatrix.N(); ++nativeRowIdx) {
+            Index domesticRowIdx = overlap_->nativeToDomestic(static_cast<Index>(nativeRowIdx));
+            if (domesticRowIdx < 0) {
+                continue; // row corresponds to a black-listed entry
+            }
+
+            auto nativeColIt = nativeMatrix[nativeRowIdx].begin();
+            const auto& nativeColEndIt = nativeMatrix[nativeRowIdx].end();
+            for (; nativeColIt != nativeColEndIt; ++nativeColIt) {
+                Index domesticColIdx = overlap_->nativeToDomestic(static_cast<Index>(nativeColIt.index()));
+
+                // make sure to include all off-diagonal entries, even those which belong
+                // to DOFs which are managed by a peer process. For this, we have to
+                // re-map the column index of the black-listed index to a native one.
+                if (domesticColIdx < 0)
+                    domesticColIdx = overlap_->blackList().nativeToDomestic(static_cast<Index>(nativeColIt.index()));
+
+                if (domesticColIdx < 0)
+                    // there is no domestic index which corresponds to a black-listed
+                    // one. this can happen if the grid overlap is larger than the
+                    // algebraic one...
+                    continue;
+
+                // we need to copy the block matrices manually since it seems that (at
+                // least some versions of) Dune have an endless recursion bug when
+                // assigning dense matrices of different field type
+                const auto& src = *nativeColIt;
+                auto& dest = (*this)[static_cast<unsigned>(domesticRowIdx)][static_cast<unsigned>(domesticColIdx)];
+                for (unsigned i = 0; i < src.rows; ++i) {
+                    for (unsigned j = 0; j < src.cols; ++j) {
+                        dest[i][j] = static_cast<field_type>(src[i][j]);
+                    }
+                }
+            }
+        }
+    }
+
+    // communicates and adds up the contents of overlapping rows
+    void syncAdd()
+    {
+        // first, send all entries to the peers
+        const PeerSet& peerSet = overlap_->peerSet();
+        typename PeerSet::const_iterator peerIt = peerSet.begin();
+        typename PeerSet::const_iterator peerEndIt = peerSet.end();
+        for (; peerIt != peerEndIt; ++peerIt) {
+            ProcessRank peerRank = *peerIt;
+
+            sendEntries_(peerRank);
+        }
+
+        // then, receive entries from the peers
+        peerIt = peerSet.begin();
+        for (; peerIt != peerEndIt; ++peerIt) {
+            ProcessRank peerRank = *peerIt;
+
+            receiveAddEntries_(peerRank);
+        }
+
+        // finally, make sure that everything which we send was
+        // received by the peers
+        peerIt = peerSet.begin();
+        for (; peerIt != peerEndIt; ++peerIt) {
+            ProcessRank peerRank = *peerIt;
+            entryValuesSendBuff_[peerRank]->wait();
+        }
+    }
+
+    // communicates and copies the contents of overlapping rows from
+    // the master
+    void syncCopy()
+    {
+        // first, send all entries to the peers
+        const PeerSet& peerSet = overlap_->peerSet();
+        typename PeerSet::const_iterator peerIt = peerSet.begin();
+        typename PeerSet::const_iterator peerEndIt = peerSet.end();
+        for (; peerIt != peerEndIt; ++peerIt) {
+            ProcessRank peerRank = *peerIt;
+
+            sendEntries_(peerRank);
+        }
+
+        // then, receive entries from the peers
+        peerIt = peerSet.begin();
+        for (; peerIt != peerEndIt; ++peerIt) {
+            ProcessRank peerRank = *peerIt;
+
+            receiveCopyEntries_(peerRank);
+        }
+
+        // finally, make sure that everything which we send was
+        // received by the peers
+        peerIt = peerSet.begin();
+        for (; peerIt != peerEndIt; ++peerIt) {
+            ProcessRank peerRank = *peerIt;
+            entryValuesSendBuff_[peerRank]->wait();
+        }
+    }
+
+private:
+    template <class NativeBCRSMatrix>
+    void build_(const NativeBCRSMatrix& nativeMatrix)
+    {
+        size_t numDomestic = overlap_->numDomestic();
+
+        // allocate the rows
+        this->setSize(numDomestic, numDomestic);
+        this->setBuildMode(ParentType::random);
+
+        // communicate the entries
+        buildIndices_(nativeMatrix);
+    }
+
+    template <class NativeBCRSMatrix>
+    void buildIndices_(const NativeBCRSMatrix& nativeMatrix)
+    {
+        /////////
+        // first, add all local matrix entries
+        /////////
+        entries_.resize(overlap_->numDomestic());
+        for (unsigned nativeRowIdx = 0; nativeRowIdx < nativeMatrix.N(); ++nativeRowIdx) {
+            int domesticRowIdx = overlap_->nativeToDomestic(static_cast<Index>(nativeRowIdx));
+            if (domesticRowIdx < 0)
+                continue;
+
+            auto nativeColIt = nativeMatrix[nativeRowIdx].begin();
+            const auto& nativeColEndIt = nativeMatrix[nativeRowIdx].end();
+            for (; nativeColIt != nativeColEndIt; ++nativeColIt) {
+                int domesticColIdx = overlap_->nativeToDomestic(static_cast<Index>(nativeColIt.index()));
+
+                // make sure to include all off-diagonal entries, even those which belong
+                // to DOFs which are managed by a peer process. For this, we have to
+                // re-map the column index of the black-listed index to a native one.
+                if (domesticColIdx < 0) {
+                    domesticColIdx = overlap_->blackList().nativeToDomestic(static_cast<Index>(nativeColIt.index()));
+                }
+
+                if (domesticColIdx < 0)
+                    continue;
+
+                entries_[static_cast<unsigned>(domesticRowIdx)].insert(domesticColIdx);
+            }
+        }
+
+        /////////
+        // add the indices for all additional entries
+        /////////
+
+        // first, send all our indices to all peers
+        const PeerSet& peerSet = overlap_->peerSet();
+        typename PeerSet::const_iterator peerIt = peerSet.begin();
+        typename PeerSet::const_iterator peerEndIt = peerSet.end();
+        for (; peerIt != peerEndIt; ++peerIt) {
+            ProcessRank peerRank = *peerIt;
+            sendIndices_(nativeMatrix, peerRank);
+        }
+
+        // then recieve all indices from the peers
+        peerIt = peerSet.begin();
+        for (; peerIt != peerEndIt; ++peerIt) {
+            ProcessRank peerRank = *peerIt;
+            receiveIndices_(peerRank);
+        }
+
+        // wait until all send operations are completed
+        peerIt = peerSet.begin();
+        for (; peerIt != peerEndIt; ++peerIt) {
+            ProcessRank peerRank = *peerIt;
+
+            numRowsSendBuff_[peerRank]->wait();
+            rowSizesSendBuff_[peerRank]->wait();
+            rowIndicesSendBuff_[peerRank]->wait();
+            entryColIndicesSendBuff_[peerRank]->wait();
+
+            // convert the global indices in the send buffers to domestic
+            // ones
+            globalToDomesticBuff_(*rowIndicesSendBuff_[peerRank]);
+            globalToDomesticBuff_(*entryColIndicesSendBuff_[peerRank]);
+        }
+
+        /////////
+        // actually initialize the BCRS matrix structure
+        /////////
+
+        // set the row sizes
+        size_t numDomestic = overlap_->numDomestic();
+        for (unsigned rowIdx = 0; rowIdx < numDomestic; ++rowIdx) {
+            unsigned numCols = 0;
+            const auto& colIndices = entries_[rowIdx];
+            auto colIdxIt = colIndices.begin();
+            const auto& colIdxEndIt = colIndices.end();
+            for (; colIdxIt != colIdxEndIt; ++colIdxIt) {
+                if (*colIdxIt < 0)
+                    // the matrix for the local process does not know about this DOF
+                    continue;
+
+                ++numCols;
+            }
+
+            this->setrowsize(rowIdx, numCols);
+        }
+        this->endrowsizes();
+
+        // set the indices
+        for (unsigned rowIdx = 0; rowIdx < numDomestic; ++rowIdx) {
+            const auto& colIndices = entries_[rowIdx];
+
+            auto colIdxIt = colIndices.begin();
+            const auto& colIdxEndIt = colIndices.end();
+            for (; colIdxIt != colIdxEndIt; ++colIdxIt) {
+                if (*colIdxIt < 0)
+                    // the matrix for the local process does not know about this DOF
+                    continue;
+
+                this->addindex(rowIdx, static_cast<unsigned>(*colIdxIt));
+            }
+        }
+        this->endindices();
+
+        // free the memory occupied by the array of the matrix entries
+        entries_.clear();
+    }
+
+    // send the overlap indices to a peer
+    template <class NativeBCRSMatrix>
+    void sendIndices_(const NativeBCRSMatrix& nativeMatrix OPM_UNUSED_NOMPI,
+                      ProcessRank peerRank OPM_UNUSED_NOMPI)
+    {
+#if HAVE_MPI
+        // send size of foreign overlap to peer
+        size_t numOverlapRows = overlap_->foreignOverlapSize(peerRank);
+        numRowsSendBuff_[peerRank] = new MpiBuffer<unsigned>(1);
+        (*numRowsSendBuff_[peerRank])[0] = static_cast<unsigned>(numOverlapRows);
+        numRowsSendBuff_[peerRank]->send(peerRank);
+
+        // allocate the buffers which hold the global indices of each row and the number
+        // of entries which need to be communicated by the respective row
+        rowIndicesSendBuff_[peerRank] = new MpiBuffer<Index>(numOverlapRows);
+        rowSizesSendBuff_[peerRank] = new MpiBuffer<unsigned>(numOverlapRows);
+
+        // compute the sets of the indices of the entries which need to be send to the peer
+        typedef std::set<int> ColumnIndexSet;
+        typedef std::map<int, ColumnIndexSet> EntryTuples;
+
+        EntryTuples entryIndices;
+        unsigned numEntries = 0; // <- total number of matrix entries to be send to the peer
+        for (unsigned overlapOffset = 0; overlapOffset < numOverlapRows; ++overlapOffset) {
+            Index domesticRowIdx = overlap_->foreignOverlapOffsetToDomesticIdx(peerRank, overlapOffset);
+            Index nativeRowIdx = overlap_->domesticToNative(domesticRowIdx);
+            Index globalRowIdx = overlap_->domesticToGlobal(domesticRowIdx);
+
+            ColumnIndexSet& colIndices = entryIndices[globalRowIdx];
+
+            auto nativeColIt = nativeMatrix[static_cast<unsigned>(nativeRowIdx)].begin();
+            const auto& nativeColEndIt = nativeMatrix[static_cast<unsigned>(nativeRowIdx)].end();
+            for (; nativeColIt != nativeColEndIt; ++nativeColIt) {
+                unsigned nativeColIdx = static_cast<unsigned>(nativeColIt.index());
+                Index domesticColIdx = overlap_->nativeToDomestic(static_cast<Index>(nativeColIdx));
+
+                if (domesticColIdx < 0)
+                    // the native column index may be blacklisted, use the corresponding
+                    // index in the domestic overlap.
+                    domesticColIdx = overlap_->blackList().nativeToDomestic(static_cast<Index>(nativeColIdx));
+
+                if (domesticColIdx < 0)
+                    // the column may still not be known locally, i.e. the corresponding
+                    // DOF of the row is at the process's front. we don't need this
+                    // entry.
+                    continue;
+
+                Index globalColIdx = overlap_->domesticToGlobal(domesticColIdx);
+                colIndices.insert(globalColIdx);
+                ++numEntries;
+            }
+        };
+
+        // fill the send buffers
+        entryColIndicesSendBuff_[peerRank] = new MpiBuffer<Index>(numEntries);
+        Index overlapEntryIdx = 0;
+        for (unsigned overlapOffset = 0; overlapOffset < numOverlapRows; ++overlapOffset) {
+            Index domesticRowIdx = overlap_->foreignOverlapOffsetToDomesticIdx(peerRank, overlapOffset);
+            Index globalRowIdx = overlap_->domesticToGlobal(domesticRowIdx);
+
+            (*rowIndicesSendBuff_[peerRank])[overlapOffset] = globalRowIdx;
+
+            const ColumnIndexSet& colIndexSet = entryIndices[globalRowIdx];
+            auto* rssb = rowSizesSendBuff_[peerRank];
+            (*rssb)[overlapOffset] = static_cast<unsigned>(colIndexSet.size());
+            for (auto it = colIndexSet.begin(); it != colIndexSet.end(); ++it) {
+                int globalColIdx = *it;
+
+                (*entryColIndicesSendBuff_[peerRank])[static_cast<unsigned>(overlapEntryIdx)] = globalColIdx;
+                ++ overlapEntryIdx;
+            }
+        }
+
+        // actually communicate with the peer
+        rowSizesSendBuff_[peerRank]->send(peerRank);
+        rowIndicesSendBuff_[peerRank]->send(peerRank);
+        entryColIndicesSendBuff_[peerRank]->send(peerRank);
+
+        // create the send buffers for the values of the matrix
+        // entries
+        entryValuesSendBuff_[peerRank] = new MpiBuffer<block_type>(numEntries);
+#endif // HAVE_MPI
+    }
+
+    // receive the overlap indices to a peer
+    void receiveIndices_(ProcessRank peerRank OPM_UNUSED_NOMPI)
+    {
+#if HAVE_MPI
+        // receive size of foreign overlap to peer
+        unsigned numOverlapRows;
+        auto& numRowsRecvBuff = numRowsRecvBuff_[peerRank];
+        numRowsRecvBuff.resize(1);
+        numRowsRecvBuff.receive(peerRank);
+        numOverlapRows = numRowsRecvBuff[0];
+
+        // create receive buffer for the row sizes and receive them
+        // from the peer
+        rowSizesRecvBuff_[peerRank] = new MpiBuffer<unsigned>(numOverlapRows);
+        rowIndicesRecvBuff_[peerRank] = new MpiBuffer<Index>(numOverlapRows);
+        rowSizesRecvBuff_[peerRank]->receive(peerRank);
+        rowIndicesRecvBuff_[peerRank]->receive(peerRank);
+
+        // calculate the total number of indices which are send by the
+        // peer
+        unsigned totalIndices = 0;
+        for (unsigned i = 0; i < numOverlapRows; ++i)
+            totalIndices += (*rowSizesRecvBuff_[peerRank])[i];
+
+        // create the buffer to store the column indices of the matrix entries
+        entryColIndicesRecvBuff_[peerRank] = new MpiBuffer<Index>(totalIndices);
+        entryValuesRecvBuff_[peerRank] = new MpiBuffer<block_type>(totalIndices);
+
+        // communicate with the peer
+        entryColIndicesRecvBuff_[peerRank]->receive(peerRank);
+
+        // convert the global indices in the receive buffers to
+        // domestic ones
+        globalToDomesticBuff_(*rowIndicesRecvBuff_[peerRank]);
+        globalToDomesticBuff_(*entryColIndicesRecvBuff_[peerRank]);
+
+        // add the entries to the global entry map
+        unsigned k = 0;
+        for (unsigned i = 0; i < numOverlapRows; ++i) {
+            Index domRowIdx = (*rowIndicesRecvBuff_[peerRank])[i];
+            for (unsigned j = 0; j < (*rowSizesRecvBuff_[peerRank])[i]; ++j) {
+                Index domColIdx = (*entryColIndicesRecvBuff_[peerRank])[k];
+                entries_[static_cast<unsigned>(domRowIdx)].insert(domColIdx);
+                ++k;
+            }
+        }
+#endif // HAVE_MPI
+    }
+
+    void sendEntries_(ProcessRank peerRank OPM_UNUSED_NOMPI)
+    {
+#if HAVE_MPI
+        auto &mpiSendBuff = *entryValuesSendBuff_[peerRank];
+
+        auto &mpiRowIndicesSendBuff = *rowIndicesSendBuff_[peerRank];
+        auto &mpiRowSizesSendBuff = *rowSizesSendBuff_[peerRank];
+        auto &mpiColIndicesSendBuff = *entryColIndicesSendBuff_[peerRank];
+
+        // fill the send buffer
+        unsigned k = 0;
+        for (unsigned i = 0; i < mpiRowIndicesSendBuff.size(); ++i) {
+            Index domRowIdx = mpiRowIndicesSendBuff[i];
+
+            for (Index j = 0; j < static_cast<Index>(mpiRowSizesSendBuff[i]); ++j)
+            {
+                // move to the next column which is in the overlap
+                Index domColIdx = mpiColIndicesSendBuff[k];
+
+                // add the values of this column to the send buffer
+                mpiSendBuff[k] = (*this)[static_cast<unsigned>(domRowIdx)][static_cast<unsigned>(domColIdx)];
+                ++k;
+            }
+        }
+
+        mpiSendBuff.send(peerRank);
+#endif // HAVE_MPI
+    }
+
+    void receiveAddEntries_(ProcessRank peerRank OPM_UNUSED_NOMPI)
+    {
+#if HAVE_MPI
+        auto &mpiRecvBuff = *entryValuesRecvBuff_[peerRank];
+
+        auto &mpiRowIndicesRecvBuff = *rowIndicesRecvBuff_[peerRank];
+        auto &mpiRowSizesRecvBuff = *rowSizesRecvBuff_[peerRank];
+        auto &mpiColIndicesRecvBuff = *entryColIndicesRecvBuff_[peerRank];
+
+        mpiRecvBuff.receive(peerRank);
+
+        // retrieve the values from the receive buffer
+        unsigned k = 0;
+        for (unsigned i = 0; i < mpiRowIndicesRecvBuff.size(); ++i) {
+            Index domRowIdx = mpiRowIndicesRecvBuff[i];
+            for (unsigned j = 0; j < mpiRowSizesRecvBuff[i]; ++j, ++k) {
+                Index domColIdx = mpiColIndicesRecvBuff[k];
+
+                if (domColIdx < 0)
+                    // the matrix for the current process does not know about this DOF
+                    continue;
+
+                (*this)[static_cast<unsigned>(domRowIdx)][static_cast<unsigned>(domColIdx)] += mpiRecvBuff[k];
+            }
+        }
+#endif // HAVE_MPI
+    }
+
+    void receiveCopyEntries_(int peerRank OPM_UNUSED_NOMPI)
+    {
+#if HAVE_MPI
+        MpiBuffer<block_type> &mpiRecvBuff = *entryValuesRecvBuff_[peerRank];
+
+        MpiBuffer<Index> &mpiRowIndicesRecvBuff = *rowIndicesRecvBuff_[peerRank];
+        MpiBuffer<unsigned> &mpiRowSizesRecvBuff = *rowSizesRecvBuff_[peerRank];
+        MpiBuffer<Index> &mpiColIndicesRecvBuff = *entryColIndicesRecvBuff_[peerRank];
+
+        mpiRecvBuff.receive(peerRank);
+
+        // retrieve the values from the receive buffer
+        unsigned k = 0;
+        for (unsigned i = 0; i < mpiRowIndicesRecvBuff.size(); ++i) {
+            Index domRowIdx = mpiRowIndicesRecvBuff[i];
+            for (unsigned j = 0; j < mpiRowSizesRecvBuff[i]; ++j, ++k) {
+                Index domColIdx = mpiColIndicesRecvBuff[k];
+
+                if (domColIdx < 0)
+                    // the matrix for the current process does not know about this DOF
+                    continue;
+
+                (*this)[static_cast<unsigned>(domRowIdx)][static_cast<unsigned>(domColIdx)] = mpiRecvBuff[k];
+            }
+        }
+#endif // HAVE_MPI
+    }
+
+    void globalToDomesticBuff_(MpiBuffer<Index>& idxBuff)
+    {
+        for (unsigned i = 0; i < idxBuff.size(); ++i)
+            idxBuff[i] = overlap_->globalToDomestic(idxBuff[i]);
+    }
+
+    int myRank_;
+    Entries entries_;
+    std::shared_ptr<Overlap> overlap_;
+
+    std::map<ProcessRank, MpiBuffer<unsigned> *> numRowsSendBuff_;
+    std::map<ProcessRank, MpiBuffer<unsigned> *> rowSizesSendBuff_;
+    std::map<ProcessRank, MpiBuffer<Index> *> rowIndicesSendBuff_;
+    std::map<ProcessRank, MpiBuffer<Index> *> entryColIndicesSendBuff_;
+    std::map<ProcessRank, MpiBuffer<block_type> *> entryValuesSendBuff_;
+
+    std::map<ProcessRank, MpiBuffer<unsigned> > numRowsRecvBuff_;
+    std::map<ProcessRank, MpiBuffer<unsigned> *> rowSizesRecvBuff_;
+    std::map<ProcessRank, MpiBuffer<Index> *> rowIndicesRecvBuff_;
+    std::map<ProcessRank, MpiBuffer<Index> *> entryColIndicesRecvBuff_;
+    std::map<ProcessRank, MpiBuffer<block_type> *> entryValuesRecvBuff_;
+};
+
+} // namespace Linear
+} // namespace Opm
+
+#endif
--- a/opm/simulators/linalg/overlappingblockvector.hh
+++ b/opm/simulators/linalg/overlappingblockvector.hh
@ -0,0 +1,362 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 2 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+
+  Consult the COPYING file in the top-level source directory of this
+  module for the precise wording of the license and the list of
+  copyright holders.
+*/
+/*!
+ * \file
+ * \copydoc Opm::Linear::OverlappingBlockVector
+ */
+#ifndef EWOMS_OVERLAPPING_BLOCK_VECTOR_HH
+#define EWOMS_OVERLAPPING_BLOCK_VECTOR_HH
+
+#include "overlaptypes.hh"
+
+#include <ewoms/parallel/mpibuffer.hh>
+#include <opm/material/common/Valgrind.hpp>
+
+#include <dune/istl/bvector.hh>
+#include <dune/common/fvector.hh>
+
+#include <memory>
+#include <map>
+#include <iostream>
+
+namespace Opm {
+namespace Linear {
+
+/*!
+ * \brief An overlap aware block vector.
+ */
+template <class FieldVector, class Overlap>
+class OverlappingBlockVector : public Dune::BlockVector<FieldVector>
+{
+    typedef Dune::BlockVector<FieldVector> ParentType;
+    typedef Dune::BlockVector<FieldVector> BlockVector;
+
+public:
+    /*!
+     * \brief Given a domestic overlap object, create an overlapping
+     *        block vector coherent to it.
+     */
+    OverlappingBlockVector(const Overlap& overlap)
+        : ParentType(overlap.numDomestic()), overlap_(&overlap)
+    { createBuffers_(); }
+
+    /*!
+     * \brief Copy constructor.
+     */
+    OverlappingBlockVector(const OverlappingBlockVector& obv)
+        : ParentType(obv)
+        , numIndicesSendBuff_(obv.numIndicesSendBuff_)
+        , indicesSendBuff_(obv.indicesSendBuff_)
+        , indicesRecvBuff_(obv.indicesRecvBuff_)
+        , valuesSendBuff_(obv.valuesSendBuff_)
+        , valuesRecvBuff_(obv.valuesRecvBuff_)
+        , overlap_(obv.overlap_)
+    {}
+
+    /*!
+     * \brief Default constructor.
+     */
+    OverlappingBlockVector()
+    {}
+
+    //! \cond SKIP
+    /*!
+     * \brief Recycle the assignment operators of Dune::BlockVector
+     */
+    using ParentType::operator=;
+    //! \endcond
+
+    /*!
+     * \brief Assignment operator.
+     */
+    OverlappingBlockVector& operator=(const OverlappingBlockVector& obv)
+    {
+        ParentType::operator=(obv);
+        numIndicesSendBuff_ = obv.numIndicesSendBuff_;
+        indicesSendBuff_ = obv.indicesSendBuff_;
+        indicesRecvBuff_ = obv.indicesRecvBuff_;
+        valuesSendBuff_ = obv.valuesSendBuff_;
+        valuesRecvBuff_ = obv.valuesRecvBuff_;
+        overlap_ = obv.overlap_;
+        return *this;
+    }
+
+    /*!
+     * \brief Assign an overlapping block vector from a
+     *        non-overlapping one, border entries are added.
+     */
+    template <class BlockVector>
+    void assignAddBorder(const BlockVector& nativeBlockVector)
+    {
+        size_t numDomestic = overlap_->numDomestic();
+
+        // assign the local rows from the non-overlapping block vector
+        for (unsigned domRowIdx = 0; domRowIdx < numDomestic; ++domRowIdx) {
+            Index nativeRowIdx = overlap_->domesticToNative(static_cast<Index>(domRowIdx));
+            if (nativeRowIdx < 0)
+                (*this)[domRowIdx] = 0.0;
+            else
+                (*this)[domRowIdx] = nativeBlockVector[nativeRowIdx];
+        }
+
+        // add up the contents of the overlapping rows. Since non-native rows are set to
+        // zero above, the addition is only different from an assignment if the row is
+        // shared amongst multiple ranks.
+        syncAdd();
+    }
+
+    /*!
+     * \brief Assign an overlapping block vector from a non-overlapping one, border
+     *        entries are assigned using their respective master ranks.
+     */
+    template <class NativeBlockVector>
+    void assign(const NativeBlockVector& nativeBlockVector)
+    {
+        Index numDomestic = overlap_->numDomestic();
+
+        // assign the local rows from the non-overlapping block vector
+        for (Index domRowIdx = 0; domRowIdx < numDomestic; ++domRowIdx) {
+            Index nativeRowIdx = overlap_->domesticToNative(domRowIdx);
+            if (nativeRowIdx < 0)
+                (*this)[static_cast<unsigned>(domRowIdx)] = 0.0;
+            else
+                (*this)[static_cast<unsigned>(domRowIdx)] = nativeBlockVector[static_cast<unsigned>(nativeRowIdx)];
+        }
+
+        // add up the contents of border rows, for the remaining rows,
+        // get the values from their respective master process.
+        sync();
+    }
+
+    /*!
+     * \brief Assign the local values to a non-overlapping block
+     *        vector.
+     */
+    template <class NativeBlockVector>
+    void assignTo(NativeBlockVector& nativeBlockVector) const
+    {
+        // assign the local rows
+        size_t numNative = overlap_->numNative();
+        nativeBlockVector.resize(numNative);
+        for (unsigned nativeRowIdx = 0; nativeRowIdx < numNative; ++nativeRowIdx) {
+            Index domRowIdx = overlap_->nativeToDomestic(static_cast<Index>(nativeRowIdx));
+
+            if (domRowIdx < 0)
+                nativeBlockVector[nativeRowIdx] = 0.0;
+            else
+                nativeBlockVector[nativeRowIdx] = (*this)[static_cast<unsigned>(domRowIdx)];
+        }
+    }
+
+    /*!
+     * \brief Syncronize all values of the block vector from their
+     *        master process.
+     */
+    void sync()
+    {
+        // send all entries to all peers
+        for (const auto peerRank: overlap_->peerSet())
+            sendEntries_(peerRank);
+
+        // recieve all entries to the peers
+        for (const auto peerRank: overlap_->peerSet())
+            receiveFromMaster_(peerRank);
+
+        // wait until we have send everything
+        waitSendFinished_();
+    }
+
+    /*!
+     * \brief Syncronize all values of the block vector by adding up
+     *        the values of all peer ranks.
+     */
+    void syncAdd()
+    {
+        // send all entries to all peers
+        for (const auto peerRank: overlap_->peerSet())
+            sendEntries_(peerRank);
+
+        // recieve all entries to the peers
+        for (const auto peerRank: overlap_->peerSet())
+            receiveAdd_(peerRank);
+
+        // wait until we have send everything
+        waitSendFinished_();
+    }
+
+    void print() const
+    {
+        for (unsigned i = 0; i < this->size(); ++i) {
+            std::cout << "row " << i << (overlap_->isLocal(i) ? " " : "*")
+                      << ": " << (*this)[i] << "\n" << std::flush;
+        }
+    }
+
+private:
+    void createBuffers_()
+    {
+#if HAVE_MPI
+        // create array for the front indices
+        typename PeerSet::const_iterator peerIt;
+        typename PeerSet::const_iterator peerEndIt = overlap_->peerSet().end();
+
+        // send all indices to the peers
+        peerIt = overlap_->peerSet().begin();
+        for (; peerIt != peerEndIt; ++peerIt) {
+            ProcessRank peerRank = *peerIt;
+
+            size_t numEntries = overlap_->foreignOverlapSize(peerRank);
+            numIndicesSendBuff_[peerRank] = std::make_shared<MpiBuffer<unsigned> >(1);
+            indicesSendBuff_[peerRank] = std::make_shared<MpiBuffer<Index> >(numEntries);
+            valuesSendBuff_[peerRank] = std::make_shared<MpiBuffer<FieldVector> >(numEntries);
+
+            // fill the indices buffer with global indices
+            MpiBuffer<Index>& indicesSendBuff = *indicesSendBuff_[peerRank];
+            for (unsigned i = 0; i < numEntries; ++i) {
+                Index domRowIdx = overlap_->foreignOverlapOffsetToDomesticIdx(peerRank, i);
+                indicesSendBuff[i] = overlap_->domesticToGlobal(domRowIdx);
+            }
+
+            // first, send the number of indices
+            (*numIndicesSendBuff_[peerRank])[0] = static_cast<unsigned>(numEntries);
+            numIndicesSendBuff_[peerRank]->send(peerRank);
+
+            // then, send the indices themselfs
+            indicesSendBuff.send(peerRank);
+        }
+
+        // receive the indices from the peers
+        peerIt = overlap_->peerSet().begin();
+        for (; peerIt != peerEndIt; ++peerIt) {
+            ProcessRank peerRank = *peerIt;
+
+            // receive size of overlap to peer
+            MpiBuffer<unsigned> numRowsRecvBuff(1);
+            numRowsRecvBuff.receive(peerRank);
+            unsigned numRows = numRowsRecvBuff[0];
+
+            // then, create the MPI buffers
+            indicesRecvBuff_[peerRank] = std::shared_ptr<MpiBuffer<Index> >(
+                new MpiBuffer<Index>(numRows));
+            valuesRecvBuff_[peerRank] = std::shared_ptr<MpiBuffer<FieldVector> >(
+                new MpiBuffer<FieldVector>(numRows));
+            MpiBuffer<Index>& indicesRecvBuff = *indicesRecvBuff_[peerRank];
+
+            // next, receive the actual indices
+            indicesRecvBuff.receive(peerRank);
+
+            // finally, translate the global indices to domestic ones
+            for (unsigned i = 0; i != numRows; ++i) {
+                Index globalRowIdx = indicesRecvBuff[i];
+                Index domRowIdx = overlap_->globalToDomestic(globalRowIdx);
+
+                indicesRecvBuff[i] = domRowIdx;
+            }
+        }
+
+        // wait for all send operations to complete
+        peerIt = overlap_->peerSet().begin();
+        for (; peerIt != peerEndIt; ++peerIt) {
+            ProcessRank peerRank = *peerIt;
+            numIndicesSendBuff_[peerRank]->wait();
+            indicesSendBuff_[peerRank]->wait();
+
+            // convert the global indices of the send buffer to
+            // domestic ones
+            MpiBuffer<Index>& indicesSendBuff = *indicesSendBuff_[peerRank];
+            for (unsigned i = 0; i < indicesSendBuff.size(); ++i) {
+                indicesSendBuff[i] = overlap_->globalToDomestic(indicesSendBuff[i]);
+            }
+        }
+#endif // HAVE_MPI
+    }
+
+    void sendEntries_(ProcessRank peerRank)
+    {
+        // copy the values into the send buffer
+        const MpiBuffer<Index>& indices = *indicesSendBuff_[peerRank];
+        MpiBuffer<FieldVector>& values = *valuesSendBuff_[peerRank];
+        for (unsigned i = 0; i < indices.size(); ++i)
+            values[i] = (*this)[static_cast<unsigned>(indices[i])];
+
+        values.send(peerRank);
+    }
+
+    void waitSendFinished_()
+    {
+        typename PeerSet::const_iterator peerIt;
+        typename PeerSet::const_iterator peerEndIt = overlap_->peerSet().end();
+
+        // send all entries to all peers
+        peerIt = overlap_->peerSet().begin();
+        for (; peerIt != peerEndIt; ++peerIt) {
+            ProcessRank peerRank = *peerIt;
+            valuesSendBuff_[peerRank]->wait();
+        }
+    }
+
+    void receiveFromMaster_(ProcessRank peerRank)
+    {
+        const MpiBuffer<Index>& indices = *indicesRecvBuff_[peerRank];
+        MpiBuffer<FieldVector>& values = *valuesRecvBuff_[peerRank];
+
+        // receive the values from the peer
+        values.receive(peerRank);
+
+        // copy them into the block vector
+        for (unsigned j = 0; j < indices.size(); ++j) {
+            Index domRowIdx = indices[j];
+            if (overlap_->masterRank(domRowIdx) == peerRank) {
+                (*this)[static_cast<unsigned>(domRowIdx)] = values[j];
+            }
+        }
+    }
+
+    void receiveAdd_(ProcessRank peerRank)
+    {
+        const MpiBuffer<Index>& indices = *indicesRecvBuff_[peerRank];
+        MpiBuffer<FieldVector>& values = *valuesRecvBuff_[peerRank];
+
+        // receive the values from the peer
+        values.receive(peerRank);
+
+        // add up the values of rows on the shared boundary
+        for (unsigned j = 0; j < indices.size(); ++j) {
+            Index domRowIdx = indices[j];
+            (*this)[static_cast<unsigned>(domRowIdx)] += values[j];
+        }
+    }
+
+    std::map<ProcessRank, std::shared_ptr<MpiBuffer<unsigned> > > numIndicesSendBuff_;
+    std::map<ProcessRank, std::shared_ptr<MpiBuffer<Index> > > indicesSendBuff_;
+    std::map<ProcessRank, std::shared_ptr<MpiBuffer<Index> > > indicesRecvBuff_;
+    std::map<ProcessRank, std::shared_ptr<MpiBuffer<FieldVector> > > valuesSendBuff_;
+    std::map<ProcessRank, std::shared_ptr<MpiBuffer<FieldVector> > > valuesRecvBuff_;
+
+    const Overlap *overlap_;
+};
+
+} // namespace Linear
+} // namespace Opm
+
+#endif
--- a/opm/simulators/linalg/overlappingoperator.hh
+++ b/opm/simulators/linalg/overlappingoperator.hh
@ -0,0 +1,91 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 2 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+
+  Consult the COPYING file in the top-level source directory of this
+  module for the precise wording of the license and the list of
+  copyright holders.
+*/
+/*!
+ * \file
+ * \copydoc Opm::Linear::OverlappingOperator
+ */
+#ifndef EWOMS_OVERLAPPING_OPERATOR_HH
+#define EWOMS_OVERLAPPING_OPERATOR_HH
+
+#include <dune/istl/operators.hh>
+#include <dune/common/version.hh>
+
+namespace Opm {
+namespace Linear {
+
+/*!
+ * \brief An overlap aware linear operator usable by ISTL.
+ */
+template <class OverlappingMatrix, class DomainVector, class RangeVector>
+class OverlappingOperator
+    : public Dune::AssembledLinearOperator<OverlappingMatrix, DomainVector, RangeVector>
+{
+    typedef typename OverlappingMatrix::Overlap Overlap;
+
+public:
+    //! export types
+    typedef DomainVector domain_type;
+    typedef typename domain_type::field_type field_type;
+
+    OverlappingOperator(const OverlappingMatrix& A) : A_(A)
+    {}
+
+#if DUNE_VERSION_NEWER(DUNE_ISTL, 2,6)
+    //! the kind of computations supported by the operator. Either overlapping or non-overlapping
+    Dune::SolverCategory::Category category() const override
+    { return Dune::SolverCategory::overlapping; }
+#else
+    // redefine the category
+    enum { category = Dune::SolverCategory::overlapping };
+#endif
+
+    //! apply operator to x:  \f$ y = A(x) \f$
+    virtual void apply(const DomainVector& x, RangeVector& y) const override
+    {
+        A_.mv(x, y);
+        y.sync();
+    }
+
+    //! apply operator to x, scale and add:  \f$ y = y + \alpha A(x) \f$
+    virtual void applyscaleadd(field_type alpha, const DomainVector& x,
+                               RangeVector& y) const override
+    {
+        A_.usmv(alpha, x, y);
+        y.sync();
+    }
+
+    //! returns the matrix
+    virtual const OverlappingMatrix& getmat() const override
+    { return A_; }
+
+    const Overlap& overlap() const
+    { return A_.overlap(); }
+
+private:
+    const OverlappingMatrix& A_;
+};
+
+} // namespace Linear
+} // namespace Opm
+
+#endif
--- a/opm/simulators/linalg/overlappingpreconditioner.hh
+++ b/opm/simulators/linalg/overlappingpreconditioner.hh
@ -0,0 +1,193 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 2 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+
+  Consult the COPYING file in the top-level source directory of this
+  module for the precise wording of the license and the list of
+  copyright holders.
+*/
+/*!
+ * \file
+ * \copydoc Opm::Linear::OverlappingPreconditioner
+ */
+#ifndef EWOMS_OVERLAPPING_PRECONDITIONER_HH
+#define EWOMS_OVERLAPPING_PRECONDITIONER_HH
+
+#include "overlappingscalarproduct.hh"
+
+#include <opm/material/common/Exceptions.hpp>
+
+#include <dune/istl/preconditioners.hh>
+
+#include <dune/common/version.hh>
+
+namespace Opm {
+namespace Linear {
+
+/*!
+ * \brief An overlap aware preconditioner for any ISTL linear solver.
+ */
+template <class SeqPreCond, class Overlap>
+class OverlappingPreconditioner
+    : public Dune::Preconditioner<typename SeqPreCond::domain_type,
+                                  typename SeqPreCond::range_type>
+{
+public:
+    typedef typename SeqPreCond::domain_type domain_type;
+    typedef typename SeqPreCond::range_type range_type;
+
+#if DUNE_VERSION_NEWER(DUNE_ISTL, 2,6)
+    //! the kind of computations supported by the operator. Either overlapping or non-overlapping
+    Dune::SolverCategory::Category category() const override
+    { return Dune::SolverCategory::overlapping; }
+#else
+    // redefine the category
+    enum { category = Dune::SolverCategory::overlapping };
+#endif
+
+    OverlappingPreconditioner(SeqPreCond& seqPreCond, const Overlap& overlap)
+        : seqPreCond_(seqPreCond), overlap_(&overlap)
+    {}
+
+    void pre(domain_type& x, range_type& y) override
+    {
+#if HAVE_MPI
+        short success;
+        try
+        {
+            seqPreCond_.pre(x, y);
+            short localSuccess = 1;
+            MPI_Allreduce(&localSuccess,   // source buffer
+                          &success,        // destination buffer
+                          1,               // number of objects in buffers
+                          MPI_SHORT,       // data type
+                          MPI_MIN,         // operation
+                          MPI_COMM_WORLD); // communicator
+        }
+        catch (...)
+        {
+            short localSuccess = 0;
+            MPI_Allreduce(&localSuccess,   // source buffer
+                          &success,        // destination buffer
+                          1,               // number of objects in buffers
+                          MPI_SHORT,       // data type
+                          MPI_MIN,         // operation
+                          MPI_COMM_WORLD); // communicator
+        }
+
+        if (success) {
+            x.sync();
+        }
+        else
+            throw Opm::NumericalIssue("Preconditioner threw an exception in pre() method on some process.");
+#else
+        seqPreCond_.pre(x, y);
+#endif
+
+        // communicate the results on the overlap
+        x.sync();
+        y.sync();
+    }
+
+    void apply(domain_type& x, const range_type& d) override
+    {
+#if HAVE_MPI
+        if (overlap_->peerSet().size() > 0) {
+            // make sure that all processes react the same if the
+            // sequential preconditioner on one process throws an
+            // exception
+            short success;
+            try
+            {
+                // execute the sequential preconditioner
+                seqPreCond_.apply(x, d);
+                short localSuccess = 1;
+                MPI_Allreduce(&localSuccess,   // source buffer
+                              &success,        // destination buffer
+                              1,               // number of objects in buffers
+                              MPI_SHORT,       // data type
+                              MPI_MIN,         // operation
+                              MPI_COMM_WORLD); // communicator
+            }
+            catch (...)
+            {
+                short localSuccess = 0;
+                MPI_Allreduce(&localSuccess,   // source buffer
+                              &success,        // destination buffer
+                              1,               // number of objects in buffers
+                              MPI_SHORT,       // data type
+                              MPI_MIN,         // operation
+                              MPI_COMM_WORLD); // communicator
+            }
+
+            if (success) {
+                x.sync();
+            }
+            else
+                throw Opm::NumericalIssue("Preconditioner threw an exception on some process.");
+        }
+        else
+#endif // HAVE_MPI
+            seqPreCond_.apply(x, d);
+    }
+
+    void post(domain_type& x) override
+    {
+#if HAVE_MPI
+        short success;
+        try
+        {
+            seqPreCond_.post(x);
+            short localSuccess = 1;
+            MPI_Allreduce(&localSuccess,   // source buffer
+                          &success,        // destination buffer
+                          1,               // number of objects in buffers
+                          MPI_SHORT,       // data type
+                          MPI_MIN,         // operation
+                          MPI_COMM_WORLD); // communicator
+        }
+        catch (...)
+        {
+            short localSuccess = 0;
+            MPI_Allreduce(&localSuccess,   // source buffer
+                          &success,        // destination buffer
+                          1,               // number of objects in buffers
+                          MPI_SHORT,       // data type
+                          MPI_MIN,         // operation
+                          MPI_COMM_WORLD); // communicator
+        }
+
+        if (success) {
+            x.sync();
+        }
+        else
+            throw Opm::NumericalIssue("Preconditioner threw an exception in post() method on "
+                                        "some process.");
+#else
+        seqPreCond_.post(x);
+#endif
+    }
+
+private:
+    SeqPreCond& seqPreCond_;
+    const Overlap *overlap_;
+};
+
+} // namespace Linear
+} // namespace Opm
+
+#endif
--- a/opm/simulators/linalg/overlappingscalarproduct.hh
+++ b/opm/simulators/linalg/overlappingscalarproduct.hh
@ -0,0 +1,92 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 2 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+
+  Consult the COPYING file in the top-level source directory of this
+  module for the precise wording of the license and the list of
+  copyright holders.
+*/
+/*!
+ * \file
+ * \copydoc Opm::Linear::OverlappingScalarProduct
+ */
+#ifndef EWOMS_OVERLAPPING_SCALAR_PRODUCT_HH
+#define EWOMS_OVERLAPPING_SCALAR_PRODUCT_HH
+
+#include <dune/common/version.hh>
+#include <dune/common/parallel/mpihelper.hh>
+#include <dune/istl/scalarproducts.hh>
+
+namespace Opm {
+namespace Linear {
+
+/*!
+ * \brief An overlap aware ISTL scalar product.
+ */
+template <class OverlappingBlockVector, class Overlap>
+class OverlappingScalarProduct
+    : public Dune::ScalarProduct<OverlappingBlockVector>
+{
+public:
+    typedef typename OverlappingBlockVector::field_type field_type;
+    typedef typename Dune::CollectiveCommunication<typename Dune::MPIHelper::MPICommunicator> CollectiveCommunication;
+
+#if DUNE_VERSION_NEWER(DUNE_ISTL, 2, 5)
+    typedef typename Dune::ScalarProduct<OverlappingBlockVector>::real_type real_type;
+#else
+    typedef double real_type;
+#endif
+
+#if DUNE_VERSION_NEWER(DUNE_ISTL, 2,6)
+    //! the kind of computations supported by the operator. Either overlapping or non-overlapping
+    Dune::SolverCategory::Category category() const override
+    { return Dune::SolverCategory::overlapping; }
+#else
+    // redefine the category
+    enum { category = Dune::SolverCategory::overlapping };
+#endif
+
+    OverlappingScalarProduct(const Overlap& overlap)
+        : overlap_(overlap), comm_( Dune::MPIHelper::getCollectiveCommunication() )
+    {}
+
+    field_type dot(const OverlappingBlockVector& x,
+                   const OverlappingBlockVector& y) override
+    {
+        field_type sum = 0;
+        size_t numLocal = overlap_.numLocal();
+        for (unsigned localIdx = 0; localIdx < numLocal; ++localIdx) {
+            if (overlap_.iAmMasterOf(static_cast<int>(localIdx)))
+                sum += x[localIdx] * y[localIdx];
+        }
+
+        // return the global sum
+        return comm_.sum( sum );
+    }
+
+    real_type norm(const OverlappingBlockVector& x) override
+    { return std::sqrt(dot(x, x)); }
+
+private:
+    const Overlap& overlap_;
+    const CollectiveCommunication comm_;
+};
+
+} // namespace Linear
+} // namespace Opm
+
+#endif
--- a/opm/simulators/linalg/overlaptypes.hh
+++ b/opm/simulators/linalg/overlaptypes.hh
@ -0,0 +1,192 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 2 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+
+  Consult the COPYING file in the top-level source directory of this
+  module for the precise wording of the license and the list of
+  copyright holders.
+*/
+/*!
+ * \file
+ *
+ * \brief This files provides several data structures for storing
+ *        tuples of indices of remote and/or local processes.
+ */
+#ifndef EWOMS_OVERLAP_TYPES_HH
+#define EWOMS_OVERLAP_TYPES_HH
+
+#include <set>
+#include <list>
+#include <vector>
+#include <map>
+#include <cstddef>
+
+namespace Opm {
+namespace Linear {
+
+/*!
+ * \brief The type of an index of a degree of freedom.
+ */
+typedef int Index;
+
+/*!
+ * \brief The type of the rank of a process.
+ */
+typedef unsigned ProcessRank;
+
+/*!
+ * \brief The type representing the distance of an index to the border.
+ */
+typedef unsigned BorderDistance;
+
+/*!
+ * \brief This structure stores an index and a process rank
+ */
+struct IndexRank
+{
+    Index index;
+    ProcessRank rank;
+};
+
+/*!
+ * \brief This structure stores a local index on a peer process and a
+ *        global index.
+ */
+struct PeerIndexGlobalIndex
+{
+    Index peerIdx;
+    Index globalIdx;
+};
+
+/*!
+ * \brief This structure stores an index, a process rank, and the
+ *        distance of the degree of freedom to the process border.
+ */
+struct IndexRankDist
+{
+    Index index;
+    ProcessRank peerRank;
+    BorderDistance borderDistance;
+};
+
+/*!
+ * \brief This structure stores an index, a process rank, and the
+ *        number of processes which "see" the degree of freedom with
+ *        the index.
+ */
+struct IndexDistanceNpeers
+{
+    Index index;
+    BorderDistance borderDistance;
+    unsigned numPeers;
+};
+
+/*!
+ * \brief A single index intersecting with the process boundary.
+ */
+struct BorderIndex
+{
+    //! Index of the entity for the local process
+    Index localIdx;
+
+    //! Index of the entity for the peer process
+    Index peerIdx;
+
+    //! Rank of the peer process
+    ProcessRank peerRank;
+
+    //! Distance to the process border for the peer (in hops)
+    BorderDistance borderDistance;
+};
+
+/*!
+ * \brief This class managages a list of indices which are on the
+ *        border of a process' partition of the grid
+ */
+typedef std::list<BorderIndex> BorderList;
+
+/*!
+ * \brief The list of indices which are on the process boundary.
+ */
+class SeedList : public std::list<IndexRankDist>
+{
+public:
+    void update(const BorderList& borderList)
+    {
+        this->clear();
+
+        auto it = borderList.begin();
+        const auto& endIt = borderList.end();
+        for (; it != endIt; ++it) {
+            IndexRankDist ird;
+            ird.index = it->localIdx;
+            ird.peerRank = it->peerRank;
+            ird.borderDistance = it->borderDistance;
+
+            this->push_back(ird);
+        }
+    }
+};
+
+/*!
+ * \brief A set of process ranks
+ */
+class PeerSet : public std::set<ProcessRank>
+{
+public:
+    void update(const BorderList& borderList)
+    {
+        this->clear();
+
+        auto it = borderList.begin();
+        const auto& endIt = borderList.end();
+        for (; it != endIt; ++it)
+            this->insert(it->peerRank);
+    }
+};
+
+/*!
+ * \brief The list of indices which overlap with a peer rank.
+ */
+typedef std::vector<IndexDistanceNpeers> OverlapWithPeer;
+
+/*!
+ * \brief A type mapping the process rank to the list of indices
+ *        shared with this peer.
+ */
+typedef std::map<ProcessRank, OverlapWithPeer> OverlapByRank;
+
+/*!
+ * \brief Maps each index to a list of processes .
+ */
+typedef std::vector<std::map<ProcessRank, BorderDistance> > OverlapByIndex;
+
+/*!
+ * \brief The list of domestic indices are owned by peer rank.
+ */
+typedef std::vector<Index> DomesticOverlapWithPeer;
+
+/*!
+ * \brief A type mapping the process rank to the list of domestic indices
+ *        which are owned by the peer.
+ */
+typedef std::map<ProcessRank, DomesticOverlapWithPeer> DomesticOverlapByRank;
+
+} // namespace Linear
+} // namespace Opm
+
+#endif
--- a/opm/simulators/linalg/parallelamgbackend.hh
+++ b/opm/simulators/linalg/parallelamgbackend.hh
@ -0,0 +1,316 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 2 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+
+  Consult the COPYING file in the top-level source directory of this
+  module for the precise wording of the license and the list of
+  copyright holders.
+*/
+/*!
+ * \file
+ * \copydoc Opm::Linear::ParallelAmgBackend
+ */
+#ifndef EWOMS_PARALLEL_AMG_BACKEND_HH
+#define EWOMS_PARALLEL_AMG_BACKEND_HH
+
+#include "parallelbasebackend.hh"
+#include "bicgstabsolver.hh"
+#include "combinedcriterion.hh"
+#include "istlsparsematrixadapter.hh"
+
+#include <dune/istl/bcrsmatrix.hh>
+#include <dune/istl/paamg/amg.hh>
+#include <dune/istl/paamg/pinfo.hh>
+#include <dune/istl/owneroverlapcopy.hh>
+
+#include <dune/common/version.hh>
+
+#include <iostream>
+
+namespace Opm {
+namespace Linear {
+template <class TypeTag>
+class ParallelAmgBackend;
+}}
+
+BEGIN_PROPERTIES
+
+NEW_TYPE_TAG(ParallelAmgLinearSolver, INHERITS_FROM(ParallelBaseLinearSolver));
+
+NEW_PROP_TAG(AmgCoarsenTarget);
+NEW_PROP_TAG(LinearSolverMaxError);
+
+//! The target number of DOFs per processor for the parallel algebraic
+//! multi-grid solver
+SET_INT_PROP(ParallelAmgLinearSolver, AmgCoarsenTarget, 5000);
+
+SET_SCALAR_PROP(ParallelAmgLinearSolver, LinearSolverMaxError, 1e7);
+
+SET_TYPE_PROP(ParallelAmgLinearSolver, LinearSolverBackend,
+              Opm::Linear::ParallelAmgBackend<TypeTag>);
+
+END_PROPERTIES
+
+namespace Opm {
+namespace Linear {
+/*!
+ * \ingroup Linear
+ *
+ * \brief Provides a linear solver backend using the parallel
+ *        algebraic multi-grid (AMG) linear solver from DUNE-ISTL.
+ */
+template <class TypeTag>
+class ParallelAmgBackend : public ParallelBaseBackend<TypeTag>
+{
+    typedef ParallelBaseBackend<TypeTag> ParentType;
+
+    typedef typename GET_PROP_TYPE(TypeTag, Scalar) Scalar;
+    typedef typename GET_PROP_TYPE(TypeTag, LinearSolverScalar) LinearSolverScalar;
+    typedef typename GET_PROP_TYPE(TypeTag, Simulator) Simulator;
+    typedef typename GET_PROP_TYPE(TypeTag, GridView) GridView;
+    typedef typename GET_PROP_TYPE(TypeTag, Overlap) Overlap;
+    typedef typename GET_PROP_TYPE(TypeTag, SparseMatrixAdapter) SparseMatrixAdapter;
+
+    typedef typename ParentType::ParallelOperator ParallelOperator;
+    typedef typename ParentType::OverlappingVector OverlappingVector;
+    typedef typename ParentType::ParallelPreconditioner ParallelPreconditioner;
+    typedef typename ParentType::ParallelScalarProduct ParallelScalarProduct;
+
+    static constexpr int numEq = GET_PROP_VALUE(TypeTag, NumEq);
+    typedef Dune::FieldVector<LinearSolverScalar, numEq> VectorBlock;
+    typedef typename SparseMatrixAdapter::MatrixBlock MatrixBlock;
+    typedef typename SparseMatrixAdapter::IstlMatrix IstlMatrix;
+
+    typedef Dune::BlockVector<VectorBlock> Vector;
+
+    // define the smoother used for the AMG and specify its
+    // arguments
+    typedef Dune::SeqSOR<IstlMatrix, Vector, Vector> SequentialSmoother;
+// typedef Dune::SeqSSOR<IstlMatrix,Vector,Vector> SequentialSmoother;
+// typedef Dune::SeqJac<IstlMatrix,Vector,Vector> SequentialSmoother;
+#if DUNE_VERSION_NEWER(DUNE_ISTL, 2,7)
+// typedef Dune::SeqILU<IstlMatrix,Vector,Vector> SequentialSmoother;
+#else
+// typedef Dune::SeqILU0<IstlMatrix,Vector,Vector> SequentialSmoother;
+// typedef Dune::SeqILUn<IstlMatrix,Vector,Vector> SequentialSmoother;
+#endif
+
+#if HAVE_MPI
+    typedef Dune::OwnerOverlapCopyCommunication<Opm::Linear::Index>
+    OwnerOverlapCopyCommunication;
+    typedef Dune::OverlappingSchwarzOperator<IstlMatrix,
+                                             Vector,
+                                             Vector,
+                                             OwnerOverlapCopyCommunication> FineOperator;
+    typedef Dune::OverlappingSchwarzScalarProduct<Vector,
+                                                  OwnerOverlapCopyCommunication> FineScalarProduct;
+    typedef Dune::BlockPreconditioner<Vector,
+                                      Vector,
+                                      OwnerOverlapCopyCommunication,
+                                      SequentialSmoother> ParallelSmoother;
+    typedef Dune::Amg::AMG<FineOperator,
+                           Vector,
+                           ParallelSmoother,
+                           OwnerOverlapCopyCommunication> AMG;
+#else
+    typedef Dune::MatrixAdapter<IstlMatrix, Vector, Vector> FineOperator;
+    typedef Dune::SeqScalarProduct<Vector> FineScalarProduct;
+    typedef SequentialSmoother ParallelSmoother;
+    typedef Dune::Amg::AMG<FineOperator, Vector, ParallelSmoother> AMG;
+#endif
+
+    typedef BiCGStabSolver<ParallelOperator,
+                           OverlappingVector,
+                           AMG> RawLinearSolver;
+
+    static_assert(std::is_same<SparseMatrixAdapter, IstlSparseMatrixAdapter<MatrixBlock> >::value,
+                  "The ParallelAmgBackend linear solver backend requires the IstlSparseMatrixAdapter");
+
+public:
+    ParallelAmgBackend(const Simulator& simulator)
+        : ParentType(simulator)
+    { }
+
+    static void registerParameters()
+    {
+        ParentType::registerParameters();
+
+        EWOMS_REGISTER_PARAM(TypeTag, Scalar, LinearSolverMaxError,
+                             "The maximum residual error which the linear solver tolerates"
+                             " without giving up");
+        EWOMS_REGISTER_PARAM(TypeTag, int, AmgCoarsenTarget,
+                             "The coarsening target for the agglomerations of "
+                             "the AMG preconditioner");
+    }
+
+protected:
+    friend ParentType;
+
+    std::shared_ptr<AMG> preparePreconditioner_()
+    {
+#if HAVE_MPI
+        // create and initialize DUNE's OwnerOverlapCopyCommunication
+        // using the domestic overlap
+        istlComm_ = std::make_shared<OwnerOverlapCopyCommunication>(MPI_COMM_WORLD);
+        setupAmgIndexSet_(this->overlappingMatrix_->overlap(), istlComm_->indexSet());
+        istlComm_->remoteIndices().template rebuild<false>();
+#endif
+
+        // create the parallel scalar product and the parallel operator
+#if HAVE_MPI
+        fineOperator_ = std::make_shared<FineOperator>(*this->overlappingMatrix_, *istlComm_);
+#else
+        fineOperator_ = std::make_shared<FineOperator>(*this->overlappingMatrix_);
+#endif
+
+        setupAmg_();
+
+        return amg_;
+    }
+
+    void cleanupPreconditioner_()
+    { /* nothing to do */ }
+
+    std::shared_ptr<RawLinearSolver> prepareSolver_(ParallelOperator& parOperator,
+                                                    ParallelScalarProduct& parScalarProduct,
+                                                    AMG& parPreCond)
+    {
+        const auto& gridView = this->simulator_.gridView();
+        typedef CombinedCriterion<OverlappingVector, decltype(gridView.comm())> CCC;
+
+        Scalar linearSolverTolerance = EWOMS_GET_PARAM(TypeTag, Scalar, LinearSolverTolerance);
+        Scalar linearSolverAbsTolerance = EWOMS_GET_PARAM(TypeTag, Scalar, LinearSolverAbsTolerance);
+        if(linearSolverAbsTolerance < 0.0)
+            linearSolverAbsTolerance = this->simulator_.model().newtonMethod().tolerance()/100.0;
+
+        convCrit_.reset(new CCC(gridView.comm(),
+                                /*residualReductionTolerance=*/linearSolverTolerance,
+                                /*absoluteResidualTolerance=*/linearSolverAbsTolerance,
+                                EWOMS_GET_PARAM(TypeTag, Scalar, LinearSolverMaxError)));
+
+        auto bicgstabSolver =
+            std::make_shared<RawLinearSolver>(parPreCond, *convCrit_, parScalarProduct);
+
+        int verbosity = 0;
+        if (parOperator.overlap().myRank() == 0)
+            verbosity = EWOMS_GET_PARAM(TypeTag, int, LinearSolverVerbosity);
+        bicgstabSolver->setVerbosity(verbosity);
+        bicgstabSolver->setMaxIterations(EWOMS_GET_PARAM(TypeTag, int, LinearSolverMaxIterations));
+        bicgstabSolver->setLinearOperator(&parOperator);
+        bicgstabSolver->setRhs(this->overlappingb_);
+
+        return bicgstabSolver;
+    }
+
+    std::pair<bool,int> runSolver_(std::shared_ptr<RawLinearSolver> solver)
+    {
+        bool converged = solver->apply(*this->overlappingx_);
+        return std::make_pair(converged, int(solver->report().iterations()));
+    }
+
+    void cleanupSolver_()
+    { /* nothing to do */ }
+
+#if HAVE_MPI
+    template <class ParallelIndexSet>
+    void setupAmgIndexSet_(const Overlap& overlap, ParallelIndexSet& istlIndices)
+    {
+        typedef Dune::OwnerOverlapCopyAttributeSet GridAttributes;
+        typedef Dune::OwnerOverlapCopyAttributeSet::AttributeSet GridAttributeSet;
+
+        // create DUNE's ParallelIndexSet from a domestic overlap
+        istlIndices.beginResize();
+        for (Index curIdx = 0; static_cast<size_t>(curIdx) < overlap.numDomestic(); ++curIdx) {
+            GridAttributeSet gridFlag =
+                overlap.iAmMasterOf(curIdx)
+                ? GridAttributes::owner
+                : GridAttributes::copy;
+
+            // an index is used by other processes if it is in the
+            // domestic or in the foreign overlap.
+            bool isShared = overlap.isInOverlap(curIdx);
+
+            assert(curIdx == overlap.globalToDomestic(overlap.domesticToGlobal(curIdx)));
+            istlIndices.add(/*globalIdx=*/overlap.domesticToGlobal(curIdx),
+                            Dune::ParallelLocalIndex<GridAttributeSet>(static_cast<size_t>(curIdx),
+                                                                       gridFlag,
+                                                                       isShared));
+        }
+        istlIndices.endResize();
+    }
+#endif
+
+    void setupAmg_()
+    {
+        if (amg_)
+            amg_.reset();
+
+        int verbosity = 0;
+        if (this->simulator_.vanguard().gridView().comm().rank() == 0)
+            verbosity = EWOMS_GET_PARAM(TypeTag, int, LinearSolverVerbosity);
+
+        typedef typename Dune::Amg::SmootherTraits<ParallelSmoother>::Arguments SmootherArgs;
+
+        SmootherArgs smootherArgs;
+        smootherArgs.iterations = 1;
+        smootherArgs.relaxationFactor = 1.0;
+
+        // specify the coarsen criterion:
+        //
+        // typedef
+        // Dune::Amg::CoarsenCriterion<Dune::Amg::SymmetricCriterion<IstlMatrix,
+        //                             Dune::Amg::FirstDiagonal>>
+        typedef Dune::Amg::
+            CoarsenCriterion<Dune::Amg::SymmetricCriterion<IstlMatrix, Dune::Amg::FrobeniusNorm> >
+            CoarsenCriterion;
+        int coarsenTarget = EWOMS_GET_PARAM(TypeTag, int, AmgCoarsenTarget);
+        CoarsenCriterion coarsenCriterion(/*maxLevel=*/15, coarsenTarget);
+        coarsenCriterion.setDefaultValuesAnisotropic(GridView::dimension,
+                                                     /*aggregateSizePerDim=*/3);
+        if (verbosity > 0)
+            coarsenCriterion.setDebugLevel(1);
+        else
+            coarsenCriterion.setDebugLevel(0); // make the AMG shut up
+
+        // reduce the minium coarsen rate (default is 1.2)
+        coarsenCriterion.setMinCoarsenRate(1.05);
+        // coarsenCriterion.setAccumulate(Dune::Amg::noAccu);
+        coarsenCriterion.setAccumulate(Dune::Amg::atOnceAccu);
+        coarsenCriterion.setSkipIsolated(false);
+
+// instantiate the AMG preconditioner
+#if HAVE_MPI
+        amg_ = std::make_shared<AMG>(*fineOperator_, coarsenCriterion, smootherArgs, *istlComm_);
+#else
+        amg_ = std::make_shared<AMG>(*fineOperator_, coarsenCriterion, smootherArgs);
+#endif
+    }
+
+    std::unique_ptr<ConvergenceCriterion<OverlappingVector> > convCrit_;
+
+    std::shared_ptr<FineOperator> fineOperator_;
+    std::shared_ptr<AMG> amg_;
+
+#if HAVE_MPI
+    std::shared_ptr<OwnerOverlapCopyCommunication> istlComm_;
+#endif
+};
+
+} // namespace Linear
+} // namespace Opm
+
+#endif
--- a/opm/simulators/linalg/parallelbasebackend.hh
+++ b/opm/simulators/linalg/parallelbasebackend.hh
@ -0,0 +1,524 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 2 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+
+  Consult the COPYING file in the top-level source directory of this
+  module for the precise wording of the license and the list of
+  copyright holders.
+*/
+/*!
+ * \file
+ * \copydoc Opm::Linear::ParallelBaseBackend
+ */
+#ifndef EWOMS_PARALLEL_BASE_BACKEND_HH
+#define EWOMS_PARALLEL_BASE_BACKEND_HH
+
+#include <opm/simulators/linalg/istlsparsematrixadapter.hh>
+#include <opm/simulators/linalg/overlappingbcrsmatrix.hh>
+#include <opm/simulators/linalg/overlappingblockvector.hh>
+#include <opm/simulators/linalg/overlappingpreconditioner.hh>
+#include <opm/simulators/linalg/overlappingscalarproduct.hh>
+#include <opm/simulators/linalg/overlappingoperator.hh>
+#include <opm/simulators/linalg/parallelbasebackend.hh>
+#include <opm/simulators/linalg/istlpreconditionerwrappers.hh>
+
+#include <ewoms/common/genericguard.hh>
+#include <ewoms/common/propertysystem.hh>
+#include <ewoms/common/parametersystem.hh>
+#include <opm/simulators/linalg/matrixblock.hh>
+
+#include <dune/grid/io/file/vtk/vtkwriter.hh>
+
+#include <dune/common/fvector.hh>
+#include <dune/common/version.hh>
+
+#include <sstream>
+#include <memory>
+#include <iostream>
+
+BEGIN_PROPERTIES
+NEW_TYPE_TAG(ParallelBaseLinearSolver);
+
+// forward declaration of the required property tags
+NEW_PROP_TAG(Simulator);
+NEW_PROP_TAG(Scalar);
+NEW_PROP_TAG(NumEq);
+NEW_PROP_TAG(SparseMatrixAdapter);
+NEW_PROP_TAG(GlobalEqVector);
+NEW_PROP_TAG(VertexMapper);
+NEW_PROP_TAG(GridView);
+
+NEW_PROP_TAG(BorderListCreator);
+NEW_PROP_TAG(Overlap);
+NEW_PROP_TAG(OverlappingVector);
+NEW_PROP_TAG(OverlappingMatrix);
+NEW_PROP_TAG(OverlappingScalarProduct);
+NEW_PROP_TAG(OverlappingLinearOperator);
+
+//! The type of the linear solver to be used
+NEW_PROP_TAG(LinearSolverBackend);
+
+//! the preconditioner used by the linear solver
+NEW_PROP_TAG(PreconditionerWrapper);
+
+
+//! The floating point type used internally by the linear solver
+NEW_PROP_TAG(LinearSolverScalar);
+
+/*!
+ * \brief The size of the algebraic overlap of the linear solver.
+ *
+ * Algebraic overlaps can be thought as being the same as the overlap
+ * of a grid, but it is only existant for the linear system of
+ * equations.
+ */
+NEW_PROP_TAG(LinearSolverOverlapSize);
+
+/*!
+ * \brief Maximum accepted error of the solution of the linear solver.
+ */
+NEW_PROP_TAG(LinearSolverTolerance);
+
+/*!
+ * \brief Maximum accepted error of the norm of the residual.
+ */
+NEW_PROP_TAG(LinearSolverAbsTolerance);
+
+/*!
+ * \brief Specifies the verbosity of the linear solver
+ *
+ * By default it is 0, i.e. it doesn't print anything. Setting this
+ * property to 1 prints aggregated convergence rates, 2 prints the
+ * convergence rate of every iteration of the scheme.
+ */
+NEW_PROP_TAG(LinearSolverVerbosity);
+
+//! Maximum number of iterations eyecuted by the linear solver
+NEW_PROP_TAG(LinearSolverMaxIterations);
+
+//! The order of the sequential preconditioner
+NEW_PROP_TAG(PreconditionerOrder);
+
+//! The relaxation factor of the preconditioner
+NEW_PROP_TAG(PreconditionerRelaxation);
+
+//! Set the type of a global jacobian matrix for linear solvers that are based on
+//! dune-istl.
+SET_PROP(ParallelBaseLinearSolver, SparseMatrixAdapter)
+{
+private:
+    typedef typename GET_PROP_TYPE(TypeTag, Scalar) Scalar;
+    enum { numEq = GET_PROP_VALUE(TypeTag, NumEq) };
+    typedef Opm::MatrixBlock<Scalar, numEq, numEq> Block;
+
+public:
+    typedef typename Opm::Linear::IstlSparseMatrixAdapter<Block> type;
+};
+
+END_PROPERTIES
+
+namespace Opm {
+namespace Linear {
+/*!
+ * \ingroup Linear
+ *
+ * \brief Provides the common code which is required by most linear solvers.
+ *
+ * This class provides access to all preconditioners offered by dune-istl using the
+ * PreconditionerWrapper property:
+ * \code
+ * SET_TYPE_PROP(YourTypeTag, PreconditionerWrapper,
+ *               Opm::Linear::PreconditionerWrapper$PRECONDITIONER<TypeTag>);
+ * \endcode
+ *
+ * Where the choices possible for '\c $PRECONDITIONER' are:
+ * - \c Jacobi: A Jacobi preconditioner
+ * - \c GaussSeidel: A Gauss-Seidel preconditioner
+ * - \c SSOR: A symmetric successive overrelaxation (SSOR) preconditioner
+ * - \c SOR: A successive overrelaxation (SOR) preconditioner
+ * - \c ILUn: An ILU(n) preconditioner
+ * - \c ILU0: An ILU(0) preconditioner. The results of this
+ *            preconditioner are the same as setting the
+ *            PreconditionerOrder property to 0 and using the ILU(n)
+ *            preconditioner. The reason for the existence of ILU0 is
+ *            that it is computationally cheaper because it does not
+ *            need to consider things which are only required for
+ *            higher orders
+ */
+template <class TypeTag>
+class ParallelBaseBackend
+{
+protected:
+    typedef typename GET_PROP_TYPE(TypeTag, LinearSolverBackend) Implementation;
+
+    typedef typename GET_PROP_TYPE(TypeTag, Simulator) Simulator;
+    typedef typename GET_PROP_TYPE(TypeTag, Scalar) Scalar;
+    typedef typename GET_PROP_TYPE(TypeTag, LinearSolverScalar) LinearSolverScalar;
+    typedef typename GET_PROP_TYPE(TypeTag, SparseMatrixAdapter) SparseMatrixAdapter;
+    typedef typename GET_PROP_TYPE(TypeTag, GlobalEqVector) Vector;
+    typedef typename GET_PROP_TYPE(TypeTag, BorderListCreator) BorderListCreator;
+    typedef typename GET_PROP_TYPE(TypeTag, GridView) GridView;
+
+    typedef typename GET_PROP_TYPE(TypeTag, Overlap) Overlap;
+    typedef typename GET_PROP_TYPE(TypeTag, OverlappingVector) OverlappingVector;
+    typedef typename GET_PROP_TYPE(TypeTag, OverlappingMatrix) OverlappingMatrix;
+
+    typedef typename GET_PROP_TYPE(TypeTag, PreconditionerWrapper) PreconditionerWrapper;
+    typedef typename PreconditionerWrapper::SequentialPreconditioner SequentialPreconditioner;
+
+    typedef Opm::Linear::OverlappingPreconditioner<SequentialPreconditioner, Overlap> ParallelPreconditioner;
+    typedef Opm::Linear::OverlappingScalarProduct<OverlappingVector, Overlap> ParallelScalarProduct;
+    typedef Opm::Linear::OverlappingOperator<OverlappingMatrix,
+                                             OverlappingVector,
+                                             OverlappingVector> ParallelOperator;
+
+    enum { dimWorld = GridView::dimensionworld };
+
+public:
+    ParallelBaseBackend(const Simulator& simulator)
+        : simulator_(simulator)
+        , gridSequenceNumber_( -1 )
+        , lastIterations_( -1 )
+    {
+        overlappingMatrix_ = nullptr;
+        overlappingb_ = nullptr;
+        overlappingx_ = nullptr;
+    }
+
+    ~ParallelBaseBackend()
+    { cleanup_(); }
+
+    /*!
+     * \brief Register all run-time parameters for the linear solver.
+     */
+    static void registerParameters()
+    {
+        EWOMS_REGISTER_PARAM(TypeTag, Scalar, LinearSolverTolerance,
+                             "The maximum allowed error between of the linear solver");
+        EWOMS_REGISTER_PARAM(TypeTag, Scalar, LinearSolverAbsTolerance,
+                             "The maximum accepted error of the norm of the residual");
+        EWOMS_REGISTER_PARAM(TypeTag, unsigned, LinearSolverOverlapSize,
+                             "The size of the algebraic overlap for the linear solver");
+        EWOMS_REGISTER_PARAM(TypeTag, int, LinearSolverMaxIterations,
+                             "The maximum number of iterations of the linear solver");
+        EWOMS_REGISTER_PARAM(TypeTag, int, LinearSolverVerbosity,
+                             "The verbosity level of the linear solver");
+
+        PreconditionerWrapper::registerParameters();
+    }
+
+    /*!
+     * \brief Causes the solve() method to discared the structure of the linear system of
+     *        equations the next time it is called.
+     */
+    void eraseMatrix()
+    { cleanup_(); }
+
+    /*!
+     * \brief Set up the internal data structures required for the linear solver.
+     *
+     * This only specified the topology of the linear system of equations; it does does
+     * *not* assign the values of the residual vector and its Jacobian matrix.
+     */
+    void prepare(const SparseMatrixAdapter& M, const Vector& )
+    {
+        // if grid has changed the sequence number has changed too
+        int curSeqNum = simulator_.vanguard().gridSequenceNumber();
+        if (gridSequenceNumber_ == curSeqNum && overlappingMatrix_)
+            // the grid has not changed since the overlappingMatrix_has been created, so
+            // there's noting to do
+            return;
+
+        asImp_().cleanup_();
+        gridSequenceNumber_ = curSeqNum;
+
+        BorderListCreator borderListCreator(simulator_.gridView(),
+                                            simulator_.model().dofMapper());
+
+        // create the overlapping Jacobian matrix
+        unsigned overlapSize = EWOMS_GET_PARAM(TypeTag, unsigned, LinearSolverOverlapSize);
+        overlappingMatrix_ = new OverlappingMatrix(M.istlMatrix(),
+                                                   borderListCreator.borderList(),
+                                                   borderListCreator.blackList(),
+                                                   overlapSize);
+
+        // create the overlapping vectors for the residual and the
+        // solution
+        overlappingb_ = new OverlappingVector(overlappingMatrix_->overlap());
+        overlappingx_ = new OverlappingVector(*overlappingb_);
+
+        // writeOverlapToVTK_();
+    }
+
+    /*!
+     * \brief Assign values to the internal data structure for the residual vector.
+     *
+     * This method also cares about synchronizing that vector with the peer processes.
+     */
+    void setResidual(const Vector& b)
+    {
+        // copy the interior values of the non-overlapping residual vector to the
+        // overlapping one
+        overlappingb_->assignAddBorder(b);
+    }
+
+    /*!
+     * \brief Retrieve the synchronized internal residual vector.
+     *
+     * This only deals with entries which are local to the current process.
+     */
+    void getResidual(Vector& b) const
+    {
+        // update the non-overlapping vector with the overlapping one
+        overlappingb_->assignTo(b);
+    }
+
+    /*!
+     * \brief Sets the values of the residual's Jacobian matrix.
+     *
+     * This method also synchronizes the data structure across the processes which are
+     * involved in the simulation run.
+     */
+    void setMatrix(const SparseMatrixAdapter& M)
+    {
+        overlappingMatrix_->assignFromNative(M.istlMatrix());
+        overlappingMatrix_->syncAdd();
+    }
+
+    /*!
+     * \brief Actually solve the linear system of equations.
+     *
+     * \return true if the residual reduction could be achieved, else false.
+     */
+    bool solve(Vector& x)
+    {
+#if ! DUNE_VERSION_NEWER(DUNE_COMMON, 2,7)
+        Dune::FMatrixPrecision<LinearSolverScalar>::set_singular_limit(1.e-30);
+        Dune::FMatrixPrecision<LinearSolverScalar>::set_absolute_limit(1.e-30);
+#endif
+
+        (*overlappingx_) = 0.0;
+
+        auto parPreCond = asImp_().preparePreconditioner_();
+        auto precondCleanupFn = [this]() -> void
+                                { this->asImp_().cleanupPreconditioner_(); };
+        auto precondCleanupGuard = Opm::make_guard(precondCleanupFn);
+        // create the parallel scalar product and the parallel operator
+        ParallelScalarProduct parScalarProduct(overlappingMatrix_->overlap());
+        ParallelOperator parOperator(*overlappingMatrix_);
+
+        // retrieve the linear solver
+        auto solver = asImp_().prepareSolver_(parOperator,
+                                              parScalarProduct,
+                                              *parPreCond);
+
+        auto cleanupSolverFn =
+            [this]() -> void
+            { this->asImp_().cleanupSolver_(); };
+        GenericGuard<decltype(cleanupSolverFn)> solverGuard(cleanupSolverFn);
+
+        // run the linear solver and have some fun
+        auto result = asImp_().runSolver_(solver);
+        // store number of iterations used
+        lastIterations_ = result.second;
+
+        // copy the result back to the non-overlapping vector
+        overlappingx_->assignTo(x);
+
+        // return the result of the solver
+        return result.first;
+    }
+
+    /*!
+     * \brief Return number of iterations used during last solve.
+     */
+    size_t iterations () const
+    { return lastIterations_; }
+
+protected:
+    Implementation& asImp_()
+    { return *static_cast<Implementation *>(this); }
+
+    const Implementation& asImp_() const
+    { return *static_cast<const Implementation *>(this); }
+
+    void cleanup_()
+    {
+        // create the overlapping Jacobian matrix and vectors
+        delete overlappingMatrix_;
+        delete overlappingb_;
+        delete overlappingx_;
+
+        overlappingMatrix_ = 0;
+        overlappingb_ = 0;
+        overlappingx_ = 0;
+    }
+
+    std::shared_ptr<ParallelPreconditioner> preparePreconditioner_()
+    {
+        int preconditionerIsReady = 1;
+        try {
+            // update sequential preconditioner
+            precWrapper_.prepare(*overlappingMatrix_);
+        }
+        catch (const Dune::Exception& e) {
+            std::cout << "Preconditioner threw exception \"" << e.what()
+                      << " on rank " << overlappingMatrix_->overlap().myRank()
+                      << "\n"  << std::flush;
+            preconditionerIsReady = 0;
+        }
+
+        // make sure that the preconditioner is also ready on all peer
+        // ranks.
+        preconditionerIsReady = simulator_.gridView().comm().min(preconditionerIsReady);
+        if (!preconditionerIsReady)
+            throw Opm::NumericalIssue("Creating the preconditioner failed");
+
+        // create the parallel preconditioner
+        return std::make_shared<ParallelPreconditioner>(precWrapper_.get(), overlappingMatrix_->overlap());
+    }
+
+    void cleanupPreconditioner_()
+    {
+        precWrapper_.cleanup();
+    }
+
+    void writeOverlapToVTK_()
+    {
+        for (int lookedAtRank = 0;
+             lookedAtRank < simulator_.gridView().comm().size(); ++lookedAtRank) {
+            std::cout << "writing overlap for rank " << lookedAtRank << "\n"  << std::flush;
+            typedef Dune::BlockVector<Dune::FieldVector<Scalar, 1> > VtkField;
+            int n = simulator_.gridView().size(/*codim=*/dimWorld);
+            VtkField isInOverlap(n);
+            VtkField rankField(n);
+            isInOverlap = 0.0;
+            rankField = 0.0;
+            assert(rankField.two_norm() == 0.0);
+            assert(isInOverlap.two_norm() == 0.0);
+            auto vIt = simulator_.gridView().template begin</*codim=*/dimWorld>();
+            const auto& vEndIt = simulator_.gridView().template end</*codim=*/dimWorld>();
+            const auto& overlap = overlappingMatrix_->overlap();
+            for (; vIt != vEndIt; ++vIt) {
+                int nativeIdx = simulator_.model().vertexMapper().map(*vIt);
+                int localIdx = overlap.foreignOverlap().nativeToLocal(nativeIdx);
+                if (localIdx < 0)
+                    continue;
+                rankField[nativeIdx] = simulator_.gridView().comm().rank();
+                if (overlap.peerHasIndex(lookedAtRank, localIdx))
+                    isInOverlap[nativeIdx] = 1.0;
+            }
+
+            typedef Dune::VTKWriter<GridView> VtkWriter;
+            VtkWriter writer(simulator_.gridView(), Dune::VTK::conforming);
+            writer.addVertexData(isInOverlap, "overlap");
+            writer.addVertexData(rankField, "rank");
+
+            std::ostringstream oss;
+            oss << "overlap_rank=" << lookedAtRank;
+            writer.write(oss.str().c_str(), Dune::VTK::ascii);
+        }
+    }
+
+    const Simulator& simulator_;
+    int gridSequenceNumber_;
+    size_t lastIterations_;
+
+    OverlappingMatrix *overlappingMatrix_;
+    OverlappingVector *overlappingb_;
+    OverlappingVector *overlappingx_;
+
+    PreconditionerWrapper precWrapper_;
+};
+}} // namespace Linear, Ewoms
+
+BEGIN_PROPERTIES
+
+//! make the linear solver shut up by default
+SET_INT_PROP(ParallelBaseLinearSolver, LinearSolverVerbosity, 0);
+
+//! set the preconditioner relaxation parameter to 1.0 by default
+SET_SCALAR_PROP(ParallelBaseLinearSolver, PreconditionerRelaxation, 1.0);
+
+//! set the preconditioner order to 0 by default
+SET_INT_PROP(ParallelBaseLinearSolver, PreconditionerOrder, 0);
+
+//! by default use the same kind of floating point values for the linearization and for
+//! the linear solve
+SET_TYPE_PROP(ParallelBaseLinearSolver,
+              LinearSolverScalar,
+              typename GET_PROP_TYPE(TypeTag, Scalar));
+
+SET_PROP(ParallelBaseLinearSolver, OverlappingMatrix)
+{
+private:
+    static constexpr int numEq = GET_PROP_VALUE(TypeTag, NumEq);
+    typedef typename GET_PROP_TYPE(TypeTag, LinearSolverScalar) LinearSolverScalar;
+    typedef Opm::MatrixBlock<LinearSolverScalar, numEq, numEq> MatrixBlock;
+    typedef Dune::BCRSMatrix<MatrixBlock> NonOverlappingMatrix;
+
+public:
+    typedef Opm::Linear::OverlappingBCRSMatrix<NonOverlappingMatrix> type;
+};
+
+SET_TYPE_PROP(ParallelBaseLinearSolver,
+              Overlap,
+              typename GET_PROP_TYPE(TypeTag, OverlappingMatrix)::Overlap);
+
+SET_PROP(ParallelBaseLinearSolver, OverlappingVector)
+{
+    static constexpr int numEq = GET_PROP_VALUE(TypeTag, NumEq);
+    typedef typename GET_PROP_TYPE(TypeTag, LinearSolverScalar) LinearSolverScalar;
+    typedef Dune::FieldVector<LinearSolverScalar, numEq> VectorBlock;
+    typedef typename GET_PROP_TYPE(TypeTag, Overlap) Overlap;
+    typedef Opm::Linear::OverlappingBlockVector<VectorBlock, Overlap> type;
+};
+
+SET_PROP(ParallelBaseLinearSolver, OverlappingScalarProduct)
+{
+    typedef typename GET_PROP_TYPE(TypeTag, OverlappingVector) OverlappingVector;
+    typedef typename GET_PROP_TYPE(TypeTag, Overlap) Overlap;
+    typedef Opm::Linear::OverlappingScalarProduct<OverlappingVector, Overlap> type;
+};
+
+SET_PROP(ParallelBaseLinearSolver, OverlappingLinearOperator)
+{
+    typedef typename GET_PROP_TYPE(TypeTag, OverlappingMatrix) OverlappingMatrix;
+    typedef typename GET_PROP_TYPE(TypeTag, OverlappingVector) OverlappingVector;
+    typedef Opm::Linear::OverlappingOperator<OverlappingMatrix, OverlappingVector,
+                                             OverlappingVector> type;
+};
+
+#if DUNE_VERSION_NEWER(DUNE_ISTL, 2,7)
+SET_TYPE_PROP(ParallelBaseLinearSolver,
+              PreconditionerWrapper,
+              Opm::Linear::PreconditionerWrapperILU<TypeTag>);
+#else
+SET_TYPE_PROP(ParallelBaseLinearSolver,
+              PreconditionerWrapper,
+              Opm::Linear::PreconditionerWrapperILU0<TypeTag>);
+#endif
+
+//! set the default overlap size to 2
+SET_INT_PROP(ParallelBaseLinearSolver, LinearSolverOverlapSize, 2);
+
+//! set the default number of maximum iterations for the linear solver
+SET_INT_PROP(ParallelBaseLinearSolver, LinearSolverMaxIterations, 1000);
+
+END_PROPERTIES
+
+#endif
--- a/opm/simulators/linalg/parallelbicgstabbackend.hh
+++ b/opm/simulators/linalg/parallelbicgstabbackend.hh
@ -0,0 +1,170 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 2 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+
+  Consult the COPYING file in the top-level source directory of this
+  module for the precise wording of the license and the list of
+  copyright holders.
+*/
+/*!
+ * \file
+ * \copydoc Opm::Linear::ParallelBiCGStabSolverBackend
+ */
+#ifndef EWOMS_PARALLEL_BICGSTAB_BACKEND_HH
+#define EWOMS_PARALLEL_BICGSTAB_BACKEND_HH
+
+#include "parallelbasebackend.hh"
+#include "bicgstabsolver.hh"
+#include "combinedcriterion.hh"
+#include "istlsparsematrixadapter.hh"
+
+#include <memory>
+
+namespace Opm {
+namespace Linear {
+template <class TypeTag>
+class ParallelBiCGStabSolverBackend;
+}} // namespace Linear, Ewoms
+
+BEGIN_PROPERTIES
+
+NEW_TYPE_TAG(ParallelBiCGStabLinearSolver, INHERITS_FROM(ParallelBaseLinearSolver));
+
+NEW_PROP_TAG(LinearSolverMaxError);
+
+SET_TYPE_PROP(ParallelBiCGStabLinearSolver,
+              LinearSolverBackend,
+              Opm::Linear::ParallelBiCGStabSolverBackend<TypeTag>);
+
+SET_SCALAR_PROP(ParallelBiCGStabLinearSolver, LinearSolverMaxError, 1e7);
+
+END_PROPERTIES
+
+namespace Opm {
+namespace Linear {
+/*!
+ * \ingroup Linear
+ *
+ * \brief Implements a generic linear solver abstraction.
+ *
+ * Chosing the preconditioner works by setting the "PreconditionerWrapper" property:
+ *
+ * \code
+ * SET_TYPE_PROP(YourTypeTag, PreconditionerWrapper,
+ *               Opm::Linear::PreconditionerWrapper$PRECONDITIONER<TypeTag>);
+ * \endcode
+ *
+ * Where the choices possible for '\c $PRECONDITIONER' are:
+ * - \c Jacobi: A Jacobi preconditioner
+ * - \c GaussSeidel: A Gauss-Seidel preconditioner
+ * - \c SSOR: A symmetric successive overrelaxation (SSOR) preconditioner
+ * - \c SOR: A successive overrelaxation (SOR) preconditioner
+ * - \c ILUn: An ILU(n) preconditioner
+ * - \c ILU0: An ILU(0) preconditioner. The results of this
+ *            preconditioner are the same as setting the
+ *            PreconditionerOrder property to 0 and using the ILU(n)
+ *            preconditioner. The reason for the existence of ILU0 is
+ *            that it is computationally cheaper because it does not
+ *            need to consider things which are only required for
+ *            higher orders
+ */
+template <class TypeTag>
+class ParallelBiCGStabSolverBackend : public ParallelBaseBackend<TypeTag>
+{
+    typedef ParallelBaseBackend<TypeTag> ParentType;
+
+    typedef typename GET_PROP_TYPE(TypeTag, Scalar) Scalar;
+    typedef typename GET_PROP_TYPE(TypeTag, Simulator) Simulator;
+    typedef typename GET_PROP_TYPE(TypeTag, SparseMatrixAdapter) SparseMatrixAdapter;
+
+    typedef typename ParentType::ParallelOperator ParallelOperator;
+    typedef typename ParentType::OverlappingVector OverlappingVector;
+    typedef typename ParentType::ParallelPreconditioner ParallelPreconditioner;
+    typedef typename ParentType::ParallelScalarProduct ParallelScalarProduct;
+
+    typedef typename SparseMatrixAdapter::MatrixBlock MatrixBlock;
+
+    typedef BiCGStabSolver<ParallelOperator,
+                           OverlappingVector,
+                           ParallelPreconditioner> RawLinearSolver;
+
+    static_assert(std::is_same<SparseMatrixAdapter, IstlSparseMatrixAdapter<MatrixBlock> >::value,
+                  "The ParallelIstlSolverBackend linear solver backend requires the IstlSparseMatrixAdapter");
+
+public:
+    ParallelBiCGStabSolverBackend(const Simulator& simulator)
+        : ParentType(simulator)
+    { }
+
+    static void registerParameters()
+    {
+        ParentType::registerParameters();
+
+        EWOMS_REGISTER_PARAM(TypeTag, Scalar, LinearSolverMaxError,
+                             "The maximum residual error which the linear solver tolerates"
+                             " without giving up");
+    }
+
+protected:
+    friend ParentType;
+
+    std::shared_ptr<RawLinearSolver> prepareSolver_(ParallelOperator& parOperator,
+                                                    ParallelScalarProduct& parScalarProduct,
+                                                    ParallelPreconditioner& parPreCond)
+    {
+        const auto& gridView = this->simulator_.gridView();
+        typedef CombinedCriterion<OverlappingVector, decltype(gridView.comm())> CCC;
+
+        Scalar linearSolverTolerance = EWOMS_GET_PARAM(TypeTag, Scalar, LinearSolverTolerance);
+        Scalar linearSolverAbsTolerance = EWOMS_GET_PARAM(TypeTag, Scalar, LinearSolverAbsTolerance);
+        if(linearSolverAbsTolerance < 0.0)
+            linearSolverAbsTolerance = this->simulator_.model().newtonMethod().tolerance() / 100.0;
+
+        convCrit_.reset(new CCC(gridView.comm(),
+                                /*residualReductionTolerance=*/linearSolverTolerance,
+                                /*absoluteResidualTolerance=*/linearSolverAbsTolerance,
+                                EWOMS_GET_PARAM(TypeTag, Scalar, LinearSolverMaxError)));
+
+        auto bicgstabSolver =
+            std::make_shared<RawLinearSolver>(parPreCond, *convCrit_, parScalarProduct);
+
+        int verbosity = 0;
+        if (parOperator.overlap().myRank() == 0)
+            verbosity = EWOMS_GET_PARAM(TypeTag, int, LinearSolverVerbosity);
+        bicgstabSolver->setVerbosity(verbosity);
+        bicgstabSolver->setMaxIterations(EWOMS_GET_PARAM(TypeTag, int, LinearSolverMaxIterations));
+        bicgstabSolver->setLinearOperator(&parOperator);
+        bicgstabSolver->setRhs(this->overlappingb_);
+
+        return bicgstabSolver;
+    }
+
+    std::pair<bool,int> runSolver_(std::shared_ptr<RawLinearSolver> solver)
+    {
+        bool converged = solver->apply(*this->overlappingx_);
+        return std::make_pair(converged, int(solver->report().iterations()));
+    }
+
+    void cleanupSolver_()
+    { /* nothing to do */ }
+
+    std::unique_ptr<ConvergenceCriterion<OverlappingVector> > convCrit_;
+};
+
+}} // namespace Linear, Ewoms
+
+#endif
--- a/opm/simulators/linalg/parallelistlbackend.hh
+++ b/opm/simulators/linalg/parallelistlbackend.hh
@ -0,0 +1,173 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 2 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+
+  Consult the COPYING file in the top-level source directory of this
+  module for the precise wording of the license and the list of
+  copyright holders.
+*/
+/*!
+ * \file
+ * \copydoc Opm::Linear::ParallelIstlSolverBackend
+ */
+#ifndef EWOMS_PARALLEL_ISTL_BACKEND_HH
+#define EWOMS_PARALLEL_ISTL_BACKEND_HH
+
+#include "parallelbasebackend.hh"
+#include "istlsolverwrappers.hh"
+#include "istlsparsematrixadapter.hh"
+
+#include <dune/common/version.hh>
+
+BEGIN_PROPERTIES
+
+NEW_TYPE_TAG(ParallelIstlLinearSolver, INHERITS_FROM(ParallelBaseLinearSolver));
+
+NEW_PROP_TAG(LinearSolverWrapper);
+NEW_PROP_TAG(SparseMatrixAdapter);
+
+//! number of iterations between solver restarts for the GMRES solver
+NEW_PROP_TAG(GMResRestart);
+
+END_PROPERTIES
+
+namespace Opm {
+namespace Linear {
+/*!
+ * \ingroup Linear
+ *
+ * \brief Provides all unmodified linear solvers from dune-istl
+ *
+ * To set the linear solver, use
+ * \code
+ * SET_TYPE_PROP(YourTypeTag, LinearSolverWrapper,
+ *               Opm::Linear::SolverWrapper$SOLVER<TypeTag>);
+ * \endcode
+ *
+ * The possible choices for '\c $SOLVER' are:
+ * - \c Richardson: A fixpoint solver using the Richardson iteration
+ * - \c SteepestDescent: The steepest descent solver
+ * - \c ConjugatedGradients: A conjugated gradients solver
+ * - \c BiCGStab: A stabilized bi-conjugated gradients solver
+ * - \c MinRes: A solver based on the  minimized residual algorithm
+ * - \c RestartedGMRes: A restarted GMRES solver
+ *
+ * Chosing the preconditioner works in an analogous way:
+ * \code
+ * SET_TYPE_PROP(YourTypeTag, PreconditionerWrapper,
+ *               Opm::Linear::PreconditionerWrapper$PRECONDITIONER<TypeTag>);
+ * \endcode
+ *
+ * Where the choices possible for '\c $PRECONDITIONER' are:
+ * - \c Jacobi: A Jacobi preconditioner
+ * - \c GaussSeidel: A Gauss-Seidel preconditioner
+ * - \c SSOR: A symmetric successive overrelaxation (SSOR) preconditioner
+ * - \c SOR: A successive overrelaxation (SOR) preconditioner
+ * - \c ILUn: An ILU(n) preconditioner
+ * - \c ILU0: A specialized (and optimized) ILU(0) preconditioner
+ */
+template <class TypeTag>
+class ParallelIstlSolverBackend : public ParallelBaseBackend<TypeTag>
+{
+    typedef ParallelBaseBackend<TypeTag> ParentType;
+
+    typedef typename GET_PROP_TYPE(TypeTag, Scalar) Scalar;
+    typedef typename GET_PROP_TYPE(TypeTag, Simulator) Simulator;
+    typedef typename GET_PROP_TYPE(TypeTag, LinearSolverWrapper) LinearSolverWrapper;
+    typedef typename GET_PROP_TYPE(TypeTag, SparseMatrixAdapter) SparseMatrixAdapter;
+
+    typedef typename ParentType::ParallelOperator ParallelOperator;
+    typedef typename ParentType::OverlappingVector OverlappingVector;
+    typedef typename ParentType::ParallelPreconditioner ParallelPreconditioner;
+    typedef typename ParentType::ParallelScalarProduct ParallelScalarProduct;
+
+    typedef typename SparseMatrixAdapter::MatrixBlock MatrixBlock;
+    typedef typename LinearSolverWrapper::RawSolver RawLinearSolver;
+
+    static_assert(std::is_same<SparseMatrixAdapter, IstlSparseMatrixAdapter<MatrixBlock> >::value,
+                  "The ParallelIstlSolverBackend linear solver backend requires the IstlSparseMatrixAdapter");
+
+public:
+    ParallelIstlSolverBackend(const Simulator& simulator)
+        : ParentType(simulator)
+    { }
+
+    /*!
+     * \brief Register all run-time parameters for the linear solver.
+     */
+    static void registerParameters()
+    {
+        ParentType::registerParameters();
+
+        LinearSolverWrapper::registerParameters();
+    }
+
+protected:
+    friend ParentType;
+
+    std::shared_ptr<RawLinearSolver> prepareSolver_(ParallelOperator& parOperator,
+                                                    ParallelScalarProduct& parScalarProduct,
+                                                    ParallelPreconditioner& parPreCond)
+    {
+        return solverWrapper_.get(parOperator,
+                                  parScalarProduct,
+                                  parPreCond);
+    }
+
+    void cleanupSolver_()
+    {
+        solverWrapper_.cleanup();
+    }
+
+    std::pair<bool, int> runSolver_(std::shared_ptr<RawLinearSolver> solver)
+    {
+        Dune::InverseOperatorResult result;
+        solver->apply(*this->overlappingx_, *this->overlappingb_, result);
+        return std::make_pair(result.converged, result.iterations);
+    }
+
+    LinearSolverWrapper solverWrapper_;
+};
+
+}} // namespace Linear, Ewoms
+
+BEGIN_PROPERTIES
+
+SET_TYPE_PROP(ParallelIstlLinearSolver,
+              LinearSolverBackend,
+              Opm::Linear::ParallelIstlSolverBackend<TypeTag>);
+
+SET_TYPE_PROP(ParallelIstlLinearSolver,
+              LinearSolverWrapper,
+              Opm::Linear::SolverWrapperBiCGStab<TypeTag>);
+
+#if DUNE_VERSION_NEWER(DUNE_ISTL, 2,7)
+SET_TYPE_PROP(ParallelIstlLinearSolver,
+              PreconditionerWrapper,
+              Opm::Linear::PreconditionerWrapperILU<TypeTag>);
+#else
+SET_TYPE_PROP(ParallelIstlLinearSolver,
+              PreconditionerWrapper,
+              Opm::Linear::PreconditionerWrapperILU0<TypeTag>);
+#endif
+
+//! set the GMRes restart parameter to 10 by default
+SET_INT_PROP(ParallelIstlLinearSolver, GMResRestart, 10);
+
+END_PROPERTIES
+
+#endif
--- a/opm/simulators/linalg/residreductioncriterion.hh
+++ b/opm/simulators/linalg/residreductioncriterion.hh
@ -0,0 +1,152 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 2 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+
+  Consult the COPYING file in the top-level source directory of this
+  module for the precise wording of the license and the list of
+  copyright holders.
+*/
+/*!
+ * \file
+ * \copydoc Opm::Linear::ResidReductionCriterion
+ */
+#ifndef EWOMS_RESID_REDUCTION_CRITERION_HH
+#define EWOMS_RESID_REDUCTION_CRITERION_HH
+
+#include "convergencecriterion.hh"
+
+#include <opm/material/common/Unused.hpp>
+
+#include <dune/istl/scalarproducts.hh>
+
+namespace Opm {
+namespace Linear {
+
+/*! \addtogroup Linear
+ * \{
+ */
+
+/*!
+ * \brief Provides a convergence criterion which looks at the
+ *        reduction of the two-norm of the residual for the linear
+ *        solvers.
+ *
+ * For the ResidReductionCriterion, the error of the solution is defined
+ * as
+ * \f[ e^k = \frac{\left| A x_k - b \right|}{\left| A x_0 - b \right|}\;, \f]
+ */
+template <class Vector>
+class ResidReductionCriterion : public ConvergenceCriterion<Vector>
+{
+    typedef typename Vector::field_type Scalar;
+
+public:
+    ResidReductionCriterion(Dune::ScalarProduct<Vector>& scalarProduct,
+                            Scalar tolerance = 1e-6)
+        : scalarProduct_(scalarProduct), tolerance_(tolerance)
+    {}
+
+    /*!
+     * \brief Set the maximum allowed weighted maximum of the reduction of the
+     * linear residual.
+     */
+    void setTolerance(Scalar tol)
+    { tolerance_ = tol; }
+
+    /*!
+     * \brief Return the maximum allowed weighted maximum of the reduction of the linear residual.
+     */
+    Scalar tolerance() const
+    { return tolerance_; }
+
+    /*!
+     * \copydoc ConvergenceCriterion::setInitial(const Vector& , const Vector& )
+     */
+    void setInitial(const Vector& curSol OPM_UNUSED, const Vector& curResid)
+    {
+        static constexpr Scalar eps = std::numeric_limits<Scalar>::min()*1e10;
+
+        // make sure that we don't allow an initial error of 0 to avoid
+        // divisions by zero
+        curDefect_ = scalarProduct_.norm(curResid);
+        lastDefect_ = curDefect_;
+        initialDefect_ = std::max(curDefect_, eps);
+    }
+
+    /*!
+     * \copydoc ConvergenceCriterion::update(const Vector& , const Vector& )
+     */
+    void update(const Vector& curSol OPM_UNUSED,
+                const Vector& changeIndicator OPM_UNUSED,
+                const Vector& curResid)
+    {
+        lastDefect_ = curDefect_;
+        curDefect_ = scalarProduct_.norm(curResid);
+    }
+
+    /*!
+     * \copydoc ConvergenceCriterion::converged()
+     */
+    bool converged() const
+    { return accuracy() <= tolerance(); }
+
+    /*!
+     * \copydoc ConvergenceCriterion::accuracy()
+     */
+    Scalar accuracy() const
+    { return curDefect_/initialDefect_; }
+
+    /*!
+     * \copydoc ConvergenceCriterion::printInitial()
+     */
+    void printInitial(std::ostream& os=std::cout) const
+    {
+        os << std::setw(20) << "iteration ";
+        os << std::setw(20) << "residual ";
+        os << std::setw(20) << "accuracy ";
+        os << std::setw(20) << "rate ";
+        os << std::endl;
+    }
+
+    /*!
+     * \copydoc ConvergenceCriterion::print()
+     */
+    void print(Scalar iter, std::ostream& os=std::cout) const
+    {
+        static constexpr Scalar eps = std::numeric_limits<Scalar>::min()*1e10;
+
+        os << std::setw(20) << iter << " ";
+        os << std::setw(20) << curDefect_ << " ";
+        os << std::setw(20) << accuracy() << " ";
+        os << std::setw(20) << (lastDefect_/std::max(eps, curDefect_)) << " ";
+        os << std::endl;
+    }
+
+private:
+    Dune::ScalarProduct<Vector>& scalarProduct_;
+
+    Scalar tolerance_;
+    Scalar initialDefect_;
+    Scalar curDefect_;
+    Scalar lastDefect_;
+};
+
+//! \} end documentation
+
+}} // end namespace Linear,Ewoms
+
+#endif
--- a/opm/simulators/linalg/superlubackend.hh
+++ b/opm/simulators/linalg/superlubackend.hh
@ -0,0 +1,192 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 2 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+
+  Consult the COPYING file in the top-level source directory of this
+  module for the precise wording of the license and the list of
+  copyright holders.
+*/
+/*!
+ * \file
+ * \copydoc Opm::Linear::SuperLUBackend
+ */
+#ifndef EWOMS_SUPER_LU_BACKEND_HH
+#define EWOMS_SUPER_LU_BACKEND_HH
+
+#if HAVE_SUPERLU
+
+#include <ewoms/linear/istlsparsematrixbackend.hh>
+#include <ewoms/common/parametersystem.hh>
+
+#include <opm/material/common/Unused.hpp>
+
+#include <dune/istl/superlu.hh>
+#include <dune/common/fmatrix.hh>
+#include <dune/common/version.hh>
+
+BEGIN_PROPERTIES
+
+// forward declaration of the required property tags
+NEW_PROP_TAG(Scalar);
+NEW_PROP_TAG(NumEq);
+NEW_PROP_TAG(Simulator);
+NEW_PROP_TAG(SparseMatrixAdapter);
+NEW_PROP_TAG(GlobalEqVector);
+NEW_PROP_TAG(LinearSolverVerbosity);
+NEW_PROP_TAG(LinearSolverBackend);
+NEW_TYPE_TAG(SuperLULinearSolver);
+
+END_PROPERTIES
+
+namespace Opm {
+namespace Linear {
+template <class Scalar, class TypeTag, class Matrix, class Vector>
+class SuperLUSolve_;
+
+/*!
+ * \ingroup Linear
+ * \brief A linear solver backend for the SuperLU sparse matrix library.
+ */
+template <class TypeTag>
+class SuperLUBackend
+{
+    typedef typename GET_PROP_TYPE(TypeTag, Scalar) Scalar;
+    typedef typename GET_PROP_TYPE(TypeTag, Simulator) Simulator;
+    typedef typename GET_PROP_TYPE(TypeTag, SparseMatrixAdapter) SparseMatrixAdapter;
+    typedef typename SparseMatrixAdapter::block_type Matrix;
+
+    static_assert(std::is_same<SparseMatrixAdapter, IstlSparseMatrixAdapter<MatrixBlock>::value,
+                  "The SuperLU linear solver backend requires the IstlSparseMatrixAdapter");
+
+public:
+    SuperLUBackend(Simulator& simulator OPM_UNUSED)
+    {}
+
+    static void registerParameters()
+    {
+        EWOMS_REGISTER_PARAM(TypeTag, int, LinearSolverVerbosity,
+                             "The verbosity level of the linear solver");
+    }
+
+    /*!
+     * \brief Causes the solve() method to discared the structure of the linear system of
+     *        equations the next time it is called.
+     *
+     * Since the SuperLU backend does not create any internal matrices, this is a no-op.
+     */
+    void eraseMatrix()
+    { }
+
+    void prepare(const SparseMatrixAdapter& M, const Vector& b)
+    { }
+
+    void setResidual(const Vector& b)
+    { b_ = &b; }
+
+    void getResidual(Vector& b) const
+    { b = *b_; }
+
+    void setMatrix(const SparseMatrixAdapter& M)
+    { M_ = &M; }
+
+    bool solve(Vector& x)
+    { return SuperLUSolve_<Scalar, TypeTag, Matrix, Vector>::solve_(*M_, x, *b_); }
+
+private:
+    const Matrix* M_;
+    Vector* b_;
+};
+
+template <class Scalar, class TypeTag, class Matrix, class Vector>
+class SuperLUSolve_
+{
+public:
+    static bool solve_(const Matrix& A, Vector& x, const Vector& b)
+    {
+        Vector bTmp(b);
+
+        int verbosity = EWOMS_GET_PARAM(TypeTag, int, LinearSolverVerbosity);
+        Dune::InverseOperatorResult result;
+        Dune::SuperLU<Matrix> solver(A, verbosity > 0);
+        solver.apply(x, bTmp, result);
+
+        if (result.converged) {
+            // make sure that the result only contains finite values.
+            Scalar tmp = 0;
+            for (unsigned i = 0; i < x.size(); ++i) {
+                const auto& xi = x[i];
+                for (unsigned j = 0; j < Vector::block_type::dimension; ++j)
+                    tmp += xi[j];
+            }
+            result.converged = std::isfinite(tmp);
+        }
+
+        return result.converged;
+    }
+};
+
+// the following is required to make the SuperLU adapter of dune-istl happy with
+// quadruple precision math on Dune 2.4. this is because the most which SuperLU can
+// handle is double precision (i.e., the linear systems of equations are always solved
+// with at most double precision if chosing SuperLU as the linear solver...)
+#if HAVE_QUAD
+template <class TypeTag, class Matrix, class Vector>
+class SuperLUSolve_<__float128, TypeTag, Matrix, Vector>
+{
+public:
+    static bool solve_(const Matrix& A,
+                       Vector& x,
+                       const Vector& b)
+    {
+        static const int numEq = GET_PROP_VALUE(TypeTag, NumEq);
+        typedef Dune::FieldVector<double, numEq> DoubleEqVector;
+        typedef Dune::FieldMatrix<double, numEq, numEq> DoubleEqMatrix;
+        typedef Dune::BlockVector<DoubleEqVector> DoubleVector;
+        typedef Dune::BCRSMatrix<DoubleEqMatrix> DoubleMatrix;
+
+        // copy the inputs into the double precision data structures
+        DoubleVector bDouble(b);
+        DoubleVector xDouble(x);
+        DoubleMatrix ADouble(A);
+
+        bool res =
+            SuperLUSolve_<double, TypeTag, Matrix, Vector>::solve_(ADouble,
+                                                                   xDouble,
+                                                                   bDouble);
+
+        // copy the result back into the quadruple precision vector.
+        x = xDouble;
+
+        return res;
+    }
+};
+#endif
+
+} // namespace Linear
+} // namespace Opm
+
+BEGIN_PROPERTIES
+
+SET_INT_PROP(SuperLULinearSolver, LinearSolverVerbosity, 0);
+SET_TYPE_PROP(SuperLULinearSolver, LinearSolverBackend,
+              Opm::Linear::SuperLUBackend<TypeTag>);
+
+END_PROPERTIES
+
+#endif // HAVE_SUPERLU
+
+#endif
--- a/opm/simulators/linalg/vertexborderlistfromgrid.hh
+++ b/opm/simulators/linalg/vertexborderlistfromgrid.hh
@ -0,0 +1,139 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 2 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+
+  Consult the COPYING file in the top-level source directory of this
+  module for the precise wording of the license and the list of
+  copyright holders.
+*/
+/*!
+ * \file
+ * \copydoc Opm::Linear::VertexBorderListFromGrid
+ */
+#ifndef EWOMS_VERTEX_BORDER_LIST_FROM_GRID_HH
+#define EWOMS_VERTEX_BORDER_LIST_FROM_GRID_HH
+
+#include "overlaptypes.hh"
+#include "blacklist.hh"
+
+#include <opm/material/common/Unused.hpp>
+
+#include <dune/grid/common/datahandleif.hh>
+#include <dune/grid/common/gridenums.hh>
+#include <dune/istl/bcrsmatrix.hh>
+#include <dune/istl/scalarproducts.hh>
+#include <dune/istl/operators.hh>
+#include <dune/common/version.hh>
+
+#include <algorithm>
+
+namespace Opm {
+namespace Linear {
+/*!
+ * \brief Uses communication on the grid to find the initial seed list
+ *        of indices.
+ *
+ * \todo implement this class generically. For this, it must be
+ *       possible to query the mapper whether it contains entities of
+ *       a given codimension without the need to hand it an actual
+ *       entity.
+ */
+template <class GridView, class VertexMapper>
+class VertexBorderListFromGrid
+    : public Dune::CommDataHandleIF<VertexBorderListFromGrid<GridView, VertexMapper>,
+                                    int>
+{
+    static const int dimWorld = GridView::dimensionworld;
+
+public:
+    VertexBorderListFromGrid(const GridView& gridView, const VertexMapper& map)
+        : gridView_(gridView), map_(map)
+    {
+        gridView.communicate(*this,
+                             Dune::InteriorBorder_InteriorBorder_Interface,
+                             Dune::ForwardCommunication);
+
+        auto vIt = gridView.template begin<dimWorld>();
+        const auto& vEndIt = gridView.template end<dimWorld >();
+        for (; vIt != vEndIt; ++vIt) {
+            if (vIt->partitionType() != Dune::InteriorEntity
+                && vIt->partitionType() != Dune::BorderEntity)
+            {
+                Index vIdx = static_cast<Index>(map_.index(*vIt));
+                blackList_.addIndex(vIdx);
+            }
+        }
+    }
+
+    // data handle methods
+    bool contains(int dim, int codim) const
+    { return dim == codim; }
+
+    bool fixedsize(int dim OPM_UNUSED, int codim OPM_UNUSED) const
+    { return true; }
+
+    template <class EntityType>
+    size_t size(const EntityType& e OPM_UNUSED) const
+    { return 2; }
+
+    template <class MessageBufferImp, class EntityType>
+    void gather(MessageBufferImp& buff, const EntityType& e) const
+    {
+        buff.write(static_cast<int>(gridView_.comm().rank()));
+        buff.write(static_cast<int>(map_.index(e)));
+    }
+
+    template <class MessageBufferImp, class EntityType>
+    void scatter(MessageBufferImp& buff, const EntityType& e, size_t n OPM_UNUSED)
+    {
+        BorderIndex bIdx;
+
+        bIdx.localIdx = static_cast<Index>(map_.index(e));
+        {
+            int tmp;
+            buff.read(tmp);
+            bIdx.peerRank = static_cast<ProcessRank>(tmp);
+        }
+        {
+            int tmp;
+            buff.read(tmp);
+            bIdx.peerIdx = static_cast<Index>(tmp);
+        }
+        bIdx.borderDistance = 0;
+
+        borderList_.push_back(bIdx);
+    }
+
+    // Access to the border list.
+    const BorderList& borderList() const
+    { return borderList_; }
+
+    // Access to the black-list indices.
+    const BlackList& blackList() const
+    { return blackList_; }
+
+private:
+    const GridView gridView_;
+    const VertexMapper& map_;
+    BorderList borderList_;
+    BlackList blackList_;
+};
+
+} // namespace Linear
+} // namespace Opm
+
+#endif
--- a/opm/simulators/linalg/weightedresidreductioncriterion.hh
+++ b/opm/simulators/linalg/weightedresidreductioncriterion.hh
@ -0,0 +1,320 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 2 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+
+  Consult the COPYING file in the top-level source directory of this
+  module for the precise wording of the license and the list of
+  copyright holders.
+*/
+/*!
+ * \file
+ * \copydoc Opm::Linear::WeightedResidualReductionCriterion
+ */
+#ifndef EWOMS_WEIGHTED_RESIDUAL_REDUCTION_CRITERION_HH
+#define EWOMS_WEIGHTED_RESIDUAL_REDUCTION_CRITERION_HH
+
+#include "convergencecriterion.hh"
+
+#include <iostream>
+
+namespace Opm {
+namespace Linear {
+
+/*! \addtogroup Linear
+ * \{
+ */
+
+/*!
+ * \brief Convergence criterion which looks at the weighted absolute
+ *        value of the residual
+ *
+ * For the WeightedResidualReductionCriterion, the error of the
+ * solution is defined as
+ * \f[ e^k = \max_i\{ \left| w_i r^k_i \right| \}\;, \f]
+ *
+ * where \f$r^k = \mathbf{A} x^k - b \f$ is the residual for the
+ * k-th iterative solution vector \f$x^k\f$ and \f$w_i\f$ is the
+ * weight of the \f$i\f$-th linear equation.
+ *
+ * In addition, to the reduction of the maximum defect, the linear
+ * solver is also considered to be converged, if the defect goes below
+ * a given absolute limit.
+ */
+template <class Vector, class CollectiveCommunication>
+class WeightedResidualReductionCriterion : public ConvergenceCriterion<Vector>
+{
+    typedef typename Vector::field_type Scalar;
+    typedef typename Vector::block_type BlockType;
+
+public:
+    WeightedResidualReductionCriterion(const CollectiveCommunication& comm)
+        : comm_(comm)
+    {}
+
+    WeightedResidualReductionCriterion(const CollectiveCommunication& comm,
+                                       const Vector& residWeights,
+                                       Scalar residualReductionTolerance,
+                                       Scalar fixPointTolerance,
+                                       Scalar absResidualTolerance = 0.0,
+                                       Scalar maxError = 0.0)
+        : comm_(comm),
+          residWeightVec_(residWeights),
+          fixPointTolerance_(fixPointTolerance),
+          residualReductionTolerance_(residualReductionTolerance),
+          absResidualTolerance_(absResidualTolerance),
+          maxError_(maxError)
+    { }
+
+    /*!
+     * \brief Sets the relative weight of each row of the residual.
+     *
+     * For the WeightedResidualReductionCriterion, the error of the solution is
+     * defined as
+     * \f[ e^k = \max_i\{ \left| w_i r^k_i \right| \}\;, \f]
+     *
+     * where \f$r^k = \mathbf{A} x^k - b \f$ is the residual for the
+     * k-th iterative solution vector \f$x^k\f$ and \f$w_i\f$ is the
+     * weight of the \f$i\f$-th linear equation.
+     *
+     * This method is not part of the generic ConvergenceCriteria interface.
+     *
+     * \param residWeightVec A Dune::BlockVector<Dune::FieldVector<Scalar, n> >
+     *                  with the relative weights of the linear equations
+     */
+    void setResidualWeight(const Vector& residWeightVec)
+    { residWeightVec_ = residWeightVec; }
+
+    /*!
+     * \brief Return the relative weight of a row of the residual.
+     *
+     * \param outerIdx The index of the outer vector (i.e. Dune::BlockVector)
+     * \param innerIdx The index of the inner vector (i.e. Dune::FieldVector)
+     */
+    Scalar residualWeight(size_t outerIdx, unsigned innerIdx) const
+    {
+        return (residWeightVec_.size() == 0)
+                   ? 1.0
+                   : residWeightVec_[outerIdx][innerIdx];
+    }
+
+    /*!
+     * \brief Sets the residual reduction tolerance.
+     */
+    void setResidualReductionTolerance(Scalar tol)
+    { residualReductionTolerance_ = tol; }
+
+    /*!
+     * \brief Returns the tolerance of the residual reduction of the solution.
+     */
+    Scalar residualReductionTolerance() const
+    { return residualReductionTolerance_; }
+
+    /*!
+     * \brief Sets the maximum absolute tolerated residual.
+     */
+    void setResidualTolerance(Scalar tol)
+    { absResidualTolerance_ = tol; }
+
+    /*!
+     * \brief Returns the maximum absolute tolerated residual.
+     */
+    Scalar absResidualTolerance() const
+    { return absResidualTolerance_; }
+
+    /*!
+     * \brief Returns the reduction of the weighted maximum of the
+     *        residual compared to the initial solution.
+     */
+    Scalar residualAccuracy() const
+    { return residualError_/std::max<Scalar>(1e-20, initialResidualError_); }
+
+    /*!
+     * \brief Sets the fix-point tolerance.
+     */
+    void setFixPointTolerance(Scalar tol)
+    { fixPointTolerance_ = tol; }
+
+    /*!
+     * \brief Returns the maximum tolerated difference between two
+     *        iterations to be met before a solution is considered to be
+     *        converged.
+     */
+    Scalar fixPointTolerance() const
+    { return fixPointTolerance_; }
+
+    /*!
+     * \brief Returns the weighted maximum of the difference
+     *        between the last two iterative solutions.
+     */
+    Scalar fixPointAccuracy() const
+    { return fixPointError_; }
+
+    /*!
+     * \copydoc ConvergenceCriterion::setInitial(const Vector& , const Vector& )
+     */
+    void setInitial(const Vector& curSol, const Vector& curResid)
+    {
+        lastResidualError_ = 1e100;
+
+        lastSolVec_ = curSol;
+        updateErrors_(curSol, curResid);
+        // the fix-point error is not applicable for the initial solution!
+        fixPointError_ = 1e100;
+
+        // make sure that we don't allow an initial error of 0 to avoid
+        // divisions by zero
+        residualError_ = std::max<Scalar>(residualError_, 1e-20);
+        initialResidualError_ = residualError_;
+    }
+
+    /*!
+     * \copydoc ConvergenceCriterion::update(const Vector& , const Vector& )
+     */
+    void update(const Vector& curSol,
+                const Vector& changeIndicator OPM_UNUSED,
+                const Vector& curResid)
+    {
+        lastResidualError_ = residualError_;
+        updateErrors_(curSol, curResid);
+    }
+
+    /*!
+     * \copydoc ConvergenceCriterion::converged()
+     */
+    bool converged() const
+    {
+        // we're converged if the solution is better than the tolerance
+        // fix-point and residual tolerance.
+        return
+            residualAccuracy() <= residualReductionTolerance() ||
+            residualError_ <= absResidualTolerance_;
+    }
+
+    /*!
+     * \copydoc ConvergenceCriterion::failed()
+     */
+    bool failed() const
+    {
+        return
+            (!converged() && fixPointError_ <= fixPointTolerance_)
+            || residualError_ > maxError_;
+    }
+
+    /*!
+     * \copydoc ConvergenceCriterion::accuracy()
+     *
+     * For the accuracy we only take the residual into account,
+     */
+    Scalar accuracy() const
+    { return residualError_; }
+
+    /*!
+     * \copydoc ConvergenceCriterion::printInitial()
+     */
+    void printInitial(std::ostream& os = std::cout) const
+    {
+        os << std::setw(20) << " Iter ";
+        os << std::setw(20) << " Delta ";
+        os << std::setw(20) << " Residual ";
+        os << std::setw(20) << " ResidRed ";
+        os << std::setw(20) << " Rate ";
+        os << std::endl;
+    }
+
+    /*!
+     * \copydoc ConvergenceCriterion::print()
+     */
+    void print(Scalar iter, std::ostream& os = std::cout) const
+    {
+        static constexpr Scalar eps = std::numeric_limits<Scalar>::min()*1e10;
+
+        os << std::setw(20) << iter << " ";
+        os << std::setw(20) << fixPointAccuracy() << " ";
+        os << std::setw(20) << residualError_ << " ";
+        os << std::setw(20) << 1.0/residualAccuracy() << " ";
+        os << std::setw(20) << lastResidualError_ / std::max<Scalar>(residualError_, eps) << " ";
+        os << std::endl << std::flush;
+    }
+
+private:
+    // update the weighted absolute residual
+    void updateErrors_(const Vector& curSol, const Vector& curResid)
+    {
+        residualError_ = 0.0;
+        fixPointError_ = 0.0;
+        for (size_t i = 0; i < curResid.size(); ++i) {
+            for (unsigned j = 0; j < BlockType::dimension; ++j) {
+                residualError_ =
+                    std::max<Scalar>(residualError_,
+                                     residualWeight(i, j)*std::abs(curResid[i][j]));
+                fixPointError_ =
+                    std::max<Scalar>(fixPointError_,
+                                     std::abs(curSol[i][j] - lastSolVec_[i][j])
+                                     /std::max<Scalar>(1.0, curSol[i][j]));
+            }
+        }
+        lastSolVec_ = curSol;
+
+        residualError_ = comm_.max(residualError_);
+        fixPointError_ = comm_.max(fixPointError_);
+    }
+
+    const CollectiveCommunication& comm_;
+
+    // the weights of the components of the residual
+    Vector residWeightVec_;
+
+    // solution vector of the last iteration
+    Vector lastSolVec_;
+
+    // the maximum of the weighted difference between the last two
+    // iterations
+    Scalar fixPointError_;
+
+    // the maximum allowed relative tolerance for difference of the
+    // solution of two iterations
+    Scalar fixPointTolerance_;
+
+    // the maximum of the absolute weighted residual of the last
+    // iteration
+    Scalar residualError_;
+
+    // the maximum of the absolute weighted difference of the last
+    // iteration
+    Scalar lastResidualError_;
+
+    // the maximum of the absolute weighted residual of the initial
+    // solution
+    Scalar initialResidualError_;
+
+    // the maximum allowed relative tolerance of the residual for the
+    // solution to be considered converged
+    Scalar residualReductionTolerance_;
+
+    // the maximum allowed absolute tolerance of the residual for the
+    // solution to be considered converged
+    Scalar absResidualTolerance_;
+
+    // The maximum error which is tolerated before we fail.
+    Scalar maxError_;
+};
+
+//! \} end documentation
+
+}} // end namespace Linear,Ewoms
+
+#endif