From 7bce15c04b9d697b8544b6ded7cd22fab5a29404 Mon Sep 17 00:00:00 2001
From: Markus Blatt <markus@dr-blatt.de>
Date: Wed, 21 Jan 2015 15:44:20 +0100
Subject: [PATCH] Added methods for computing global reductions.

We need to compute quite a few global reductions in the
Newton method of opm-autodiff. This commit adds the functionality
to compute several reductions combined using only one global
communication. Compiles and test succeeds with one or more process.
---
 opm/core/linalg/ParallelIstlInformation.hpp | 301 +++++++++++++++++++-
 tests/DuneIstlTestHelpers.hpp               | 220 ++++++++++++++
 tests/test_parallel_linearsolver.cpp        | 183 +-----------
 tests/test_parallelistlinformation.cpp      |  81 ++++++
 4 files changed, 598 insertions(+), 187 deletions(-)
 create mode 100644 tests/DuneIstlTestHelpers.hpp
 create mode 100644 tests/test_parallelistlinformation.cpp
diff --git a/opm/core/linalg/ParallelIstlInformation.hpp b/opm/core/linalg/ParallelIstlInformation.hpp
index 38795774c..479f4be98 100644
--- a/opm/core/linalg/ParallelIstlInformation.hpp
+++ b/opm/core/linalg/ParallelIstlInformation.hpp
@@ -1,5 +1,7 @@
 /*
-  Copyright 2014 Dr. Markus Blatt - HPC-Simulation-Software & Services
+  Copyright 2014, 2015 Dr. Markus Blatt - HPC-Simulation-Software & Services
+  Copyright 2014 Statoil ASA
+  Copyright 2015 NTNU
 
   This file is part of the Open Porous Media project (OPM).
 
@@ -19,6 +21,12 @@
 #ifndef OPM_PARALLELISTLINFORMTION_HEADER_INCLUDED
 #define OPM_PARALLELISTLINFORMTION_HEADER_INCLUDED
 
+
+#include <opm/core/grid.h>
+#include <opm/core/utility/ErrorMacros.hpp>
+#include <boost/any.hpp>
+#include <exception>
+
 #if HAVE_MPI && HAVE_DUNE_ISTL
 
 #include "mpi.h"
@@ -28,9 +36,23 @@
 #include <dune/common/enumset.hh>
 
 #include<algorithm>
+#include<limits>
+#include<type_traits>
 
 namespace Opm
 {
+namespace
+{
+
+    template<class T>
+    struct is_tuple
+        : std::integral_constant<bool, false>
+    {};
+    template<typename... T>
+    struct is_tuple<std::tuple<T...> >
+        : std::integral_constant<bool, true>
+    {};
+}
 
 /// \brief Class that encapsulates the parallelization information needed by the
 /// ISTL solvers.
@@ -81,8 +103,8 @@ public:
     {
         return remoteIndices_;
     }
-    /// \brief Get the MPI communicator that we use.
-    MPI_Comm communicator() const
+    /// \brief Get the Collective MPI communicator that we use.
+    Dune::CollectiveCommunication<MPI_Comm> communicator() const
     {
         return communicator_;
     }
@@ -115,9 +137,143 @@ public:
       communicator.template build<T>(interface);
       communicator.template forward<CopyGatherScatter<T> >(source,dest);
       communicator.free();
-    }    
+    }
+    template<class T>
+    void updateOwnerMask(const T& container)
+    {
+        if(! indexSet_)
+            OPM_THROW(std::runtime_error, "Trying to update owner mask without parallel information!");
+        if(container.size()!= ownerMask_.size())
+        {
+            ownerMask_.resize(container.size(), 1.);
+            for(auto i=indexSet_->begin(), end=indexSet_->end(); i!=end; ++i)
+                if (i->local().attribute()!=Dune::OwnerOverlapCopyAttributeSet::owner)
+                    ownerMask_[i->local().local()] = 0.;
+        }
+    };
+    /// \brief Compute one or more global reductions.
+    ///
+    /// This function can either be used with a container, an operator, and an initial value
+    /// to compute a reduction. Or with tuples of them to compute multiple reductions with only
+    /// one global communication.
+    /// \tparam type of the container or the tuple of  containers.
+    /// \tparam tyoe of the operator or a tuple of operators, examples are e.g. 
+    /// Reduction::MaskIDOperator, Reduction::MaskToMinOperator,
+    /// and Reduction::MaskToMaxOperator. Has to provide an operator() that takes three
+    /// arguments (the last one is the mask value: 1 for a dof that we own, 0 otherwise),
+    /// a method maskValue that takes a value and mask value, and localOperator that
+    /// returns the underlying binary operator.
+    /// \param container A container or tuple of containers.
+    /// \param binaryOperator An operator doing the reduction of two values.
+    /// \param value The initial value or a tuple of them.
+    template<typename Container, typename BinaryOperator, typename T>
+    void computeReduction(const Container& container, BinaryOperator binaryOperator,
+                          T& value)
+    {
+        computeReduction(container, binaryOperator, value, is_tuple<Container>());
+    }
 private:
-        /** \brief gather/scatter callback for communcation */
+    /// \brief compute the reductions for tuples.
+    ///
+    /// This is a helper function to prepare for calling computeTupleReduction.
+    template<typename Container, typename BinaryOperator, typename T>
+    void computeReduction(const Container& container, BinaryOperator binaryOperator,
+                          T& value, std::integral_constant<bool,true>)
+    {
+        computeTupleReduction(container, binaryOperator, value);
+    }
+    /// \brief compute the reductions for non-tuples.
+    ///
+    /// This is a helper function to prepare for calling computeTupleReduction.
+    template<typename Container, typename BinaryOperator, typename T>
+    void computeReduction(const Container& container, BinaryOperator binaryOperator,
+                          T& value, std::integral_constant<bool,false>)
+    {
+        std::tuple<const Container&> containers=std::tuple<const Container&>(container);
+        auto values=std::make_tuple(value);
+        auto operators=std::make_tuple(binaryOperator);
+        computeTupleReduction(containers, operators, values);
+        value=std::get<0>(values);
+    }
+    /// \brief Compute the reductions for tuples.
+    template<typename... Containers, typename... BinaryOperators, typename... ReturnValues>
+    void computeTupleReduction(const std::tuple<Containers...>& containers,
+                               std::tuple<BinaryOperators...>& operators,
+                               std::tuple<ReturnValues...>& values)
+    {
+        static_assert(std::tuple_size<std::tuple<Containers...> >::value==
+                      std::tuple_size<std::tuple<BinaryOperators...> >::value,
+                      "We need the same number of containers and binary operators");
+        static_assert(std::tuple_size<std::tuple<Containers...> >::value==
+                      std::tuple_size<std::tuple<ReturnValues...> >::value,
+                      "We need the same number of containers and return values");
+        if(std::tuple_size<std::tuple<Containers...> >::value==0)
+            return;
+        // Copy the initial values.
+        std::tuple<ReturnValues...> init=values;
+        updateOwnerMask(std::get<0>(containers));
+        computeLocalReduction(containers, operators, values);
+        auto val=std::get<0>(values);
+        std::vector<std::tuple<ReturnValues...> > receivedValues(communicator_.size());
+        communicator_.allgather(&values, 1, &(receivedValues[0]));
+        values=init;
+        for(auto rvals=receivedValues.begin(), endvals=receivedValues.end(); rvals!=endvals;
+            ++rvals)
+            computeGlobalReduction(*rvals, operators, values);
+    }
+    /// \brief TMP for computing the the global reduction after receiving the local ones.
+    ///
+    /// End of recursion.
+    template<int I=0, typename... BinaryOperators, typename... ReturnValues>
+    typename std::enable_if<I == sizeof...(BinaryOperators), void>::type
+    computeGlobalReduction(const std::tuple<ReturnValues...>&,
+                                std::tuple<BinaryOperators...>&,
+                                std::tuple<ReturnValues...>&)
+    {}
+    /// \brief TMP for computing the the global reduction after receiving the local ones.
+    template<int I=0, typename... BinaryOperators, typename... ReturnValues>
+    typename std::enable_if<I !=sizeof...(BinaryOperators), void>::type
+    computeGlobalReduction(const std::tuple<ReturnValues...>& receivedValues,
+                           std::tuple<BinaryOperators...>& operators,
+                           std::tuple<ReturnValues...>& values)
+    {
+        auto& val=std::get<I>(values);
+        val = std::get<I>(operators).localOperator()(val, std::get<I>(receivedValues));
+    }
+    /// \brief TMP for computing the the local reduction on the DOF that the process owns.
+    ///
+    /// End of recursion.
+    template<int I=0, typename... Containers, typename... BinaryOperators, typename... ReturnValues>
+    typename std::enable_if<I==sizeof...(Containers), void>::type
+    computeLocalReduction(const std::tuple<Containers...>&,
+                          std::tuple<BinaryOperators...>&,
+                          std::tuple<ReturnValues...>&)
+    {}
+    /// \brief TMP for computing the the local reduction on the DOF that the process owns.
+    template<int I=0, typename... Containers, typename... BinaryOperators, typename... ReturnValues>
+    typename std::enable_if<I!=sizeof...(Containers), void>::type
+    computeLocalReduction(const std::tuple<Containers...>& containers,
+                          std::tuple<BinaryOperators...>& operators,
+                          std::tuple<ReturnValues...>& values)
+    {
+        const auto& container = std::get<I>(containers);
+        if(container.size())
+        {
+            auto& reduceOperator  = std::get<I>(operators);
+            auto newVal = container.begin();
+            auto mask   = ownerMask_.begin();
+            auto& value = std::get<I>(values);
+            value = reduceOperator.maskValue(*newVal, *mask);
+            ++mask;
+            ++newVal;
+
+            for(auto endVal=container.end(); newVal!=endVal;
+                ++newVal, ++mask)
+            value = reduceOperator(value, *newVal, *mask);
+        }
+        computeLocalReduction<I+1>(containers, operators, values);
+    }
+    /** \brief gather/scatter callback for communcation */
     template<typename T>
     struct CopyGatherScatter
     {
@@ -153,8 +309,141 @@ private:
 
     std::shared_ptr<ParallelIndexSet> indexSet_;
     std::shared_ptr<RemoteIndices> remoteIndices_;
-    MPI_Comm communicator_;
+    Dune::CollectiveCommunication<MPI_Comm> communicator_;
+    mutable std::vector<double> ownerMask_;
 };
+
+    namespace Reduction
+    {
+    /// \brief An operator that only uses values where mask is 1.
+    ///
+    /// Could be used to compute a global sum
+    /// \tparam BinaryOperator The wrapped binary operator that specifies
+    // the reduction operation.
+    template<typename BinaryOperator>
+    struct MaskIDOperator
+    {
+        /// \brief Apply the underlying binary operator according to the mask.
+        ///
+        /// The BinaryOperator will be called with t1, and mask*t2.
+        /// \param t1 first value
+        /// \param t2 second value (might be modified).
+        /// \param mask The mask (0 or 1).
+        template<class T, class T1>
+        T operator()(const T& t1, const T& t2, const T1& mask)
+        {
+            b_(t1, maskValue(t2, mask));
+        }
+        template<class T, class T1>
+        T maskValue(const T& t, const T1& mask)
+        {
+            return t*mask;
+        }
+        BinaryOperator& localOperator()
+        {
+            return b_;
+        }
+    private:
+        BinaryOperator b_;
+    };
+
+    /// \brief An operator that converts the values where mask is 0 to the minimum value
+    ///
+    /// Could be used to compute a global maximum.
+    /// \tparam BinaryOperator The wrapped binary operator that specifies
+    // the reduction operation.
+    template<typename BinaryOperator>
+    struct MaskToMinOperator
+    {
+        /// \brief Apply the underlying binary operator according to the mask.
+        ///
+        /// If mask is 0 then t2 will be substituted by the lowest value,
+        /// else t2 will be used.
+        /// \param t1 first value
+        /// \param t2 second value (might be modified).
+        template<class T, class T1>
+        T operator()(const T& t1, const T& t2, const T1& mask)
+        {
+            b_(t1, maskValue(t2, mask));
+        }
+        template<class T, class T1>
+        T maskValue(const T& t, const T1& mask)
+        {
+            if(mask)
+                return t;
+            else{
+                //g++-4.4 does not support std::numeric_limits<T>::lowest();
+                // we rely on IEE 754 for floating point values and use min()
+                // for integral types.
+                if(std::is_integral<T>::value)
+                    return -std::numeric_limits<float>::min();
+                else
+                    return -std::numeric_limits<float>::max();
+            }
+        }
+        /// \brief Get the underlying binary operator.
+        ///
+        /// This might be needed to compute the reduction after each processor
+        /// has computed its local one.
+        BinaryOperator& localOperator()
+        {
+            return b_;
+        }
+    private:
+        BinaryOperator b_;
+    };
+
+    /// \brief An operator that converts the values where mask is 0 to the maximum value
+    ///
+    /// Could be used to compute a global minimum.
+    template<typename BinaryOperator>
+    struct MaskToMaxOperator
+    {
+        /// \brief Apply the underlying binary operator according to the mask.
+        ///
+        /// If mask is 0 then t2 will be substituted by the maximum value,
+        /// else t2 will be used.
+        /// \param t1 first value
+        /// \param t2 second value (might be modified).
+        template<class T, class T1>
+        T operator()(const T& t1, const T& t2, const T1& mask)
+        {
+            b_(t1, maskValue(t2, mask));
+        }
+        template<class T, class T1>
+        T maskValue(const T& t, const T1& mask)
+        {
+            if(mask)
+                return t;
+            else
+                return std::numeric_limits<T>::max();
+        }
+        BinaryOperator& localOperator()
+        {
+            return b_;
+        }
+    private:
+        BinaryOperator b_;
+    };
+    } // end namespace Reduction
 } // end namespace Opm
+
 #endif
+
+namespace Opm
+{
+/// \brief Extracts the information about the data decomposition from the grid for dune-istl
+/// 
+/// In the case that grid is a parallel grid this method will query it to get the information
+/// about the data decompoisition and convert it to the format expected by the linear algebra
+/// of dune-istl.
+/// \warn for UnstructuredGrid this function doesn't do anything.
+/// \param anyComm The handle to store the information in. If grid is a parallel grid
+/// then this will ecapsulate an instance of ParallelISTLInformation.
+/// \param grid The grid to inspect.
+
+inline void extractParallelGridInformationToISTL(boost::any& anyComm, const UnstructuredGrid& grid)
+{}
+} // end namespace Opm
+
 #endif
diff --git a/tests/DuneIstlTestHelpers.hpp b/tests/DuneIstlTestHelpers.hpp
new file mode 100644
index 000000000..b13100f03
--- /dev/null
+++ b/tests/DuneIstlTestHelpers.hpp
@@ -0,0 +1,220 @@
+/*
+  Copyright 2014 Dr. Markus Blatt - HPC-Simulation-Software & Services
+  Copyright 2014 Statoil ASA
+
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifndef OPM_DUNEISTLTESTHELPERS_HEADER
+#define OPM_DUNEISTLTESTHELPERS_HEADER
+// MPI header
+#if HAVE_MPI
+#include <mpi.h>
+#else
+#error "This file needs to compiled with MPI support!"
+#endif
+
+#include <dune/common/version.hh>
+#if DUNE_VERSION_NEWER(DUNE_COMMON, 2, 3)
+#include <dune/common/parallel/mpicollectivecommunication.hh>
+#include <dune/common/parallel/collectivecommunication.hh>
+#else
+#include <dune/common/mpicollectivecommunication.hh>
+#include <dune/common/collectivecommunication.hh>
+#endif
+
+#include <dune/common/parallel/indexset.hh>
+#include <dune/common/parallel/communicator.hh>
+#include <dune/common/parallel/remoteindices.hh>
+#include <dune/istl/owneroverlapcopy.hh>
+
+#include <tuple>
+
+struct MPIFixture {
+    MPIFixture()
+    {
+        int m_argc = boost::unit_test::framework::master_test_suite().argc;
+        char** m_argv = boost::unit_test::framework::master_test_suite().argv;
+        MPI_Init(&m_argc, &m_argv);
+    }
+    ~MPIFixture()
+    {
+        MPI_Finalize();
+    }
+};
+
+
+BOOST_GLOBAL_FIXTURE(MPIFixture);
+
+struct MyMatrix
+{
+    MyMatrix(std::size_t rows, std::size_t nnz)
+        : data(nnz, 0.0), rowStart(rows+1, -1),
+          colIndex(nnz, -1)
+    {}
+    MyMatrix()
+        : data(), rowStart(), colIndex()
+    {}
+
+    std::vector<double> data;
+    std::vector<int> rowStart;
+    std::vector<int> colIndex;
+};
+
+typedef int LocalId;
+typedef int GlobalId;
+typedef Dune::OwnerOverlapCopyCommunication<GlobalId,LocalId> Communication;
+typedef Dune::OwnerOverlapCopyAttributeSet GridAttributes;
+typedef GridAttributes::AttributeSet GridFlag;
+typedef Dune::ParallelLocalIndex<GridFlag> LocalIndex;
+
+/// \brief Sets up a paralle Laplacian.
+///
+/// The process stores the unknowns with indices in the range [start, end).
+/// As we use an overlapping domain decomposition, the process owns the indices
+/// in the range [istart, iend]. If we would only used the indices in this range then
+/// they form a partitioning of the whole index set.
+/// \tparam I The type of the parallel index set (for convenience)
+/// \param indexset The parallel index set for marking owner and copy region.
+/// \param N The global number of unknowns of the system.
+/// \param start The first index stored on this process
+/// \param end One past the last index stored on this process
+/// \param istart The first index that the process owns.
+/// \param iend One past the last index the process owns.
+template<class I>
+std::shared_ptr<MyMatrix> create1DLaplacian(I& indexset, int N, int start, int end,
+                                            int istart, int iend)
+{
+    indexset.beginResize();
+    MyMatrix* mm=new MyMatrix(end-start, (end-start)*3);
+    int nnz=0;
+    mm->rowStart[0]=0;
+    assert(start==0||start<istart);
+    assert(end==N||iend<end);
+
+    for(int row=start, localRow=0; row<end; row++, localRow++)
+    {
+        if(row<istart || row>=iend)
+        {
+            // We are in the overlap region of the grid
+            // therefore we setup the system such that
+            // right hand side will equal the left hand side
+            // of the linear system.
+            if(localRow>0)
+            {
+                mm->colIndex[nnz]=localRow-1;
+                mm->data[nnz++]=0;
+            }
+            mm->colIndex[nnz]=localRow;
+            mm->data[nnz++]=1.0;
+            indexset.add(row, LocalIndex(localRow, GridAttributes::copy, true));
+            if(localRow<end-1)
+            {
+                mm->colIndex[nnz]=localRow+1;
+                mm->data[nnz++]=0;
+            }
+            mm->rowStart[localRow+1]=nnz;
+            continue;
+        }
+
+        double dval=0;
+        if(row>0)
+        {
+            mm->colIndex[nnz]=localRow-1;
+            mm->data[nnz++]=-1;
+            dval+=1;
+        }
+        mm->colIndex[nnz]=localRow;
+        mm->data[nnz++]=2;//dval+(row<N-1);
+        if(row<N-1)
+        {
+            mm->colIndex[nnz]=localRow+1;
+            mm->data[nnz++]=-1;
+            dval+=1;
+        }
+        mm->rowStart[localRow+1]=nnz;
+        indexset.add(row, LocalIndex(localRow, GridAttributes::owner, true));
+    }
+    mm->data.resize(nnz);
+    mm->colIndex.resize(nnz);
+    indexset.endResize();
+    return std::shared_ptr<MyMatrix>(mm);
+}
+
+template<class O>
+void createRandomVectors(O& pinfo, int NN, std::vector<double>& x, std::vector<double>& b,
+                         const MyMatrix& mat)
+{
+    x.resize(NN);
+    for(auto entry=x.begin(), end =x.end(); entry!=end; ++entry)
+        *entry=((double) (rand()%100))/10.0;
+
+    pinfo.copyOwnerToAll(x,x);
+
+    b.resize(NN);
+
+    // Construct the right hand side as b=A*x
+    std::fill(b.begin(), b.end(), 0.0);
+    for(std::size_t row=0; row<mat.rowStart.size()-1; ++row)
+    {
+        for(int i=mat.rowStart[row], end=mat.rowStart[row+1]; i!=end; ++i)
+        {
+            b[row]+= mat.data[i]*x[mat.colIndex[i]];
+        }
+    }
+    pinfo.copyOwnerToAll(b,b);
+}
+
+std::tuple<int,int,int,int> computeRegions(int N=100)
+{
+    int procs, rank;
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &procs);
+    int n = N/procs; // number of unknowns per process
+    int bigger = N%procs; // number of process with n+1 unknows
+
+
+    int start, end, istart, iend;
+    // Compute owner region
+    if(rank<bigger) {
+        start = rank*(n+1);
+        end   = start+(n+1);
+    }else{
+        start = bigger*(n+1) + (rank-bigger) * n;
+        end   = start+n;
+    }
+    // Compute owner region
+    if(rank<bigger) {
+        istart = rank*(n+1);
+        iend   = start+(n+1);
+    }else{
+        istart = bigger*(n+1) + (rank-bigger) * n;
+        iend   = start+n;
+    }
+
+    // Compute overlap region
+    if(istart>0)
+        start = istart - 1;
+    else
+        start = istart;
+
+    if(iend<N)
+        end = iend + 1;
+    else
+        end = iend;
+    return std::make_tuple(start, istart, iend, end);
+}
+
+#endif
diff --git a/tests/test_parallel_linearsolver.cpp b/tests/test_parallel_linearsolver.cpp
index 3ca362a4d..63d2b1cb2 100644
--- a/tests/test_parallel_linearsolver.cpp
+++ b/tests/test_parallel_linearsolver.cpp
@@ -31,23 +31,12 @@
 #if HAVE_MPI
 #include <mpi.h>
 #include <dune/common/version.hh>
-#include <dune/common/parallel/indexset.hh>
-#include <dune/common/parallel/communicator.hh>
-#include <dune/common/parallel/remoteindices.hh>
-#include <dune/common/version.hh>
-#if DUNE_VERSION_NEWER(DUNE_COMMON, 2, 3)
-#include <dune/common/parallel/mpicollectivecommunication.hh>
-#include <dune/common/parallel/collectivecommunication.hh>
-#else
-#include <dune/common/mpicollectivecommunication.hh>
-#include <dune/common/collectivecommunication.hh>
-#endif
 
-#include <dune/istl/owneroverlapcopy.hh>
 #include <opm/core/linalg/ParallelIstlInformation.hpp>
 #else
 #error "This file needs to compiled with MPI support!"
 #endif
+#include "DuneIstlTestHelpers.hpp"
 #include <opm/core/linalg/LinearSolverFactory.hpp>
 #include <opm/core/utility/parameters/ParameterGroup.hpp>
 
@@ -55,179 +44,11 @@
 #include <cstdlib>
 #include <string>
 
-struct MPIFixture {
-    MPIFixture()
-    {    
-        int m_argc = boost::unit_test::framework::master_test_suite().argc;
-        char** m_argv = boost::unit_test::framework::master_test_suite().argv;
-        MPI_Init(&m_argc, &m_argv);
-    }
-    ~MPIFixture()
-    {
-        MPI_Finalize();
-    }
-};
-
-BOOST_GLOBAL_FIXTURE(MPIFixture);
-
-struct MyMatrix
-{
-    MyMatrix(std::size_t rows, std::size_t nnz)
-        : data(nnz, 0.0), rowStart(rows+1, -1),
-          colIndex(nnz, -1)
-    {}
-    MyMatrix()
-        : data(), rowStart(), colIndex()
-    {}
-
-    std::vector<double> data;
-    std::vector<int> rowStart;
-    std::vector<int> colIndex;
-};
-
-typedef int LocalId;
-typedef int GlobalId;
-typedef Dune::OwnerOverlapCopyCommunication<GlobalId,LocalId> Communication;
-typedef Dune::OwnerOverlapCopyAttributeSet GridAttributes;
-typedef GridAttributes::AttributeSet GridFlag;
-typedef Dune::ParallelLocalIndex<GridFlag> LocalIndex;
-
-/// \brief Sets up a paralle Laplacian.
-///
-/// The process stores the unknowns with indices in the range [start, end).
-/// As we use an overlapping domain decomposition, the process owns the indices
-/// in the range [istart, iend]. If we would only used the indices in this range then
-/// they form a partitioning of the whole index set.
-/// \tparam I The type of the parallel index set (for convenience)
-/// \param indexset The parallel index set for marking owner and copy region.
-/// \param N The global number of unknowns of the system.
-/// \param start The first index stored on this process
-/// \param end One past the last index stored on this process
-/// \param istart The first index that the process owns.
-/// \param iend One past the last index the process owns.
-template<class I>
-std::shared_ptr<MyMatrix> create1DLaplacian(I& indexset, int N, int start, int end,
-                                            int istart, int iend)
-{
-    indexset.beginResize();
-    MyMatrix* mm=new MyMatrix(end-start, (end-start)*3);
-    int nnz=0;
-    mm->rowStart[0]=0;
-    assert(start==0||start<istart);
-    assert(end==N||iend<end);
-    
-    for(int row=start, localRow=0; row<end; row++, localRow++)
-    {
-        if(row<istart || row>=iend)
-        {
-            // We are in the overlap region of the grid
-            // therefore we setup the system such that
-            // right hand side will equal the left hand side
-            // of the linear system.
-            if(localRow>0)
-            {
-                mm->colIndex[nnz]=localRow-1;
-                mm->data[nnz++]=0;
-            }
-            mm->colIndex[nnz]=localRow;
-            mm->data[nnz++]=1.0;
-            indexset.add(row, LocalIndex(localRow, GridAttributes::copy, true));
-            if(localRow<end-1)
-            {
-                mm->colIndex[nnz]=localRow+1;
-                mm->data[nnz++]=0;
-            }
-            mm->rowStart[localRow+1]=nnz;
-            continue;
-        }
-        
-        double dval=0;
-        if(row>0)
-        {
-            mm->colIndex[nnz]=localRow-1;
-            mm->data[nnz++]=-1;
-            dval+=1;
-        }
-        mm->colIndex[nnz]=localRow;
-        mm->data[nnz++]=2;//dval+(row<N-1);
-        if(row<N-1)
-        {
-            mm->colIndex[nnz]=localRow+1;
-            mm->data[nnz++]=-1;
-            dval+=1;
-        }
-        mm->rowStart[localRow+1]=nnz;
-        indexset.add(row, LocalIndex(localRow, GridAttributes::owner, true));
-    }
-    mm->data.resize(nnz);
-    mm->colIndex.resize(nnz);
-    indexset.endResize();
-    return std::shared_ptr<MyMatrix>(mm);
-}
-
-template<class O>
-void createRandomVectors(O& pinfo, int NN, std::vector<double>& x, std::vector<double>& b,
-                         const MyMatrix& mat)
-{
-    x.resize(NN);
-    for(auto entry=x.begin(), end =x.end(); entry!=end; ++entry)
-        *entry=((double) (rand()%100))/10.0;
-
-    pinfo.copyOwnerToAll(x,x);
-    
-    b.resize(NN);
-
-    // Construct the right hand side as b=A*x
-    std::fill(b.begin(), b.end(), 0.0);
-    for(std::size_t row=0; row<mat.rowStart.size()-1; ++row)
-    {
-        for(int i=mat.rowStart[row], end=mat.rowStart[row+1]; i!=end; ++i)
-        {
-            b[row]+= mat.data[i]*x[mat.colIndex[i]];
-        }
-    }
-    pinfo.copyOwnerToAll(b,b);
-}
-
 void run_test(const Opm::parameter::ParameterGroup& param)
 {
     int N=100;
-    int procs, rank;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &procs);
-    int n = N/procs; // number of unknowns per process
-    int bigger = N%procs; // number of process with n+1 unknows
-
-
     int start, end, istart, iend;
-    // Compute owner region
-    if(rank<bigger) {
-        start = rank*(n+1);
-        end   = start+(n+1);
-    }else{
-        start = bigger*(n+1) + (rank-bigger) * n;
-        end   = start+n;
-    }
-    // Compute owner region
-    if(rank<bigger) {
-        istart = rank*(n+1);
-        iend   = start+(n+1);
-    }else{
-        istart = bigger*(n+1) + (rank-bigger) * n;
-        iend   = start+n;
-    }
-
-    // Compute overlap region
-    if(istart>0)
-        start = istart - 1;
-    else
-        start = istart;
-    
-    if(iend<N)
-        end = iend + 1;
-    else
-        end = iend;
-
+    std::tie(start,istart,iend,end) = computeRegions(N);
     Opm::ParallelISTLInformation comm(MPI_COMM_WORLD);
     auto mat = create1DLaplacian(*comm.indexSet(), N, start, end, istart, iend);
     std::vector<double> x(end-start), b(end-start);
diff --git a/tests/test_parallelistlinformation.cpp b/tests/test_parallelistlinformation.cpp
new file mode 100644
index 000000000..4e0d3ffa7
--- /dev/null
+++ b/tests/test_parallelistlinformation.cpp
@@ -0,0 +1,81 @@
+/*
+  Copyright 2015 Dr. Markus Blatt - HPC-Simulation-Software & Services
+  Copyright 2015 NTNU
+
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <config.h>
+
+#if HAVE_DYNAMIC_BOOST_TEST
+#define BOOST_TEST_DYN_LINK
+#endif
+#define NVERBOSE // to suppress our messages when throwing
+
+#define BOOST_TEST_MODULE OPM-ParallelIstlInformation
+#include <boost/test/unit_test.hpp>
+#if HAVE_MPI
+#include <mpi.h>
+#else
+#error "This file needs to compiled with MPI support!"
+#endif
+#include "DuneIstlTestHelpers.hpp"
+#include <opm/core/linalg/ParallelIstlInformation.hpp>
+#include <functional>
+#ifdef HAVE_DUNE_ISTL
+BOOST_AUTO_TEST_CASE(tupleReductionTest)
+{
+    int N=100;
+    int start, end, istart, iend;
+    std::tie(start,istart,iend,end) = computeRegions(N);
+    Opm::ParallelISTLInformation comm(MPI_COMM_WORLD);
+    auto mat = create1DLaplacian(*comm.indexSet(), N, start, end, istart, iend);
+    std::vector<int> x(end-start);
+    assert(comm.indexSet()->size()==x.size());
+    for(auto i=comm.indexSet()->begin(), iend=comm.indexSet()->end(); i!=iend; ++i)
+        x[i->local()]=i->global();
+    auto containers = std::make_tuple(x, x, x);
+    auto operators  = std::make_tuple(Opm::Reduction::MaskIDOperator<std::plus<int> >(),
+                                      Opm::Reduction::MaskToMinOperator<std::greater<int> >(),
+                                      Opm::Reduction::MaskToMaxOperator<std::less< int>  >());
+    auto values     = std::make_tuple(0,0,100000);
+    auto oldvalues  = values;
+    comm.computeReduction(containers,operators,values);
+    BOOST_CHECK(std::get<0>(values)==std::get<0>(oldvalues)+((N-1)*N)/2);
+    BOOST_CHECK(std::get<1>(values)==std::min(0, std::get<1>(oldvalues)));
+    BOOST_CHECK(std::get<2>(values)==std::max(N, std::get<2>(oldvalues)));
+}
+BOOST_AUTO_TEST_CASE(singleContainerReductionTest)
+{
+    int N=100;
+    int start, end, istart, iend;
+    std::tie(start,istart,iend,end) = computeRegions(N);
+    Opm::ParallelISTLInformation comm(MPI_COMM_WORLD);
+    auto mat = create1DLaplacian(*comm.indexSet(), N, start, end, istart, iend);
+    std::vector<int> x(end-start);
+    assert(comm.indexSet()->size()==x.size());
+    for(auto i=comm.indexSet()->begin(), iend=comm.indexSet()->end(); i!=iend; ++i)
+        x[i->local()]=i->global();
+    auto containers = std::make_tuple(x, x, x);
+    auto operators  = std::make_tuple(Opm::Reduction::MaskIDOperator<std::plus<int> >(),
+                                      Opm::Reduction::MaskToMinOperator<std::greater<int> >(),
+                                      Opm::Reduction::MaskToMaxOperator<std::less< int>  >());
+    int value = 1;
+    int oldvalue = value;
+    comm.computeReduction(x,Opm::Reduction::MaskIDOperator<std::plus<int> >(),value);
+    BOOST_CHECK(value==oldvalue+((N-1)*N)/2);
+}
+#endif