diff --git a/opm/simulators/wells/BlackoilWellModel_impl.hpp b/opm/simulators/wells/BlackoilWellModel_impl.hpp
index 177818689..3a2fe0d4d 100644
--- a/opm/simulators/wells/BlackoilWellModel_impl.hpp
+++ b/opm/simulators/wells/BlackoilWellModel_impl.hpp
@@ -467,7 +467,7 @@ namespace Opm {
         // Clear the communication data structures for above values.
         for(auto&& pinfo : local_parallel_well_info_)
         {
-            pinfo->clearCommunicateAboveBelow();
+            pinfo->clear();
         }
     }
 
@@ -620,7 +620,8 @@ namespace Opm {
         int well_index = 0;
         for (const auto& well : wells_ecl_) {
             int completion_index = 0;
-            int completion_index_above = -1; // -1 marks no above perf available
+            // INVALID_ECL_INDEX marks no above perf available
+            int completion_index_above = ParallelWellInfo::INVALID_ECL_INDEX;
             well_perf_data_[well_index].clear();
             well_perf_data_[well_index].reserve(well.getConnections().size());
             CheckDistributedWellConnections checker(well, *local_parallel_well_info_[well_index]);
@@ -649,6 +650,9 @@ namespace Opm {
                                                           completion_index);
                     }
                     firstOpenCompletion = false;
+                    // Next time this index is the one above as each open completion is
+                    // is stored somehwere.
+                    completion_index_above = completion_index;
                 } else {
                     checker.connectionFound(completion_index);
                     if (completion.state() != Connection::State::SHUT) {
@@ -659,7 +663,6 @@ namespace Opm {
                 // Note: we rely on the connections being filtered! I.e. there are only connections
                 // to active cells in the global grid.
                 ++completion_index;
-                ++completion_index_above;
             }
             parallelWellInfo.endReset();
             checker.checkAllConnectionsFound();
diff --git a/opm/simulators/wells/ParallelWellInfo.cpp b/opm/simulators/wells/ParallelWellInfo.cpp
index 03b879623..224b7a77b 100644
--- a/opm/simulators/wells/ParallelWellInfo.cpp
+++ b/opm/simulators/wells/ParallelWellInfo.cpp
@@ -43,6 +43,143 @@ struct CommPolicy<double*>
 
 namespace Opm
 {
+
+GlobalPerfContainerFactory::GlobalPerfContainerFactory(const IndexSet& local_indices, const Communication comm,
+                                                       const int num_local_perfs)
+    : local_indices_(local_indices), comm_(comm)
+{
+    if ( comm_.size() > 1 )
+    {
+        // The global index used in the index set current_indices
+        // is the index of the perforation in ECL Schedule definition.
+        // This is assumed to give the topological order.
+        // allgather the index of the perforation in ECL schedule and the value.
+        sizes_.resize(comm_.size());
+        displ_.resize(comm_.size() + 1, 0);
+        // Send the int for convenience. It will be used to get the place where the
+        // data comes from
+        using Pair = std::pair<GlobalIndex,int>;
+        std::vector<Pair> my_pairs;
+        my_pairs.reserve(local_indices_.size());
+        for (const auto& pair: local_indices_)
+        {
+            if (pair.local().attribute() == Attribute::owner)
+            {
+                my_pairs.emplace_back(pair.global(), -1);
+            }
+        }
+        int mySize = my_pairs.size();
+        comm_.allgather(&mySize, 1, sizes_.data());
+        std::partial_sum(sizes_.begin(), sizes_.end(), displ_.begin()+1);
+        std::vector<Pair> global_pairs(displ_.back());
+        comm_.allgatherv(my_pairs.data(), my_pairs.size(), global_pairs.data(), sizes_.data(), displ_.data());
+        // Set the the index where we receive
+        int count = 0;
+        std::for_each(global_pairs.begin(), global_pairs.end(), [&count](Pair& pair){ pair.second = count++;});
+        // sort the complete range to get the correct ordering
+        std::sort(global_pairs.begin(), global_pairs.end(),
+                  [](const Pair& p1, const Pair& p2){ return p1.first < p2.first; } );
+        map_received_.resize(global_pairs.size());
+        std::transform(global_pairs.begin(), global_pairs.end(), map_received_.begin(),
+                       [](const Pair& pair){ return pair.second; });
+        perf_ecl_index_.resize(global_pairs.size());
+        std::transform(global_pairs.begin(), global_pairs.end(), perf_ecl_index_.begin(),
+                       [](const Pair& pair){ return pair.first; });
+        num_global_perfs_ = global_pairs.size();
+    }
+    else
+    {
+        num_global_perfs_ = num_local_perfs;
+    }
+}
+
+
+
+std::vector<double> GlobalPerfContainerFactory::createGlobal(const std::vector<double>& local_perf_container,
+                                                             std::size_t num_components) const
+{
+    // Could be become templated later.
+    using Value = double;
+
+    if (comm_.size() > 1)
+    {
+        std::vector<Value> global_recv(perf_ecl_index_.size() * num_components);
+        if (num_components == 1)
+        {
+            comm_.allgatherv(local_perf_container.data(), local_perf_container.size(),
+                             global_recv.data(), const_cast<int*>(sizes_.data()),
+                             const_cast<int*>(displ_.data()));
+        }
+        else
+        {
+#if HAVE_MPI
+            // Create MPI type for sending num_components entries
+            MPI_Datatype data_type;
+            MPI_Type_contiguous(num_components, Dune::MPITraits<Value>::getType(), &data_type);
+            MPI_Type_commit(&data_type);
+            MPI_Allgatherv(local_perf_container.data(),
+                           local_perf_container.size()/num_components,
+                           data_type, global_recv.data(), sizes_.data(),
+                           displ_.data(), data_type, comm_);
+            MPI_Type_free(&data_type);
+#endif
+        }
+
+        // reorder by ascending ecl index.
+        std::vector<Value> global_remapped(perf_ecl_index_.size() * num_components);
+        auto global = global_remapped.begin();
+        for (auto map_entry = map_received_.begin(); map_entry !=  map_received_.end(); ++map_entry)
+        {
+            auto global_index = *map_entry * num_components;
+
+            for(std::size_t i = 0; i < num_components; ++i)
+                *(global++) = global_recv[global_index++];
+        }
+        assert(global == global_remapped.end());
+        return global_remapped;
+    }
+    else
+    {
+        return local_perf_container;
+    }
+}
+
+void GlobalPerfContainerFactory::copyGlobalToLocal(const std::vector<double>& global, std::vector<double>& local,
+                                                   std::size_t num_components) const
+{
+    if (global.empty())
+    {
+        return;
+    }
+
+    if (comm_.size() > 1)
+    {
+        // assign the values (both ranges are sorted by the ecl index)
+        auto global_perf = perf_ecl_index_.begin();
+
+        for (const auto& pair: local_indices_)
+        {
+            global_perf = std::lower_bound(global_perf, perf_ecl_index_.end(), pair.global());
+            assert(global_perf != perf_ecl_index_.end());
+            assert(*global_perf == pair.global());
+            auto local_index = pair.local() * num_components;
+            auto global_index = (global_perf - perf_ecl_index_.begin()) * num_components;
+            for (std::size_t i = 0; i < num_components; ++i)
+                local[local_index++] = global[global_index++];
+        }
+    }
+    else
+    {
+        std::copy(global.begin(), global.end(), local.begin());
+    }
+}
+
+int GlobalPerfContainerFactory::numGlobalPerfs() const
+{
+    return num_global_perfs_;
+}
+
+
 CommunicateAboveBelow::CommunicateAboveBelow([[maybe_unused]] const Communication& comm)
 #if HAVE_MPI
     : comm_(comm), interface_(comm_)
@@ -57,7 +194,7 @@ void CommunicateAboveBelow::clear()
     interface_.free();
     communicator_.free();
 #endif
-    count_ = 0;
+    num_local_perfs_ = 0;
 }
 
 void CommunicateAboveBelow::beginReset()
@@ -72,7 +209,7 @@ void CommunicateAboveBelow::beginReset()
 #endif
 }
 
-void CommunicateAboveBelow::endReset()
+int CommunicateAboveBelow::endReset()
 {
 #if HAVE_MPI
     if (comm_.size() > 1)
@@ -80,7 +217,7 @@ void CommunicateAboveBelow::endReset()
         above_indices_.endResize();
         current_indices_.endResize();
         remote_indices_.setIndexSets(current_indices_, above_indices_, comm_);
-        remote_indices_.setIncludeSelf(true);
+        // It is mandatory to not set includeSelf to true, as this will skip some entries.
         remote_indices_.rebuild<true>();
         using FromSet = Dune::EnumItem<Attribute,owner>;
         using ToSet = Dune::AllSet<Attribute>;
@@ -88,6 +225,7 @@ void CommunicateAboveBelow::endReset()
         communicator_.build<double*>(interface_);
     }
 #endif
+    return num_local_perfs_;
 }
 
 struct CopyGatherScatter
@@ -171,10 +309,11 @@ void CommunicateAboveBelow::pushBackEclIndex([[maybe_unused]] int above,
         {
             attr = overlap;
         }
-        above_indices_.add(above, {count_, attr, true});
-        current_indices_.add(current, {count_++, attr, true});
+        above_indices_.add(above, {num_local_perfs_, attr, true});
+        current_indices_.add(current, {num_local_perfs_, attr, true});
     }
 #endif
+    ++num_local_perfs_;
 }
 
 
@@ -197,6 +336,17 @@ void ParallelWellInfo::DestroyComm::operator()(Communication* comm)
     delete comm;
 }
 
+
+const CommunicateAboveBelow::IndexSet& CommunicateAboveBelow::getIndexSet() const
+{
+    return current_indices_;
+}
+
+int CommunicateAboveBelow::numLocalPerfs() const
+{
+    return num_local_perfs_;
+}
+
 ParallelWellInfo::ParallelWellInfo(const std::string& name,
                                    bool hasLocalCells)
     : name_(name), hasLocalCells_ (hasLocalCells),
@@ -245,12 +395,17 @@ void ParallelWellInfo::beginReset()
 
 void ParallelWellInfo::endReset()
 {
-    commAboveBelow_->beginReset();
+    int local_num_perfs = commAboveBelow_->endReset();
+    globalPerfCont_
+        .reset(new GlobalPerfContainerFactory(commAboveBelow_->getIndexSet(),
+                                              *comm_,
+                                              local_num_perfs));
 }
 
-void ParallelWellInfo::clearCommunicateAboveBelow()
+void ParallelWellInfo::clear()
 {
     commAboveBelow_->clear();
+    globalPerfCont_.reset();
 }
 
 std::vector<double> ParallelWellInfo::communicateAboveValues(double zero_value,
@@ -283,6 +438,16 @@ std::vector<double> ParallelWellInfo::communicateBelowValues(double last_value,
                                              current_values.size());
 }
 
+const GlobalPerfContainerFactory&
+ParallelWellInfo::getGlobalPerfContainerFactory() const
+{
+    if(globalPerfCont_)
+        return *globalPerfCont_;
+    else
+        OPM_THROW(std::logic_error,
+                  "No ecl indices have been added via beginReset, pushBackEclIndex, endReset");
+}
+
 bool operator<(const ParallelWellInfo& well1, const ParallelWellInfo& well2)
 {
     return well1.name() < well2.name() || (! (well2.name() < well1.name()) && well1.hasLocalCells() < well2.hasLocalCells());
diff --git a/opm/simulators/wells/ParallelWellInfo.hpp b/opm/simulators/wells/ParallelWellInfo.hpp
index b7f1ccb5a..bb09b5f38 100644
--- a/opm/simulators/wells/ParallelWellInfo.hpp
+++ b/opm/simulators/wells/ParallelWellInfo.hpp
@@ -38,8 +38,15 @@ namespace Opm
 ///
 class CommunicateAboveBelow
 {
+public:
     enum Attribute {
-      owner=1, overlap=2
+      owner=1,
+      overlap=2,
+      // there is a bug in older versions of DUNE that will skip
+      // entries with matching attributes in RemoteIndices that are local
+      // therefore we add one more version for above.
+      ownerAbove = 3,
+      overlapAbove = 4
     };
     using MPIComm = typename Dune::MPIHelper::MPICommunicator;
 #if DUNE_VERSION_NEWER(DUNE_COMMON, 2, 7)
@@ -48,12 +55,11 @@ class CommunicateAboveBelow
     using Communication = Dune::CollectiveCommunication<MPIComm>;
 #endif
     using LocalIndex = Dune::ParallelLocalIndex<Attribute>;
+    using IndexSet = Dune::ParallelIndexSet<int,LocalIndex,50>;
 #if HAVE_MPI
-    using IndexSet = Dune::ParallelIndexSet<std::size_t,LocalIndex,50>;
     using RI = Dune::RemoteIndices<IndexSet>;
 #endif
 
-public:
     explicit CommunicateAboveBelow(const Communication& comm);
     /// \brief Adds information about original index of the perforations in ECL Schedule.
     ///
@@ -75,7 +81,8 @@ public:
     ///
     /// Sets up the commmunication structures to be used by
     /// communicate()
-    void endReset();
+    /// \return The number of local perforations
+    int endReset();
 
     /// \brief Creates an array of values for the perforation above.
     /// \param first_value Value to use for above of the first perforation
@@ -119,7 +126,7 @@ public:
             // allgather the index of the perforation in ECL schedule and the value.
             using Value = typename std::iterator_traits<RAIterator>::value_type;
             std::vector<int> sizes(comm_.size());
-            std::vector<int> displ(comm_.size(), 0);
+            std::vector<int> displ(comm_.size() + 1, 0);
             using GlobalIndex = typename IndexSet::IndexPair::GlobalIndex;
             using Pair = std::pair<GlobalIndex,Value>;
             std::vector<Pair> my_pairs;
@@ -161,20 +168,79 @@ public:
         }
     }
 
+    /// \brief Get index set for the local perforations.
+    const IndexSet& getIndexSet() const;
+
+    int numLocalPerfs() const;
 private:
     Communication comm_;
-#if HAVE_MPI
     /// \brief Mapping of the local well index to ecl index
     IndexSet current_indices_;
+#if HAVE_MPI
     /// \brief Mapping of the above well index to ecl index
     IndexSet above_indices_;
     RI remote_indices_;
     Dune::Interface interface_;
     Dune::BufferedCommunicator communicator_;
 #endif
-    std::size_t count_{};
+    std::size_t num_local_perfs_{};
 };
 
+/// \brief A factory for creating a global data representation for distributed wells.
+///
+/// Unfortunately, there are occassion where we need to compute sequential on a well
+/// even if the data is distributed. This class is supposed to help with that by
+/// computing the global data arrays for the well and copy computed values back to
+/// the distributed representation.
+class GlobalPerfContainerFactory
+{
+public:
+    using MPIComm = typename Dune::MPIHelper::MPICommunicator;
+#if DUNE_VERSION_NEWER(DUNE_COMMON, 2, 7)
+    using Communication = Dune::Communication<MPIComm>;
+#else
+    using Communication = Dune::CollectiveCommunication<MPIComm>;
+#endif
+    using IndexSet = CommunicateAboveBelow::IndexSet;
+    using Attribute = CommunicateAboveBelow::Attribute;
+    using GlobalIndex = typename IndexSet::IndexPair::GlobalIndex;
+
+    /// \brief Constructor
+    /// \param local_indices completely set up index set for map ecl index to local index
+    GlobalPerfContainerFactory(const IndexSet& local_indices, const Communication comm,
+                               int num_local_perfs);
+
+    /// \brief Creates a container that holds values for all perforations
+    /// \param local_perf_container Container with values attached to the local perforations.
+    /// \param num_components the number of components per perforation.
+    /// \return A container with values attached to all perforations of a well.
+    ///         Values are ordered by the index of the perforation in the ECL schedule.
+    std::vector<double> createGlobal(const std::vector<double>& local_perf_container,
+                                     std::size_t num_components) const;
+
+    /// \brief Copies the values of the global perforation to the local representation
+    /// \param global values attached to all peforations of a well (as if the well would live on one process)
+    /// \param num_components the number of components per perforation.
+    /// \param[out] local The values attached to the local perforations only.
+    void copyGlobalToLocal(const std::vector<double>& global, std::vector<double>& local,
+                           std::size_t num_components) const;
+
+    int numGlobalPerfs() const;
+private:
+    const IndexSet& local_indices_;
+    Communication comm_;
+    int num_global_perfs_;
+    /// \brief sizes for allgatherv
+    std::vector<int> sizes_;
+    /// \brief displacement for allgatherv
+    std::vector<int> displ_;
+    /// \brief Mapping for storing gathered local values at the correct index.
+    std::vector<int> map_received_;
+    /// \brief The index of a perforation in the schedule of ECL
+    ///
+    /// This is is sorted.
+    std::vector<int> perf_ecl_index_;
+};
 
 /// \brief Class encapsulating some information about parallel wells
 ///
@@ -189,6 +255,7 @@ public:
     using Communication = Dune::CollectiveCommunication<MPIComm>;
 #endif
 
+    static constexpr int INVALID_ECL_INDEX = -1;
 
     /// \brief Constructs object using MPI_COMM_SELF
     ParallelWellInfo(const std::string& name = {""},
@@ -315,7 +382,14 @@ public:
     }
 
     /// \brief Free data of communication data structures.
-    void clearCommunicateAboveBelow();
+    void clear();
+
+    /// \brief Get a factor to create a global representation of peforation data.
+    ///
+    /// That is a container that holds data for every perforation no matter where
+    /// it is stored. Container is ordered via ascendings index of the perforations
+    /// in the ECL schedule.
+    const GlobalPerfContainerFactory& getGlobalPerfContainerFactory() const;
 private:
 
     /// \brief Deleter that also frees custom MPI communicators
@@ -340,6 +414,8 @@ private:
 
     /// \brief used to communicate the values for the perforation above.
     std::unique_ptr<CommunicateAboveBelow> commAboveBelow_;
+
+    std::unique_ptr<GlobalPerfContainerFactory> globalPerfCont_;
 };
 
 /// \brief Class checking that all connections are on active cells
diff --git a/opm/simulators/wells/StandardWell_impl.hpp b/opm/simulators/wells/StandardWell_impl.hpp
index 163863f1f..e3ec47d01 100644
--- a/opm/simulators/wells/StandardWell_impl.hpp
+++ b/opm/simulators/wells/StandardWell_impl.hpp
@@ -1675,6 +1675,8 @@ namespace Opm
                 ipr_b_[ebosCompIdxToFlowCompIdx(p)] += ipr_b_perf[p];
             }
         }
+        this->parallel_well_info_.communication().sum(ipr_a_.data(), ipr_a_.size());
+        this->parallel_well_info_.communication().sum(ipr_b_.data(), ipr_b_.size());
     }
 
 
@@ -1799,6 +1801,13 @@ namespace Opm
             }
         }
 
+        const auto& comm = this->parallel_well_info_.communication();
+        if (comm.size() > 1)
+        {
+            all_drawdown_wrong_direction =
+                (comm.min(all_drawdown_wrong_direction ? 1 : 0) == 1);
+        }
+
         return all_drawdown_wrong_direction;
     }
 
@@ -1991,24 +2000,37 @@ namespace Opm
         //    component) exiting up the wellbore from each perforation,
         //    taking into account flow from lower in the well, and
         //    in/out-flow at each perforation.
-        std::vector<double> q_out_perf(nperf*num_comp);
+        std::vector<double> q_out_perf((nperf)*num_comp, 0.0);
+
+        // Step 1 depends on the order of the perforations. Hence we need to
+        // do the modifications globally.
+        // Create and get the global perforation information and do this sequentially
+        // on each process
+
+        const auto& factory = this->parallel_well_info_.getGlobalPerfContainerFactory();
+        auto global_q_out_perf = factory.createGlobal(q_out_perf, num_comp);
+        auto global_perf_comp_rates = factory.createGlobal(perfComponentRates, num_comp);
 
         // TODO: investigate whether we should use the following techniques to calcuate the composition of flows in the wellbore
         // Iterate over well perforations from bottom to top.
-        for (int perf = nperf - 1; perf >= 0; --perf) {
+        for (int perf = factory.numGlobalPerfs() - 1; perf >= 0; --perf) {
             for (int component = 0; component < num_comp; ++component) {
-                if (perf == nperf - 1) {
+                auto index = perf * num_comp + component;
+                if (perf == factory.numGlobalPerfs() - 1) {
                     // This is the bottom perforation. No flow from below.
-                    q_out_perf[perf*num_comp+ component] = 0.0;
+                    global_q_out_perf[index] = 0.0;
                 } else {
                     // Set equal to flow from below.
-                    q_out_perf[perf*num_comp + component] = q_out_perf[(perf+1)*num_comp + component];
+                    global_q_out_perf[index] = global_q_out_perf[index + num_comp];
                 }
                 // Subtract outflow through perforation.
-                q_out_perf[perf*num_comp + component] -= perfComponentRates[perf*num_comp + component];
+                global_q_out_perf[index] -= global_perf_comp_rates[index];
             }
         }
 
+        // Copy the data back to local view
+        factory.copyGlobalToLocal(global_q_out_perf, q_out_perf, num_comp);
+
         // 2. Compute the component mix at each perforation as the
         //    absolute values of the surface rates divided by their sum.
         //    Then compute volume ratios (formation factors) for each perforation.
@@ -2271,6 +2293,12 @@ namespace Opm
             connPI += np;
         }
 
+        //  Sum with communication in case of distributed well.
+        const auto& comm = this->parallel_well_info_.communication();
+        if (comm.size() > 1)
+        {
+            comm.sum(wellPI, np);
+        }
         assert (static_cast<int>(subsetPerfID) == this->number_of_perforations_ &&
                 "Internal logic error in processing connections for PI/II");
     }
@@ -2561,6 +2589,7 @@ namespace Opm
                 well_flux[ebosCompIdxToFlowCompIdx(p)] += cq_s[p].value();
             }
         }
+        this->parallel_well_info_.communication().sum(well_flux.data(), well_flux.size());
     }
 
 
@@ -4115,6 +4144,11 @@ namespace Opm
         for (int comp = 0; comp < num_components_; ++comp) {
             well_q_s_noderiv[comp] = well_q_s[comp].value();
         }
+        const auto& comm = this->parallel_well_info_.communication();
+        if (comm.size() > 1)
+        {
+            comm.sum(well_q_s_noderiv.data(), well_q_s_noderiv.size());
+        }
         return well_q_s_noderiv;
     }
 
diff --git a/tests/test_parallelwellinfo.cpp b/tests/test_parallelwellinfo.cpp
index 8e6fa2766..660fa4eaf 100644
--- a/tests/test_parallelwellinfo.cpp
+++ b/tests/test_parallelwellinfo.cpp
@@ -23,6 +23,9 @@
 #include<string>
 #include<tuple>
 #include<ostream>
+#include <random>
+#include <algorithm>
+#include <iterator>
 
 #define BOOST_TEST_MODULE ParallelWellInfo
 #include <boost/test/unit_test.hpp>
@@ -96,6 +99,8 @@ std::ostream& operator<<(std::ostream& os, const Opm::ParallelWellInfo& w)
 }
 }
 
+constexpr int numPerProc = 3;
+
 BOOST_AUTO_TEST_CASE(ParallelWellComparison)
 {
     int argc = 0;
@@ -222,56 +227,75 @@ BOOST_AUTO_TEST_CASE(CommunicateAboveBelowSelf1)
     }
 }
 
+using MPIComm = typename Dune::MPIHelper::MPICommunicator;
+#if DUNE_VERSION_NEWER(DUNE_COMMON, 2, 7)
+using Communication = Dune::Communication<MPIComm>;
+#else
+using Communication = Dune::CollectiveCommunication<MPIComm>;
+#endif
+std::vector<int> createGlobalEclIndex(const Communication& comm)
+{
+    std::vector<int> globalEclIndex = {0, 1, 2, 3, 7 , 8, 10, 11};
+    auto oldSize = globalEclIndex.size();
+    std::size_t globalSize = numPerProc * comm.size();
+    auto lastIndex = globalEclIndex.back();
+    globalEclIndex.resize(globalSize);
+    if ( globalSize > oldSize)
+    {
+        ++lastIndex;
+        for(auto entry = globalEclIndex.begin() + oldSize;
+            entry != globalEclIndex.end(); ++entry, ++lastIndex)
+        {
+            *entry = lastIndex;
+        }
+    }
+    return globalEclIndex;
+}
+
+template<class C>
+std::vector<double> populateCommAbove(C& commAboveBelow,
+                                      const Communication& comm,
+                                      const std::vector<int>& globalEclIndex,
+                                      const std::vector<double> globalCurrent,
+                                      int num_component = 1,
+                                      bool local_consecutive = false)
+{
+    auto size = numPerProc * num_component;
+    std::vector<double> current(size);
+
+    commAboveBelow.beginReset();
+    for (std::size_t i = 0; i < current.size()/num_component; i++)
+    {
+        auto gi = local_consecutive ? comm.rank() * numPerProc + i : comm.rank() + comm.size() * i;
+
+        if (gi==0)
+        {
+            commAboveBelow.pushBackEclIndex(-1, globalEclIndex[gi]);
+        }
+        else
+        {
+            commAboveBelow.pushBackEclIndex(globalEclIndex[gi-1], globalEclIndex[gi]);
+        }
+        for(int c = 0; c < num_component; ++c)
+        current[i * num_component + c] = globalCurrent[gi * num_component + c];
+    }
+    commAboveBelow.endReset();
+    return current;
+}
+
 BOOST_AUTO_TEST_CASE(CommunicateAboveBelowParallel)
 {
-    using MPIComm = typename Dune::MPIHelper::MPICommunicator;
-#if DUNE_VERSION_NEWER(DUNE_COMMON, 2, 7)
-    using Communication = Dune::Communication<MPIComm>;
-#else
-    using Communication = Dune::CollectiveCommunication<MPIComm>;
-#endif
     auto comm = Communication(Dune::MPIHelper::getCommunicator());
 
     Opm::CommunicateAboveBelow commAboveBelow{ comm };
     for(std::size_t count=0; count < 2; ++count)
     {
-        std::vector<int> globalEclIndex = {0, 1, 2, 3, 7 , 8, 10, 11};
-        auto oldSize = globalEclIndex.size();
-        std::size_t globalSize = 3 * comm.size();
-        auto lastIndex = globalEclIndex.back();
-        globalEclIndex.resize(globalSize);
-        if ( globalSize > oldSize)
-        {
-            ++lastIndex;
-            for(auto entry = globalEclIndex.begin() + oldSize;
-                entry != globalEclIndex.end(); ++entry, ++lastIndex)
-            {
-                *entry = lastIndex;
-            }
-        }
-
+        auto globalEclIndex = createGlobalEclIndex(comm);
         std::vector<double> globalCurrent(globalEclIndex.size());
         std::transform(globalEclIndex.begin(), globalEclIndex.end(), globalCurrent.begin(),
                        [](double v){ return 1+10.0*v;});
 
-        std::vector<double> current(3);
-
-        commAboveBelow.beginReset();
-        for (std::size_t i = 0; i < current.size(); ++i)
-        {
-            auto gi = comm.rank() + comm.size() * i;
-
-            if (gi==0)
-            {
-                commAboveBelow.pushBackEclIndex(-1, globalEclIndex[gi]);
-            }
-            else
-            {
-                commAboveBelow.pushBackEclIndex(globalEclIndex[gi-1], globalEclIndex[gi]);
-            }
-            current[i] = globalCurrent[gi];
-        }
-        commAboveBelow.endReset();
+        auto current = populateCommAbove(commAboveBelow, comm, globalEclIndex, globalCurrent);
         auto above = commAboveBelow.communicateAbove(-10.0, current.data(), current.size());
         if (comm.rank() == 0)
             BOOST_CHECK(above[0]==-10.0);
@@ -302,3 +326,145 @@ BOOST_AUTO_TEST_CASE(CommunicateAboveBelowParallel)
         }
     }
 }
+
+template<class Iter, class C>
+void initRandomNumbers(Iter begin, Iter end, C comm)
+{
+    // Initialize with random numbers.
+    std::random_device rndDevice;
+    std::mt19937 mersenneEngine {rndDevice()};  // Generates random integers
+    std::uniform_int_distribution<int> dist {1, 100};
+
+    auto gen = [&dist, &mersenneEngine](){
+                   return dist(mersenneEngine);
+               };
+
+    std::generate(begin, end, gen);
+    comm.broadcast(&(*begin), end-begin, 0);
+}
+
+BOOST_AUTO_TEST_CASE(PartialSumself)
+{
+    auto comm = Dune::MPIHelper::getLocalCommunicator();
+
+    Opm::CommunicateAboveBelow commAboveBelow{ comm };
+    std::vector<int> eclIndex = {0, 1, 2, 3, 7 , 8, 10, 11};
+    std::vector<double> current(eclIndex.size());
+    std::transform(eclIndex.begin(), eclIndex.end(), current.begin(),
+                   [](double v){ return 1+10.0*v;});
+    commAboveBelow.beginReset();
+    for (std::size_t i = 0; i < current.size(); ++i)
+    {
+        if (i==0)
+            commAboveBelow.pushBackEclIndex(-1, eclIndex[i]);
+        else
+            commAboveBelow.pushBackEclIndex(eclIndex[i-1], eclIndex[i]);
+    }
+    commAboveBelow.endReset();
+
+    initRandomNumbers(std::begin(current), std::end(current),
+                      Communication(comm));
+    auto stdCopy = current;
+    std::partial_sum(std::begin(stdCopy), std::end(stdCopy), std::begin(stdCopy));
+
+
+    commAboveBelow.partialSumPerfValues(std::begin(current), std::end(current));
+
+    BOOST_CHECK_EQUAL_COLLECTIONS(std::begin(stdCopy), std::end(stdCopy),
+                                  std::begin(current), std::end(current));
+}
+
+BOOST_AUTO_TEST_CASE(PartialSumParallel)
+{
+
+    auto comm = Communication(Dune::MPIHelper::getCommunicator());
+
+    Opm::CommunicateAboveBelow commAboveBelow{ comm };
+    auto globalEclIndex = createGlobalEclIndex(comm);
+    std::vector<double> globalCurrent(globalEclIndex.size());
+    initRandomNumbers(std::begin(globalCurrent), std::end(globalCurrent),
+                      Communication(comm));
+
+    auto localCurrent = populateCommAbove(commAboveBelow, comm,
+                                          globalEclIndex, globalCurrent);
+
+    auto globalPartialSum = globalCurrent;
+
+    std::partial_sum(std::begin(globalPartialSum), std::end(globalPartialSum), std::begin(globalPartialSum));
+
+
+    commAboveBelow.partialSumPerfValues(std::begin(localCurrent), std::end(localCurrent));
+
+
+    for (std::size_t i = 0; i < localCurrent.size(); ++i)
+    {
+        auto gi = comm.rank() + comm.size() * i;
+        BOOST_CHECK(localCurrent[i]==globalPartialSum[gi]);
+    }
+}
+
+void testGlobalPerfFactoryParallel(int num_component, bool local_consecutive = false)
+{
+    auto comm = Communication(Dune::MPIHelper::getCommunicator());
+
+    Opm::ParallelWellInfo wellInfo{ {"Test", true }, comm };
+    auto globalEclIndex = createGlobalEclIndex(comm);
+    std::vector<double> globalCurrent(globalEclIndex.size() * num_component);
+    std::vector<double> globalAdd(globalEclIndex.size() * num_component);
+    initRandomNumbers(std::begin(globalCurrent), std::end(globalCurrent),
+                      comm);
+    initRandomNumbers(std::begin(globalAdd), std::end(globalAdd),
+                      comm);
+
+    auto localCurrent = populateCommAbove(wellInfo, comm, globalEclIndex,
+                                          globalCurrent, num_component,
+                                          local_consecutive);
+
+    // A hack to get local values to add.
+    Opm::ParallelWellInfo dummy{ {"Test", true }, comm };
+    auto localAdd = populateCommAbove(dummy, comm, globalEclIndex,
+                                      globalAdd, num_component,
+                                      local_consecutive);
+
+    const auto& factory = wellInfo.getGlobalPerfContainerFactory();
+
+    auto globalCreated = factory.createGlobal(localCurrent, num_component);
+
+
+    BOOST_CHECK_EQUAL_COLLECTIONS(std::begin(globalCurrent), std::end(globalCurrent),
+                                  std::begin(globalCreated), std::end(globalCreated));
+
+    std::transform(std::begin(globalAdd), std::end(globalAdd),
+                   std::begin(globalCreated), std::begin(globalCreated),
+                   std::plus<double>());
+
+    auto globalSol = globalCurrent;
+    std::transform(std::begin(globalAdd), std::end(globalAdd),
+                   std::begin(globalSol), std::begin(globalSol),
+                   std::plus<double>());
+
+    auto localSol = localCurrent;
+
+    std::transform(std::begin(localAdd), std::end(localAdd),
+                   std::begin(localSol), std::begin(localSol),
+                   std::plus<double>());
+    factory.copyGlobalToLocal(globalCreated, localCurrent, num_component);
+
+    for (std::size_t i = 0; i < localCurrent.size() / num_component; ++i)
+    {
+        auto gi = local_consecutive ? comm.rank() * numPerProc + i :
+            comm.rank() + comm.size() * i;
+        for (int c = 0; c < num_component; ++c)
+        {
+            BOOST_CHECK(localCurrent[i * num_component + c]==globalSol[gi * num_component + c]);
+            BOOST_CHECK(localSol[i * num_component + c] == localCurrent[i * num_component + c]);
+        }
+    }
+}
+
+BOOST_AUTO_TEST_CASE(GlobalPerfFactoryParallel1)
+{
+
+    testGlobalPerfFactoryParallel(1);
+    testGlobalPerfFactoryParallel(3);
+}