Make the parallel reduction when applying the Wells.

The B matrix is basically a component-wise multiplication with a vector followed by a parallel reduction. We do that reduction to all ranks computing for the well to save the broadcast when applying C^T.
2025-02-25 18:55:30 -06:00 · 2020-10-09 15:09:28 +02:00
parent 3996967344
commit 8ee58096ba
11 changed files with 188 additions and 13 deletions
--- a/opm/simulators/wells/WellHelpers.hpp
+++ b/opm/simulators/wells/WellHelpers.hpp
@@ -1,6 +1,7 @@
 /*
  Copyright 2016 SINTEF ICT, Applied Mathematics.
  Copyright 2016 Statoil ASA.
+  Copyright 2020 OPM-OP AS.

  This file is part of the Open Porous Media project (OPM).

@@ -22,6 +23,12 @@
 #ifndef OPM_WELLHELPERS_HEADER_INCLUDED
 #define OPM_WELLHELPERS_HEADER_INCLUDED

+#include <opm/common/OpmLog/OpmLog.hpp>
+#include <opm/simulators/wells/ParallelWellInfo.hpp>
+
+#include <dune/istl/bcrsmatrix.hh>
+#include <dune/common/dynmatrix.hh>
+#include <dune/common/parallel/mpihelper.hh>

 #include <vector>

@@ -33,6 +40,93 @@ namespace Opm {
    namespace wellhelpers
    {

+        /// \brief A wrapper around the B matrix for distributed wells
+        ///
+        /// For standard wells the B matrix, is basically a multiplication
+        /// of the equation of the perforated cells followed by a reduction
+        /// (summation) of these to the well equations.
+        ///
+        /// This class does that in the functions mv and mmv (from the DUNE
+        /// matrix interface.
+        ///
+        /// \tparam Scalar The scalar used for the computation.
+        template<typename Scalar>
+        class ParallelStandardWellB
+        {
+        public:
+            using Block = Dune::DynamicMatrix<Scalar>;
+            using Matrix = Dune::BCRSMatrix<Block>;
+
+            ParallelStandardWellB(const Matrix& B, const ParallelWellInfo& pinfo)
+                : B_(&B), pinfo_(&pinfo)
+            {}
+
+            //! y = A x
+            template<class X, class Y>
+            void mv (const X& x, Y& y) const
+            {
+    #if !defined(NDEBUG) && HAVE_MPI
+                // We need to make sure that all ranks are actually computing
+                // for the same well. Doing this by checking the name of the well.
+                int cstring_size = pinfo_->name().size()+1;
+                std::vector<int> sizes(pinfo_->communication().size());
+                pinfo_->communication().allgather(&cstring_size, 1, sizes.data());
+                std::vector<int> offsets(sizes.size()+1, 0); //last entry will be accumulated size
+                std::partial_sum(sizes.begin(), sizes.end(), offsets.begin() + 1);
+                std::vector<char> cstrings(offsets[sizes.size()]);
+                bool consistentWells = true;
+                char* send = const_cast<char*>(pinfo_->name().c_str());
+                pinfo_->communication().allgatherv(send, cstring_size,
+                                                   cstrings.data(), sizes.data(),
+                                                   offsets.data());
+                for(std::size_t i = 0; i < sizes.size(); ++i)
+                {
+                    std::string name(cstrings.data()+offsets[i]);
+                    if (name != pinfo_->name())
+                    {
+                        if (pinfo_->communication().rank() == 0)
+                        {
+                            //only one process per well logs, might not be 0 of MPI_COMM_WORLD, though
+                            std::string msg = std::string("Fatal Error: Not all ranks are computing for the same well")
+                                          + " well should be " + pinfo_->name() + " but is "
+                                + name;
+                            OpmLog::debug(msg);
+                        }
+                        consistentWells = false;
+                        break;
+                    }
+                }
+                pinfo_->communication().barrier();
+                // As not all processes are involved here we need to use MPI_Abort and hope MPI kills them all
+                if (!consistentWells)
+                {
+                    MPI_Abort(MPI_COMM_WORLD, 1);
+                }
+#endif
+                B_->mv(x, y);
+                // The B matrix is basically a component-wise multiplication
+                // with a vector followed by a parallel reduction. We do that
+                // reduction to all ranks computing for the well to save the
+                // broadcast when applying C^T.
+                using YField = typename Y::block_type::value_type;
+                assert(y.size() == 1);
+                this->pinfo_->communication().allreduce<std::plus<YField>>(y[0].container().data(),
+                                                                           y[0].container().size());
+            }
+
+            //! y = A x
+            template<class X, class Y>
+            void mmv (const X& x, Y& y) const
+            {
+                Y temp(y);
+                mv(x, temp); // includes parallel reduction
+                y -= temp;
+            }
+        private:
+            const Matrix* B_;
+            const ParallelWellInfo* pinfo_;
+        };
+
        inline
        double computeHydrostaticCorrection(const double well_ref_depth, const double vfp_ref_depth,
                                            const double rho, const double gravity) {
@@ -44,6 +138,38 @@ namespace Opm {



+        /// \brief Sums entries of the diagonal Matrix for distributed wells
+        template<typename Scalar, typename Comm>
+        void sumDistributedWellEntries(Dune::DynamicMatrix<Scalar>& mat, Dune::DynamicVector<Scalar>& vec,
+                                       const Comm& comm)
+        {
+            // DynamicMatrix does not use one contiguous array for storing the data
+            // but a DynamicVector of DynamicVectors. Hence we need to copy the data
+            // to contiguous memory for MPI.
+            if (comm.size() == 1)
+            {
+                return;
+            }
+            std::vector<Scalar> allEntries;
+            allEntries.reserve(mat.N()*mat.M()+vec.size());
+            for(const auto& row: mat)
+            {
+                allEntries.insert(allEntries.end(), row.begin(), row.end());
+            }
+            allEntries.insert(allEntries.end(), vec.begin(), vec.end());
+            comm.sum(allEntries.data(), allEntries.size());
+            auto pos = allEntries.begin();
+            auto cols = mat.cols();
+            for(auto&& row: mat)
+            {
+                std::copy(pos, pos + cols, &(row[0]));
+                pos += cols;
+            }
+            assert(std::size_t(allEntries.end() - pos) == vec.size());
+            std::copy(pos, allEntries.end(), &(vec[0]));
+        }
+
+

        template <int dim, class C2F, class FC>
        std::array<double, dim>