Make the parallel reduction when applying the Wells.

The B matrix is basically a component-wise multiplication
with a vector followed by a parallel reduction. We do that
reduction to all ranks computing for the well to save the
broadcast when applying C^T.
This commit is contained in:
Markus Blatt
2020-10-09 15:09:28 +02:00
parent 3996967344
commit 8ee58096ba
11 changed files with 188 additions and 13 deletions

View File

@@ -1,6 +1,7 @@
/*
Copyright 2016 SINTEF ICT, Applied Mathematics.
Copyright 2016 Statoil ASA.
Copyright 2020 OPM-OP AS.
This file is part of the Open Porous Media project (OPM).
@@ -22,6 +23,12 @@
#ifndef OPM_WELLHELPERS_HEADER_INCLUDED
#define OPM_WELLHELPERS_HEADER_INCLUDED
#include <opm/common/OpmLog/OpmLog.hpp>
#include <opm/simulators/wells/ParallelWellInfo.hpp>
#include <dune/istl/bcrsmatrix.hh>
#include <dune/common/dynmatrix.hh>
#include <dune/common/parallel/mpihelper.hh>
#include <vector>
@@ -33,6 +40,93 @@ namespace Opm {
namespace wellhelpers
{
/// \brief A wrapper around the B matrix for distributed wells
///
/// For standard wells the B matrix, is basically a multiplication
/// of the equation of the perforated cells followed by a reduction
/// (summation) of these to the well equations.
///
/// This class does that in the functions mv and mmv (from the DUNE
/// matrix interface.
///
/// \tparam Scalar The scalar used for the computation.
template<typename Scalar>
class ParallelStandardWellB
{
public:
using Block = Dune::DynamicMatrix<Scalar>;
using Matrix = Dune::BCRSMatrix<Block>;
ParallelStandardWellB(const Matrix& B, const ParallelWellInfo& pinfo)
: B_(&B), pinfo_(&pinfo)
{}
//! y = A x
template<class X, class Y>
void mv (const X& x, Y& y) const
{
#if !defined(NDEBUG) && HAVE_MPI
// We need to make sure that all ranks are actually computing
// for the same well. Doing this by checking the name of the well.
int cstring_size = pinfo_->name().size()+1;
std::vector<int> sizes(pinfo_->communication().size());
pinfo_->communication().allgather(&cstring_size, 1, sizes.data());
std::vector<int> offsets(sizes.size()+1, 0); //last entry will be accumulated size
std::partial_sum(sizes.begin(), sizes.end(), offsets.begin() + 1);
std::vector<char> cstrings(offsets[sizes.size()]);
bool consistentWells = true;
char* send = const_cast<char*>(pinfo_->name().c_str());
pinfo_->communication().allgatherv(send, cstring_size,
cstrings.data(), sizes.data(),
offsets.data());
for(std::size_t i = 0; i < sizes.size(); ++i)
{
std::string name(cstrings.data()+offsets[i]);
if (name != pinfo_->name())
{
if (pinfo_->communication().rank() == 0)
{
//only one process per well logs, might not be 0 of MPI_COMM_WORLD, though
std::string msg = std::string("Fatal Error: Not all ranks are computing for the same well")
+ " well should be " + pinfo_->name() + " but is "
+ name;
OpmLog::debug(msg);
}
consistentWells = false;
break;
}
}
pinfo_->communication().barrier();
// As not all processes are involved here we need to use MPI_Abort and hope MPI kills them all
if (!consistentWells)
{
MPI_Abort(MPI_COMM_WORLD, 1);
}
#endif
B_->mv(x, y);
// The B matrix is basically a component-wise multiplication
// with a vector followed by a parallel reduction. We do that
// reduction to all ranks computing for the well to save the
// broadcast when applying C^T.
using YField = typename Y::block_type::value_type;
assert(y.size() == 1);
this->pinfo_->communication().allreduce<std::plus<YField>>(y[0].container().data(),
y[0].container().size());
}
//! y = A x
template<class X, class Y>
void mmv (const X& x, Y& y) const
{
Y temp(y);
mv(x, temp); // includes parallel reduction
y -= temp;
}
private:
const Matrix* B_;
const ParallelWellInfo* pinfo_;
};
inline
double computeHydrostaticCorrection(const double well_ref_depth, const double vfp_ref_depth,
const double rho, const double gravity) {
@@ -44,6 +138,38 @@ namespace Opm {
/// \brief Sums entries of the diagonal Matrix for distributed wells
template<typename Scalar, typename Comm>
void sumDistributedWellEntries(Dune::DynamicMatrix<Scalar>& mat, Dune::DynamicVector<Scalar>& vec,
const Comm& comm)
{
// DynamicMatrix does not use one contiguous array for storing the data
// but a DynamicVector of DynamicVectors. Hence we need to copy the data
// to contiguous memory for MPI.
if (comm.size() == 1)
{
return;
}
std::vector<Scalar> allEntries;
allEntries.reserve(mat.N()*mat.M()+vec.size());
for(const auto& row: mat)
{
allEntries.insert(allEntries.end(), row.begin(), row.end());
}
allEntries.insert(allEntries.end(), vec.begin(), vec.end());
comm.sum(allEntries.data(), allEntries.size());
auto pos = allEntries.begin();
auto cols = mat.cols();
for(auto&& row: mat)
{
std::copy(pos, pos + cols, &(row[0]));
pos += cols;
}
assert(std::size_t(allEntries.end() - pos) == vec.size());
std::copy(pos, allEntries.end(), &(vec[0]));
}
template <int dim, class C2F, class FC>
std::array<double, dim>