/*
Copyright 2014, 2015 Dr. Markus Blatt - HPC-Simulation-Software & Services
Copyright 2014, 2015 Statoil ASA
Copyright 2015 NTNU
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see .
*/
#ifndef OPM_PARALLELISTLINFORMTION_HEADER_INCLUDED
#define OPM_PARALLELISTLINFORMTION_HEADER_INCLUDED
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#if HAVE_MPI && HAVE_DUNE_ISTL
#include
#include
#include
#include
#include
#include
#include
#include
namespace Opm
{
namespace
{
template
struct is_tuple
: std::integral_constant
{};
template
struct is_tuple >
: std::integral_constant
{};
}
/// \brief Class that encapsulates the parallelization information needed by the
/// ISTL solvers.
class ParallelISTLInformation
{
public:
/// \brief The type of the parallel index set used.
typedef Dune::OwnerOverlapCopyCommunication::ParallelIndexSet ParallelIndexSet;
/// \brief The type of the remote indices information used.
typedef Dune::OwnerOverlapCopyCommunication::RemoteIndices RemoteIndices;
/// \brief Constructs an empty parallel information object using MPI_COMM_WORLD
ParallelISTLInformation()
: indexSet_(new ParallelIndexSet),
remoteIndices_(new RemoteIndices(*indexSet_, *indexSet_, MPI_COMM_WORLD)),
communicator_(MPI_COMM_WORLD)
{}
/// \brief Constructs an empty parallel information object using a communicator.
/// \param communicator The communicator to use.
ParallelISTLInformation(MPI_Comm communicator)
: indexSet_(new ParallelIndexSet),
remoteIndices_(new RemoteIndices(*indexSet_, *indexSet_, communicator)),
communicator_(communicator)
{}
/// \brief Constructs a parallel information object from the specified information.
/// \param indexSet The parallel index set to use.
/// \param remoteIndices The remote indices information to use.
/// \param communicator The communicator to use.
ParallelISTLInformation(const std::shared_ptr& indexSet,
const std::shared_ptr& remoteIndices,
MPI_Comm communicator)
: indexSet_(indexSet), remoteIndices_(remoteIndices), communicator_(communicator)
{}
/// \brief Copy constructor.
///
/// The information will be shared by the the two objects.
ParallelISTLInformation(const ParallelISTLInformation& other)
: indexSet_(other.indexSet_), remoteIndices_(other.remoteIndices_),
communicator_(other.communicator_)
{}
/// \brief Get a pointer to the underlying index set.
std::shared_ptr indexSet() const
{
return indexSet_;
}
/// \brief Get a pointer to the remote indices information.
std::shared_ptr remoteIndices() const
{
return remoteIndices_;
}
/// \brief Get the Collective MPI communicator that we use.
Parallel::Communication communicator() const
{
return communicator_;
}
/// \brief Copy the information stored to the specified objects.
/// \param[out] indexSet The object to store the index set in.
/// \param[out] remoteIndices The object to store the remote indices information in.
void copyValuesTo(ParallelIndexSet& indexSet, RemoteIndices& remoteIndices,
std::size_t local_component_size = 0, std::size_t num_components = 1) const
{
ParallelIndexSet::GlobalIndex global_component_size = local_component_size;
if ( num_components > 1 )
{
ParallelIndexSet::GlobalIndex max_gi = 0;
// component the max global index
for( auto i = indexSet_->begin(), end = indexSet_->end(); i != end; ++i )
{
max_gi = std::max(max_gi, i->global());
}
global_component_size = max_gi+1;
global_component_size = communicator_.max(global_component_size);
}
indexSet.beginResize();
IndexSetInserter inserter(indexSet, global_component_size,
local_component_size, num_components);
std::for_each(indexSet_->begin(), indexSet_->end(), inserter);
indexSet.endResize();
remoteIndices.rebuild();
}
/// \brief Communcate the dofs owned by us to the other process.
///
/// Afterwards all associated dofs will contain the same data.
template
void copyOwnerToAll (const T& source, T& dest) const
{
typedef Dune::Combine,Dune::EnumItem,Dune::OwnerOverlapCopyAttributeSet::AttributeSet> OwnerOverlapSet;
typedef Dune::EnumItem OwnerSet;
typedef Dune::Combine,Dune::OwnerOverlapCopyAttributeSet::AttributeSet> AllSet;
OwnerSet sourceFlags;
AllSet destFlags;
Dune::Interface interface(communicator_);
if( !remoteIndices_->isSynced() )
{
remoteIndices_->rebuild();
}
interface.build(*remoteIndices_,sourceFlags,destFlags);
Dune::BufferedCommunicator communicator;
communicator.template build(interface);
communicator.template forward >(source,dest);
communicator.free();
}
template
const std::vector& updateOwnerMask(const T& container) const
{
if( ! indexSet_ )
{
OPM_THROW(std::runtime_error, "Trying to update owner mask without parallel information!");
}
if( static_cast(container.size())!= ownerMask_.size() )
{
ownerMask_.resize(container.size(), 1.);
for( auto i=indexSet_->begin(), end=indexSet_->end(); i!=end; ++i )
{
if (i->local().attribute()!=Dune::OwnerOverlapCopyAttributeSet::owner)
{
ownerMask_[i->local().local()] = 0.;
}
}
}
return ownerMask_;
}
/// \brief Get the owner Mask.
///
/// \return A vector with entries 0, and 1. 0 marks an index that we cannot
/// compute correct results for. 1 marks an index that this process
/// is responsible for and computes correct results in parallel.
const std::vector& getOwnerMask() const
{
return ownerMask_;
}
/// \brief Compute one or more global reductions.
///
/// This function can either be used with a container, an operator, and an initial value
/// to compute a reduction. Or with tuples of them to compute multiple reductions with only
/// one global communication.
/// The possible functors needed can be constructed with Opm::Reduction::makeGlobalMaxFunctor(),
/// Opm::Reduction::makeLInfinityNormFunctor(),
/// Opm::Reduction::makeGlobalMinFunctor(), and
/// Opm::Reduction::makeGlobalSumFunctor().
/// \tparam type of the container or the tuple of containers.
/// \tparam tyoe of the operator or a tuple of operators, examples are e.g.
/// Reduction::MaskIDOperator, Reduction::MaskToMinOperator,
/// and Reduction::MaskToMaxOperator. Has to provide an operator() that takes three
/// arguments (the last one is the mask value: 1 for a dof that we own, 0 otherwise),
/// a method maskValue that takes a value and mask value, and localOperator that
/// returns the underlying binary operator.
/// \param container A container or tuple of containers.
/// \param binaryOperator An operator doing the reduction of two values.
/// \param value The initial value or a tuple of them.
template
void computeReduction(const Container& container, BinaryOperator binaryOperator,
T& value) const
{
computeReduction(container, binaryOperator, value, is_tuple());
}
private:
/// \brief compute the reductions for tuples.
///
/// This is a helper function to prepare for calling computeTupleReduction.
template
void computeReduction(const Container& container, BinaryOperator binaryOperator,
T& value, std::integral_constant) const
{
computeTupleReduction(container, binaryOperator, value);
}
/// \brief compute the reductions for non-tuples.
///
/// This is a helper function to prepare for calling computeTupleReduction.
template
void computeReduction(const Container& container, BinaryOperator binaryOperator,
T& value, std::integral_constant) const
{
std::tuple containers=std::tuple(container);
auto values=std::make_tuple(value);
auto operators=std::make_tuple(binaryOperator);
computeTupleReduction(containers, operators, values);
value=std::get<0>(values);
}
/// \brief Compute the reductions for tuples.
template
void computeTupleReduction(const std::tuple& containers,
std::tuple& operators,
std::tuple& values) const
{
static_assert(std::tuple_size >::value==
std::tuple_size >::value,
"We need the same number of containers and binary operators");
static_assert(std::tuple_size >::value==
std::tuple_size >::value,
"We need the same number of containers and return values");
if( std::tuple_size >::value==0 )
{
return;
}
// Copy the initial values.
std::tuple init=values;
updateOwnerMask(std::get<0>(containers));
computeLocalReduction(containers, operators, values);
std::vector > receivedValues(communicator_.size());
communicator_.allgather(&values, 1, &(receivedValues[0]));
values=init;
for( auto rvals=receivedValues.begin(), endvals=receivedValues.end(); rvals!=endvals;
++rvals )
{
computeGlobalReduction(*rvals, operators, values);
}
}
/// \brief TMP for computing the the global reduction after receiving the local ones.
///
/// End of recursion.
template
typename std::enable_if::type
computeGlobalReduction(const std::tuple&,
std::tuple&,
std::tuple&) const
{}
/// \brief TMP for computing the the global reduction after receiving the local ones.
template
typename std::enable_if::type
computeGlobalReduction(const std::tuple& receivedValues,
std::tuple& operators,
std::tuple& values) const
{
auto& val=std::get(values);
val = std::get(operators).localOperator()(val, std::get(receivedValues));
computeGlobalReduction(receivedValues, operators, values);
}
/// \brief TMP for computing the the local reduction on the DOF that the process owns.
///
/// End of recursion.
template
typename std::enable_if::type
computeLocalReduction(const std::tuple&,
std::tuple&,
std::tuple&) const
{}
/// \brief TMP for computing the the local reduction on the DOF that the process owns.
template
typename std::enable_if::type
computeLocalReduction(const std::tuple& containers,
std::tuple& operators,
std::tuple& values) const
{
const auto& container = std::get(containers);
if( container.size() )
{
auto& reduceOperator = std::get(operators);
// Eigen:Block does not support STL iterators!!!!
// Therefore we need to rely on the harder random-access
// property of the containers. But this should be save, too.
// Just commenting out code in the hope that Eigen might improve
// in this regard in the future.
//auto newVal = container.begin();
auto mask = ownerMask_.begin();
auto& value = std::get(values);
value = reduceOperator.getInitialValue();
for( auto endVal=ownerMask_.end(); mask!=endVal;
/*++newVal,*/ ++mask )
{
value = reduceOperator(value, container[mask-ownerMask_.begin()], *mask);
}
}
computeLocalReduction(containers, operators, values);
}
/** \brief gather/scatter callback for communcation */
template
struct CopyGatherScatter
{
typedef typename Dune::CommPolicy::IndexedType V;
static V gather(const T& a, std::size_t i)
{
return a[i];
}
static void scatter(T& a, V v, std::size_t i)
{
a[i] = v;
}
};
template
class IndexSetInserter
{
public:
typedef T ParallelIndexSet;
typedef typename ParallelIndexSet::LocalIndex LocalIndex;
typedef typename ParallelIndexSet::GlobalIndex GlobalIndex;
IndexSetInserter(ParallelIndexSet& indexSet, const GlobalIndex& component_size,
std::size_t local_component_size, std::size_t num_components)
: indexSet_(&indexSet), component_size_(component_size),
local_component_size_(local_component_size),
num_components_(num_components)
{}
void operator()(const typename ParallelIndexSet::IndexPair& pair)
{
for(std::size_t i = 0; i < num_components_; i++)
indexSet_->add(i * component_size_ + pair.global(),
LocalIndex(i * local_component_size_ + pair.local(),
pair.local().attribute()));
}
private:
ParallelIndexSet* indexSet_;
/// \brief The global number of unknowns per component/equation.
GlobalIndex component_size_;
/// \brief The local number of unknowns per component/equation.
std::size_t local_component_size_;
/// \brief The number of components/equations.
std::size_t num_components_;
};
std::shared_ptr indexSet_;
std::shared_ptr remoteIndices_;
Dune::CollectiveCommunication communicator_;
mutable std::vector ownerMask_;
};
namespace Reduction
{
/// \brief An operator that only uses values where mask is 1.
///
/// Could be used to compute a global sum
/// \tparam BinaryOperator The wrapped binary operator that specifies
// the reduction operation.
template
struct MaskIDOperator
{
// This is a real nice one: numeric limits needs a type without const
// or reference qualifier. Otherwise we get complete nonesense.
typedef typename std::remove_cv<
typename std::remove_reference::type
>::type Result;
/// \brief Apply the underlying binary operator according to the mask.
///
/// The BinaryOperator will be called with t1, and mask*t2.
/// \param t1 first value
/// \param t2 second value (might be modified).
/// \param mask The mask (0 or 1).
template
T operator()(const T& t1, const T& t2, const T1& mask)
{
return b_(t1, maskValue(t2, mask));
}
template
T maskValue(const T& t, const T1& mask)
{
return t*mask;
}
BinaryOperator& localOperator()
{
return b_;
}
Result getInitialValue()
{
return Result();
}
private:
BinaryOperator b_;
};
/// \brief An operator for computing a parallel inner product.
template
struct InnerProductFunctor
{
/// \brief Apply the underlying binary operator according to the mask.
///
/// The BinaryOperator will be called with t1, and mask*t2.
/// \param t1 first value
/// \param t2 second value (might be modified).
/// \param mask The mask (0 or 1).
template
T operator()(const T& t1, const T& t2, const T1& mask)
{
T masked = maskValue(t2, mask);
return t1 + masked * masked;
}
template
T maskValue(const T& t, const T1& mask)
{
return t*mask;
}
std::plus localOperator()
{
return std::plus();
}
T getInitialValue()
{
return T();
}
};
/// \brief An operator that converts the values where mask is 0 to the minimum value
///
/// Could be used to compute a global maximum.
/// \tparam BinaryOperator The wrapped binary operator that specifies
// the reduction operation.
template
struct MaskToMinOperator
{
// This is a real nice one: numeric limits has to a type without const
// or reference. Otherwise we get complete nonesense.
typedef typename std::remove_reference<
typename std::remove_const::type
>::type Result;
MaskToMinOperator(BinaryOperator b)
: b_(b)
{}
/// \brief Apply the underlying binary operator according to the mask.
///
/// If mask is 0 then t2 will be substituted by the lowest value,
/// else t2 will be used.
/// \param t1 first value
/// \param t2 second value (might be modified).
template
T operator()(const T& t1, const T& t2, const T1& mask)
{
return b_(t1, maskValue(t2, mask));
}
template
T maskValue(const T& t, const T1& mask)
{
if( mask )
{
return t;
}
else
{
return getInitialValue();
}
}
Result getInitialValue()
{
//g++-4.4 does not support std::numeric_limits::lowest();
// we rely on IEE 754 for floating point values and use min()
// for integral types.
if( std::is_integral::value )
{
return std::numeric_limits::min();
}
else
{
return -std::numeric_limits::max();
}
}
/// \brief Get the underlying binary operator.
///
/// This might be needed to compute the reduction after each processor
/// has computed its local one.
BinaryOperator& localOperator()
{
return b_;
}
private:
BinaryOperator b_;
};
/// \brief An operator that converts the values where mask is 0 to the maximum value
///
/// Could be used to compute a global minimum.
template
struct MaskToMaxOperator
{
// This is a real nice one: numeric limits has to a type without const
// or reference. Otherwise we get complete nonesense.
typedef typename std::remove_cv<
typename std::remove_reference::type
>::type Result;
MaskToMaxOperator(BinaryOperator b)
: b_(b)
{}
/// \brief Apply the underlying binary operator according to the mask.
///
/// If mask is 0 then t2 will be substituted by the maximum value,
/// else t2 will be used.
/// \param t1 first value
/// \param t2 second value (might be modified).
template
T operator()(const T& t1, const T& t2, const T1& mask)
{
return b_(t1, maskValue(t2, mask));
}
template
T maskValue(const T& t, const T1& mask)
{
if( mask )
{
return t;
}
else
{
return std::numeric_limits::max();
}
}
BinaryOperator& localOperator()
{
return b_;
}
Result getInitialValue()
{
return std::numeric_limits::max();
}
private:
BinaryOperator b_;
};
/// \brief Create a functor for computing a global sum.
///
/// To be used with ParallelISTLInformation::computeReduction.
template
MaskIDOperator >
makeGlobalSumFunctor()
{
return MaskIDOperator >();
}
/// \brief Create a functor for computing a global maximum.
///
/// To be used with ParallelISTLInformation::computeReduction.
template
auto makeGlobalMaxFunctor()
{
struct MaxOp
{
using result_type = T;
const result_type& operator()(const T& t1, const T& t2)
{
return std::max(t1, t2);
}
};
return MaskToMinOperator(MaxOp());
}
namespace detail
{
/// \brief Computes the maximum of the absolute values of two values.
template
struct MaxAbsFunctor
{
using result_type = T;
result_type operator()(const T& t1,
const T& t2)
{
return std::max(std::abs(t1), std::abs(t2));
}
};
// Specialization for unsigned integers. They need their own
// version since abs(x) is ambiguous (as well as somewhat
// meaningless).
template
struct MaxAbsFunctor::value>::type>
{
using result_type = T;
result_type operator()(const T& t1,
const T& t2)
{
return std::max(t1, t2);
}
};
}
/// \brief Create a functor for computing a global L infinity norm
///
/// To be used with ParallelISTLInformation::computeReduction.
template
MaskIDOperator >
makeLInfinityNormFunctor()
{
return MaskIDOperator >();
}
/// \brief Create a functor for computing a global minimum.
///
/// To be used with ParallelISTLInformation::computeReduction.
template
auto
makeGlobalMinFunctor()
{
struct MinOp
{
using result_type = T;
const result_type& operator()(const T& t1, const T& t2)
{
return std::min(t1, t2);
}
};
return MaskToMaxOperator(MinOp());
}
template
InnerProductFunctor
makeInnerProductFunctor()
{
return InnerProductFunctor();
}
} // end namespace Reduction
} // end namespace Opm
#endif
namespace Opm
{
/// \brief Extracts the information about the data decomposition from the grid for dune-istl
///
/// In the case that grid is a parallel grid this method will query it to get the information
/// about the data decompoisition and convert it to the format expected by the linear algebra
/// of dune-istl.
/// \warn for UnstructuredGrid this function doesn't do anything.
/// \param anyComm The handle to store the information in. If grid is a parallel grid
/// then this will ecapsulate an instance of ParallelISTLInformation.
/// \param grid The grid to inspect.
inline void extractParallelGridInformationToISTL(std::any& anyComm, const UnstructuredGrid& grid)
{
(void)anyComm; (void)grid;
}
/// \brief Accumulates entries masked with 1.
/// \param container The container whose values to accumulate.
/// \param maskContainer null pointer or a pointer to a container
/// with entries 0 and 1. Only values at indices with a 1 stored
/// will be accumulated. If null then all values will be accumulated
/// \return the summ of all entries that should be represented.
template
auto
accumulateMaskedValues(const T1& container, const std::vector* maskContainer)
-> decltype(container[0]*(*maskContainer)[0])
{
decltype(container[0]*(*maskContainer)[0]) initial = 0;
if( maskContainer )
{
return std::inner_product(container.begin(), container.end(), maskContainer->begin(),
initial);
}else
{
return std::accumulate(container.begin(), container.end(), initial);
}
}
} // end namespace Opm
#endif