/* Copyright 2014, 2015 Dr. Markus Blatt - HPC-Simulation-Software & Services Copyright 2014, 2015 Statoil ASA Copyright 2015 NTNU This file is part of the Open Porous Media project (OPM). OPM is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. OPM is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OPM. If not, see . */ #include #if HAVE_MPI && HAVE_DUNE_ISTL #include #include #include #include #include #include #include namespace { template class IndexSetInserter { public: using ParallelIndexSet = T; using LocalIndex = typename ParallelIndexSet::LocalIndex; using GlobalIndex = typename ParallelIndexSet::GlobalIndex; IndexSetInserter(ParallelIndexSet& indexSet, const GlobalIndex& component_size, std::size_t local_component_size, std::size_t num_components) : indexSet_(&indexSet), component_size_(component_size), local_component_size_(local_component_size), num_components_(num_components) {} void operator()(const typename ParallelIndexSet::IndexPair& pair) { for(std::size_t i = 0; i < num_components_; i++) indexSet_->add(i * component_size_ + pair.global(), LocalIndex(i * local_component_size_ + pair.local(), pair.local().attribute())); } private: ParallelIndexSet* indexSet_; /// \brief The global number of unknowns per component/equation. GlobalIndex component_size_; /// \brief The local number of unknowns per component/equation. std::size_t local_component_size_; /// \brief The number of components/equations. std::size_t num_components_; }; /** \brief gather/scatter callback for communcation */ template struct CopyGatherScatter { using V = typename Dune::CommPolicy::IndexedType; static V gather(const T& a, std::size_t i) { return a[i]; } static void scatter(T& a, V v, std::size_t i) { a[i] = v; } }; template typename std::enable_if::type computeGlobalReduction(const std::tuple&, std::tuple&, std::tuple&) {} template typename std::enable_if::type computeGlobalReduction(const std::tuple& receivedValues, std::tuple& operators, std::tuple& values) { auto& val = std::get(values); val = std::get(operators).localOperator()(val, std::get(receivedValues)); computeGlobalReduction(receivedValues, operators, values); } template typename std::enable_if::type computeLocalReduction(const std::tuple&, std::tuple&, std::tuple&, const std::vector&) {} template typename std::enable_if::type computeLocalReduction(const std::tuple& containers, std::tuple& operators, std::tuple& values, const std::vector& ownerMask) { const auto& container = std::get(containers); if (container.size()) { auto& reduceOperator = std::get(operators); // Eigen:Block does not support STL iterators!!!! // Therefore we need to rely on the harder random-access // property of the containers. But this should be save, too. // Just commenting out code in the hope that Eigen might improve // in this regard in the future. //auto newVal = container.begin(); auto mask = ownerMask.begin(); auto& value = std::get(values); value = reduceOperator.getInitialValue(); for (auto endVal = ownerMask.end(); mask != endVal; /*++newVal,*/ ++mask ) { value = reduceOperator(value, container[mask-ownerMask.begin()], *mask); } } computeLocalReduction(containers, operators, values, ownerMask); } } namespace Opm { namespace { template struct is_tuple : std::integral_constant {}; template struct is_tuple > : std::integral_constant {}; } ParallelISTLInformation::ParallelISTLInformation() : indexSet_(new ParallelIndexSet), remoteIndices_(new RemoteIndices(*indexSet_, *indexSet_, MPI_COMM_WORLD)), communicator_(MPI_COMM_WORLD) {} ParallelISTLInformation::ParallelISTLInformation(MPI_Comm communicator) : indexSet_(new ParallelIndexSet), remoteIndices_(new RemoteIndices(*indexSet_, *indexSet_, communicator)), communicator_(communicator) {} ParallelISTLInformation:: ParallelISTLInformation(const std::shared_ptr& indexSet, const std::shared_ptr& remoteIndices, MPI_Comm communicator) : indexSet_(indexSet), remoteIndices_(remoteIndices), communicator_(communicator) {} ParallelISTLInformation::ParallelISTLInformation(const ParallelISTLInformation& other) : indexSet_(other.indexSet_), remoteIndices_(other.remoteIndices_), communicator_(other.communicator_) {} void ParallelISTLInformation::copyValuesTo(ParallelIndexSet& indexSet, RemoteIndices& remoteIndices, std::size_t local_component_size, std::size_t num_components) const { ParallelIndexSet::GlobalIndex global_component_size = local_component_size; if ( num_components > 1 ) { ParallelIndexSet::GlobalIndex max_gi = 0; // component the max global index for( auto i = indexSet_->begin(), end = indexSet_->end(); i != end; ++i ) { max_gi = std::max(max_gi, i->global()); } global_component_size = max_gi+1; global_component_size = communicator_.max(global_component_size); } indexSet.beginResize(); IndexSetInserter inserter(indexSet, global_component_size, local_component_size, num_components); std::for_each(indexSet_->begin(), indexSet_->end(), inserter); indexSet.endResize(); remoteIndices.rebuild(); } template void ParallelISTLInformation::copyOwnerToAll(const T& source, T& dest) const { using AS = Dune::OwnerOverlapCopyAttributeSet; using CopySet = Dune::EnumItem; using OwnerSet = Dune::EnumItem; using OverlapSet = Dune::EnumItem; using OwnerOverlapSet = Dune::Combine; using AllSet = Dune::Combine; OwnerSet sourceFlags; AllSet destFlags; Dune::Interface interface(communicator_); if( !remoteIndices_->isSynced() ) { remoteIndices_->rebuild(); } interface.build(*remoteIndices_,sourceFlags,destFlags); Dune::BufferedCommunicator communicator; communicator.template build(interface); communicator.template forward>(source,dest); communicator.free(); } template const std::vector& ParallelISTLInformation::updateOwnerMask(const T& container) const { if (!indexSet_) { OPM_THROW(std::runtime_error, "Trying to update owner mask without parallel information!"); } if (static_cast(container.size()) != ownerMask_.size()) { ownerMask_.resize(container.size(), 1.); for (const auto& i : *indexSet_) { if (i.local().attribute() != Dune::OwnerOverlapCopyAttributeSet::owner) { ownerMask_[i.local().local()] = 0.; } } } return ownerMask_; } template void ParallelISTLInformation::computeReduction(const Container& container, BinaryOperator binaryOperator, T& value) const { if constexpr (is_tuple()) computeTupleReduction(container, binaryOperator, value); else { std::tuple containers = std::tuple(container); auto values = std::make_tuple(value); auto operators = std::make_tuple(binaryOperator); computeTupleReduction(containers, operators, values); value = std::get<0>(values); } } template void ParallelISTLInformation::computeTupleReduction(const std::tuple& containers, std::tuple& operators, std::tuple& values) const { static_assert(std::tuple_size >::value == std::tuple_size >::value, "We need the same number of containers and binary operators"); static_assert(std::tuple_size >::value == std::tuple_size >::value, "We need the same number of containers and return values"); if (std::tuple_size >::value == 0) { return; } // Copy the initial values. std::tuple init = values; updateOwnerMask(std::get<0>(containers)); computeLocalReduction(containers, operators, values, ownerMask_); std::vector > receivedValues(communicator_.size()); communicator_.allgather(&values, 1, &(receivedValues[0])); values = init; for (auto& rval : receivedValues) { computeGlobalReduction(rval, operators, values); } } template auto accumulateMaskedValues(const T1& container, const std::vector* maskContainer) -> decltype(container[0]*(*maskContainer)[0]) { decltype(container[0]*(*maskContainer)[0]) initial = 0; if (maskContainer) { return std::inner_product(container.begin(), container.end(), maskContainer->begin(), initial); } else { return std::accumulate(container.begin(), container.end(), initial); } } template using C1 = std::vector; template using Ops1 = Reduction::MaskIDOperator>; template using C2 = std::tuple, std::vector, std::vector, std::vector, std::vector>; template using Ops2 = std::tuple()), decltype(Reduction::makeGlobalMaxFunctor()), decltype(Reduction::makeGlobalMinFunctor()), decltype(Reduction::makeInnerProductFunctor()), decltype(Reduction::makeLInfinityNormFunctor())>; template using Vals2 = std::tuple; #define INSTANCE1(T) \ template void ParallelISTLInformation::computeReduction,Ops1,T>(const C1&,Ops1,T&) const; #define INSTANCE2(T) \ template void ParallelISTLInformation::computeReduction,Ops2,Vals2>(const C2&,Ops2,Vals2&) const; #define INSTANCE(T) \ INSTANCE1(T) \ INSTANCE2(T) INSTANCE(int) INSTANCE(float) INSTANCE(std::size_t) } // namespace Opm #endif // HAVE_MPI && HAVE_DUNE_ISTL