From 7bce15c04b9d697b8544b6ded7cd22fab5a29404 Mon Sep 17 00:00:00 2001 From: Markus Blatt Date: Wed, 21 Jan 2015 15:44:20 +0100 Subject: [PATCH] Added methods for computing global reductions. We need to compute quite a few global reductions in the Newton method of opm-autodiff. This commit adds the functionality to compute several reductions combined using only one global communication. Compiles and test succeeds with one or more process. --- opm/core/linalg/ParallelIstlInformation.hpp | 301 +++++++++++++++++++- tests/DuneIstlTestHelpers.hpp | 220 ++++++++++++++ tests/test_parallel_linearsolver.cpp | 183 +----------- tests/test_parallelistlinformation.cpp | 81 ++++++ 4 files changed, 598 insertions(+), 187 deletions(-) create mode 100644 tests/DuneIstlTestHelpers.hpp create mode 100644 tests/test_parallelistlinformation.cpp diff --git a/opm/core/linalg/ParallelIstlInformation.hpp b/opm/core/linalg/ParallelIstlInformation.hpp index 38795774c..479f4be98 100644 --- a/opm/core/linalg/ParallelIstlInformation.hpp +++ b/opm/core/linalg/ParallelIstlInformation.hpp @@ -1,5 +1,7 @@ /* - Copyright 2014 Dr. Markus Blatt - HPC-Simulation-Software & Services + Copyright 2014, 2015 Dr. Markus Blatt - HPC-Simulation-Software & Services + Copyright 2014 Statoil ASA + Copyright 2015 NTNU This file is part of the Open Porous Media project (OPM). @@ -19,6 +21,12 @@ #ifndef OPM_PARALLELISTLINFORMTION_HEADER_INCLUDED #define OPM_PARALLELISTLINFORMTION_HEADER_INCLUDED + +#include +#include +#include +#include + #if HAVE_MPI && HAVE_DUNE_ISTL #include "mpi.h" @@ -28,9 +36,23 @@ #include #include +#include +#include namespace Opm { +namespace +{ + + template + struct is_tuple + : std::integral_constant + {}; + template + struct is_tuple > + : std::integral_constant + {}; +} /// \brief Class that encapsulates the parallelization information needed by the /// ISTL solvers. @@ -81,8 +103,8 @@ public: { return remoteIndices_; } - /// \brief Get the MPI communicator that we use. - MPI_Comm communicator() const + /// \brief Get the Collective MPI communicator that we use. + Dune::CollectiveCommunication communicator() const { return communicator_; } @@ -115,9 +137,143 @@ public: communicator.template build(interface); communicator.template forward >(source,dest); communicator.free(); - } + } + template + void updateOwnerMask(const T& container) + { + if(! indexSet_) + OPM_THROW(std::runtime_error, "Trying to update owner mask without parallel information!"); + if(container.size()!= ownerMask_.size()) + { + ownerMask_.resize(container.size(), 1.); + for(auto i=indexSet_->begin(), end=indexSet_->end(); i!=end; ++i) + if (i->local().attribute()!=Dune::OwnerOverlapCopyAttributeSet::owner) + ownerMask_[i->local().local()] = 0.; + } + }; + /// \brief Compute one or more global reductions. + /// + /// This function can either be used with a container, an operator, and an initial value + /// to compute a reduction. Or with tuples of them to compute multiple reductions with only + /// one global communication. + /// \tparam type of the container or the tuple of containers. + /// \tparam tyoe of the operator or a tuple of operators, examples are e.g. + /// Reduction::MaskIDOperator, Reduction::MaskToMinOperator, + /// and Reduction::MaskToMaxOperator. Has to provide an operator() that takes three + /// arguments (the last one is the mask value: 1 for a dof that we own, 0 otherwise), + /// a method maskValue that takes a value and mask value, and localOperator that + /// returns the underlying binary operator. + /// \param container A container or tuple of containers. + /// \param binaryOperator An operator doing the reduction of two values. + /// \param value The initial value or a tuple of them. + template + void computeReduction(const Container& container, BinaryOperator binaryOperator, + T& value) + { + computeReduction(container, binaryOperator, value, is_tuple()); + } private: - /** \brief gather/scatter callback for communcation */ + /// \brief compute the reductions for tuples. + /// + /// This is a helper function to prepare for calling computeTupleReduction. + template + void computeReduction(const Container& container, BinaryOperator binaryOperator, + T& value, std::integral_constant) + { + computeTupleReduction(container, binaryOperator, value); + } + /// \brief compute the reductions for non-tuples. + /// + /// This is a helper function to prepare for calling computeTupleReduction. + template + void computeReduction(const Container& container, BinaryOperator binaryOperator, + T& value, std::integral_constant) + { + std::tuple containers=std::tuple(container); + auto values=std::make_tuple(value); + auto operators=std::make_tuple(binaryOperator); + computeTupleReduction(containers, operators, values); + value=std::get<0>(values); + } + /// \brief Compute the reductions for tuples. + template + void computeTupleReduction(const std::tuple& containers, + std::tuple& operators, + std::tuple& values) + { + static_assert(std::tuple_size >::value== + std::tuple_size >::value, + "We need the same number of containers and binary operators"); + static_assert(std::tuple_size >::value== + std::tuple_size >::value, + "We need the same number of containers and return values"); + if(std::tuple_size >::value==0) + return; + // Copy the initial values. + std::tuple init=values; + updateOwnerMask(std::get<0>(containers)); + computeLocalReduction(containers, operators, values); + auto val=std::get<0>(values); + std::vector > receivedValues(communicator_.size()); + communicator_.allgather(&values, 1, &(receivedValues[0])); + values=init; + for(auto rvals=receivedValues.begin(), endvals=receivedValues.end(); rvals!=endvals; + ++rvals) + computeGlobalReduction(*rvals, operators, values); + } + /// \brief TMP for computing the the global reduction after receiving the local ones. + /// + /// End of recursion. + template + typename std::enable_if::type + computeGlobalReduction(const std::tuple&, + std::tuple&, + std::tuple&) + {} + /// \brief TMP for computing the the global reduction after receiving the local ones. + template + typename std::enable_if::type + computeGlobalReduction(const std::tuple& receivedValues, + std::tuple& operators, + std::tuple& values) + { + auto& val=std::get(values); + val = std::get(operators).localOperator()(val, std::get(receivedValues)); + } + /// \brief TMP for computing the the local reduction on the DOF that the process owns. + /// + /// End of recursion. + template + typename std::enable_if::type + computeLocalReduction(const std::tuple&, + std::tuple&, + std::tuple&) + {} + /// \brief TMP for computing the the local reduction on the DOF that the process owns. + template + typename std::enable_if::type + computeLocalReduction(const std::tuple& containers, + std::tuple& operators, + std::tuple& values) + { + const auto& container = std::get(containers); + if(container.size()) + { + auto& reduceOperator = std::get(operators); + auto newVal = container.begin(); + auto mask = ownerMask_.begin(); + auto& value = std::get(values); + value = reduceOperator.maskValue(*newVal, *mask); + ++mask; + ++newVal; + + for(auto endVal=container.end(); newVal!=endVal; + ++newVal, ++mask) + value = reduceOperator(value, *newVal, *mask); + } + computeLocalReduction(containers, operators, values); + } + /** \brief gather/scatter callback for communcation */ template struct CopyGatherScatter { @@ -153,8 +309,141 @@ private: std::shared_ptr indexSet_; std::shared_ptr remoteIndices_; - MPI_Comm communicator_; + Dune::CollectiveCommunication communicator_; + mutable std::vector ownerMask_; }; + + namespace Reduction + { + /// \brief An operator that only uses values where mask is 1. + /// + /// Could be used to compute a global sum + /// \tparam BinaryOperator The wrapped binary operator that specifies + // the reduction operation. + template + struct MaskIDOperator + { + /// \brief Apply the underlying binary operator according to the mask. + /// + /// The BinaryOperator will be called with t1, and mask*t2. + /// \param t1 first value + /// \param t2 second value (might be modified). + /// \param mask The mask (0 or 1). + template + T operator()(const T& t1, const T& t2, const T1& mask) + { + b_(t1, maskValue(t2, mask)); + } + template + T maskValue(const T& t, const T1& mask) + { + return t*mask; + } + BinaryOperator& localOperator() + { + return b_; + } + private: + BinaryOperator b_; + }; + + /// \brief An operator that converts the values where mask is 0 to the minimum value + /// + /// Could be used to compute a global maximum. + /// \tparam BinaryOperator The wrapped binary operator that specifies + // the reduction operation. + template + struct MaskToMinOperator + { + /// \brief Apply the underlying binary operator according to the mask. + /// + /// If mask is 0 then t2 will be substituted by the lowest value, + /// else t2 will be used. + /// \param t1 first value + /// \param t2 second value (might be modified). + template + T operator()(const T& t1, const T& t2, const T1& mask) + { + b_(t1, maskValue(t2, mask)); + } + template + T maskValue(const T& t, const T1& mask) + { + if(mask) + return t; + else{ + //g++-4.4 does not support std::numeric_limits::lowest(); + // we rely on IEE 754 for floating point values and use min() + // for integral types. + if(std::is_integral::value) + return -std::numeric_limits::min(); + else + return -std::numeric_limits::max(); + } + } + /// \brief Get the underlying binary operator. + /// + /// This might be needed to compute the reduction after each processor + /// has computed its local one. + BinaryOperator& localOperator() + { + return b_; + } + private: + BinaryOperator b_; + }; + + /// \brief An operator that converts the values where mask is 0 to the maximum value + /// + /// Could be used to compute a global minimum. + template + struct MaskToMaxOperator + { + /// \brief Apply the underlying binary operator according to the mask. + /// + /// If mask is 0 then t2 will be substituted by the maximum value, + /// else t2 will be used. + /// \param t1 first value + /// \param t2 second value (might be modified). + template + T operator()(const T& t1, const T& t2, const T1& mask) + { + b_(t1, maskValue(t2, mask)); + } + template + T maskValue(const T& t, const T1& mask) + { + if(mask) + return t; + else + return std::numeric_limits::max(); + } + BinaryOperator& localOperator() + { + return b_; + } + private: + BinaryOperator b_; + }; + } // end namespace Reduction } // end namespace Opm + #endif + +namespace Opm +{ +/// \brief Extracts the information about the data decomposition from the grid for dune-istl +/// +/// In the case that grid is a parallel grid this method will query it to get the information +/// about the data decompoisition and convert it to the format expected by the linear algebra +/// of dune-istl. +/// \warn for UnstructuredGrid this function doesn't do anything. +/// \param anyComm The handle to store the information in. If grid is a parallel grid +/// then this will ecapsulate an instance of ParallelISTLInformation. +/// \param grid The grid to inspect. + +inline void extractParallelGridInformationToISTL(boost::any& anyComm, const UnstructuredGrid& grid) +{} +} // end namespace Opm + #endif diff --git a/tests/DuneIstlTestHelpers.hpp b/tests/DuneIstlTestHelpers.hpp new file mode 100644 index 000000000..b13100f03 --- /dev/null +++ b/tests/DuneIstlTestHelpers.hpp @@ -0,0 +1,220 @@ +/* + Copyright 2014 Dr. Markus Blatt - HPC-Simulation-Software & Services + Copyright 2014 Statoil ASA + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ +#ifndef OPM_DUNEISTLTESTHELPERS_HEADER +#define OPM_DUNEISTLTESTHELPERS_HEADER +// MPI header +#if HAVE_MPI +#include +#else +#error "This file needs to compiled with MPI support!" +#endif + +#include +#if DUNE_VERSION_NEWER(DUNE_COMMON, 2, 3) +#include +#include +#else +#include +#include +#endif + +#include +#include +#include +#include + +#include + +struct MPIFixture { + MPIFixture() + { + int m_argc = boost::unit_test::framework::master_test_suite().argc; + char** m_argv = boost::unit_test::framework::master_test_suite().argv; + MPI_Init(&m_argc, &m_argv); + } + ~MPIFixture() + { + MPI_Finalize(); + } +}; + + +BOOST_GLOBAL_FIXTURE(MPIFixture); + +struct MyMatrix +{ + MyMatrix(std::size_t rows, std::size_t nnz) + : data(nnz, 0.0), rowStart(rows+1, -1), + colIndex(nnz, -1) + {} + MyMatrix() + : data(), rowStart(), colIndex() + {} + + std::vector data; + std::vector rowStart; + std::vector colIndex; +}; + +typedef int LocalId; +typedef int GlobalId; +typedef Dune::OwnerOverlapCopyCommunication Communication; +typedef Dune::OwnerOverlapCopyAttributeSet GridAttributes; +typedef GridAttributes::AttributeSet GridFlag; +typedef Dune::ParallelLocalIndex LocalIndex; + +/// \brief Sets up a paralle Laplacian. +/// +/// The process stores the unknowns with indices in the range [start, end). +/// As we use an overlapping domain decomposition, the process owns the indices +/// in the range [istart, iend]. If we would only used the indices in this range then +/// they form a partitioning of the whole index set. +/// \tparam I The type of the parallel index set (for convenience) +/// \param indexset The parallel index set for marking owner and copy region. +/// \param N The global number of unknowns of the system. +/// \param start The first index stored on this process +/// \param end One past the last index stored on this process +/// \param istart The first index that the process owns. +/// \param iend One past the last index the process owns. +template +std::shared_ptr create1DLaplacian(I& indexset, int N, int start, int end, + int istart, int iend) +{ + indexset.beginResize(); + MyMatrix* mm=new MyMatrix(end-start, (end-start)*3); + int nnz=0; + mm->rowStart[0]=0; + assert(start==0||start=iend) + { + // We are in the overlap region of the grid + // therefore we setup the system such that + // right hand side will equal the left hand side + // of the linear system. + if(localRow>0) + { + mm->colIndex[nnz]=localRow-1; + mm->data[nnz++]=0; + } + mm->colIndex[nnz]=localRow; + mm->data[nnz++]=1.0; + indexset.add(row, LocalIndex(localRow, GridAttributes::copy, true)); + if(localRowcolIndex[nnz]=localRow+1; + mm->data[nnz++]=0; + } + mm->rowStart[localRow+1]=nnz; + continue; + } + + double dval=0; + if(row>0) + { + mm->colIndex[nnz]=localRow-1; + mm->data[nnz++]=-1; + dval+=1; + } + mm->colIndex[nnz]=localRow; + mm->data[nnz++]=2;//dval+(rowcolIndex[nnz]=localRow+1; + mm->data[nnz++]=-1; + dval+=1; + } + mm->rowStart[localRow+1]=nnz; + indexset.add(row, LocalIndex(localRow, GridAttributes::owner, true)); + } + mm->data.resize(nnz); + mm->colIndex.resize(nnz); + indexset.endResize(); + return std::shared_ptr(mm); +} + +template +void createRandomVectors(O& pinfo, int NN, std::vector& x, std::vector& b, + const MyMatrix& mat) +{ + x.resize(NN); + for(auto entry=x.begin(), end =x.end(); entry!=end; ++entry) + *entry=((double) (rand()%100))/10.0; + + pinfo.copyOwnerToAll(x,x); + + b.resize(NN); + + // Construct the right hand side as b=A*x + std::fill(b.begin(), b.end(), 0.0); + for(std::size_t row=0; row computeRegions(int N=100) +{ + int procs, rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &procs); + int n = N/procs; // number of unknowns per process + int bigger = N%procs; // number of process with n+1 unknows + + + int start, end, istart, iend; + // Compute owner region + if(rank0) + start = istart - 1; + else + start = istart; + + if(iend #include -#include -#include -#include -#include -#if DUNE_VERSION_NEWER(DUNE_COMMON, 2, 3) -#include -#include -#else -#include -#include -#endif -#include #include #else #error "This file needs to compiled with MPI support!" #endif +#include "DuneIstlTestHelpers.hpp" #include #include @@ -55,179 +44,11 @@ #include #include -struct MPIFixture { - MPIFixture() - { - int m_argc = boost::unit_test::framework::master_test_suite().argc; - char** m_argv = boost::unit_test::framework::master_test_suite().argv; - MPI_Init(&m_argc, &m_argv); - } - ~MPIFixture() - { - MPI_Finalize(); - } -}; - -BOOST_GLOBAL_FIXTURE(MPIFixture); - -struct MyMatrix -{ - MyMatrix(std::size_t rows, std::size_t nnz) - : data(nnz, 0.0), rowStart(rows+1, -1), - colIndex(nnz, -1) - {} - MyMatrix() - : data(), rowStart(), colIndex() - {} - - std::vector data; - std::vector rowStart; - std::vector colIndex; -}; - -typedef int LocalId; -typedef int GlobalId; -typedef Dune::OwnerOverlapCopyCommunication Communication; -typedef Dune::OwnerOverlapCopyAttributeSet GridAttributes; -typedef GridAttributes::AttributeSet GridFlag; -typedef Dune::ParallelLocalIndex LocalIndex; - -/// \brief Sets up a paralle Laplacian. -/// -/// The process stores the unknowns with indices in the range [start, end). -/// As we use an overlapping domain decomposition, the process owns the indices -/// in the range [istart, iend]. If we would only used the indices in this range then -/// they form a partitioning of the whole index set. -/// \tparam I The type of the parallel index set (for convenience) -/// \param indexset The parallel index set for marking owner and copy region. -/// \param N The global number of unknowns of the system. -/// \param start The first index stored on this process -/// \param end One past the last index stored on this process -/// \param istart The first index that the process owns. -/// \param iend One past the last index the process owns. -template -std::shared_ptr create1DLaplacian(I& indexset, int N, int start, int end, - int istart, int iend) -{ - indexset.beginResize(); - MyMatrix* mm=new MyMatrix(end-start, (end-start)*3); - int nnz=0; - mm->rowStart[0]=0; - assert(start==0||start=iend) - { - // We are in the overlap region of the grid - // therefore we setup the system such that - // right hand side will equal the left hand side - // of the linear system. - if(localRow>0) - { - mm->colIndex[nnz]=localRow-1; - mm->data[nnz++]=0; - } - mm->colIndex[nnz]=localRow; - mm->data[nnz++]=1.0; - indexset.add(row, LocalIndex(localRow, GridAttributes::copy, true)); - if(localRowcolIndex[nnz]=localRow+1; - mm->data[nnz++]=0; - } - mm->rowStart[localRow+1]=nnz; - continue; - } - - double dval=0; - if(row>0) - { - mm->colIndex[nnz]=localRow-1; - mm->data[nnz++]=-1; - dval+=1; - } - mm->colIndex[nnz]=localRow; - mm->data[nnz++]=2;//dval+(rowcolIndex[nnz]=localRow+1; - mm->data[nnz++]=-1; - dval+=1; - } - mm->rowStart[localRow+1]=nnz; - indexset.add(row, LocalIndex(localRow, GridAttributes::owner, true)); - } - mm->data.resize(nnz); - mm->colIndex.resize(nnz); - indexset.endResize(); - return std::shared_ptr(mm); -} - -template -void createRandomVectors(O& pinfo, int NN, std::vector& x, std::vector& b, - const MyMatrix& mat) -{ - x.resize(NN); - for(auto entry=x.begin(), end =x.end(); entry!=end; ++entry) - *entry=((double) (rand()%100))/10.0; - - pinfo.copyOwnerToAll(x,x); - - b.resize(NN); - - // Construct the right hand side as b=A*x - std::fill(b.begin(), b.end(), 0.0); - for(std::size_t row=0; row0) - start = istart - 1; - else - start = istart; - - if(iend x(end-start), b(end-start); diff --git a/tests/test_parallelistlinformation.cpp b/tests/test_parallelistlinformation.cpp new file mode 100644 index 000000000..4e0d3ffa7 --- /dev/null +++ b/tests/test_parallelistlinformation.cpp @@ -0,0 +1,81 @@ +/* + Copyright 2015 Dr. Markus Blatt - HPC-Simulation-Software & Services + Copyright 2015 NTNU + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ + +#include + +#if HAVE_DYNAMIC_BOOST_TEST +#define BOOST_TEST_DYN_LINK +#endif +#define NVERBOSE // to suppress our messages when throwing + +#define BOOST_TEST_MODULE OPM-ParallelIstlInformation +#include +#if HAVE_MPI +#include +#else +#error "This file needs to compiled with MPI support!" +#endif +#include "DuneIstlTestHelpers.hpp" +#include +#include +#ifdef HAVE_DUNE_ISTL +BOOST_AUTO_TEST_CASE(tupleReductionTest) +{ + int N=100; + int start, end, istart, iend; + std::tie(start,istart,iend,end) = computeRegions(N); + Opm::ParallelISTLInformation comm(MPI_COMM_WORLD); + auto mat = create1DLaplacian(*comm.indexSet(), N, start, end, istart, iend); + std::vector x(end-start); + assert(comm.indexSet()->size()==x.size()); + for(auto i=comm.indexSet()->begin(), iend=comm.indexSet()->end(); i!=iend; ++i) + x[i->local()]=i->global(); + auto containers = std::make_tuple(x, x, x); + auto operators = std::make_tuple(Opm::Reduction::MaskIDOperator >(), + Opm::Reduction::MaskToMinOperator >(), + Opm::Reduction::MaskToMaxOperator >()); + auto values = std::make_tuple(0,0,100000); + auto oldvalues = values; + comm.computeReduction(containers,operators,values); + BOOST_CHECK(std::get<0>(values)==std::get<0>(oldvalues)+((N-1)*N)/2); + BOOST_CHECK(std::get<1>(values)==std::min(0, std::get<1>(oldvalues))); + BOOST_CHECK(std::get<2>(values)==std::max(N, std::get<2>(oldvalues))); +} +BOOST_AUTO_TEST_CASE(singleContainerReductionTest) +{ + int N=100; + int start, end, istart, iend; + std::tie(start,istart,iend,end) = computeRegions(N); + Opm::ParallelISTLInformation comm(MPI_COMM_WORLD); + auto mat = create1DLaplacian(*comm.indexSet(), N, start, end, istart, iend); + std::vector x(end-start); + assert(comm.indexSet()->size()==x.size()); + for(auto i=comm.indexSet()->begin(), iend=comm.indexSet()->end(); i!=iend; ++i) + x[i->local()]=i->global(); + auto containers = std::make_tuple(x, x, x); + auto operators = std::make_tuple(Opm::Reduction::MaskIDOperator >(), + Opm::Reduction::MaskToMinOperator >(), + Opm::Reduction::MaskToMaxOperator >()); + int value = 1; + int oldvalue = value; + comm.computeReduction(x,Opm::Reduction::MaskIDOperator >(),value); + BOOST_CHECK(value==oldvalue+((N-1)*N)/2); +} +#endif