Added methods for computing global reductions.

We need to compute quite a few global reductions in the
Newton method of opm-autodiff. This commit adds the functionality
to compute several reductions combined using only one global
communication. Compiles and test succeeds with one or more process.
This commit is contained in:
Markus Blatt
2015-01-21 15:44:20 +01:00
parent faa8e00f0f
commit 566aee7896
6 changed files with 600 additions and 187 deletions

View File

@@ -31,23 +31,12 @@
#if HAVE_MPI
#include <mpi.h>
#include <dune/common/version.hh>
#include <dune/common/parallel/indexset.hh>
#include <dune/common/parallel/communicator.hh>
#include <dune/common/parallel/remoteindices.hh>
#include <dune/common/version.hh>
#if DUNE_VERSION_NEWER(DUNE_COMMON, 2, 3)
#include <dune/common/parallel/mpicollectivecommunication.hh>
#include <dune/common/parallel/collectivecommunication.hh>
#else
#include <dune/common/mpicollectivecommunication.hh>
#include <dune/common/collectivecommunication.hh>
#endif
#include <dune/istl/owneroverlapcopy.hh>
#include <opm/core/linalg/ParallelIstlInformation.hpp>
#else
#error "This file needs to compiled with MPI support!"
#endif
#include "DuneIstlTestHelpers.hpp"
#include <opm/core/linalg/LinearSolverFactory.hpp>
#include <opm/core/utility/parameters/ParameterGroup.hpp>
@@ -55,179 +44,11 @@
#include <cstdlib>
#include <string>
struct MPIFixture {
MPIFixture()
{
int m_argc = boost::unit_test::framework::master_test_suite().argc;
char** m_argv = boost::unit_test::framework::master_test_suite().argv;
MPI_Init(&m_argc, &m_argv);
}
~MPIFixture()
{
MPI_Finalize();
}
};
BOOST_GLOBAL_FIXTURE(MPIFixture);
struct MyMatrix
{
MyMatrix(std::size_t rows, std::size_t nnz)
: data(nnz, 0.0), rowStart(rows+1, -1),
colIndex(nnz, -1)
{}
MyMatrix()
: data(), rowStart(), colIndex()
{}
std::vector<double> data;
std::vector<int> rowStart;
std::vector<int> colIndex;
};
typedef int LocalId;
typedef int GlobalId;
typedef Dune::OwnerOverlapCopyCommunication<GlobalId,LocalId> Communication;
typedef Dune::OwnerOverlapCopyAttributeSet GridAttributes;
typedef GridAttributes::AttributeSet GridFlag;
typedef Dune::ParallelLocalIndex<GridFlag> LocalIndex;
/// \brief Sets up a paralle Laplacian.
///
/// The process stores the unknowns with indices in the range [start, end).
/// As we use an overlapping domain decomposition, the process owns the indices
/// in the range [istart, iend]. If we would only used the indices in this range then
/// they form a partitioning of the whole index set.
/// \tparam I The type of the parallel index set (for convenience)
/// \param indexset The parallel index set for marking owner and copy region.
/// \param N The global number of unknowns of the system.
/// \param start The first index stored on this process
/// \param end One past the last index stored on this process
/// \param istart The first index that the process owns.
/// \param iend One past the last index the process owns.
template<class I>
std::shared_ptr<MyMatrix> create1DLaplacian(I& indexset, int N, int start, int end,
int istart, int iend)
{
indexset.beginResize();
MyMatrix* mm=new MyMatrix(end-start, (end-start)*3);
int nnz=0;
mm->rowStart[0]=0;
assert(start==0||start<istart);
assert(end==N||iend<end);
for(int row=start, localRow=0; row<end; row++, localRow++)
{
if(row<istart || row>=iend)
{
// We are in the overlap region of the grid
// therefore we setup the system such that
// right hand side will equal the left hand side
// of the linear system.
if(localRow>0)
{
mm->colIndex[nnz]=localRow-1;
mm->data[nnz++]=0;
}
mm->colIndex[nnz]=localRow;
mm->data[nnz++]=1.0;
indexset.add(row, LocalIndex(localRow, GridAttributes::copy, true));
if(localRow<end-1)
{
mm->colIndex[nnz]=localRow+1;
mm->data[nnz++]=0;
}
mm->rowStart[localRow+1]=nnz;
continue;
}
double dval=0;
if(row>0)
{
mm->colIndex[nnz]=localRow-1;
mm->data[nnz++]=-1;
dval+=1;
}
mm->colIndex[nnz]=localRow;
mm->data[nnz++]=2;//dval+(row<N-1);
if(row<N-1)
{
mm->colIndex[nnz]=localRow+1;
mm->data[nnz++]=-1;
dval+=1;
}
mm->rowStart[localRow+1]=nnz;
indexset.add(row, LocalIndex(localRow, GridAttributes::owner, true));
}
mm->data.resize(nnz);
mm->colIndex.resize(nnz);
indexset.endResize();
return std::shared_ptr<MyMatrix>(mm);
}
template<class O>
void createRandomVectors(O& pinfo, int NN, std::vector<double>& x, std::vector<double>& b,
const MyMatrix& mat)
{
x.resize(NN);
for(auto entry=x.begin(), end =x.end(); entry!=end; ++entry)
*entry=((double) (rand()%100))/10.0;
pinfo.copyOwnerToAll(x,x);
b.resize(NN);
// Construct the right hand side as b=A*x
std::fill(b.begin(), b.end(), 0.0);
for(std::size_t row=0; row<mat.rowStart.size()-1; ++row)
{
for(int i=mat.rowStart[row], end=mat.rowStart[row+1]; i!=end; ++i)
{
b[row]+= mat.data[i]*x[mat.colIndex[i]];
}
}
pinfo.copyOwnerToAll(b,b);
}
void run_test(const Opm::parameter::ParameterGroup& param)
{
int N=100;
int procs, rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &procs);
int n = N/procs; // number of unknowns per process
int bigger = N%procs; // number of process with n+1 unknows
int start, end, istart, iend;
// Compute owner region
if(rank<bigger) {
start = rank*(n+1);
end = start+(n+1);
}else{
start = bigger*(n+1) + (rank-bigger) * n;
end = start+n;
}
// Compute owner region
if(rank<bigger) {
istart = rank*(n+1);
iend = start+(n+1);
}else{
istart = bigger*(n+1) + (rank-bigger) * n;
iend = start+n;
}
// Compute overlap region
if(istart>0)
start = istart - 1;
else
start = istart;
if(iend<N)
end = iend + 1;
else
end = iend;
std::tie(start,istart,iend,end) = computeRegions(N);
Opm::ParallelISTLInformation comm(MPI_COMM_WORLD);
auto mat = create1DLaplacian(*comm.indexSet(), N, start, end, istart, iend);
std::vector<double> x(end-start), b(end-start);