From cf28b2794b23604c1b45285f139415397998ac6d Mon Sep 17 00:00:00 2001 From: JamesEMcclure Date: Wed, 20 Nov 2019 13:20:11 -0500 Subject: [PATCH 001/121] created skeleton for greyscale model --- common/ScaLBL.h | 5 + cpu/Greyscale.cpp | 278 ++++++++++++++ gpu/Greyscale.cu | 311 ++++++++++++++++ models/GreyscaleModel.cpp | 568 +++++++++++++++++++++++++++++ models/GreyscaleModel.h | 81 ++++ tests/CMakeLists.txt | 1 + tests/lbpm_greyscale_simulator.cpp | 64 ++++ 7 files changed, 1308 insertions(+) create mode 100644 cpu/Greyscale.cpp create mode 100644 gpu/Greyscale.cu create mode 100644 models/GreyscaleModel.cpp create mode 100644 models/GreyscaleModel.h create mode 100644 tests/lbpm_greyscale_simulator.cpp diff --git a/common/ScaLBL.h b/common/ScaLBL.h index a50ab7ed..efca3be8 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -55,6 +55,11 @@ extern "C" void ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int extern "C" void ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz); +// GREYSCALE MODEL +extern "C" void ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz); + +extern "C" void ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz); + // MRT MODEL extern "C" void ScaLBL_D3Q19_AAeven_MRT(double *dist, int start, int finish, int Np, double rlx_setA, double rlx_setB, double Fx, double Fy, double Fz); diff --git a/cpu/Greyscale.cpp b/cpu/Greyscale.cpp new file mode 100644 index 00000000..a800413d --- /dev/null +++ b/cpu/Greyscale.cpp @@ -0,0 +1,278 @@ +extern "C" void ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){ + int n; + // conserved momemnts + double rho,ux,uy,uz,uu; + // non-conserved moments + double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; + + for (int n=start; n 10Np => odd part of dist) + f1 = dist[nr1]; // reading the f1 data into register fq + + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + f2 = dist[nr2]; // reading the f2 data into register fq + + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + f3 = dist[nr3]; + + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + f4 = dist[nr4]; + + // q=5 + nr5 = neighborList[n+4*Np]; + f5 = dist[nr5]; + + // q = 6 + nr6 = neighborList[n+5*Np]; + f6 = dist[nr6]; + + // q=7 + nr7 = neighborList[n+6*Np]; + f7 = dist[nr7]; + + // q = 8 + nr8 = neighborList[n+7*Np]; + f8 = dist[nr8]; + + // q=9 + nr9 = neighborList[n+8*Np]; + f9 = dist[nr9]; + + // q = 10 + nr10 = neighborList[n+9*Np]; + f10 = dist[nr10]; + + // q=11 + nr11 = neighborList[n+10*Np]; + f11 = dist[nr11]; + + // q=12 + nr12 = neighborList[n+11*Np]; + f12 = dist[nr12]; + + // q=13 + nr13 = neighborList[n+12*Np]; + f13 = dist[nr13]; + + // q=14 + nr14 = neighborList[n+13*Np]; + f14 = dist[nr14]; + + // q=15 + nr15 = neighborList[n+14*Np]; + f15 = dist[nr15]; + + // q=16 + nr16 = neighborList[n+15*Np]; + f16 = dist[nr16]; + + // q=17 + //fq = dist[18*Np+n]; + nr17 = neighborList[n+16*Np]; + f17 = dist[nr17]; + + // q=18 + nr18 = neighborList[n+17*Np]; + f18 = dist[nr18]; + + rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17; + ux = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14; + uy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18; + uz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18; + uu = 1.5*(ux*ux+uy*uy+uz*uz); + + // q=0 + dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*(1.0-uu); + + // q = 1 + dist[nr2] = f1*(1.0-rlx) + rlx*0.05555555555555555*(rho + 3.0*ux + 4.5*ux*ux - uu) + 0.16666666*Fx; + + // q=2 + dist[nr1] = f2*(1.0-rlx) + rlx*0.05555555555555555*(rho - 3.0*ux + 4.5*ux*ux - uu)- 0.16666666*Fx; + + // q = 3 + dist[nr4] = f3*(1.0-rlx) + + rlx*0.05555555555555555*(rho + 3.0*uy + 4.5*uy*uy - uu) + 0.16666666*Fy; + + // q = 4 + dist[nr3] = f4*(1.0-rlx) + + rlx*0.05555555555555555*(rho - 3.0*uy + 4.5*uy*uy - uu)- 0.16666666*Fy; + + // q = 5 + dist[nr6] = f5*(1.0-rlx) + + rlx*0.05555555555555555*(rho + 3.0*uz + 4.5*uz*uz - uu) + 0.16666666*Fz; + + // q = 6 + dist[nr5] = f6*(1.0-rlx) + + rlx*0.05555555555555555*(rho - 3.0*uz + 4.5*uz*uz - uu) - 0.16666666*Fz; + + // q = 7 + dist[nr8] = f7*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) + 0.08333333333*(Fx+Fy); + + // q = 8 + dist[nr7] = f8*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) - 0.08333333333*(Fx+Fy); + + // q = 9 + dist[nr10] = f9*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) + 0.08333333333*(Fx-Fy); + + // q = 10 + dist[nr9] = f10*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) - 0.08333333333*(Fx-Fy); + + // q = 11 + dist[nr12] = f11*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) + 0.08333333333*(Fx+Fz); + + // q = 12 + dist[nr11] = f12*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) - 0.08333333333*(Fx+Fz); + + // q = 13 + dist[nr14] = f13*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu) + 0.08333333333*(Fx-Fz); + + // q= 14 + dist[nr13] = f14*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu)- 0.08333333333*(Fx-Fz); + + // q = 15 + dist[nr16] = f15*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) + 0.08333333333*(Fy+Fz); + + // q = 16 + dist[nr15] = f16*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) - 0.08333333333*(Fy+Fz); + + // q = 17 + dist[nr18] = f17*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) + 0.08333333333*(Fy-Fz); + + // q = 18 + dist[nr17] = f18*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) - 0.08333333333*(Fy-Fz); + + } +} \ No newline at end of file diff --git a/gpu/Greyscale.cu b/gpu/Greyscale.cu new file mode 100644 index 00000000..04b5e979 --- /dev/null +++ b/gpu/Greyscale.cu @@ -0,0 +1,311 @@ +#include + +#define NBLOCKS 1024 +#define NTHREADS 256 + +__global__ void dvc_ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){ + int n; + // conserved momemnts + double rho,ux,uy,uz,uu; + // non-conserved moments + double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s 10Np => odd part of dist) + f1 = dist[nr1]; // reading the f1 data into register fq + + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + f2 = dist[nr2]; // reading the f2 data into register fq + + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + f3 = dist[nr3]; + + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + f4 = dist[nr4]; + + // q=5 + nr5 = neighborList[n+4*Np]; + f5 = dist[nr5]; + + // q = 6 + nr6 = neighborList[n+5*Np]; + f6 = dist[nr6]; + + // q=7 + nr7 = neighborList[n+6*Np]; + f7 = dist[nr7]; + + // q = 8 + nr8 = neighborList[n+7*Np]; + f8 = dist[nr8]; + + // q=9 + nr9 = neighborList[n+8*Np]; + f9 = dist[nr9]; + + // q = 10 + nr10 = neighborList[n+9*Np]; + f10 = dist[nr10]; + + // q=11 + nr11 = neighborList[n+10*Np]; + f11 = dist[nr11]; + + // q=12 + nr12 = neighborList[n+11*Np]; + f12 = dist[nr12]; + + // q=13 + nr13 = neighborList[n+12*Np]; + f13 = dist[nr13]; + + // q=14 + nr14 = neighborList[n+13*Np]; + f14 = dist[nr14]; + + // q=15 + nr15 = neighborList[n+14*Np]; + f15 = dist[nr15]; + + // q=16 + nr16 = neighborList[n+15*Np]; + f16 = dist[nr16]; + + // q=17 + //fq = dist[18*Np+n]; + nr17 = neighborList[n+16*Np]; + f17 = dist[nr17]; + + // q=18 + nr18 = neighborList[n+17*Np]; + f18 = dist[nr18]; + + rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17; + ux = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14; + uy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18; + uz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18; + uu = 1.5*(ux*ux+uy*uy+uz*uz); + + // q=0 + dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*(1.0-uu); + + // q = 1 + dist[nr2] = f1*(1.0-rlx) + rlx*0.05555555555555555*(rho + 3.0*ux + 4.5*ux*ux - uu) + 0.16666666*Fx; + + // q=2 + dist[nr1] = f2*(1.0-rlx) + rlx*0.05555555555555555*(rho - 3.0*ux + 4.5*ux*ux - uu)- 0.16666666*Fx; + + // q = 3 + dist[nr4] = f3*(1.0-rlx) + + rlx*0.05555555555555555*(rho + 3.0*uy + 4.5*uy*uy - uu) + 0.16666666*Fy; + + // q = 4 + dist[nr3] = f4*(1.0-rlx) + + rlx*0.05555555555555555*(rho - 3.0*uy + 4.5*uy*uy - uu)- 0.16666666*Fy; + + // q = 5 + dist[nr6] = f5*(1.0-rlx) + + rlx*0.05555555555555555*(rho + 3.0*uz + 4.5*uz*uz - uu) + 0.16666666*Fz; + + // q = 6 + dist[nr5] = f6*(1.0-rlx) + + rlx*0.05555555555555555*(rho - 3.0*uz + 4.5*uz*uz - uu) - 0.16666666*Fz; + + // q = 7 + dist[nr8] = f7*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) + 0.08333333333*(Fx+Fy); + + // q = 8 + dist[nr7] = f8*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) - 0.08333333333*(Fx+Fy); + + // q = 9 + dist[nr10] = f9*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) + 0.08333333333*(Fx-Fy); + + // q = 10 + dist[nr9] = f10*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) - 0.08333333333*(Fx-Fy); + + // q = 11 + dist[nr12] = f11*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) + 0.08333333333*(Fx+Fz); + + // q = 12 + dist[nr11] = f12*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) - 0.08333333333*(Fx+Fz); + + // q = 13 + dist[nr14] = f13*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu) + 0.08333333333*(Fx-Fz); + + // q= 14 + dist[nr13] = f14*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu)- 0.08333333333*(Fx-Fz); + + // q = 15 + dist[nr16] = f15*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) + 0.08333333333*(Fy+Fz); + + // q = 16 + dist[nr15] = f16*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) - 0.08333333333*(Fy+Fz); + + // q = 17 + dist[nr18] = f17*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) + 0.08333333333*(Fy-Fz); + + // q = 18 + dist[nr17] = f18*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) - 0.08333333333*(Fy-Fz); + } + } +} + +extern "C" void ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){ + + dvc_ScaLBL_D3Q19_AAeven_Greyscale<<>>(dist,start,finish,Np,rlx,Fx,Fy,Fz); + + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_Greyscale: %s \n",cudaGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){ + dvc_ScaLBL_D3Q19_AAodd_Greyscale<<>>(neighborList,dist,start,finish,Np,rlx,Fx,Fy,Fz); + + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_Greyscale: %s \n",cudaGetErrorString(err)); + } +} \ No newline at end of file diff --git a/models/GreyscaleModel.cpp b/models/GreyscaleModel.cpp new file mode 100644 index 00000000..980d15b5 --- /dev/null +++ b/models/GreyscaleModel.cpp @@ -0,0 +1,568 @@ +/* +color lattice boltzmann model + */ +#include "models/GreyscaleModel.h" +#include "analysis/distance.h" +#include "analysis/morphology.h" +#include +#include + +ScaLBL_GreyscaleModel::ScaLBL_GreyscaleModel(int RANK, int NP, MPI_Comm COMM): +rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tau(0), +Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0), +Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) +{ + SignDist.resize(Nx,Ny,Nz); SignDist.fill(0); + +} +ScaLBL_GreyscaleModel::~ScaLBL_GreyscaleModel(){ + +} + +void ScaLBL_GreyscaleModel::ReadParams(string filename){ + // read the input database + db = std::make_shared( filename ); + domain_db = db->getDatabase( "Domain" ); + greyscale_db = db->getDatabase( "Greyscale" ); + analysis_db = db->getDatabase( "Analysis" ); + vis_db = db->getDatabase( "Visualization" ); + + // set defaults + timestepMax = 100000; + tau = 1.0; + tolerance = 0.01; + Fx = Fy = Fz = 0.0; + Restart=false; + din=dout=1.0; + flux=0.0; + + // Color Model parameters + if (greyscale_db->keyExists( "timestepMax" )){ + timestepMax = greyscale_db->getScalar( "timestepMax" ); + } + if (greyscale_db->keyExists( "tau" )){ + tau = greyscale_db->getScalar( "tauA" ); + } + if (greyscale_db->keyExists( "F" )){ + Fx = greyscale_db->getVector( "F" )[0]; + Fy = greyscale_db->getVector( "F" )[1]; + Fz = greyscale_db->getVector( "F" )[2]; + } + if (greyscale_db->keyExists( "Restart" )){ + Restart = greyscale_db->getScalar( "Restart" ); + } + if (greyscale_db->keyExists( "din" )){ + din = greyscale_db->getScalar( "din" ); + } + if (greyscale_db->keyExists( "dout" )){ + dout = greyscale_db->getScalar( "dout" ); + } + if (greyscale_db->keyExists( "flux" )){ + flux = greyscale_db->getScalar( "flux" ); + } + if (greyscale_db->keyExists( "tolerance" )){ + tolerance = greyscale_db->getScalar( "tolerance" ); + } + BoundaryCondition = 0; + if (domain_db->keyExists( "BC" )){ + BoundaryCondition = domain_db->getScalar( "BC" ); + } +} + +void ScaLBL_GreyscaleModel::SetDomain(){ + Dm = std::shared_ptr(new Domain(domain_db,comm)); // full domain for analysis + Mask = std::shared_ptr(new Domain(domain_db,comm)); // mask domain removes immobile phases + // domain parameters + Nx = Dm->Nx; + Ny = Dm->Ny; + Nz = Dm->Nz; + Lx = Dm->Lx; + Ly = Dm->Ly; + Lz = Dm->Lz; + N = Nx*Ny*Nz; + id = new signed char [N]; + for (int i=0; iid[i] = 1; // initialize this way + MPI_Barrier(comm); + Dm->CommInit(); + MPI_Barrier(comm); + // Read domain parameters + rank = Dm->rank(); + nprocx = Dm->nprocx(); + nprocy = Dm->nprocy(); + nprocz = Dm->nprocz(); +} + +void ScaLBL_GreyscaleModel::ReadInput(){ + + sprintf(LocalRankString,"%05d",rank); + sprintf(LocalRankFilename,"%s%s","ID.",LocalRankString); + sprintf(LocalRestartFile,"%s%s","Restart.",LocalRankString); + + if (domain_db->keyExists( "Filename" )){ + auto Filename = domain_db->getScalar( "Filename" ); + Mask->Decomp(Filename); + } + else{ + Mask->ReadIDs(); + } + for (int i=0; iid[i]; // save what was read + + // Generate the signed distance map + // Initialize the domain and communication + Array id_solid(Nx,Ny,Nz); + int count = 0; + // Solve for the position of the solid phase + for (int k=0;kid[n]; + if (label > 0) id_solid(i,j,k) = 1; + else id_solid(i,j,k) = 0; + } + } + } + // Initialize the signed distance function + for (int k=0;kgetVector( "ComponentLabels" ); + auto PorosityList = greyscale_db->getVector( "PorosityList" ); + auto PermeabilityList = greyscale_db->getVector( "PermeabilityList" ); + + NLABELS=LabelList.size(); + if (NLABELS != PorosityList.size()){ + ERROR("Error: ComponentLabels and PorosityList must be the same length! \n"); + } + + double label_count[NLABELS]; + double label_count_global[NLABELS]; + // Assign the labels + + for (int idx=0; idxid[n] = 0; // set mask to zero since this is an immobile component + } + } + // fluid labels are reserved / negative labels are immobile + if (VALUE == 1) POROSITY=1.0; + else if (VALUE == 2) POROSITY=1.0; + else if (VALUE < 1) POROSITY = 0.0; + int idx = Map(i,j,k); + if (!(idx < 0)) + Porosity[idx] = POROSITY; + } + } + } + + if (NLABELS != PermeabilityList.size()){ + ERROR("Error: ComponentLabels and PermeabilityList must be the same length! \n"); + } + for (int k=1;kid[n] = 0; // set mask to zero since this is an immobile component + } + } + // fluid labels are reserved / negative labels are immobile + if (VALUE == 1) PERMEABILITY=1.0; + else if (VALUE == 2) PERMEABILITY=1.0; + else if (VALUE < 1) PERMEABILITY = 0.0; + int idx = Map(i,j,k); + if (!(idx < 0)) + Permeability[idx] = PERMEABILITY; + } + } + } + + + // Set Dm to match Mask + for (int i=0; iid[i] = Mask->id[i]; + + for (int idx=0; idxComm, label_count[idx]); + + if (rank==0){ + printf("Component labels: %lu \n",NLABELS); + for (unsigned int idx=0; idxid[i] = Mask->id[i]; + Mask->CommInit(); + Np=Mask->PoreCount(); + //........................................................................... + if (rank==0) printf ("Create ScaLBL_Communicator \n"); + // Create a communicator for the device (will use optimized layout) + // ScaLBL_Communicator ScaLBL_Comm(Mask); // original + ScaLBL_Comm = std::shared_ptr(new ScaLBL_Communicator(Mask)); + + int Npad=(Np/16 + 2)*16; + if (rank==0) printf ("Set up memory efficient layout, %i | %i | %i \n", Np, Npad, N); + Map.resize(Nx,Ny,Nz); Map.fill(-2); + auto neighborList= new int[18*Npad]; + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); + MPI_Barrier(comm); + + //........................................................................... + // MAIN VARIABLES ALLOCATED HERE + //........................................................................... + // LBM variables + if (rank==0) printf ("Allocating distributions \n"); + //......................device distributions................................. + dist_mem_size = Np*sizeof(double); + neighborSize=18*(Np*sizeof(int)); + //........................................................................... + ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize); + ScaLBL_AllocateDeviceMemory((void **) &dvcMap, sizeof(int)*Np); + ScaLBL_AllocateDeviceMemory((void **) &fq, 19*dist_mem_size); + ScaLBL_AllocateDeviceMemory((void **) &Permeability, sizeof(double)*Np); + ScaLBL_AllocateDeviceMemory((void **) &Porosity, sizeof(double)*Np); + ScaLBL_AllocateDeviceMemory((void **) &Pressure, sizeof(double)*Np); + ScaLBL_AllocateDeviceMemory((void **) &Velocity, 3*sizeof(double)*Np); + //........................................................................... + // Update GPU data structures + if (rank==0) printf ("Setting up device map and neighbor list \n"); + fflush(stdout); + int *TmpMap; + TmpMap=new int[Np]; + for (int k=1; kLastExterior(); idx++){ + int n = TmpMap[idx]; + if (n > Nx*Ny*Nz){ + printf("Bad value! idx=%i \n"); + TmpMap[idx] = Nx*Ny*Nz-1; + } + } + for (int idx=ScaLBL_Comm->FirstInterior(); idxLastInterior(); idx++){ + int n = TmpMap[idx]; + if (n > Nx*Ny*Nz){ + printf("Bad value! idx=%i \n"); + TmpMap[idx] = Nx*Ny*Nz-1; + } + } + ScaLBL_CopyToDevice(dvcMap, TmpMap, sizeof(int)*Np); + ScaLBL_DeviceBarrier(); + delete [] TmpMap; + + // copy the neighbor list + ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize); + // initialize phi based on PhaseLabel (include solid component labels) + double *Poros, *Perm; + Poros = new double[Np]; + Perm = new double[Np]; + AssignComponentLabels(Poros,Perm); + ScaLBL_CopyToDevice(Porosity, Poros, Np*sizeof(double)); + ScaLBL_CopyToDevice(Permeability, Perm, Np*sizeof(double)); +} + +/******************************************************** + * AssignComponentLabels * + ********************************************************/ + +void ScaLBL_GreyscaleModel::Initialize(){ + + if (rank==0) printf ("Initializing distributions \n"); + ScaLBL_D3Q19_Init(fq, Np); + /* + * This function initializes model + */ + if (Restart == true){ + if (rank==0){ + printf("Reading restart file! \n"); + } + + // Read in the restart file to CPU buffers + int *TmpMap; + TmpMap = new int[Np]; + + double *cDist; + cDist = new double[19*Np]; + ScaLBL_CopyToHost(TmpMap, dvcMap, Np*sizeof(int)); + + ifstream File(LocalRestartFile,ios::binary); + int idx; + double value; + for (int n=0; n analysis_db; + timestep=0; + double rlx = 1.0/tau; + double error = 1.0; + double flow_rate_previous = 0.0; + while (timestep < timestepMax && error > tolerance) { + //************************************************************************/ + timestep++; + ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL + ScaLBL_D3Q19_AAodd_Greyscale(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz); + ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE + ScaLBL_D3Q19_AAodd_Greyscale(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + timestep++; + ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL + ScaLBL_D3Q19_AAeven_Greyscale(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz); + ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE + ScaLBL_D3Q19_AAeven_Greyscale(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + //************************************************************************/ + + if (timestep%1000==0){ + ScaLBL_D3Q19_Momentum(fq,Velocity, Np); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_Comm->RegularLayout(Map,&Velocity[0],Velocity_x); + ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],Velocity_y); + ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],Velocity_z); + + double count_loc=0; + double count; + double vax,vay,vaz; + double vax_loc,vay_loc,vaz_loc; + vax_loc = vay_loc = vaz_loc = 0.f; + for (int k=1; k 0){ + vax_loc += Velocity_x(i,j,k); + vay_loc += Velocity_y(i,j,k); + vaz_loc += Velocity_z(i,j,k); + count_loc+=1.0; + } + } + } + } + MPI_Allreduce(&vax_loc,&vax,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&vay_loc,&vay,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&vaz_loc,&vaz,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + + vax /= count; + vay /= count; + vaz /= count; + + double force_mag = sqrt(Fx*Fx+Fy*Fy+Fz*Fz); + double dir_x = Fx/force_mag; + double dir_y = Fy/force_mag; + double dir_z = Fz/force_mag; + if (force_mag == 0.0){ + // default to z direction + dir_x = 0.0; + dir_y = 0.0; + dir_z = 1.0; + force_mag = 1.0; + } + double flow_rate = (vax*dir_x + vay*dir_y + vaz*dir_z); + + error = fabs(flow_rate - flow_rate_previous) / fabs(flow_rate); + flow_rate_previous = flow_rate; + + //if (rank==0) printf("Computing Minkowski functionals \n"); + Morphology.ComputeScalar(SignDist,0.f); + //Morphology.PrintAll(); + double mu = (tau-0.5)/3.f; + double Vs = Morphology.V(); + double As = Morphology.A(); + double Hs = Morphology.H(); + double Xs = Morphology.X(); + Vs=sumReduce( Dm->Comm, Vs); + As=sumReduce( Dm->Comm, As); + Hs=sumReduce( Dm->Comm, Hs); + Xs=sumReduce( Dm->Comm, Xs); + double h = Dm->voxel_length; + double absperm = h*h*mu*Mask->Porosity()*flow_rate / force_mag; + if (rank==0) { + printf(" %f\n",absperm); + FILE * log_file = fopen("Permeability.csv","a"); + fprintf(log_file,"%i %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g\n",timestep, Fx, Fy, Fz, mu, + h*h*h*Vs,h*h*As,h*Hs,Xs,vax,vay,vaz, absperm); + fclose(log_file); + } + } + } + PROFILE_STOP("Loop"); + PROFILE_SAVE("lbpm_greyscale_simulator",1); + //************************************************************************ + ScaLBL_DeviceBarrier(); + MPI_Barrier(comm); + stoptime = MPI_Wtime(); + if (rank==0) printf("-------------------------------------------------------------------\n"); + // Compute the walltime per timestep + cputime = (stoptime - starttime)/timestep; + // Performance obtained from each node + double MLUPS = double(Np)/cputime/1000000; + + if (rank==0) printf("********************************************************\n"); + if (rank==0) printf("CPU time = %f \n", cputime); + if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS); + MLUPS *= nprocs; + if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS); + if (rank==0) printf("********************************************************\n"); + + // ************************************************************************ +} + + +void ScaLBL_GreyscaleModel::WriteDebug(){ + // Copy back final phase indicator field and convert to regular layout +/* ScaLBL_CopyToHost(Porosity.data(), Poros, sizeof(double)*N); + + FILE *OUTFILE; + sprintf(LocalRankFilename,"Phase.%05i.raw",rank); + OUTFILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,OUTFILE); + fclose(OUTFILE); + + ScaLBL_Comm->RegularLayout(Map,&Den[0],PhaseField); + FILE *AFILE; + sprintf(LocalRankFilename,"A.%05i.raw",rank); + AFILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,AFILE); + fclose(AFILE); + + ScaLBL_Comm->RegularLayout(Map,&Den[Np],PhaseField); + FILE *BFILE; + sprintf(LocalRankFilename,"B.%05i.raw",rank); + BFILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,BFILE); + fclose(BFILE); + + ScaLBL_Comm->RegularLayout(Map,Pressure,PhaseField); + FILE *PFILE; + sprintf(LocalRankFilename,"Pressure.%05i.raw",rank); + PFILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,PFILE); + fclose(PFILE); + + ScaLBL_Comm->RegularLayout(Map,&Velocity[0],PhaseField); + FILE *VELX_FILE; + sprintf(LocalRankFilename,"Velocity_X.%05i.raw",rank); + VELX_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,VELX_FILE); + fclose(VELX_FILE); + + ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],PhaseField); + FILE *VELY_FILE; + sprintf(LocalRankFilename,"Velocity_Y.%05i.raw",rank); + VELY_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,VELY_FILE); + fclose(VELY_FILE); + + ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],PhaseField); + FILE *VELZ_FILE; + sprintf(LocalRankFilename,"Velocity_Z.%05i.raw",rank); + VELZ_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,VELZ_FILE); + fclose(VELZ_FILE); + + * + */ + +} diff --git a/models/GreyscaleModel.h b/models/GreyscaleModel.h new file mode 100644 index 00000000..37ddf28f --- /dev/null +++ b/models/GreyscaleModel.h @@ -0,0 +1,81 @@ +/* +Implementation of color lattice boltzmann model + */ +#include +#include +#include +#include +#include +#include +#include + +#include "common/Communication.h" +#include "common/MPI_Helpers.h" +#include "common/Database.h" +#include "common/ScaLBL.h" +#include "ProfilerApp.h" +#include "threadpool/thread_pool.h" + +class ScaLBL_GreyscaleModel{ +public: + ScaLBL_GreyscaleModel(int RANK, int NP, MPI_Comm COMM); + ~ScaLBL_GreyscaleModel(); + + // functions in they should be run + void ReadParams(string filename); + void ReadParams(std::shared_ptr db0); + void SetDomain(); + void ReadInput(); + void Create(); + void Initialize(); + void Run(); + void WriteDebug(); + + bool Restart,pBC; + int timestep,timestepMax; + int BoundaryCondition; + double tau; + double tolerance; + double Fx,Fy,Fz,flux; + double din,dout; + + int Nx,Ny,Nz,N,Np; + int rank,nprocx,nprocy,nprocz,nprocs; + double Lx,Ly,Lz; + + std::shared_ptr Dm; // this domain is for analysis + std::shared_ptr Mask; // this domain is for lbm + std::shared_ptr ScaLBL_Comm; + + // input database + std::shared_ptr db; + std::shared_ptr domain_db; + std::shared_ptr greyscale_db; + std::shared_ptr analysis_db; + std::shared_ptr vis_db; + + IntArray Map; + DoubleArray SignDist; + signed char *id; + int *NeighborList; + int *dvcMap; + double *fq; + double *Permeability; + double *Porosity; + double *Velocity; + double *Pressure; + +private: + MPI_Comm comm; + + int dist_mem_size; + int neighborSize; + // filenames + char LocalRankString[8]; + char LocalRankFilename[40]; + char LocalRestartFile[40]; + + void AssignComponentLabels(double *Porosity, double *Permeablity); + +}; + diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 8d600321..8b14a9dc 100755 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -3,6 +3,7 @@ #ADD_LBPM_EXECUTABLE( lbpm_nondarcy_simulator ) ADD_LBPM_EXECUTABLE( lbpm_color_simulator ) ADD_LBPM_EXECUTABLE( lbpm_permeability_simulator ) +ADD_LBPM_EXECUTABLE( lbpm_greyscale_simulator ) #ADD_LBPM_EXECUTABLE( lbpm_BGK_simulator ) #ADD_LBPM_EXECUTABLE( lbpm_color_macro_simulator ) ADD_LBPM_EXECUTABLE( lbpm_dfh_simulator ) diff --git a/tests/lbpm_greyscale_simulator.cpp b/tests/lbpm_greyscale_simulator.cpp new file mode 100644 index 00000000..9ab7c385 --- /dev/null +++ b/tests/lbpm_greyscale_simulator.cpp @@ -0,0 +1,64 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "common/ScaLBL.h" +#include "common/Communication.h" +#include "common/MPI_Helpers.h" +#include "models/GreyscaleModel.h" +//#define WRITE_SURFACES + +/* + * Simulator for two-phase flow in porous media + * James E. McClure 2013-2014 + */ + +using namespace std; + + +int main(int argc, char **argv) +{ + //***************************************** + // ***** MPI STUFF **************** + //***************************************** + // Initialize MPI + int rank,nprocs; + MPI_Init(&argc,&argv); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); + { + // parallel domain size (# of sub-domains) + int nprocx,nprocy,nprocz; + int iproc,jproc,kproc; + + if (rank == 0){ + printf("********************************************************\n"); + printf("Running Greyscale Single Phase Permeability Calculation \n"); + printf("********************************************************\n"); + } + // Initialize compute device + int device=ScaLBL_SetDevice(rank); + ScaLBL_DeviceBarrier(); + MPI_Barrier(comm); + + + ScaLBL_MRTModel MRT(rank,nprocs,comm); + auto filename = argv[1]; + MRT.ReadParams(filename); + MRT.SetDomain(); // this reads in the domain + MRT.ReadInput(); + MRT.Create(); // creating the model will create data structure to match the pore structure and allocate variables + MRT.Initialize(); // initializing the model will set initial conditions for variables + MRT.Run(); + MRT.VelocityField(); + } + // **************************************************** + MPI_Barrier(comm); + MPI_Finalize(); + // **************************************************** +} From 2abcf030286f148af2bcbea34d5bf67708b01f92 Mon Sep 17 00:00:00 2001 From: JamesEMcclure Date: Thu, 21 Nov 2019 13:01:24 -0500 Subject: [PATCH 002/121] greyscale update --- tests/lbpm_greyscale_simulator.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tests/lbpm_greyscale_simulator.cpp b/tests/lbpm_greyscale_simulator.cpp index 9ab7c385..0744a214 100644 --- a/tests/lbpm_greyscale_simulator.cpp +++ b/tests/lbpm_greyscale_simulator.cpp @@ -47,15 +47,14 @@ int main(int argc, char **argv) MPI_Barrier(comm); - ScaLBL_MRTModel MRT(rank,nprocs,comm); + ScaLBL_GreyscaleModel Greyscale(rank,nprocs,comm); auto filename = argv[1]; - MRT.ReadParams(filename); - MRT.SetDomain(); // this reads in the domain - MRT.ReadInput(); - MRT.Create(); // creating the model will create data structure to match the pore structure and allocate variables - MRT.Initialize(); // initializing the model will set initial conditions for variables - MRT.Run(); - MRT.VelocityField(); + Greyscale.ReadParams(filename); + Greyscale.SetDomain(); // this reads in the domain + Greyscale.ReadInput(); + Greyscale.Create(); // creating the model will create data structure to match the pore structure and allocate variables + Greyscale.Initialize(); // initializing the model will set initial conditions for variables + Greyscale.Run(); } // **************************************************** MPI_Barrier(comm); From 3cd5053ec9b2635ab4bc91943eae0b8fe545d262 Mon Sep 17 00:00:00 2001 From: Mark Berrill Date: Thu, 21 Nov 2019 13:29:26 -0500 Subject: [PATCH 003/121] Copying halo when reading grid file --- models/ColorModel.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 13d71b4d..d21153a9 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -4,6 +4,7 @@ color lattice boltzmann model #include "models/ColorModel.h" #include "analysis/distance.h" #include "analysis/morphology.h" +#include "common/Communication.h" #include "common/ReadMicroCT.h" #include #include @@ -191,8 +192,17 @@ void ScaLBL_ColorModel::ReadInput(){ IMAGE_INDEX++; } else if (domain_db->keyExists( "GridFile" )){ + // Read the local domain data auto input_id = readMicroCT( *domain_db, MPI_COMM_WORLD ); - for (int i=0; iid[i] = input_id(i); + // Fill the halo (assuming GCW of 1) + array size0 = { input_id.size(0), input_id.size(1), input_id.size(2) }; + ArraySize size1 = { Mask->Nx, Mask->Ny, Mask->Nz }; + ASSERT( size1[0] == size0[0]+2 && size1[1] == size0[1]+2 && size1[2] == size0[2]+2 ); + fillHalo fill( MPI_COMM_WORLD, Mask->rank_info, size0, { 1, 1, 1 }, 0, 1 ); + Array id_view; + id_view.viewRaw( size1, Mask->id ); + fill.copy( input_id, id_view ); + fill.fill( id_view ); } else if (domain_db->keyExists( "Filename" )){ auto Filename = domain_db->getScalar( "Filename" ); From 86beafab8acb435a76b03381e8631d909b55c6fe Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Thu, 21 Nov 2019 13:43:32 -0500 Subject: [PATCH 004/121] save the work for cpu version --- common/ScaLBL.h | 6 +- cpu/Greyscale.cpp | 277 ++++++++++++++++++++--------- models/GreyscaleModel.cpp | 166 ++++++++++++++--- models/GreyscaleModel.h | 11 +- tests/lbpm_greyscale_simulator.cpp | 16 +- 5 files changed, 356 insertions(+), 120 deletions(-) diff --git a/common/ScaLBL.h b/common/ScaLBL.h index efca3be8..ecb1ffed 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -56,9 +56,11 @@ extern "C" void ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int extern "C" void ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz); // GREYSCALE MODEL -extern "C" void ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz); +extern "C" void ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz, + double *Poros,double *Perm, double *Velocity); -extern "C" void ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz); +extern "C" void ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz, + double *Poros,double *Perm, double *Velocity); // MRT MODEL extern "C" void ScaLBL_D3Q19_AAeven_MRT(double *dist, int start, int finish, int Np, double rlx_setA, double rlx_setB, double Fx, diff --git a/cpu/Greyscale.cpp b/cpu/Greyscale.cpp index a800413d..fa9a1f49 100644 --- a/cpu/Greyscale.cpp +++ b/cpu/Greyscale.cpp @@ -1,9 +1,19 @@ -extern "C" void ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){ +#include + +extern "C" void ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz, + double *Poros,double *Perm, double *Velocity){ int n; // conserved momemnts - double rho,ux,uy,uz,uu; + double rho,vx,vy,vz,v_mag; + double ux,uy,uz,u_mag; + //double uu; // non-conserved moments double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; + double GeoFun;//geometric function from Guo's PRE 66, 036304 (2002) + double porosity; + double perm;//voxel permeability + double c0, c1; //Guo's model parameters + double mu = (1.0/rlx-0.5)/3.0;//kinematic viscosity for (int n=start; n ScaLBL_GreyscaleModel::ScaLBL_GreyscaleModel(int RANK, int NP, MPI_Comm COMM): -rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tau(0), -Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0), +rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tau(0),Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0), Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) { - SignDist.resize(Nx,Ny,Nz); SignDist.fill(0); + SignDist.resize(Nx,Ny,Nz); + SignDist.fill(0); } ScaLBL_GreyscaleModel::~ScaLBL_GreyscaleModel(){ @@ -35,13 +35,17 @@ void ScaLBL_GreyscaleModel::ReadParams(string filename){ Restart=false; din=dout=1.0; flux=0.0; + dp = 10.0; //unit of 'dp': voxel // Color Model parameters if (greyscale_db->keyExists( "timestepMax" )){ timestepMax = greyscale_db->getScalar( "timestepMax" ); } if (greyscale_db->keyExists( "tau" )){ - tau = greyscale_db->getScalar( "tauA" ); + tau = greyscale_db->getScalar( "tau" ); + } + if (greyscale_db->keyExists( "dp" )){ + dp = greyscale_db->getScalar( "dp" ); } if (greyscale_db->keyExists( "F" )){ Fx = greyscale_db->getVector( "F" )[0]; @@ -80,6 +84,12 @@ void ScaLBL_GreyscaleModel::SetDomain(){ Ly = Dm->Ly; Lz = Dm->Lz; N = Nx*Ny*Nz; + + SignDist.resize(Nx,Ny,Nz); + Velocity_x.resize(Nx,Ny,Nz); + Velocity_y.resize(Nx,Ny,Nz); + Velocity_z.resize(Nx,Ny,Nz); + id = new signed char [N]; for (int i=0; iid[i] = 1; // initialize this way MPI_Barrier(comm); @@ -140,6 +150,9 @@ void ScaLBL_GreyscaleModel::ReadInput(){ if (rank == 0) cout << "Domain set." << endl; } +/******************************************************** + * AssignComponentLabels * + ********************************************************/ void ScaLBL_GreyscaleModel::AssignComponentLabels(double *Porosity, double *Permeablity) { size_t NLABELS=0; @@ -182,8 +195,14 @@ void ScaLBL_GreyscaleModel::AssignComponentLabels(double *Porosity, double *Perm else if (VALUE == 2) POROSITY=1.0; else if (VALUE < 1) POROSITY = 0.0; int idx = Map(i,j,k); - if (!(idx < 0)) - Porosity[idx] = POROSITY; + if (!(idx < 0)){ + if (POROSITY<=0.0){ + ERROR("Error: Porosity for grey voxels must be 0.0 < Porosity <= 1.0 !\n"); + } + else{ + Porosity[idx] = POROSITY; + } + } } } } @@ -205,13 +224,21 @@ void ScaLBL_GreyscaleModel::AssignComponentLabels(double *Porosity, double *Perm //Mask->id[n] = 0; // set mask to zero since this is an immobile component } } - // fluid labels are reserved / negative labels are immobile + // Permeability of fluid labels are reserved + // NOTE: the voxel permeability of apparent pore nodes should be infinity + // TODO: Need to revise the PERMEABILITY of nodes whose VALUE=1 and 2 if (VALUE == 1) PERMEABILITY=1.0; else if (VALUE == 2) PERMEABILITY=1.0; else if (VALUE < 1) PERMEABILITY = 0.0; int idx = Map(i,j,k); - if (!(idx < 0)) - Permeability[idx] = PERMEABILITY; + if (!(idx < 0)){ + if (PERMEABILITY<=0.0){ + ERROR("Error: Permeability for grey voxel must be > 0.0 ! \n"); + } + else{ + Permeability[idx] = PERMEABILITY; + } + } } } } @@ -229,7 +256,7 @@ void ScaLBL_GreyscaleModel::AssignComponentLabels(double *Porosity, double *Perm POROSITY=PorosityList[idx]; PERMEABILITY=PermeabilityList[idx]; double volume_fraction = double(label_count_global[idx])/double((Nx-2)*(Ny-2)*(Nz-2)*nprocs); - printf(" label=%d, porosity=%f, permeability=%f, volume fraction==%f\n",VALUE,POROSITY,PERMEABILITY,volume_fraction); + printf(" label=%d, porosity=%.3g, permeability=%.3g, volume fraction==%.3g\n",VALUE,POROSITY,PERMEABILITY,volume_fraction); } } @@ -324,9 +351,6 @@ void ScaLBL_GreyscaleModel::Create(){ ScaLBL_CopyToDevice(Permeability, Perm, Np*sizeof(double)); } -/******************************************************** - * AssignComponentLabels * - ********************************************************/ void ScaLBL_GreyscaleModel::Initialize(){ @@ -387,10 +411,6 @@ void ScaLBL_GreyscaleModel::Run(){ //......................................... Minkowski Morphology(Mask); - DoubleArray Velocity_x(Nx,Ny,Nz); - DoubleArray Velocity_y(Nx,Ny,Nz); - DoubleArray Velocity_z(Nx,Ny,Nz); - DoubleArray Pressure(Nx,Ny,Nz); //************ MAIN ITERATION LOOP ***************************************/ PROFILE_START("Loop"); @@ -403,21 +423,21 @@ void ScaLBL_GreyscaleModel::Run(){ //************************************************************************/ timestep++; ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL - ScaLBL_D3Q19_AAodd_Greyscale(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz); + ScaLBL_D3Q19_AAodd_Greyscale(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity); ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE - ScaLBL_D3Q19_AAodd_Greyscale(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz); + ScaLBL_D3Q19_AAodd_Greyscale(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); timestep++; ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL - ScaLBL_D3Q19_AAeven_Greyscale(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz); + ScaLBL_D3Q19_AAeven_Greyscale(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity); ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE - ScaLBL_D3Q19_AAeven_Greyscale(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz); + ScaLBL_D3Q19_AAeven_Greyscale(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); //************************************************************************/ if (timestep%1000==0){ - ScaLBL_D3Q19_Momentum(fq,Velocity, Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + //ScaLBL_D3Q19_Momentum(fq,Velocity, Np); + //ScaLBL_DeviceBarrier(); MPI_Barrier(comm); ScaLBL_Comm->RegularLayout(Map,&Velocity[0],Velocity_x); ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],Velocity_y); ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],Velocity_z); @@ -509,6 +529,106 @@ void ScaLBL_GreyscaleModel::Run(){ // ************************************************************************ } +void ScaLBL_GreyscaleModel::VelocityField(){ + +/* Minkowski Morphology(Mask); + int SIZE=Np*sizeof(double); + ScaLBL_D3Q19_Momentum(fq,Velocity, Np); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_CopyToHost(&VELOCITY[0],&Velocity[0],3*SIZE); + + memcpy(Morphology.SDn.data(), Distance.data(), Nx*Ny*Nz*sizeof(double)); + Morphology.Initialize(); + Morphology.UpdateMeshValues(); + Morphology.ComputeLocal(); + Morphology.Reduce(); + + double count_loc=0; + double count; + double vax,vay,vaz; + double vax_loc,vay_loc,vaz_loc; + vax_loc = vay_loc = vaz_loc = 0.f; + for (int n=0; nLastExterior(); n++){ + vax_loc += VELOCITY[n]; + vay_loc += VELOCITY[Np+n]; + vaz_loc += VELOCITY[2*Np+n]; + count_loc+=1.0; + } + + for (int n=ScaLBL_Comm->FirstInterior(); nLastInterior(); n++){ + vax_loc += VELOCITY[n]; + vay_loc += VELOCITY[Np+n]; + vaz_loc += VELOCITY[2*Np+n]; + count_loc+=1.0; + } + MPI_Allreduce(&vax_loc,&vax,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&vay_loc,&vay,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&vaz_loc,&vaz,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + + vax /= count; + vay /= count; + vaz /= count; + + double mu = (tau-0.5)/3.f; + if (rank==0) printf("Fx Fy Fz mu Vs As Js Xs vx vy vz\n"); + if (rank==0) printf("%.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g\n",Fx, Fy, Fz, mu, + Morphology.V(),Morphology.A(),Morphology.J(),Morphology.X(),vax,vay,vaz); + */ + + std::vector visData; + fillHalo fillData(Dm->Comm,Dm->rank_info,{Dm->Nx-2,Dm->Ny-2,Dm->Nz-2},{1,1,1},0,1); + + auto VxVar = std::make_shared(); + auto VyVar = std::make_shared(); + auto VzVar = std::make_shared(); + auto SignDistVar = std::make_shared(); + + IO::initialize("","silo","false"); + // Create the MeshDataStruct + visData.resize(1); + visData[0].meshName = "domain"; + visData[0].mesh = std::make_shared( Dm->rank_info,Dm->Nx-2,Dm->Ny-2,Dm->Nz-2,Dm->Lx,Dm->Ly,Dm->Lz ); + SignDistVar->name = "SignDist"; + SignDistVar->type = IO::VariableType::VolumeVariable; + SignDistVar->dim = 1; + SignDistVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(SignDistVar); + + VxVar->name = "Velocity_x"; + VxVar->type = IO::VariableType::VolumeVariable; + VxVar->dim = 1; + VxVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(VxVar); + VyVar->name = "Velocity_y"; + VyVar->type = IO::VariableType::VolumeVariable; + VyVar->dim = 1; + VyVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(VyVar); + VzVar->name = "Velocity_z"; + VzVar->type = IO::VariableType::VolumeVariable; + VzVar->dim = 1; + VzVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(VzVar); + + Array& SignData = visData[0].vars[0]->data; + Array& VelxData = visData[0].vars[1]->data; + Array& VelyData = visData[0].vars[2]->data; + Array& VelzData = visData[0].vars[3]->data; + + ASSERT(visData[0].vars[0]->name=="SignDist"); + ASSERT(visData[0].vars[1]->name=="Velocity_x"); + ASSERT(visData[0].vars[2]->name=="Velocity_y"); + ASSERT(visData[0].vars[3]->name=="Velocity_z"); + + fillData.copy(SignDist,SignData); + fillData.copy(Velocity_x,VelxData); + fillData.copy(Velocity_y,VelyData); + fillData.copy(Velocity_z,VelzData); + + IO::writeData( timestep, visData, Dm->Comm ); + +} void ScaLBL_GreyscaleModel::WriteDebug(){ // Copy back final phase indicator field and convert to regular layout diff --git a/models/GreyscaleModel.h b/models/GreyscaleModel.h index 37ddf28f..9b970a65 100644 --- a/models/GreyscaleModel.h +++ b/models/GreyscaleModel.h @@ -30,6 +30,7 @@ public: void Initialize(); void Run(); void WriteDebug(); + void VelocityField(); bool Restart,pBC; int timestep,timestepMax; @@ -38,6 +39,7 @@ public: double tolerance; double Fx,Fy,Fz,flux; double din,dout; + double dp;//solid particle diameter, unit in voxel int Nx,Ny,Nz,N,Np; int rank,nprocx,nprocy,nprocz,nprocs; @@ -54,16 +56,19 @@ public: std::shared_ptr analysis_db; std::shared_ptr vis_db; - IntArray Map; - DoubleArray SignDist; signed char *id; int *NeighborList; int *dvcMap; double *fq; - double *Permeability; + double *Permeability;//grey voxel permeability double *Porosity; double *Velocity; double *Pressure; + IntArray Map; + DoubleArray SignDist; + DoubleArray Velocity_x; + DoubleArray Velocity_y; + DoubleArray Velocity_z; private: MPI_Comm comm; diff --git a/tests/lbpm_greyscale_simulator.cpp b/tests/lbpm_greyscale_simulator.cpp index 9ab7c385..e15797e3 100644 --- a/tests/lbpm_greyscale_simulator.cpp +++ b/tests/lbpm_greyscale_simulator.cpp @@ -47,15 +47,15 @@ int main(int argc, char **argv) MPI_Barrier(comm); - ScaLBL_MRTModel MRT(rank,nprocs,comm); + ScaLBL_GreyscaleModel GreyscaleModel(rank,nprocs,comm); auto filename = argv[1]; - MRT.ReadParams(filename); - MRT.SetDomain(); // this reads in the domain - MRT.ReadInput(); - MRT.Create(); // creating the model will create data structure to match the pore structure and allocate variables - MRT.Initialize(); // initializing the model will set initial conditions for variables - MRT.Run(); - MRT.VelocityField(); + GreyscaleModel.ReadParams(filename); + GreyscaleModel.SetDomain(); // this reads in the domain + GreyscaleModel.ReadInput(); + GreyscaleModel.Create(); // creating the model will create data structure to match the pore structure and allocate variables + GreyscaleModel.Initialize(); // initializing the model will set initial conditions for variables + GreyscaleModel.Run(); + GreyscaleModel.VelocityField(); } // **************************************************** MPI_Barrier(comm); From a4b0f3e26eafa27d5de3b81008e99f060ce86a49 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Thu, 21 Nov 2019 14:05:58 -0500 Subject: [PATCH 005/121] gpu version of greyscale LBM is also updated --- gpu/Greyscale.cu | 287 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 198 insertions(+), 89 deletions(-) diff --git a/gpu/Greyscale.cu b/gpu/Greyscale.cu index 04b5e979..3365c6f9 100644 --- a/gpu/Greyscale.cu +++ b/gpu/Greyscale.cu @@ -3,12 +3,20 @@ #define NBLOCKS 1024 #define NTHREADS 256 -__global__ void dvc_ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){ +__global__ void dvc_ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz, + double *Poros,double *Perm, double *Velocity){ int n; // conserved momemnts - double rho,ux,uy,uz,uu; + double rho,vx,vy,vz,v_mag; + double ux,uy,uz,u_mag; + //double uu; // non-conserved moments double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; + double GeoFun;//geometric function from Guo's PRE 66, 036304 (2002) + double porosity; + double perm;//voxel permeability + double c0, c1; //Guo's model parameters + double mu = (1.0/rlx-0.5)/3.0;//kinematic viscosity int S = Np/NBLOCKS/NTHREADS + 1; for (int s=0; s>>(dist,start,finish,Np,rlx,Fx,Fy,Fz); + dvc_ScaLBL_D3Q19_AAeven_Greyscale<<>>(dist,start,finish,Np,rlx,Fx,Fy,Fz,Poros,Perm,Velocity); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ @@ -301,11 +410,11 @@ extern "C" void ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finis } } -extern "C" void ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){ - dvc_ScaLBL_D3Q19_AAodd_Greyscale<<>>(neighborList,dist,start,finish,Np,rlx,Fx,Fy,Fz); +extern "C" void ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity){ + dvc_ScaLBL_D3Q19_AAodd_Greyscale<<>>(neighborList,dist,start,finish,Np,rlx,Fx,Fy,Fz,Poros,Perm,Velocity); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ - printf("CUDA error in ScaLBL_D3Q19_AAeven_Greyscale: %s \n",cudaGetErrorString(err)); + printf("CUDA error in ScaLBL_D3Q19_AAodd_Greyscale: %s \n",cudaGetErrorString(err)); } -} \ No newline at end of file +} From cf5a284f6dd7ef2ac3b03128c11d2eb960651c03 Mon Sep 17 00:00:00 2001 From: James McClure Date: Tue, 3 Dec 2019 13:01:37 -0500 Subject: [PATCH 006/121] adding subphase functionality --- example/Workflow/HelperFunctions.R | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/example/Workflow/HelperFunctions.R b/example/Workflow/HelperFunctions.R index a86ee6fe..497cb262 100644 --- a/example/Workflow/HelperFunctions.R +++ b/example/Workflow/HelperFunctions.R @@ -7,11 +7,26 @@ ReadSubphase<-function(PATH){ S<-read.csv(FILE,head=TRUE,sep=" ") S$Vw<-S$Vwc+S$Vwd S$Vn<-S$Vnc+S$Vnd + S$Aw<-S$Awc+S$Awd + S$An<-S$Anc+S$And + S$Hw<-S$Hwc+S$Hwd + S$Hn<-S$Hnc+S$Hnd + S$Xw<-S$Xwc+S$Xwd + S$Xn<-S$Xnc+S$Xnd + S$Sw<-S$Vw/(S$Vn+S$Vw) + S$pw<-(S$pwc*S$Vwc+S$pwd*S$Vwd) / (S$Vwc+S$Vwd) + S$pn<-(S$pnc*S$Vnc+S$pnd*S$Vnd) / (S$Vnc+S$Vnd) + S$Qwx<-S$Vw*(S$Pwc_x+S$Pwd_x)/(S$Mwc+S$Mwd) S$Qnx<-S$Vn*(S$Pnc_x+S$Pnd_x)/(S$Mnc+S$Mnd) - S$Krn<-S$nun*S$Qnx/S$Fx - S$Krw<-S$nuw*S$Qwx/S$Fx + S$Qwy<-S$Vw*(S$Pwc_y+S$Pwd_y)/(S$Mwc+S$Mwd) + S$Qny<-S$Vn*(S$Pnc_y+S$Pnd_y)/(S$Mnc+S$Mnd) + S$Qwz<-S$Vw*(S$Pwc_z+S$Pwd_z)/(S$Mwc+S$Mwd) + S$Qnz<-S$Vn*(S$Pnc_z+S$Pnd_z)/(S$Mnc+S$Mnd) + + S$Krn<-S$nun*S$Qnz/S$Fz + S$Krw<-S$nuw*S$Qwz/S$Fz S$Case<-PATH return(S) } From a67d3f8b6900e032c02d2b879f0c666aab86c0fd Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Mon, 9 Dec 2019 14:44:58 -0500 Subject: [PATCH 007/121] calculate the medium porosity if read domain from Filename --- common/Domain.cpp | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/common/Domain.cpp b/common/Domain.cpp index 0fa8545a..82bcaee2 100644 --- a/common/Domain.cpp +++ b/common/Domain.cpp @@ -585,6 +585,50 @@ void Domain::Decomp(std::string Filename) MPI_Recv(id,N,MPI_CHAR,0,15,Comm,MPI_STATUS_IGNORE); } MPI_Barrier(Comm); + + // Compute the porosity + double sum; + double sum_local=0.0; + double iVol_global = 1.0/(1.0*(Nx-2)*(Ny-2)*(Nz-2)*nprocs); + if (BoundaryCondition > 0) iVol_global = 1.0/(1.0*(Nx-2)*nprocx*(Ny-2)*nprocy*((Nz-2)*nprocz-6)); + //......................................................... + // If external boundary conditions are applied remove solid + if (BoundaryCondition > 0 && kproc() == 0){ + if (inlet_layers_z < 4) inlet_layers_z=4; + for (int k=0; k 0 && kproc() == nprocz-1){ + if (outlet_layers_z < 4) outlet_layers_z=4; + for (int k=Nz-outlet_layers_z; k 0){ + sum_local+=1.0; + } + } + } + } + MPI_Allreduce(&sum_local,&sum,1,MPI_DOUBLE,MPI_SUM,Comm); + porosity = sum*iVol_global; + if (rank()==0) printf("Media porosity = %f \n",porosity); + //......................................................... } void Domain::AggregateLabels(char *FILENAME){ From 9c48b3de7070734e6ba3b637a369e634787bd321 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Mon, 9 Dec 2019 15:17:27 -0500 Subject: [PATCH 008/121] enable single phase abs-perm simulator to read medium from Filename --- common/Domain.cpp | 44 ++++++++++++++++++++++++++++++++++++++++++++ models/MRTModel.cpp | 13 ++++++++----- 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/common/Domain.cpp b/common/Domain.cpp index 0fa8545a..82bcaee2 100644 --- a/common/Domain.cpp +++ b/common/Domain.cpp @@ -585,6 +585,50 @@ void Domain::Decomp(std::string Filename) MPI_Recv(id,N,MPI_CHAR,0,15,Comm,MPI_STATUS_IGNORE); } MPI_Barrier(Comm); + + // Compute the porosity + double sum; + double sum_local=0.0; + double iVol_global = 1.0/(1.0*(Nx-2)*(Ny-2)*(Nz-2)*nprocs); + if (BoundaryCondition > 0) iVol_global = 1.0/(1.0*(Nx-2)*nprocx*(Ny-2)*nprocy*((Nz-2)*nprocz-6)); + //......................................................... + // If external boundary conditions are applied remove solid + if (BoundaryCondition > 0 && kproc() == 0){ + if (inlet_layers_z < 4) inlet_layers_z=4; + for (int k=0; k 0 && kproc() == nprocz-1){ + if (outlet_layers_z < 4) outlet_layers_z=4; + for (int k=Nz-outlet_layers_z; k 0){ + sum_local+=1.0; + } + } + } + } + MPI_Allreduce(&sum_local,&sum,1,MPI_DOUBLE,MPI_SUM,Comm); + porosity = sum*iVol_global; + if (rank()==0) printf("Media porosity = %f \n",porosity); + //......................................................... } void Domain::AggregateLabels(char *FILENAME){ diff --git a/models/MRTModel.cpp b/models/MRTModel.cpp index 04fe937d..e2984b2a 100644 --- a/models/MRTModel.cpp +++ b/models/MRTModel.cpp @@ -93,16 +93,19 @@ void ScaLBL_MRTModel::SetDomain(){ } void ScaLBL_MRTModel::ReadInput(){ - int rank=Dm->rank(); - size_t readID; - //....................................................................... - //....................................................................... - Mask->ReadIDs(); sprintf(LocalRankString,"%05d",Dm->rank()); sprintf(LocalRankFilename,"%s%s","ID.",LocalRankString); sprintf(LocalRestartFile,"%s%s","Restart.",LocalRankString); + if (domain_db->keyExists( "Filename" )){ + auto Filename = domain_db->getScalar( "Filename" ); + Mask->Decomp(Filename); + } + else{ + Mask->ReadIDs(); + } + // Generate the signed distance map // Initialize the domain and communication Array id_solid(Nx,Ny,Nz); From bbd2a6e34a81d1349ffbc4ac4a729164bd7f2133 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Mon, 9 Dec 2019 15:56:44 -0500 Subject: [PATCH 009/121] update output (*.out and Permeability.csv) for Greyscale --- models/GreyscaleModel.cpp | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/models/GreyscaleModel.cpp b/models/GreyscaleModel.cpp index cf66d6f4..5af10205 100644 --- a/models/GreyscaleModel.cpp +++ b/models/GreyscaleModel.cpp @@ -498,13 +498,24 @@ void ScaLBL_GreyscaleModel::Run(){ Xs=sumReduce( Dm->Comm, Xs); double h = Dm->voxel_length; double absperm = h*h*mu*Mask->Porosity()*flow_rate / force_mag; - if (rank==0) { - printf(" %f\n",absperm); - FILE * log_file = fopen("Permeability.csv","a"); - fprintf(log_file,"%i %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g\n",timestep, Fx, Fy, Fz, mu, - h*h*h*Vs,h*h*As,h*Hs,Xs,vax,vay,vaz, absperm); - fclose(log_file); - } + + if (rank==0){ + printf(" AbsPerm = %.5g [micron^2]\n",absperm); + bool WriteHeader=false; + FILE * log_file = fopen("Permeability.csv","r"); + if (log_file != NULL) + fclose(log_file); + else + WriteHeader=true; + log_file = fopen("Permeability.csv","a"); + if (WriteHeader) + fprintf(log_file,"timesteps Fx Fy Fz mu Vs As Hs Xs vax vay vaz absperm \n", + timestep,Fx,Fy,Fz,mu,h*h*h*Vs,h*h*As,h*Hs,Xs,vax,vay,vaz,absperm); + + fprintf(log_file,"%i %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g\n",timestep, Fx, Fy, Fz, mu, + h*h*h*Vs,h*h*As,h*Hs,Xs,vax,vay,vaz, absperm); + fclose(log_file); + } } } PROFILE_STOP("Loop"); From 5f85b767d6f5fd01463506d832d44f6be02b2ba5 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Mon, 9 Dec 2019 22:30:36 -0500 Subject: [PATCH 010/121] add debugging for greyscale lbm --- models/GreyscaleModel.cpp | 56 +++++++++++++++--------------- tests/lbpm_greyscale_simulator.cpp | 1 + 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/models/GreyscaleModel.cpp b/models/GreyscaleModel.cpp index 5af10205..2ec3b85e 100644 --- a/models/GreyscaleModel.cpp +++ b/models/GreyscaleModel.cpp @@ -643,34 +643,36 @@ void ScaLBL_GreyscaleModel::VelocityField(){ void ScaLBL_GreyscaleModel::WriteDebug(){ // Copy back final phase indicator field and convert to regular layout -/* ScaLBL_CopyToHost(Porosity.data(), Poros, sizeof(double)*N); + DoubleArray PhaseField(Nx,Ny,Nz); - FILE *OUTFILE; - sprintf(LocalRankFilename,"Phase.%05i.raw",rank); - OUTFILE = fopen(LocalRankFilename,"wb"); - fwrite(PhaseField.data(),8,N,OUTFILE); - fclose(OUTFILE); + //ScaLBL_CopyToHost(Porosity.data(), Poros, sizeof(double)*N); - ScaLBL_Comm->RegularLayout(Map,&Den[0],PhaseField); - FILE *AFILE; - sprintf(LocalRankFilename,"A.%05i.raw",rank); - AFILE = fopen(LocalRankFilename,"wb"); - fwrite(PhaseField.data(),8,N,AFILE); - fclose(AFILE); - - ScaLBL_Comm->RegularLayout(Map,&Den[Np],PhaseField); - FILE *BFILE; - sprintf(LocalRankFilename,"B.%05i.raw",rank); - BFILE = fopen(LocalRankFilename,"wb"); - fwrite(PhaseField.data(),8,N,BFILE); - fclose(BFILE); - - ScaLBL_Comm->RegularLayout(Map,Pressure,PhaseField); - FILE *PFILE; - sprintf(LocalRankFilename,"Pressure.%05i.raw",rank); - PFILE = fopen(LocalRankFilename,"wb"); - fwrite(PhaseField.data(),8,N,PFILE); - fclose(PFILE); +// FILE *OUTFILE; +// sprintf(LocalRankFilename,"Phase.%05i.raw",rank); +// OUTFILE = fopen(LocalRankFilename,"wb"); +// fwrite(PhaseField.data(),8,N,OUTFILE); +// fclose(OUTFILE); +// +// ScaLBL_Comm->RegularLayout(Map,&Den[0],PhaseField); +// FILE *AFILE; +// sprintf(LocalRankFilename,"A.%05i.raw",rank); +// AFILE = fopen(LocalRankFilename,"wb"); +// fwrite(PhaseField.data(),8,N,AFILE); +// fclose(AFILE); +// +// ScaLBL_Comm->RegularLayout(Map,&Den[Np],PhaseField); +// FILE *BFILE; +// sprintf(LocalRankFilename,"B.%05i.raw",rank); +// BFILE = fopen(LocalRankFilename,"wb"); +// fwrite(PhaseField.data(),8,N,BFILE); +// fclose(BFILE); +// +// ScaLBL_Comm->RegularLayout(Map,Pressure,PhaseField); +// FILE *PFILE; +// sprintf(LocalRankFilename,"Pressure.%05i.raw",rank); +// PFILE = fopen(LocalRankFilename,"wb"); +// fwrite(PhaseField.data(),8,N,PFILE); +// fclose(PFILE); ScaLBL_Comm->RegularLayout(Map,&Velocity[0],PhaseField); FILE *VELX_FILE; @@ -693,7 +695,5 @@ void ScaLBL_GreyscaleModel::WriteDebug(){ fwrite(PhaseField.data(),8,N,VELZ_FILE); fclose(VELZ_FILE); - * - */ } diff --git a/tests/lbpm_greyscale_simulator.cpp b/tests/lbpm_greyscale_simulator.cpp index 9f910a32..61322d6d 100644 --- a/tests/lbpm_greyscale_simulator.cpp +++ b/tests/lbpm_greyscale_simulator.cpp @@ -55,6 +55,7 @@ int main(int argc, char **argv) Greyscale.Initialize(); // initializing the model will set initial conditions for variables Greyscale.Run(); Greyscale.VelocityField(); + Greyscale.WriteDebug(); } // **************************************************** MPI_Barrier(comm); From cddcfa0188d932f755981e92ede2d71c5394d267 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Tue, 10 Dec 2019 16:54:42 -0500 Subject: [PATCH 011/121] fix the bug and now have a workable greyscale BGK model in both CPU and GPU --- cpu/Greyscale.cpp | 54 +++++++++++++++++---------------------- gpu/Greyscale.cu | 52 +++++++++++++++++-------------------- models/GreyscaleModel.cpp | 14 +++++++++- 3 files changed, 60 insertions(+), 60 deletions(-) diff --git a/cpu/Greyscale.cpp b/cpu/Greyscale.cpp index fa9a1f49..48e61a56 100644 --- a/cpu/Greyscale.cpp +++ b/cpu/Greyscale.cpp @@ -1,6 +1,6 @@ #include -extern "C" void ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz, +extern "C" void ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finish, int Np, double rlx, double Gx, double Gy, double Gz, double *Poros,double *Perm, double *Velocity){ int n; // conserved momemnts @@ -14,6 +14,7 @@ extern "C" void ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finis double perm;//voxel permeability double c0, c1; //Guo's model parameters double mu = (1.0/rlx-0.5)/3.0;//kinematic viscosity + double Fx, Fy, Fz;//The total body force including Brinkman force and user-specified (Gx,Gy,Gz) for (int n=start; nRegularLayout(Map,&Porosity[0],PhaseField); + FILE *POROS_FILE; + sprintf(LocalRankFilename,"Porosity.%05i.raw",rank); + POROS_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,POROS_FILE); + fclose(POROS_FILE); + ScaLBL_Comm->RegularLayout(Map,&Permeability[0],PhaseField); + FILE *PERM_FILE; + sprintf(LocalRankFilename,"Permeability.%05i.raw",rank); + PERM_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,PERM_FILE); + fclose(PERM_FILE); } From e66a92142fd6c8da55d732d54c98d53c4cc38248 Mon Sep 17 00:00:00 2001 From: Mark Berrill Date: Thu, 12 Dec 2019 13:58:51 -0500 Subject: [PATCH 012/121] Adding MPIFLAGS option --- cmake/libraries.cmake | 2 +- cmake/macros.cmake | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cmake/libraries.cmake b/cmake/libraries.cmake index 54d70b5d..ebc37f8f 100644 --- a/cmake/libraries.cmake +++ b/cmake/libraries.cmake @@ -77,7 +77,7 @@ MACRO( CONFIGURE_MPI ) ENDIF () ELSE () # Search for the MPI executable in the current directory - FIND_PROGRAM ( MPIEXEC NAMES mpiexec mpirun lamexec PATHS ${MPI_DIRECTORY}/bin NO_DEFAULT_PATH ) + FIND_PROGRAM( MPIEXEC NAMES mpiexec mpirun lamexec PATHS ${MPI_DIRECTORY}/bin NO_DEFAULT_PATH ) IF ( NOT MPIEXEC ) MESSAGE( FATAL_ERROR "Could not locate mpi executable" ) ENDIF() diff --git a/cmake/macros.cmake b/cmake/macros.cmake index 8791616c..d1c8dbe7 100644 --- a/cmake/macros.cmake +++ b/cmake/macros.cmake @@ -848,7 +848,7 @@ FUNCTION( ADD_${PROJ}_TEST EXEFILE ${ARGN} ) ADD_PROJ_PROVISIONAL_TEST( ${EXEFILE} ) CREATE_TEST_NAME( ${EXEFILE} ${ARGN} ) IF ( USE_MPI_FOR_SERIAL_TESTS ) - ADD_TEST( NAME ${TESTNAME} COMMAND ${MPIEXEC} "${MPIEXEC_NUMPROC_FLAG}" 1 $ ${ARGN} ) + ADD_TEST( NAME ${TESTNAME} COMMAND ${MPIEXEC} ${MPIFLAGS} "${MPIEXEC_NUMPROC_FLAG}" 1 $ ${ARGN} ) SET_PROPERTY( TEST ${TESTNAME} APPEND PROPERTY ENVIRONMENT OMPI_MCA_hwloc_base_binding_policy=none ) ELSE() ADD_TEST( NAME ${TESTNAME} COMMAND $ ${ARGN} ) @@ -877,7 +877,7 @@ FUNCTION( ADD_${PROJ}_WEEKLY_TEST EXEFILE PROCS ${ARGN} ) ELSEIF( ${PROCS} STREQUAL "1" ) CREATE_TEST_NAME( "${EXEFILE}_WEEKLY" ${ARGN} ) IF ( USE_MPI_FOR_SERIAL_TESTS ) - ADD_TEST( NAME ${TESTNAME} COMMAND ${MPIEXEC} "${MPIEXEC_NUMPROC_FLAG}" 1 $ ${ARGN} ) + ADD_TEST( NAME ${TESTNAME} COMMAND ${MPIEXEC} ${MPIFLAGS} "${MPIEXEC_NUMPROC_FLAG}" 1 $ ${ARGN} ) SET_PROPERTY( TEST ${TESTNAME} APPEND PROPERTY ENVIRONMENT OMPI_MCA_hwloc_base_binding_policy=none ) ELSE() ADD_TEST( NAME ${TESTNAME} COMMAND $ ${ARGN} ) @@ -909,7 +909,7 @@ FUNCTION( ADD_${PROJ}_TEST_PARALLEL EXEFILE PROCS ${ARGN} ) ELSEIF ( ${PROCS} GREATER ${TEST_MAX_PROCS} ) MESSAGE("Disabling test ${TESTNAME} (exceeds maximum number of processors ${TEST_MAX_PROCS})") ELSE() - ADD_TEST( NAME ${TESTNAME} COMMAND ${MPIEXEC} "${MPIEXEC_NUMPROC_FLAG}" ${PROCS} $ ${ARGN} ) + ADD_TEST( NAME ${TESTNAME} COMMAND ${MPIEXEC} ${MPIFLAGS} "${MPIEXEC_NUMPROC_FLAG}" ${PROCS} $ ${ARGN} ) SET_PROPERTY( TEST ${TESTNAME} APPEND PROPERTY ENVIRONMENT OMPI_MCA_hwloc_base_binding_policy=none ) SET_TESTS_PROPERTIES( ${TESTNAME} PROPERTIES FAIL_REGULAR_EXPRESSION "${TEST_FAIL_REGULAR_EXPRESSION}" PROCESSORS ${PROCS} ) ADD_RESOURCE_LOCK( ${TESTNAME} ${EXEFILE} ${ARGN} ) @@ -930,7 +930,7 @@ MACRO( ADD_${PROJ}_TEST_THREAD_MPI EXEFILE PROCS THREADS ${ARGN} ) SET_TESTS_PROPERTIES ( ${TESTNAME} PROPERTIES FAIL_REGULAR_EXPRESSION "${TEST_FAIL_REGULAR_EXPRESSION}" PROCESSORS ${TOT_PROCS} ) ADD_RESOURCE_LOCK( ${TESTNAME} ${EXEFILE} ${ARGN} ) ELSEIF ( USE_MPI OR USE_EXT_MPI ) - ADD_TEST( NAME ${TESTNAME} COMMAND ${MPIEXEC} "${MPIEXEC_NUMPROC_FLAG}" ${PROCS} $ ${ARGN} ) + ADD_TEST( NAME ${TESTNAME} COMMAND ${MPIEXEC} ${MPIFLAGS} "${MPIEXEC_NUMPROC_FLAG}" ${PROCS} $ ${ARGN} ) SET_PROPERTY( TEST ${TESTNAME} APPEND PROPERTY ENVIRONMENT OMPI_MCA_hwloc_base_binding_policy=none ) SET_TESTS_PROPERTIES ( ${TESTNAME} PROPERTIES FAIL_REGULAR_EXPRESSION "${TEST_FAIL_REGULAR_EXPRESSION}" PROCESSORS ${TOT_PROCS} ) ADD_RESOURCE_LOCK( ${TESTNAME} ${EXEFILE} ${ARGN} ) @@ -966,7 +966,7 @@ FUNCTION( ADD_${PROJ}_EXAMPLE EXEFILE PROCS ${ARGN} ) ADD_TEST( NAME ${TESTNAME} COMMAND $ ${ARGN} ) ELSEIF ( USE_EXT_MPI AND NOT (${PROCS} GREATER ${TEST_MAX_PROCS}) ) CREATE_TEST_NAME( "example--${EXEFILE}_${PROCS}procs" ${ARGN} ) - ADD_TEST( NAME ${TESTNAME} COMMAND ${MPIEXEC} "${MPIEXEC_NUMPROC_FLAG}" ${PROCS} $ ${ARGN} ) + ADD_TEST( NAME ${TESTNAME} COMMAND ${MPIEXEC} ${MPIFLAGS} "${MPIEXEC_NUMPROC_FLAG}" ${PROCS} $ ${ARGN} ) SET_PROPERTY( TEST ${TESTNAME} APPEND PROPERTY ENVIRONMENT OMPI_MCA_hwloc_base_binding_policy=none ) ENDIF() SET_TESTS_PROPERTIES( ${TESTNAME} PROPERTIES FAIL_REGULAR_EXPRESSION "${TEST_FAIL_REGULAR_EXPRESSION}" PROCESSORS ${PROCS} ) From f0a7732f21e27756de72c96400c4e646c628d7f5 Mon Sep 17 00:00:00 2001 From: Mark Berrill Date: Thu, 2 Jan 2020 13:23:51 -0500 Subject: [PATCH 013/121] Updating StackTrace and improving performance converting uCT data --- StackTrace/StackTrace.cpp | 26 ++++++-- StackTrace/StackTrace.h | 11 ++++ StackTrace/Utilities.cpp | 58 ++++++++++++++--- StackTrace/Utilities.h | 18 +++++ StackTrace/string_view.h | 2 +- analysis/runAnalysis.cpp | 11 ++-- common/Communication.hpp | 12 ++-- common/ReadMicroCT.cpp | 37 +++++------ common/Utilities.cpp | 116 ++++++++++++++++++++++++++++++++- common/Utilities.h | 31 +++++++++ tests/lbpm_color_simulator.cpp | 70 ++++++++++---------- 11 files changed, 303 insertions(+), 89 deletions(-) diff --git a/StackTrace/StackTrace.cpp b/StackTrace/StackTrace.cpp index e9292990..55a24352 100644 --- a/StackTrace/StackTrace.cpp +++ b/StackTrace/StackTrace.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -348,8 +349,11 @@ static inline int exec3( const char *cmd, FUNCTION &fun ) if ( buffer[0] != 0 ) fun( buffer ); } - auto status = pclose( pipe ); - int code = WEXITSTATUS( status ); + int code = pclose( pipe ); + if ( errno == ECHILD ) { + errno = 0; + code = 0; + } std::this_thread::yield(); // Allow any signals to process resetSignal( SIGCHLD ); // Clear child exited return code; @@ -1741,7 +1745,7 @@ std::vector StackTrace::defaultSignalsToCatch() * Set the signal handlers * ****************************************************************************/ static std::function abort_fun; -static StackTrace::abort_error rethrow() +StackTrace::abort_error rethrow() { StackTrace::abort_error error; #ifdef USE_LINUX @@ -1775,14 +1779,14 @@ static StackTrace::abort_error rethrow() } return error; } -static void term_func_abort( int sig ) +void StackTrace::terminateFunctionSignal( int sig ) { StackTrace::abort_error err; err.type = StackTrace::terminateType::signal; err.signal = sig; err.bytes = StackTrace::Utilities::getMemoryUsage(); err.stack = StackTrace::backtrace(); - err.stackType = StackTrace::printStackType::global; + err.stackType = StackTrace::getDefaultStackType(); abort_fun( err ); } static bool signals_set[256] = { false }; @@ -1829,7 +1833,7 @@ void StackTrace::setErrorHandler( std::function allSignalsToCatch(); @@ -289,6 +293,13 @@ multi_stack_info generateFromString( const std::vector &str ); multi_stack_info generateFromString( const std::string &str ); +//! Set default stack type +void setDefaultStackType( StackTrace::printStackType ); + +//! Get default stack type +StackTrace::printStackType getDefaultStackType(); + + } // namespace StackTrace diff --git a/StackTrace/Utilities.cpp b/StackTrace/Utilities.cpp index 734a0056..11f05777 100644 --- a/StackTrace/Utilities.cpp +++ b/StackTrace/Utilities.cpp @@ -8,8 +8,10 @@ #include #include #include +#include #include #include +#include #ifdef USE_MPI #include "mpi.h" @@ -19,6 +21,10 @@ #include "MemoryApp.h" #endif +#ifdef USE_GCOV +extern "C" void __gcov_flush( void ); +#endif + #define perr std::cerr @@ -65,6 +71,12 @@ // clang-format on +#ifdef __GNUC__ +#define USE_ABI +#include +#endif + + namespace StackTrace { @@ -96,13 +108,12 @@ inline size_t findfirst( const std::vector &X, TYPE Y ) /**************************************************************************** * Function to terminate the program * ****************************************************************************/ -static bool abort_throwException = false; -static printStackType abort_stackType = printStackType::global; -static int force_exit = 0; +static bool abort_throwException = false; +static int force_exit = 0; void Utilities::setAbortBehavior( bool throwException, int stackType ) { abort_throwException = throwException; - abort_stackType = static_cast( stackType ); + StackTrace::setDefaultStackType( static_cast( stackType ) ); } void Utilities::abort( const std::string &message, const std::string &filename, const int line ) { @@ -112,16 +123,28 @@ void Utilities::abort( const std::string &message, const std::string &filename, err.type = terminateType::abort; err.line = line; err.bytes = Utilities::getMemoryUsage(); - err.stackType = abort_stackType; + err.stackType = StackTrace::getDefaultStackType(); err.stack = StackTrace::backtrace(); throw err; } -static void terminate( const StackTrace::abort_error &err ) +static std::mutex terminate_mutex; +static inline void callAbort() { +#ifdef USE_GCOV + __gcov_flush(); +#endif + terminate_mutex.unlock(); + std::abort(); +} +void Utilities::terminate( const StackTrace::abort_error &err ) +{ + // Lock mutex to ensure multiple threads do not try to abort simultaneously + terminate_mutex.lock(); + // Clear the error handlers clearErrorHandler(); // Print the message and abort if ( force_exit > 1 ) { - std::abort(); + callAbort(); } else if ( !abort_throwException ) { // Use MPI_abort (will terminate all processes) force_exit = 2; @@ -135,10 +158,11 @@ static void terminate( const StackTrace::abort_error &err ) MPI_Abort( MPI_COMM_WORLD, -1 ); } #endif - std::abort(); + callAbort(); } else { perr << err.what(); - std::abort(); + perr.flush(); + callAbort(); } } @@ -149,7 +173,7 @@ static void terminate( const StackTrace::abort_error &err ) static void setTerminateErrorHandler() { // Set the terminate routine for runtime errors - StackTrace::setErrorHandler( terminate ); + StackTrace::setErrorHandler( Utilities::terminate ); } void Utilities::setErrorHandlers() { @@ -293,4 +317,18 @@ std::string Utilities::exec( const string_view &cmd, int &exit_code ) } +/**************************************************************************** + * Get the type name * + ****************************************************************************/ +std::string Utilities::getTypeName( const std::type_info &id ) +{ + std::string name = id.name(); +#if defined( USE_ABI ) + int status; + name = abi::__cxa_demangle( name.c_str(), 0, 0, &status ); +#endif + return name; +} + + } // namespace StackTrace diff --git a/StackTrace/Utilities.h b/StackTrace/Utilities.h index 10ed9085..83c8d7aa 100644 --- a/StackTrace/Utilities.h +++ b/StackTrace/Utilities.h @@ -4,6 +4,7 @@ #include #include #include +#include #include "StackTrace/StackTrace.h" #include "StackTrace/string_view.h" @@ -28,9 +29,14 @@ void abort( const std::string &message, const std::string &filename, const int l void setAbortBehavior( bool throwException, int stackType = 2 ); +//! Function to terminate the application +void terminate( const StackTrace::abort_error &err ); + + //! Function to set the error handlers void setErrorHandlers(); + //! Function to clear the error handlers void clearErrorHandlers(); @@ -92,6 +98,18 @@ void cause_segfault(); std::string exec( const StackTrace::string_view &cmd, int &exit_code ); +//! Return the hopefully demangled name of the given type +std::string getTypeName( const std::type_info &id ); + + +//! Return the hopefully demangled name of the given type +template +inline std::string getTypeName() +{ + return getTypeName( typeid( TYPE ) ); +} + + } // namespace Utilities } // namespace StackTrace diff --git a/StackTrace/string_view.h b/StackTrace/string_view.h index d83d1f24..ee729f63 100644 --- a/StackTrace/string_view.h +++ b/StackTrace/string_view.h @@ -119,7 +119,7 @@ public: int result = 0; for ( int i = 0; i < N && result == 0; i++ ) if ( d_data[i] != other[i] ) - result = d_data[i] < other[i] ? -i : i; + result = d_data[i] < other[i] ? -( i + 1 ) : ( i + 1 ); if ( result == 0 ) result = size() == other.size() ? 0 : size() < other.size() ? -1 : 1; return result; diff --git a/analysis/runAnalysis.cpp b/analysis/runAnalysis.cpp index caa03b1b..6c76f58b 100644 --- a/analysis/runAnalysis.cpp +++ b/analysis/runAnalysis.cpp @@ -767,6 +767,8 @@ void runAnalysis::run(int timestep, std::shared_ptr input_db, TwoPhase double *Pressure, double *Velocity, double *fq, double *Den) { int N = d_N[0]*d_N[1]*d_N[2]; + NULL_USE( N ); + NULL_USE( Phi ); auto db = input_db->getDatabase( "Analysis" ); //int timestep = db->getWithDefault( "timestep", 0 ); @@ -937,8 +939,6 @@ void runAnalysis::run(int timestep, std::shared_ptr input_db, TwoPhase ******************************************************************/ void runAnalysis::basic(int timestep, std::shared_ptr input_db, SubPhase &Averages, const double *Phi, double *Pressure, double *Velocity, double *fq, double *Den) { - int N = d_N[0]*d_N[1]*d_N[2]; - // Check which analysis steps we need to perform auto color_db = input_db->getDatabase( "Color" ); auto vis_db = input_db->getDatabase( "Visualization" ); @@ -954,7 +954,7 @@ void runAnalysis::basic(int timestep, std::shared_ptr input_db, SubPha finish(); } - PROFILE_START("run"); + PROFILE_START("basic"); // Copy the appropriate variables to the host (so we can spawn new threads) ScaLBL_DeviceBarrier(); @@ -983,7 +983,6 @@ void runAnalysis::basic(int timestep, std::shared_ptr input_db, SubPha } PROFILE_STOP("Copy data to host"); - PROFILE_START("run",1); // Spawn threads to do the analysis work //if (timestep%d_restart_interval==0){ // if ( matches(type,AnalysisType::ComputeAverages) ) { @@ -1036,12 +1035,11 @@ void runAnalysis::basic(int timestep, std::shared_ptr input_db, SubPha d_wait_vis = d_tpool.add_work(work); } - PROFILE_STOP("run"); + PROFILE_STOP("basic"); } void runAnalysis::WriteVisData(int timestep, std::shared_ptr input_db, SubPhase &Averages, const double *Phi, double *Pressure, double *Velocity, double *fq, double *Den) { - int N = d_N[0]*d_N[1]*d_N[2]; auto color_db = input_db->getDatabase( "Color" ); auto vis_db = input_db->getDatabase( "Visualization" ); //int timestep = color_db->getWithDefault( "timestep", 0 ); @@ -1068,7 +1066,6 @@ void runAnalysis::WriteVisData(int timestep, std::shared_ptr input_db, d_wait_vis = d_tpool.add_work(work2); //Averages.WriteVis = false; - // } PROFILE_STOP("write vis"); } diff --git a/common/Communication.hpp b/common/Communication.hpp index cb9f3f18..33fed3a7 100644 --- a/common/Communication.hpp +++ b/common/Communication.hpp @@ -44,9 +44,9 @@ Array redistribute( const RankInfoStruct& src_rank, const Array& src if ( !src_data.empty() ) { int i1[3] = { src_size[0] * src_rank.ix, src_size[1] * src_rank.jy, src_size[2] * src_rank.kz }; int i2[3] = { i1[0] + src_size[0] - 1, i1[1] + src_size[1] - 1, i1[2] + src_size[2] - 1 }; - for ( size_t i=0; i redistribute( const RankInfoStruct& src_rank, const Array& src Array dst_data( dst_size[0], dst_size[1], dst_size[2] ); int i1[3] = { dst_size[0] * dst_rank.ix, dst_size[1] * dst_rank.jy, dst_size[2] * dst_rank.kz }; int i2[3] = { i1[0] + dst_size[0] - 1, i1[1] + dst_size[1] - 1, i1[2] + dst_size[2] - 1 }; - for ( size_t i=0; i readMicroCT( const Database& domain, MPI_Comm comm ) auto n = domain.getVector( "n" ); int rank = comm_rank(MPI_COMM_WORLD); auto nproc = domain.getVector( "nproc" ); - auto ReadValues = domain.getVector( "ReadValues" ); - auto WriteValues = domain.getVector( "WriteValues" ); RankInfoStruct rankInfo( rank, nproc[0], nproc[1], nproc[2] ); // Determine the largest file number to get @@ -95,29 +93,26 @@ Array readMicroCT( const Database& domain, MPI_Comm comm ) ERROR( "Invalid name for first file" ); } data = readMicroCT( filename ); - - // Relabel the data - for (int k = 0; k<1024; k++){ - for (int j = 0; j<1024; j++){ - for (int i = 0; i<1024; i++){ - //n = k*Nfx*Nfy + j*Nfx + i; - //char locval = loc_id[n]; - char locval = data(i,j,k); - for (int idx=0; idx( "ReadValues" ); + auto WriteValues = domain.getVector( "WriteValues" ); + ASSERT( ReadValues.size() == WriteValues.size() ); + int readMaxValue = 0; + for ( auto v : ReadValues ) + readMaxValue = std::max( data.max()+1, v ); + std::vector map( readMaxValue + 1, -1 ); + for ( size_t i=0; i= 0 && t <= readMaxValue ); + data(i) = map[t]; + } + return data; } diff --git a/common/Utilities.cpp b/common/Utilities.cpp index f6d810af..1cf764be 100644 --- a/common/Utilities.cpp +++ b/common/Utilities.cpp @@ -1,10 +1,116 @@ #include "common/Utilities.h" +#include "StackTrace/StackTrace.h" +#include "StackTrace/ErrorHandlers.h" + +#ifdef USE_TIMER +#include "MemoryApp.h" +#include "ProfilerApp.h" +#endif + +#ifdef USE_MPI +#include "mpi.h" +#endif -#include #include +#include +#include -// Factor a number into it's prime factors +// Mutex for Utility functions +static std::mutex Utilities_mutex; + + +/**************************************************************************** + * Function to perform the default startup/shutdown sequences * + ****************************************************************************/ +void Utilities::startup( int argc, char **argv ) +{ + NULL_USE( argc ); + NULL_USE( argv ); + // Disable OpenMP + Utilities::setenv( "OMP_NUM_THREADS", "1" ); + Utilities::setenv( "MKL_NUM_THREADS", "1" ); + // Start MPI +#ifdef USE_MPI + int provided; + MPI_Init_thread( &argc, &argv, MPI_THREAD_MULTIPLE, &provided ); + if ( provided < MPI_THREAD_MULTIPLE ) { + int rank; + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); + if ( rank == 0 ) + std::cerr << "Warning: Failed to start MPI with necessary thread support, thread support will be disabled" << std::endl; + } + StackTrace::globalCallStackInitialize( MPI_COMM_WORLD ); +#endif + // Set the error handlers + Utilities::setAbortBehavior( true, 3 ); + Utilities::setErrorHandlers(); +} +void Utilities::shutdown() +{ + // Clear the error handlers + Utilities::clearErrorHandlers(); + StackTrace::clearSignals(); + StackTrace::clearSymbols(); + int rank = 0; +#ifdef USE_MPI + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); + StackTrace::globalCallStackFinalize(); + MPI_Barrier( MPI_COMM_WORLD ); + MPI_Finalize(); +#endif +#ifdef USE_TIMER + PROFILE_DISABLE(); + auto memory = MemoryApp::getMemoryStats(); + if ( rank == 0 && memory.N_new > memory.N_delete ) + MemoryApp::print( std::cout ); +#endif +} + + +/**************************************************************************** + * Function to set an environemental variable * + ****************************************************************************/ +void Utilities::setenv( const std::string &name, const std::string &value ) +{ + Utilities_mutex.lock(); +#if defined( USE_LINUX ) || defined( USE_MAC ) + bool pass = false; + if ( !value.empty() ) + pass = ::setenv( name.data(), value.data(), 1 ) == 0; + else + pass = ::unsetenv( name.data() ) == 0; +#elif defined( USE_WINDOWS ) + bool pass = SetEnvironmentVariable( name.data(), value.data() ) != 0; +#else +#error Unknown OS +#endif + Utilities_mutex.unlock(); + if ( !pass ) { + char msg[1024]; + if ( !value.empty() ) + sprintf( + msg, "Error setting enviornmental variable: %s=%s\n", name.data(), value.data() ); + else + sprintf( msg, "Error clearing enviornmental variable: %s\n", name.data() ); + ERROR( msg ); + } +} +std::string Utilities::getenv( const std::string &name ) +{ + std::string var; + Utilities_mutex.lock(); + auto tmp = std::getenv( name.data() ); + if ( tmp ) + var = std::string( tmp ); + Utilities_mutex.unlock(); + return var; +} + + +/**************************************************************************** + * Factor a number into it's prime factors * + ****************************************************************************/ std::vector Utilities::factor(size_t number) { if ( number<=3 ) @@ -54,9 +160,13 @@ std::vector Utilities::factor(size_t number) } -// Dummy function to prevent compiler from optimizing away variable +/**************************************************************************** + * Dummy function to prevent compiler from optimizing away variable * + ****************************************************************************/ void Utilities::nullUse( void* data ) { NULL_USE(data); } + + diff --git a/common/Utilities.h b/common/Utilities.h index 90cb4008..da579966 100644 --- a/common/Utilities.h +++ b/common/Utilities.h @@ -25,6 +25,37 @@ using StackTrace::Utilities::sleep_ms; using StackTrace::Utilities::sleep_s; +/*! + * \brief Start MPI, error handlers + * \details This routine will peform the default startup sequence + * \param argc argc from main + * \param argv argv from main + */ +void startup( int argc, char **argv ); + +/*! + * \brief Stop MPI, error handlers + * \details This routine will peform the default shutdown sequence to match startup + */ +void shutdown(); + + +/*! + * Get an environmental variable + * @param name The name of the environmental variable + * @return The value of the enviornmental variable + */ +std::string getenv( const std::string &name ); + + +/*! + * Set an environmental variable + * @param name The name of the environmental variable + * @param value The value to set + */ +void setenv( const std::string &name, const std::string &value ); + + //! std::string version of sprintf inline std::string stringf( const char *format, ... ); diff --git a/tests/lbpm_color_simulator.cpp b/tests/lbpm_color_simulator.cpp index e8e675e2..1f63c653 100644 --- a/tests/lbpm_color_simulator.cpp +++ b/tests/lbpm_color_simulator.cpp @@ -7,6 +7,7 @@ #include #include "models/ColorModel.h" +#include "common/Utilities.h" //#define WRE_SURFACES @@ -15,7 +16,6 @@ * James E. McClure 2013-2014 */ -using namespace std; //************************************************************************* // Implementation of Two-Phase Immiscible LBM using CUDA @@ -23,27 +23,26 @@ using namespace std; int main(int argc, char **argv) { - // Initialize MPI - int provided_thread_support = -1; - MPI_Init_thread(&argc,&argv,MPI_THREAD_MULTIPLE,&provided_thread_support); - MPI_Comm comm; - MPI_Comm_dup(MPI_COMM_WORLD,&comm); - int rank = comm_rank(comm); - int nprocs = comm_size(comm); - if ( rank==0 && provided_thread_support Date: Mon, 13 Jan 2020 22:50:37 -0500 Subject: [PATCH 014/121] save the work --- gpu/Greyscale.cu | 468 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 468 insertions(+) diff --git a/gpu/Greyscale.cu b/gpu/Greyscale.cu index 18bfba58..fdb0a462 100644 --- a/gpu/Greyscale.cu +++ b/gpu/Greyscale.cu @@ -394,6 +394,474 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist } } +__global__ void dvc_ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, int finish, int Np, double rlx, double Gx, double Gy, double Gz, + double *Poros,double *Perm, double *Velocity, double Den){ + int n; + double vx,vy,vz,v_mag; + double ux,uy,uz,u_mag; + // conserved momemnts + double rho,jx,jy,jz; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m3,m5,m7; + double GeoFun;//geometric function from Guo's PRE 66, 036304 (2002) + double porosity; + double perm;//voxel permeability + double c0, c1; //Guo's model parameters + double mu = (1.0/rlx-0.5)/3.0;//kinematic viscosity + double Fx, Fy, Fz;//The total body force including Brinkman force and user-specified (Gx,Gy,Gz) + double rlx_setA = rlx; + double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA); + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s>>(dist,start,finish,Np,rlx,Fx,Fy,Fz,Poros,Perm,Velocity); From 3b23fca11864c313da091bd375000dfb70d39a88 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Tue, 14 Jan 2020 12:00:22 -0500 Subject: [PATCH 015/121] change specifier of printf to correct the output for very large image --- common/Domain.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/Domain.cpp b/common/Domain.cpp index 82bcaee2..48bfed15 100644 --- a/common/Domain.cpp +++ b/common/Domain.cpp @@ -391,7 +391,7 @@ void Domain::Decomp(std::string Filename) for (int idx=0; idx Date: Tue, 14 Jan 2020 12:01:33 -0500 Subject: [PATCH 016/121] change specifier of printf to correct the output for very large image --- common/Domain.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/Domain.cpp b/common/Domain.cpp index 82bcaee2..48bfed15 100644 --- a/common/Domain.cpp +++ b/common/Domain.cpp @@ -391,7 +391,7 @@ void Domain::Decomp(std::string Filename) for (int idx=0; idx Date: Fri, 17 Jan 2020 18:46:28 -0500 Subject: [PATCH 017/121] save the work, CPU versions seem to work, but need non-unity porosity test --- common/Domain.cpp | 2 +- common/ScaLBL.h | 7 + cpu/Greyscale.cpp | 1070 +++++++++++++++++++++++++++++++++++++ models/GreyscaleModel.cpp | 20 +- models/GreyscaleModel.h | 1 + 5 files changed, 1093 insertions(+), 7 deletions(-) diff --git a/common/Domain.cpp b/common/Domain.cpp index 82bcaee2..48bfed15 100644 --- a/common/Domain.cpp +++ b/common/Domain.cpp @@ -391,7 +391,7 @@ void Domain::Decomp(std::string Filename) for (int idx=0; idx 10Np => odd part of dist) + fq = dist[nread]; // reading the f1 data into register fq + pressure = fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jx = fq; + m4 = -4.0*fq; + m9 = 2.0*fq; + m10 = -4.0*fq; + + // q=2 + nread = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + fq = dist[nread]; // reading the f2 data into register fq + pressure += fq; + m1 -= 11.0*(fq); + m2 -= 4.0*(fq); + jx -= fq; + m4 += 4.0*(fq); + m9 += 2.0*(fq); + m10 -= 4.0*(fq); + + // q=3 + nread = neighborList[n+2*Np]; // neighbor 4 + fq = dist[nread]; + pressure += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy = fq; + m6 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 = fq; + m12 = -2.0*fq; + + // q = 4 + nread = neighborList[n+3*Np]; // neighbor 3 + fq = dist[nread]; + pressure += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy -= fq; + m6 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 += fq; + m12 -= 2.0*fq; + + // q=5 + nread = neighborList[n+4*Np]; + fq = dist[nread]; + pressure += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz = fq; + m8 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + // q = 6 + nread = neighborList[n+5*Np]; + fq = dist[nread]; + pressure += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz -= fq; + m8 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + // q=7 + nread = neighborList[n+6*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 = fq; + m16 = fq; + m17 = -fq; + + // q = 8 + nread = neighborList[n+7*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 += fq; + m16 -= fq; + m17 += fq; + + // q=9 + nread = neighborList[n+8*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 += fq; + m17 += fq; + + // q = 10 + nread = neighborList[n+9*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 -= fq; + m17 -= fq; + + // q=11 + nread = neighborList[n+10*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 = fq; + m16 -= fq; + m18 = fq; + + // q=12 + nread = neighborList[n+11*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 += fq; + m16 += fq; + m18 -= fq; + + // q=13 + nread = neighborList[n+12*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 -= fq; + m18 -= fq; + + // q=14 + nread = neighborList[n+13*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 += fq; + m18 += fq; + + // q=15 + nread = neighborList[n+14*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 = fq; + m17 += fq; + m18 -= fq; + + // q=16 + nread = neighborList[n+15*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 += fq; + m17 -= fq; + m18 += fq; + + // q=17 + nread = neighborList[n+16*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 += fq; + m18 += fq; + + // q=18 + nread = neighborList[n+17*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 -= fq; + m18 -= fq; + //---------------------------------------------------------------------// + + porosity = Poros[n]; + perm = Perm[n]; + + c0 = 0.5*(1.0+porosity*0.5*mu/perm); + if (porosity==1.0) c0 = 0.5;//i.e. apparent pore nodes + GeoFun = 1.75/sqrt(150.0*porosity*porosity*porosity); + c1 = porosity*0.5*GeoFun/sqrt(perm); + if (porosity==1.0) c1 = 0.0;//i.e. apparent pore nodes + + vx = jx/Den+0.5*porosity*Gx; + vy = jy/Den+0.5*porosity*Gy; + vz = jz/Den+0.5*porosity*Gz; + v_mag=sqrt(vx*vx+vy*vy+vz*vz); + ux = vx/(c0+sqrt(c0*c0+c1*v_mag)); + uy = vy/(c0+sqrt(c0*c0+c1*v_mag)); + uz = vz/(c0+sqrt(c0*c0+c1*v_mag)); + u_mag=sqrt(ux*ux+uy*uy+uz*uz); + + //Update the total force to include linear (Darcy) and nonlinear (Forchheimer) drags due to the porous medium + Fx = Den*(-porosity*mu/perm*ux - porosity*GeoFun/sqrt(perm)*u_mag*ux + porosity*Gx); + Fy = Den*(-porosity*mu/perm*uy - porosity*GeoFun/sqrt(perm)*u_mag*uy + porosity*Gy); + Fz = Den*(-porosity*mu/perm*uz - porosity*GeoFun/sqrt(perm)*u_mag*uz + porosity*Gz); + if (porosity==1.0){ + Fx=Den*Gx; + Fy=Den*Gy; + Fz=Den*Gz; + } + + //Calculate pressure for Incompressible-MRT model + pressure=0.5/porosity*(pressure-0.5*Den*u_mag*u_mag/porosity); + + //..............carry out relaxation process............................................... +// m1 = m1 + rlx_setA*((-30*Den+19*(jx*jx+jy*jy+jz*jz)/Den/porosity + 57*pressure*porosity) - m1); +// m2 = m2 + rlx_setA*((12*Den - 5.5*(jx*jx+jy*jy+jz*jz)/Den/porosity-27*pressure*porosity) - m2); +// m4 = m4 + rlx_setB*((-0.6666666666666666*jx) - m4); +// m6 = m6 + rlx_setB*((-0.6666666666666666*jy) - m6); +// m8 = m8 + rlx_setB*((-0.6666666666666666*jz) - m8); +// m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/Den/porosity) - m9); +// m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/Den/porosity)- m10); +// m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/Den/porosity) - m11); +// m12 = m12 + rlx_setA*(-0.5*((jy*jy-jz*jz)/Den/porosity)- m12); +// m13 = m13 + rlx_setA*((jx*jy/Den/porosity) - m13); +// m14 = m14 + rlx_setA*((jy*jz/Den/porosity) - m14); +// m15 = m15 + rlx_setA*((jx*jz/Den/porosity) - m15); +// m16 = m16 + rlx_setB*( - m16); +// m17 = m17 + rlx_setB*( - m17); +// m18 = m18 + rlx_setB*( - m18); + //....................................................................................................... + + //..............carry out relaxation process............................................... + m1 = m1 + rlx_setA*((-30*Den+19*(ux*ux+uy*uy+uz*uz)/porosity + 57*pressure*porosity) - m1); + m2 = m2 + rlx_setA*((12*Den - 5.5*(ux*ux+uy*uy+uz*uz)/porosity-27*pressure*porosity) - m2); + m4 = m4 + rlx_setB*((-0.6666666666666666*ux*Den) - m4); + m6 = m6 + rlx_setB*((-0.6666666666666666*uy*Den) - m6); + m8 = m8 + rlx_setB*((-0.6666666666666666*uz*Den) - m8); + m9 = m9 + rlx_setA*((Den*(2*ux*ux-uy*uy-uz*uz)/porosity) - m9); + m10 = m10 + rlx_setA*(-0.5*Den*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10); + m11 = m11 + rlx_setA*((Den*(uy*uy-uz*uz)/porosity) - m11); + m12 = m12 + rlx_setA*(-0.5*(Den*(uy*uy-uz*uz)/porosity)- m12); + m13 = m13 + rlx_setA*((Den*ux*uy/porosity) - m13); + m14 = m14 + rlx_setA*((Den*uy*uz/porosity) - m14); + m15 = m15 + rlx_setA*((Den*ux*uz/porosity) - m15); + m16 = m16 + rlx_setB*( - m16); + m17 = m17 + rlx_setB*( - m17); + m18 = m18 + rlx_setB*( - m18); + //....................................................................................................... + + jx+=0.5*Fx;//There is no collision for momentum, but they must be updated subject to the body force + jy+=0.5*Fy; + jz+=0.5*Fz; + //.................inverse transformation...................................................... + // q=0 + //fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2; + fq = mrt_V1*Den-mrt_V2*m1+mrt_V3*m2 + + 0.3333333333333333*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + dist[n] = fq; + + // q = 1 + //fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10) + 0.16666666*Fx; + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10) + +0.05555555555555555*(1. - 0.5*rlx)*(Fx*(3. + (6.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + nread = neighborList[n+Np]; + dist[nread] = fq; + + // q=2 + //fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - 0.16666666*Fx; + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) + +0.05555555555555555*(1. - 0.5*rlx)*(Fx*(-3. + (6.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + nread = neighborList[n]; + dist[nread] = fq; + + // q = 3 + //fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + 0.16666666*Fy; + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + +0.05555555555555555*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. + (6.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + nread = neighborList[n+3*Np]; + dist[nread] = fq; + + // q = 4 + //fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - 0.16666666*Fy; + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + +0.05555555555555555*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. + (6.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + nread = neighborList[n+2*Np]; + dist[nread] = fq; + + // q = 5 + //fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + 0.16666666*Fz; + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + +0.05555555555555555*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(3. + (6.*uz)/porosity)); + nread = neighborList[n+5*Np]; + dist[nread] = fq; + + // q = 6 + //fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - 0.16666666*Fz; + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + +0.05555555555555555*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(-3. + (6.*uz)/porosity)); + nread = neighborList[n+4*Np]; + dist[nread] = fq; + + // q = 7 + //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17) + 0.08333333333*(Fx+Fy); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17) + +0.027777777777777776*(1. - 0.5*rlx)*(Fx*(3. - (3.*ux)/porosity + (9.*(ux + uy))/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(ux + uy))/porosity) + + Fz*(0. - (3.*uz)/porosity)); + nread = neighborList[n+7*Np]; + dist[nread] = fq; + + // q = 8 + //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m17-m16) - 0.08333333333*(Fx+Fy); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m17-m16) + +0.027777777777777776*(1. - 0.5*rlx)*(Fx*(-3. - (3.*ux)/porosity - (9.*(-ux - uy))/porosity) + Fy*(-3. - (9.*(-ux - uy))/porosity - (3.*uy)/porosity) + + Fz*(0. - (3.*uz)/porosity)); + nread = neighborList[n+6*Np]; + dist[nread] = fq; + + // q = 9 + //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17) + 0.08333333333*(Fx-Fy); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17) + +0.027777777777777776*(1. - 0.5*rlx)*(Fx*(3. - (3.*ux)/porosity + (9.*(ux - uy))/porosity) + Fy*(-3. - (9.*(ux - uy))/porosity - (3.*uy)/porosity) + + Fz*(0. - (3.*uz)/porosity)); + nread = neighborList[n+9*Np]; + dist[nread] = fq; + + // q = 10 + //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17)- 0.08333333333*(Fx-Fy); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17) + +0.027777777777777776*(1. - 0.5*rlx)*(Fx*(-3. - (3.*ux)/porosity - (9.*(-ux + uy))/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(-ux + uy))/porosity) + + Fz*(0. - (3.*uz)/porosity)); + nread = neighborList[n+8*Np]; + dist[nread] = fq; + + // q = 11 + //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m18-m16) + 0.08333333333*(Fx+Fz); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m18-m16) + +0.027777777777777776*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(3. - (3.*ux)/porosity + (9.*(ux + uz))/porosity) + + Fz*(3. - (3.*uz)/porosity + (9.*(ux + uz))/porosity)); + nread = neighborList[n+11*Np]; + dist[nread] = fq; + + // q = 12 + //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18) - 0.08333333333*(Fx+Fz); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18) + +0.027777777777777776*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(-3. - (3.*ux)/porosity - (9.*(-ux - uz))/porosity) + + Fz*(-3. - (9.*(-ux - uz))/porosity - (3.*uz)/porosity)); + nread = neighborList[n+10*Np]; + dist[nread]= fq; + + // q = 13 + //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12-0.25*m15-0.125*(m16+m18) + 0.08333333333*(Fx-Fz); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12-0.25*m15-0.125*(m16+m18) + +0.027777777777777776*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(3. - (3.*ux)/porosity + (9.*(ux - uz))/porosity) + + Fz*(-3. - (9.*(ux - uz))/porosity - (3.*uz)/porosity)); + nread = neighborList[n+13*Np]; + dist[nread] = fq; + + // q= 14 + //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12-0.25*m15+0.125*(m16+m18) - 0.08333333333*(Fx-Fz); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12-0.25*m15+0.125*(m16+m18) + +0.027777777777777776*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(-3. - (3.*ux)/porosity - (9.*(-ux + uz))/porosity) + + Fz*(3. - (3.*uz)/porosity + (9.*(-ux + uz))/porosity)); + nread = neighborList[n+12*Np]; + dist[nread] = fq; + + // q = 15 + //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8)-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) + 0.08333333333*(Fy+Fz); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8)-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) + +0.027777777777777776*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(uy + uz))/porosity) + + Fz*(3. - (3.*uz)/porosity + (9.*(uy + uz))/porosity)); + nread = neighborList[n+15*Np]; + dist[nread] = fq; + + // q = 16 + //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8)-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17)- 0.08333333333*(Fy+Fz); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8)-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17) + +0.027777777777777776*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. - (3.*uy)/porosity - (9.*(-uy - uz))/porosity) + + Fz*(-3. - (9.*(-uy - uz))/porosity - (3.*uz)/porosity)); + nread = neighborList[n+14*Np]; + dist[nread] = fq; + + // q = 17 + //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8)-mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) + 0.08333333333*(Fy-Fz); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8)-mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) + +0.027777777777777776*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(uy - uz))/porosity) + + Fz*(-3. - (9.*(uy - uz))/porosity - (3.*uz)/porosity)); + nread = neighborList[n+17*Np]; + dist[nread] = fq; + + // q = 18 + //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6)-mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - 0.08333333333*(Fy-Fz); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6)-mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) + +0.027777777777777776*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. - (3.*uy)/porosity - (9.*(-uy + uz))/porosity) + + Fz*(3. - (3.*uz)/porosity + (9.*(-uy + uz))/porosity)); + nread = neighborList[n+16*Np]; + dist[nread] = fq; + //........................................................................ + + //Update velocity on device + Velocity[0*Np+n] = ux; + Velocity[1*Np+n] = uy; + Velocity[2*Np+n] = uz; + } +} + diff --git a/models/GreyscaleModel.cpp b/models/GreyscaleModel.cpp index 2ce6ff5e..1cdae815 100644 --- a/models/GreyscaleModel.cpp +++ b/models/GreyscaleModel.cpp @@ -8,7 +8,7 @@ color lattice boltzmann model #include ScaLBL_GreyscaleModel::ScaLBL_GreyscaleModel(int RANK, int NP, MPI_Comm COMM): -rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tau(0),Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0), +rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tau(0),Den(0),Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0), Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) { SignDist.resize(Nx,Ny,Nz); @@ -30,6 +30,7 @@ void ScaLBL_GreyscaleModel::ReadParams(string filename){ // set defaults timestepMax = 100000; tau = 1.0; + Den = 1.0;//constant density tolerance = 0.01; Fx = Fy = Fz = 0.0; Restart=false; @@ -37,13 +38,16 @@ void ScaLBL_GreyscaleModel::ReadParams(string filename){ flux=0.0; dp = 10.0; //unit of 'dp': voxel - // Color Model parameters + // Greyscale Model parameters if (greyscale_db->keyExists( "timestepMax" )){ timestepMax = greyscale_db->getScalar( "timestepMax" ); } if (greyscale_db->keyExists( "tau" )){ tau = greyscale_db->getScalar( "tau" ); } + if (greyscale_db->keyExists( "Den" )){ + Den = greyscale_db->getScalar( "Den" ); + } if (greyscale_db->keyExists( "dp" )){ dp = greyscale_db->getScalar( "dp" ); } @@ -423,15 +427,19 @@ void ScaLBL_GreyscaleModel::Run(){ //************************************************************************/ timestep++; ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL - ScaLBL_D3Q19_AAodd_Greyscale(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity); + //ScaLBL_D3Q19_AAodd_Greyscale(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity); + ScaLBL_D3Q19_AAodd_Greyscale_IMRT(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den); ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE - ScaLBL_D3Q19_AAodd_Greyscale(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity); + //ScaLBL_D3Q19_AAodd_Greyscale(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity); + ScaLBL_D3Q19_AAodd_Greyscale_IMRT(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); timestep++; ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL - ScaLBL_D3Q19_AAeven_Greyscale(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity); + //ScaLBL_D3Q19_AAeven_Greyscale(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity); + ScaLBL_D3Q19_AAeven_Greyscale_IMRT(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den); ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE - ScaLBL_D3Q19_AAeven_Greyscale(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity); + //ScaLBL_D3Q19_AAeven_Greyscale(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity); + ScaLBL_D3Q19_AAeven_Greyscale_IMRT(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); //************************************************************************/ diff --git a/models/GreyscaleModel.h b/models/GreyscaleModel.h index 9b970a65..ac939aed 100644 --- a/models/GreyscaleModel.h +++ b/models/GreyscaleModel.h @@ -36,6 +36,7 @@ public: int timestep,timestepMax; int BoundaryCondition; double tau; + double Den;//constant density double tolerance; double Fx,Fy,Fz,flux; double din,dout; From 7e4e91a06beb1ee75633e31e0e25e3fe1e032e45 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Sat, 18 Jan 2020 18:43:20 -0500 Subject: [PATCH 018/121] The CPU version of incompressible MRT greyscale model is available now --- cpu/Greyscale.cpp | 298 +++++++++++++++------------------------------- 1 file changed, 93 insertions(+), 205 deletions(-) diff --git a/cpu/Greyscale.cpp b/cpu/Greyscale.cpp index 11a8eb5c..95cf516b 100644 --- a/cpu/Greyscale.cpp +++ b/cpu/Greyscale.cpp @@ -724,169 +724,113 @@ extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, int pressure=0.5/porosity*(pressure-0.5*Den*u_mag*u_mag/porosity); //..............carry out relaxation process............................................... -// m1 = m1 + rlx_setA*((-30*Den+19*(jx*jx+jy*jy+jz*jz)/Den/porosity + 57*pressure*porosity) - m1); -// m2 = m2 + rlx_setA*((12*Den - 5.5*(jx*jx+jy*jy+jz*jz)/Den/porosity-27*pressure*porosity) - m2); -// m4 = m4 + rlx_setB*((-0.6666666666666666*jx) - m4); -// m6 = m6 + rlx_setB*((-0.6666666666666666*jy) - m6); -// m8 = m8 + rlx_setB*((-0.6666666666666666*jz) - m8); -// m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/Den/porosity) - m9); -// m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/Den/porosity)- m10); -// m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/Den/porosity) - m11); -// m12 = m12 + rlx_setA*(-0.5*((jy*jy-jz*jz)/Den/porosity)- m12); -// m13 = m13 + rlx_setA*((jx*jy/Den/porosity) - m13); -// m14 = m14 + rlx_setA*((jy*jz/Den/porosity) - m14); -// m15 = m15 + rlx_setA*((jx*jz/Den/porosity) - m15); -// m16 = m16 + rlx_setB*( - m16); -// m17 = m17 + rlx_setB*( - m17); -// m18 = m18 + rlx_setB*( - m18); - //....................................................................................................... - - //..............carry out relaxation process............................................... - m1 = m1 + rlx_setA*((-30*Den+19*(ux*ux+uy*uy+uz*uz)/porosity + 57*pressure*porosity) - m1); - m2 = m2 + rlx_setA*((12*Den - 5.5*(ux*ux+uy*uy+uz*uz)/porosity-27*pressure*porosity) - m2); - m4 = m4 + rlx_setB*((-0.6666666666666666*ux*Den) - m4); - m6 = m6 + rlx_setB*((-0.6666666666666666*uy*Den) - m6); - m8 = m8 + rlx_setB*((-0.6666666666666666*uz*Den) - m8); - m9 = m9 + rlx_setA*((Den*(2*ux*ux-uy*uy-uz*uz)/porosity) - m9); - m10 = m10 + rlx_setA*(-0.5*Den*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10); - m11 = m11 + rlx_setA*((Den*(uy*uy-uz*uz)/porosity) - m11); - m12 = m12 + rlx_setA*(-0.5*(Den*(uy*uy-uz*uz)/porosity)- m12); - m13 = m13 + rlx_setA*((Den*ux*uy/porosity) - m13); - m14 = m14 + rlx_setA*((Den*uy*uz/porosity) - m14); - m15 = m15 + rlx_setA*((Den*ux*uz/porosity) - m15); + m1 = m1 + rlx_setA*((-30*Den+19*(ux*ux+uy*uy+uz*uz)/porosity + 57*pressure*porosity) - m1) + + (1-0.5*rlx_setA)*38*(Fx*ux+Fy*uy+Fz*uz)/porosity; + m2 = m2 + rlx_setA*((12*Den - 5.5*(ux*ux+uy*uy+uz*uz)/porosity-27*pressure*porosity) - m2) + + (1-0.5*rlx_setA)*11*(-Fx*ux-Fy*uy-Fz*uz)/porosity; + jx = jx + Fx; + m4 = m4 + rlx_setB*((-0.6666666666666666*ux*Den) - m4) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); + jy = jy + Fy; + m6 = m6 + rlx_setB*((-0.6666666666666666*uy*Den) - m6) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fy); + jz = jz + Fz; + m8 = m8 + rlx_setB*((-0.6666666666666666*uz*Den) - m8) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); + m9 = m9 + rlx_setA*((Den*(2*ux*ux-uy*uy-uz*uz)/porosity) - m9) + + (1-0.5*rlx_setA)*(4*Fx*ux-2*Fy*uy-2*Fz*uz)/porosity; + m10 = m10 + rlx_setA*(-0.5*Den*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10) + + (1-0.5*rlx_setA)*(-2*Fx*ux+Fy*uy+Fz*uz)/porosity; + m11 = m11 + rlx_setA*((Den*(uy*uy-uz*uz)/porosity) - m11) + + (1-0.5*rlx_setA)*(2*Fy*uy-2*Fz*uz)/porosity; + m12 = m12 + rlx_setA*(-0.5*(Den*(uy*uy-uz*uz)/porosity)- m12) + + (1-0.5*rlx_setA)*(-Fy*uy+Fz*uz)/porosity; + m13 = m13 + rlx_setA*((Den*ux*uy/porosity) - m13) + + (1-0.5*rlx_setA)*(Fy*ux+Fx*uy)/porosity; + m14 = m14 + rlx_setA*((Den*uy*uz/porosity) - m14) + + (1-0.5*rlx_setA)*(Fz*uy+Fy*uz)/porosity; + m15 = m15 + rlx_setA*((Den*ux*uz/porosity) - m15) + + (1-0.5*rlx_setA)*(Fz*ux+Fx*uz)/porosity; m16 = m16 + rlx_setB*( - m16); m17 = m17 + rlx_setB*( - m17); m18 = m18 + rlx_setB*( - m18); //....................................................................................................... - jx+=0.5*Fx;//There is no collision for momentum, but they must be updated subject to the body force - jy+=0.5*Fy; - jz+=0.5*Fz; //.................inverse transformation...................................................... // q=0 - //fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2; - fq = mrt_V1*Den-mrt_V2*m1+mrt_V3*m2 - + 0.3333333333333333*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + fq = mrt_V1*Den-mrt_V2*m1+mrt_V3*m2; dist[n] = fq; // q = 1 - //fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10) + 0.16666666*Fx; - fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10) - +0.05555555555555555*(1. - 0.5*rlx)*(Fx*(3. + (6.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10); dist[1*Np+n] = fq; // q=2 - //fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - 0.16666666*Fx; - fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - +0.05555555555555555*(1. - 0.5*rlx)*(Fx*(-3. + (6.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10); dist[2*Np+n] = fq; // q = 3 - //fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + 0.16666666*Fy; - fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - +0.05555555555555555*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. + (6.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); dist[3*Np+n] = fq; // q = 4 - //fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - 0.16666666*Fy; - fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - +0.05555555555555555*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. + (6.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); dist[4*Np+n] = fq; // q = 5 - //fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + 0.16666666*Fz; - fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - +0.05555555555555555*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(3. + (6.*uz)/porosity)); + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); dist[5*Np+n] = fq; // q = 6 - //fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - 0.16666666*Fz; - fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - +0.05555555555555555*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(-3. + (6.*uz)/porosity)); + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); dist[6*Np+n] = fq; // q = 7 - //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17) + 0.08333333333*(Fx+Fy); - fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17) - +0.027777777777777776*(1. - 0.5*rlx)*(Fx*(3. - (3.*ux)/porosity + (9.*(ux + uy))/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(ux + uy))/porosity) + - Fz*(0. - (3.*uz)/porosity)); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17); dist[7*Np+n] = fq; // q = 8 - //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m17-m16) - 0.08333333333*(Fx+Fy); - fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m17-m16) - +0.027777777777777776*(1. - 0.5*rlx)*(Fx*(-3. - (3.*ux)/porosity - (9.*(-ux - uy))/porosity) + Fy*(-3. - (9.*(-ux - uy))/porosity - (3.*uy)/porosity) + - Fz*(0. - (3.*uz)/porosity)); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m17-m16); dist[8*Np+n] = fq; // q = 9 - //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17) + 0.08333333333*(Fx-Fy); - fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17) - +0.027777777777777776*(1. - 0.5*rlx)*(Fx*(3. - (3.*ux)/porosity + (9.*(ux - uy))/porosity) + Fy*(-3. - (9.*(ux - uy))/porosity - (3.*uy)/porosity) + - Fz*(0. - (3.*uz)/porosity)); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17); dist[9*Np+n] = fq; // q = 10 - //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17)- 0.08333333333*(Fx-Fy); - fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17) - +0.027777777777777776*(1. - 0.5*rlx)*(Fx*(-3. - (3.*ux)/porosity - (9.*(-ux + uy))/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(-ux + uy))/porosity) + - Fz*(0. - (3.*uz)/porosity)); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17); dist[10*Np+n] = fq; // q = 11 - //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m18-m16) + 0.08333333333*(Fx+Fz); - fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m18-m16) - +0.027777777777777776*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(3. - (3.*ux)/porosity + (9.*(ux + uz))/porosity) + - Fz*(3. - (3.*uz)/porosity + (9.*(ux + uz))/porosity)); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m18-m16); dist[11*Np+n] = fq; // q = 12 - //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18) - 0.08333333333*(Fx+Fz); - fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18) - +0.027777777777777776*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(-3. - (3.*ux)/porosity - (9.*(-ux - uz))/porosity) + - Fz*(-3. - (9.*(-ux - uz))/porosity - (3.*uz)/porosity)); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18); dist[12*Np+n] = fq; // q = 13 - //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12-0.25*m15-0.125*(m16+m18) + 0.08333333333*(Fx-Fz); - fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12-0.25*m15-0.125*(m16+m18) - +0.027777777777777776*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(3. - (3.*ux)/porosity + (9.*(ux - uz))/porosity) + - Fz*(-3. - (9.*(ux - uz))/porosity - (3.*uz)/porosity)); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12-0.25*m15-0.125*(m16+m18); dist[13*Np+n] = fq; // q= 14 - //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12-0.25*m15+0.125*(m16+m18) - 0.08333333333*(Fx-Fz); - fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12-0.25*m15+0.125*(m16+m18) - +0.027777777777777776*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(-3. - (3.*ux)/porosity - (9.*(-ux + uz))/porosity) + - Fz*(3. - (3.*uz)/porosity + (9.*(-ux + uz))/porosity)); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12-0.25*m15+0.125*(m16+m18); dist[14*Np+n] = fq; // q = 15 - //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8)-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) + 0.08333333333*(Fy+Fz); - fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8)-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) - +0.027777777777777776*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(uy + uz))/porosity) + - Fz*(3. - (3.*uz)/porosity + (9.*(uy + uz))/porosity)); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8)-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18); dist[15*Np+n] = fq; // q = 16 - //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8)-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17)- 0.08333333333*(Fy+Fz); - fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8)-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17) - +0.027777777777777776*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. - (3.*uy)/porosity - (9.*(-uy - uz))/porosity) + - Fz*(-3. - (9.*(-uy - uz))/porosity - (3.*uz)/porosity)); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8)-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17); dist[16*Np+n] = fq; // q = 17 - //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8)-mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) + 0.08333333333*(Fy-Fz); - fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8)-mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) - +0.027777777777777776*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(uy - uz))/porosity) + - Fz*(-3. - (9.*(uy - uz))/porosity - (3.*uz)/porosity)); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8)-mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18); dist[17*Np+n] = fq; // q = 18 - //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6)-mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - 0.08333333333*(Fy-Fz); - fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6)-mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - +0.027777777777777776*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. - (3.*uy)/porosity - (9.*(-uy + uz))/porosity) + - Fz*(3. - (3.*uz)/porosity + (9.*(-uy + uz))/porosity)); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6)-mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18); dist[18*Np+n] = fq; //........................................................................ @@ -1258,186 +1202,130 @@ extern "C" void ScaLBL_D3Q19_AAodd_Greyscale_IMRT(int *neighborList, double *dis pressure=0.5/porosity*(pressure-0.5*Den*u_mag*u_mag/porosity); //..............carry out relaxation process............................................... -// m1 = m1 + rlx_setA*((-30*Den+19*(jx*jx+jy*jy+jz*jz)/Den/porosity + 57*pressure*porosity) - m1); -// m2 = m2 + rlx_setA*((12*Den - 5.5*(jx*jx+jy*jy+jz*jz)/Den/porosity-27*pressure*porosity) - m2); -// m4 = m4 + rlx_setB*((-0.6666666666666666*jx) - m4); -// m6 = m6 + rlx_setB*((-0.6666666666666666*jy) - m6); -// m8 = m8 + rlx_setB*((-0.6666666666666666*jz) - m8); -// m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/Den/porosity) - m9); -// m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/Den/porosity)- m10); -// m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/Den/porosity) - m11); -// m12 = m12 + rlx_setA*(-0.5*((jy*jy-jz*jz)/Den/porosity)- m12); -// m13 = m13 + rlx_setA*((jx*jy/Den/porosity) - m13); -// m14 = m14 + rlx_setA*((jy*jz/Den/porosity) - m14); -// m15 = m15 + rlx_setA*((jx*jz/Den/porosity) - m15); -// m16 = m16 + rlx_setB*( - m16); -// m17 = m17 + rlx_setB*( - m17); -// m18 = m18 + rlx_setB*( - m18); - //....................................................................................................... - - //..............carry out relaxation process............................................... - m1 = m1 + rlx_setA*((-30*Den+19*(ux*ux+uy*uy+uz*uz)/porosity + 57*pressure*porosity) - m1); - m2 = m2 + rlx_setA*((12*Den - 5.5*(ux*ux+uy*uy+uz*uz)/porosity-27*pressure*porosity) - m2); - m4 = m4 + rlx_setB*((-0.6666666666666666*ux*Den) - m4); - m6 = m6 + rlx_setB*((-0.6666666666666666*uy*Den) - m6); - m8 = m8 + rlx_setB*((-0.6666666666666666*uz*Den) - m8); - m9 = m9 + rlx_setA*((Den*(2*ux*ux-uy*uy-uz*uz)/porosity) - m9); - m10 = m10 + rlx_setA*(-0.5*Den*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10); - m11 = m11 + rlx_setA*((Den*(uy*uy-uz*uz)/porosity) - m11); - m12 = m12 + rlx_setA*(-0.5*(Den*(uy*uy-uz*uz)/porosity)- m12); - m13 = m13 + rlx_setA*((Den*ux*uy/porosity) - m13); - m14 = m14 + rlx_setA*((Den*uy*uz/porosity) - m14); - m15 = m15 + rlx_setA*((Den*ux*uz/porosity) - m15); + m1 = m1 + rlx_setA*((-30*Den+19*(ux*ux+uy*uy+uz*uz)/porosity + 57*pressure*porosity) - m1) + + (1-0.5*rlx_setA)*38*(Fx*ux+Fy*uy+Fz*uz)/porosity; + m2 = m2 + rlx_setA*((12*Den - 5.5*(ux*ux+uy*uy+uz*uz)/porosity-27*pressure*porosity) - m2) + + (1-0.5*rlx_setA)*11*(-Fx*ux-Fy*uy-Fz*uz)/porosity; + jx = jx + Fx; + m4 = m4 + rlx_setB*((-0.6666666666666666*ux*Den) - m4) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); + jy = jy + Fy; + m6 = m6 + rlx_setB*((-0.6666666666666666*uy*Den) - m6) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fy); + jz = jz + Fz; + m8 = m8 + rlx_setB*((-0.6666666666666666*uz*Den) - m8) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); + m9 = m9 + rlx_setA*((Den*(2*ux*ux-uy*uy-uz*uz)/porosity) - m9) + + (1-0.5*rlx_setA)*(4*Fx*ux-2*Fy*uy-2*Fz*uz)/porosity; + m10 = m10 + rlx_setA*(-0.5*Den*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10) + + (1-0.5*rlx_setA)*(-2*Fx*ux+Fy*uy+Fz*uz)/porosity; + m11 = m11 + rlx_setA*((Den*(uy*uy-uz*uz)/porosity) - m11) + + (1-0.5*rlx_setA)*(2*Fy*uy-2*Fz*uz)/porosity; + m12 = m12 + rlx_setA*(-0.5*(Den*(uy*uy-uz*uz)/porosity)- m12) + + (1-0.5*rlx_setA)*(-Fy*uy+Fz*uz)/porosity; + m13 = m13 + rlx_setA*((Den*ux*uy/porosity) - m13) + + (1-0.5*rlx_setA)*(Fy*ux+Fx*uy)/porosity; + m14 = m14 + rlx_setA*((Den*uy*uz/porosity) - m14) + + (1-0.5*rlx_setA)*(Fz*uy+Fy*uz)/porosity; + m15 = m15 + rlx_setA*((Den*ux*uz/porosity) - m15) + + (1-0.5*rlx_setA)*(Fz*ux+Fx*uz)/porosity; m16 = m16 + rlx_setB*( - m16); m17 = m17 + rlx_setB*( - m17); m18 = m18 + rlx_setB*( - m18); //....................................................................................................... - - jx+=0.5*Fx;//There is no collision for momentum, but they must be updated subject to the body force - jy+=0.5*Fy; - jz+=0.5*Fz; + //.................inverse transformation...................................................... // q=0 - //fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2; - fq = mrt_V1*Den-mrt_V2*m1+mrt_V3*m2 - + 0.3333333333333333*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + fq = mrt_V1*Den-mrt_V2*m1+mrt_V3*m2; dist[n] = fq; // q = 1 - //fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10) + 0.16666666*Fx; - fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10) - +0.05555555555555555*(1. - 0.5*rlx)*(Fx*(3. + (6.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10); nread = neighborList[n+Np]; dist[nread] = fq; // q=2 - //fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - 0.16666666*Fx; - fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - +0.05555555555555555*(1. - 0.5*rlx)*(Fx*(-3. + (6.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10); nread = neighborList[n]; dist[nread] = fq; // q = 3 - //fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + 0.16666666*Fy; - fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - +0.05555555555555555*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. + (6.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); nread = neighborList[n+3*Np]; dist[nread] = fq; // q = 4 - //fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - 0.16666666*Fy; - fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - +0.05555555555555555*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. + (6.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); nread = neighborList[n+2*Np]; dist[nread] = fq; // q = 5 - //fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + 0.16666666*Fz; - fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - +0.05555555555555555*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(3. + (6.*uz)/porosity)); + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); nread = neighborList[n+5*Np]; dist[nread] = fq; // q = 6 - //fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - 0.16666666*Fz; - fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - +0.05555555555555555*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(-3. + (6.*uz)/porosity)); + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); nread = neighborList[n+4*Np]; dist[nread] = fq; // q = 7 - //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17) + 0.08333333333*(Fx+Fy); - fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17) - +0.027777777777777776*(1. - 0.5*rlx)*(Fx*(3. - (3.*ux)/porosity + (9.*(ux + uy))/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(ux + uy))/porosity) + - Fz*(0. - (3.*uz)/porosity)); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17); nread = neighborList[n+7*Np]; dist[nread] = fq; // q = 8 - //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m17-m16) - 0.08333333333*(Fx+Fy); - fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m17-m16) - +0.027777777777777776*(1. - 0.5*rlx)*(Fx*(-3. - (3.*ux)/porosity - (9.*(-ux - uy))/porosity) + Fy*(-3. - (9.*(-ux - uy))/porosity - (3.*uy)/porosity) + - Fz*(0. - (3.*uz)/porosity)); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m17-m16); nread = neighborList[n+6*Np]; dist[nread] = fq; // q = 9 - //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17) + 0.08333333333*(Fx-Fy); - fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17) - +0.027777777777777776*(1. - 0.5*rlx)*(Fx*(3. - (3.*ux)/porosity + (9.*(ux - uy))/porosity) + Fy*(-3. - (9.*(ux - uy))/porosity - (3.*uy)/porosity) + - Fz*(0. - (3.*uz)/porosity)); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17); nread = neighborList[n+9*Np]; dist[nread] = fq; // q = 10 - //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17)- 0.08333333333*(Fx-Fy); - fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17) - +0.027777777777777776*(1. - 0.5*rlx)*(Fx*(-3. - (3.*ux)/porosity - (9.*(-ux + uy))/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(-ux + uy))/porosity) + - Fz*(0. - (3.*uz)/porosity)); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17); nread = neighborList[n+8*Np]; dist[nread] = fq; // q = 11 - //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m18-m16) + 0.08333333333*(Fx+Fz); - fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m18-m16) - +0.027777777777777776*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(3. - (3.*ux)/porosity + (9.*(ux + uz))/porosity) + - Fz*(3. - (3.*uz)/porosity + (9.*(ux + uz))/porosity)); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m18-m16); nread = neighborList[n+11*Np]; dist[nread] = fq; // q = 12 - //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18) - 0.08333333333*(Fx+Fz); - fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18) - +0.027777777777777776*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(-3. - (3.*ux)/porosity - (9.*(-ux - uz))/porosity) + - Fz*(-3. - (9.*(-ux - uz))/porosity - (3.*uz)/porosity)); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18); nread = neighborList[n+10*Np]; dist[nread]= fq; // q = 13 - //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12-0.25*m15-0.125*(m16+m18) + 0.08333333333*(Fx-Fz); - fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12-0.25*m15-0.125*(m16+m18) - +0.027777777777777776*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(3. - (3.*ux)/porosity + (9.*(ux - uz))/porosity) + - Fz*(-3. - (9.*(ux - uz))/porosity - (3.*uz)/porosity)); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12-0.25*m15-0.125*(m16+m18); nread = neighborList[n+13*Np]; dist[nread] = fq; // q= 14 - //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12-0.25*m15+0.125*(m16+m18) - 0.08333333333*(Fx-Fz); - fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12-0.25*m15+0.125*(m16+m18) - +0.027777777777777776*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(-3. - (3.*ux)/porosity - (9.*(-ux + uz))/porosity) + - Fz*(3. - (3.*uz)/porosity + (9.*(-ux + uz))/porosity)); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12-0.25*m15+0.125*(m16+m18); nread = neighborList[n+12*Np]; dist[nread] = fq; // q = 15 - //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8)-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) + 0.08333333333*(Fy+Fz); - fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8)-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) - +0.027777777777777776*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(uy + uz))/porosity) + - Fz*(3. - (3.*uz)/porosity + (9.*(uy + uz))/porosity)); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8)-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18); nread = neighborList[n+15*Np]; dist[nread] = fq; // q = 16 - //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8)-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17)- 0.08333333333*(Fy+Fz); - fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8)-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17) - +0.027777777777777776*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. - (3.*uy)/porosity - (9.*(-uy - uz))/porosity) + - Fz*(-3. - (9.*(-uy - uz))/porosity - (3.*uz)/porosity)); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8)-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17); nread = neighborList[n+14*Np]; dist[nread] = fq; // q = 17 - //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8)-mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) + 0.08333333333*(Fy-Fz); - fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8)-mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) - +0.027777777777777776*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(uy - uz))/porosity) + - Fz*(-3. - (9.*(uy - uz))/porosity - (3.*uz)/porosity)); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8)-mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18); nread = neighborList[n+17*Np]; dist[nread] = fq; // q = 18 - //fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6)-mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - 0.08333333333*(Fy-Fz); - fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6)-mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - +0.027777777777777776*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. - (3.*uy)/porosity - (9.*(-uy + uz))/porosity) + - Fz*(3. - (3.*uz)/porosity + (9.*(-uy + uz))/porosity)); + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6)-mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18); nread = neighborList[n+16*Np]; dist[nread] = fq; //........................................................................ From 060bee7b4495324841b51d205e701fd05fb152fe Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Sat, 18 Jan 2020 22:52:04 -0500 Subject: [PATCH 019/121] GPU version of incompressible greysacle MRT model is ready --- cpu/Greyscale.cpp | 2 - gpu/Greyscale.cu | 1313 +++++++++++++++++++++++++++++++-------------- 2 files changed, 920 insertions(+), 395 deletions(-) diff --git a/cpu/Greyscale.cpp b/cpu/Greyscale.cpp index 95cf516b..f2be769e 100644 --- a/cpu/Greyscale.cpp +++ b/cpu/Greyscale.cpp @@ -391,7 +391,6 @@ extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, int double jx,jy,jz; // non-conserved moments double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; - double m3,m5,m7; double fq; //double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; double GeoFun;//geometric function from Guo's PRE 66, 036304 (2002) @@ -851,7 +850,6 @@ extern "C" void ScaLBL_D3Q19_AAodd_Greyscale_IMRT(int *neighborList, double *dis double jx,jy,jz; // non-conserved moments double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; - double m3,m5,m7; double fq; //double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; double GeoFun;//geometric function from Guo's PRE 66, 036304 (2002) diff --git a/gpu/Greyscale.cu b/gpu/Greyscale.cu index fdb0a462..5b8273fe 100644 --- a/gpu/Greyscale.cu +++ b/gpu/Greyscale.cu @@ -396,14 +396,17 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist __global__ void dvc_ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, int finish, int Np, double rlx, double Gx, double Gy, double Gz, double *Poros,double *Perm, double *Velocity, double Den){ + int n; double vx,vy,vz,v_mag; double ux,uy,uz,u_mag; + double pressure;//defined for this incompressible model // conserved momemnts - double rho,jx,jy,jz; + double jx,jy,jz; // non-conserved moments double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; - double m3,m5,m7; + double fq; + //double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; double GeoFun;//geometric function from Guo's PRE 66, 036304 (2002) double porosity; double perm;//voxel permeability @@ -413,6 +416,20 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, double rlx_setA = rlx; double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA); + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + int S = Np/NBLOCKS/NTHREADS + 1; for (int s=0; s 10Np => odd part of dist) + fq = dist[nread]; // reading the f1 data into register fq + pressure = fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jx = fq; + m4 = -4.0*fq; + m9 = 2.0*fq; + m10 = -4.0*fq; + + // q=2 + nread = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + fq = dist[nread]; // reading the f2 data into register fq + pressure += fq; + m1 -= 11.0*(fq); + m2 -= 4.0*(fq); + jx -= fq; + m4 += 4.0*(fq); + m9 += 2.0*(fq); + m10 -= 4.0*(fq); + + // q=3 + nread = neighborList[n+2*Np]; // neighbor 4 + fq = dist[nread]; + pressure += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy = fq; + m6 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 = fq; + m12 = -2.0*fq; + + // q = 4 + nread = neighborList[n+3*Np]; // neighbor 3 + fq = dist[nread]; + pressure += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy -= fq; + m6 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 += fq; + m12 -= 2.0*fq; + + // q=5 + nread = neighborList[n+4*Np]; + fq = dist[nread]; + pressure += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz = fq; + m8 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + // q = 6 + nread = neighborList[n+5*Np]; + fq = dist[nread]; + pressure += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz -= fq; + m8 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + // q=7 + nread = neighborList[n+6*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 = fq; + m16 = fq; + m17 = -fq; + + // q = 8 + nread = neighborList[n+7*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 += fq; + m16 -= fq; + m17 += fq; + + // q=9 + nread = neighborList[n+8*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 += fq; + m17 += fq; + + // q = 10 + nread = neighborList[n+9*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 -= fq; + m17 -= fq; + + // q=11 + nread = neighborList[n+10*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 = fq; + m16 -= fq; + m18 = fq; + + // q=12 + nread = neighborList[n+11*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 += fq; + m16 += fq; + m18 -= fq; + + // q=13 + nread = neighborList[n+12*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 -= fq; + m18 -= fq; + + // q=14 + nread = neighborList[n+13*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 += fq; + m18 += fq; + + // q=15 + nread = neighborList[n+14*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 = fq; + m17 += fq; + m18 -= fq; + + // q=16 + nread = neighborList[n+15*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 += fq; + m17 -= fq; + m18 += fq; + + // q=17 + nread = neighborList[n+16*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 += fq; + m18 += fq; + + // q=18 + nread = neighborList[n+17*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 -= fq; + m18 -= fq; + //---------------------------------------------------------------------// + + porosity = Poros[n]; + perm = Perm[n]; + + c0 = 0.5*(1.0+porosity*0.5*mu/perm); + if (porosity==1.0) c0 = 0.5;//i.e. apparent pore nodes + GeoFun = 1.75/sqrt(150.0*porosity*porosity*porosity); + c1 = porosity*0.5*GeoFun/sqrt(perm); + if (porosity==1.0) c1 = 0.0;//i.e. apparent pore nodes + + vx = jx/Den+0.5*porosity*Gx; + vy = jy/Den+0.5*porosity*Gy; + vz = jz/Den+0.5*porosity*Gz; + v_mag=sqrt(vx*vx+vy*vy+vz*vz); + ux = vx/(c0+sqrt(c0*c0+c1*v_mag)); + uy = vy/(c0+sqrt(c0*c0+c1*v_mag)); + uz = vz/(c0+sqrt(c0*c0+c1*v_mag)); + u_mag=sqrt(ux*ux+uy*uy+uz*uz); + + //Update the total force to include linear (Darcy) and nonlinear (Forchheimer) drags due to the porous medium + Fx = Den*(-porosity*mu/perm*ux - porosity*GeoFun/sqrt(perm)*u_mag*ux + porosity*Gx); + Fy = Den*(-porosity*mu/perm*uy - porosity*GeoFun/sqrt(perm)*u_mag*uy + porosity*Gy); + Fz = Den*(-porosity*mu/perm*uz - porosity*GeoFun/sqrt(perm)*u_mag*uz + porosity*Gz); + if (porosity==1.0){ + Fx=Den*Gx; + Fy=Den*Gy; + Fz=Den*Gz; + } + + //Calculate pressure for Incompressible-MRT model + pressure=0.5/porosity*(pressure-0.5*Den*u_mag*u_mag/porosity); + + //..............carry out relaxation process............................................... + m1 = m1 + rlx_setA*((-30*Den+19*(ux*ux+uy*uy+uz*uz)/porosity + 57*pressure*porosity) - m1) + + (1-0.5*rlx_setA)*38*(Fx*ux+Fy*uy+Fz*uz)/porosity; + m2 = m2 + rlx_setA*((12*Den - 5.5*(ux*ux+uy*uy+uz*uz)/porosity-27*pressure*porosity) - m2) + + (1-0.5*rlx_setA)*11*(-Fx*ux-Fy*uy-Fz*uz)/porosity; + jx = jx + Fx; + m4 = m4 + rlx_setB*((-0.6666666666666666*ux*Den) - m4) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); + jy = jy + Fy; + m6 = m6 + rlx_setB*((-0.6666666666666666*uy*Den) - m6) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fy); + jz = jz + Fz; + m8 = m8 + rlx_setB*((-0.6666666666666666*uz*Den) - m8) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); + m9 = m9 + rlx_setA*((Den*(2*ux*ux-uy*uy-uz*uz)/porosity) - m9) + + (1-0.5*rlx_setA)*(4*Fx*ux-2*Fy*uy-2*Fz*uz)/porosity; + m10 = m10 + rlx_setA*(-0.5*Den*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10) + + (1-0.5*rlx_setA)*(-2*Fx*ux+Fy*uy+Fz*uz)/porosity; + m11 = m11 + rlx_setA*((Den*(uy*uy-uz*uz)/porosity) - m11) + + (1-0.5*rlx_setA)*(2*Fy*uy-2*Fz*uz)/porosity; + m12 = m12 + rlx_setA*(-0.5*(Den*(uy*uy-uz*uz)/porosity)- m12) + + (1-0.5*rlx_setA)*(-Fy*uy+Fz*uz)/porosity; + m13 = m13 + rlx_setA*((Den*ux*uy/porosity) - m13) + + (1-0.5*rlx_setA)*(Fy*ux+Fx*uy)/porosity; + m14 = m14 + rlx_setA*((Den*uy*uz/porosity) - m14) + + (1-0.5*rlx_setA)*(Fz*uy+Fy*uz)/porosity; + m15 = m15 + rlx_setA*((Den*ux*uz/porosity) - m15) + + (1-0.5*rlx_setA)*(Fz*ux+Fx*uz)/porosity; + m16 = m16 + rlx_setB*( - m16); + m17 = m17 + rlx_setB*( - m17); + m18 = m18 + rlx_setB*( - m18); + //....................................................................................................... + + //.................inverse transformation...................................................... + // q=0 + fq = mrt_V1*Den-mrt_V2*m1+mrt_V3*m2; + dist[n] = fq; + + // q = 1 + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10); + nread = neighborList[n+Np]; + dist[nread] = fq; + + // q=2 + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10); + nread = neighborList[n]; + dist[nread] = fq; + + // q = 3 + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); + nread = neighborList[n+3*Np]; + dist[nread] = fq; + + // q = 4 + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); + nread = neighborList[n+2*Np]; + dist[nread] = fq; + + // q = 5 + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); + nread = neighborList[n+5*Np]; + dist[nread] = fq; + + // q = 6 + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); + nread = neighborList[n+4*Np]; + dist[nread] = fq; + + // q = 7 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17); + nread = neighborList[n+7*Np]; + dist[nread] = fq; + + // q = 8 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m17-m16); + nread = neighborList[n+6*Np]; + dist[nread] = fq; + + // q = 9 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17); + nread = neighborList[n+9*Np]; + dist[nread] = fq; + + // q = 10 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17); + nread = neighborList[n+8*Np]; + dist[nread] = fq; + + // q = 11 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m18-m16); + nread = neighborList[n+11*Np]; + dist[nread] = fq; + + // q = 12 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18); + nread = neighborList[n+10*Np]; + dist[nread]= fq; + + // q = 13 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12-0.25*m15-0.125*(m16+m18); + nread = neighborList[n+13*Np]; + dist[nread] = fq; + + // q= 14 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12-0.25*m15+0.125*(m16+m18); + nread = neighborList[n+12*Np]; + dist[nread] = fq; + + // q = 15 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8)-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18); + nread = neighborList[n+15*Np]; + dist[nread] = fq; + + // q = 16 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8)-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17); + nread = neighborList[n+14*Np]; + dist[nread] = fq; + + // q = 17 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8)-mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18); + nread = neighborList[n+17*Np]; + dist[nread] = fq; + + // q = 18 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6)-mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18); + nread = neighborList[n+16*Np]; + dist[nread] = fq; + //........................................................................ + + //Update velocity on device + Velocity[0*Np+n] = ux; + Velocity[1*Np+n] = uy; + Velocity[2*Np+n] = uz; + + } + } +} + extern "C" void ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity){ @@ -873,6 +1378,7 @@ extern "C" void ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finis } extern "C" void ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity){ + dvc_ScaLBL_D3Q19_AAodd_Greyscale<<>>(neighborList,dist,start,finish,Np,rlx,Fx,Fy,Fz,Poros,Perm,Velocity); cudaError_t err = cudaGetLastError(); @@ -880,3 +1386,24 @@ extern "C" void ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist, in printf("CUDA error in ScaLBL_D3Q19_AAodd_Greyscale: %s \n",cudaGetErrorString(err)); } } + +extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity,double Den){ + + dvc_ScaLBL_D3Q19_AAeven_Greyscale_IMRT<<>>(dist,start,finish,Np,rlx,Fx,Fy,Fz,Poros,Perm,Velocity,Den); + + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_Greyscale_IMRT: %s \n",cudaGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAodd_Greyscale_IMRT(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity,double Den){ + + dvc_ScaLBL_D3Q19_AAodd_Greyscale_IMRT<<>>(neighborList,dist,start,finish,Np,rlx,Fx,Fy,Fz,Poros,Perm,Velocity,Den); + + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAodd_Greyscale_IMRT: %s \n",cudaGetErrorString(err)); + } +} + From 783d7ff7b2cf9027ab9f26d48fb5e5595c18fa81 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Mon, 20 Jan 2020 13:17:03 -0500 Subject: [PATCH 020/121] remove the reserved labels (i.e. Label=1,2) which store some pre-defined voxel perm values that may accidentally overwrite use-defined labels --- models/GreyscaleModel.cpp | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/models/GreyscaleModel.cpp b/models/GreyscaleModel.cpp index 1cdae815..ac44c6d3 100644 --- a/models/GreyscaleModel.cpp +++ b/models/GreyscaleModel.cpp @@ -194,10 +194,6 @@ void ScaLBL_GreyscaleModel::AssignComponentLabels(double *Porosity, double *Perm //Mask->id[n] = 0; // set mask to zero since this is an immobile component } } - // fluid labels are reserved / negative labels are immobile - if (VALUE == 1) POROSITY=1.0; - else if (VALUE == 2) POROSITY=1.0; - else if (VALUE < 1) POROSITY = 0.0; int idx = Map(i,j,k); if (!(idx < 0)){ if (POROSITY<=0.0){ @@ -228,19 +224,13 @@ void ScaLBL_GreyscaleModel::AssignComponentLabels(double *Porosity, double *Perm //Mask->id[n] = 0; // set mask to zero since this is an immobile component } } - // Permeability of fluid labels are reserved - // NOTE: the voxel permeability of apparent pore nodes should be infinity - // TODO: Need to revise the PERMEABILITY of nodes whose VALUE=1 and 2 - if (VALUE == 1) PERMEABILITY=1.0; - else if (VALUE == 2) PERMEABILITY=1.0; - else if (VALUE < 1) PERMEABILITY = 0.0; int idx = Map(i,j,k); if (!(idx < 0)){ if (PERMEABILITY<=0.0){ ERROR("Error: Permeability for grey voxel must be > 0.0 ! \n"); } else{ - Permeability[idx] = PERMEABILITY; + Permeability[idx] = PERMEABILITY/Dm->voxel_length/Dm->voxel_length; } } } @@ -254,13 +244,15 @@ void ScaLBL_GreyscaleModel::AssignComponentLabels(double *Porosity, double *Perm for (int idx=0; idxComm, label_count[idx]); if (rank==0){ + printf("Image resolution: %.5g [um/voxel]\n",Dm->voxel_length); printf("Component labels: %lu \n",NLABELS); for (unsigned int idx=0; idxvoxel_length/Dm->voxel_length,volume_fraction); } } From fb33408a956bffbca4c20f8b6fc77f527686e1ee Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Mon, 20 Jan 2020 19:29:03 -0500 Subject: [PATCH 021/121] 1.disable debug write-out; 2. add a weighted porosity --- models/GreyscaleModel.cpp | 17 +++++++++++++---- models/GreyscaleModel.h | 1 + tests/lbpm_greyscale_simulator.cpp | 2 +- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/models/GreyscaleModel.cpp b/models/GreyscaleModel.cpp index ac44c6d3..b1f6603e 100644 --- a/models/GreyscaleModel.cpp +++ b/models/GreyscaleModel.cpp @@ -8,7 +8,7 @@ color lattice boltzmann model #include ScaLBL_GreyscaleModel::ScaLBL_GreyscaleModel(int RANK, int NP, MPI_Comm COMM): -rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tau(0),Den(0),Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0), +rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tau(0),Den(0),Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),GreyPorosity(0), Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) { SignDist.resize(Nx,Ny,Nz); @@ -243,6 +243,13 @@ void ScaLBL_GreyscaleModel::AssignComponentLabels(double *Porosity, double *Perm for (int idx=0; idxComm, label_count[idx]); + //Initialize a weighted porosity after considering grey voxels + GreyPorosity=0.0; + for (unsigned int idx=0; idxvoxel_length); printf("Component labels: %lu \n",NLABELS); @@ -251,11 +258,12 @@ void ScaLBL_GreyscaleModel::AssignComponentLabels(double *Porosity, double *Perm POROSITY=PorosityList[idx]; PERMEABILITY=PermeabilityList[idx]; double volume_fraction = double(label_count_global[idx])/double((Nx-2)*(Ny-2)*(Nz-2)*nprocs); - printf(" label=%d, porosity=%.3g, permeability=%.3g [um^2] (=%.3g [voxel^2]), volume fraction=%.3g\n", + printf(" label=%d: porosity=%.3g, permeability=%.3g [um^2] (=%.3g [voxel^2]), volume fraction=%.3g\n", VALUE,POROSITY,PERMEABILITY,PERMEABILITY/Dm->voxel_length/Dm->voxel_length,volume_fraction); + printf(" effective porosity=%.3g\n",volume_fraction*POROSITY); } + printf("The weighted porosity, considering both open and grey voxels, is %.3g\n",GreyPorosity); } - } @@ -497,7 +505,8 @@ void ScaLBL_GreyscaleModel::Run(){ Hs=sumReduce( Dm->Comm, Hs); Xs=sumReduce( Dm->Comm, Xs); double h = Dm->voxel_length; - double absperm = h*h*mu*Mask->Porosity()*flow_rate / force_mag; + //double absperm = h*h*mu*Mask->Porosity()*flow_rate / force_mag; + double absperm = h*h*mu*GreyPorosity*flow_rate / force_mag; if (rank==0){ printf(" AbsPerm = %.5g [micron^2]\n",absperm); diff --git a/models/GreyscaleModel.h b/models/GreyscaleModel.h index ac939aed..b427218b 100644 --- a/models/GreyscaleModel.h +++ b/models/GreyscaleModel.h @@ -41,6 +41,7 @@ public: double Fx,Fy,Fz,flux; double din,dout; double dp;//solid particle diameter, unit in voxel + double GreyPorosity; int Nx,Ny,Nz,N,Np; int rank,nprocx,nprocy,nprocz,nprocs; diff --git a/tests/lbpm_greyscale_simulator.cpp b/tests/lbpm_greyscale_simulator.cpp index 61322d6d..ef253cd7 100644 --- a/tests/lbpm_greyscale_simulator.cpp +++ b/tests/lbpm_greyscale_simulator.cpp @@ -55,7 +55,7 @@ int main(int argc, char **argv) Greyscale.Initialize(); // initializing the model will set initial conditions for variables Greyscale.Run(); Greyscale.VelocityField(); - Greyscale.WriteDebug(); + //Greyscale.WriteDebug(); } // **************************************************** MPI_Barrier(comm); From e3afe1eba80700d65b2b00e8ab9b65354ff201a5 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Tue, 21 Jan 2020 14:31:33 -0500 Subject: [PATCH 022/121] add a restart utitlity to the greyscale simulator --- models/GreyscaleModel.cpp | 95 ++++++++++++++++++++---------- tests/lbpm_greyscale_simulator.cpp | 2 +- 2 files changed, 65 insertions(+), 32 deletions(-) diff --git a/models/GreyscaleModel.cpp b/models/GreyscaleModel.cpp index b1f6603e..06077780 100644 --- a/models/GreyscaleModel.cpp +++ b/models/GreyscaleModel.cpp @@ -1,5 +1,5 @@ /* -color lattice boltzmann model +Greyscale lattice boltzmann model */ #include "models/GreyscaleModel.h" #include "analysis/distance.h" @@ -7,6 +7,12 @@ color lattice boltzmann model #include #include +template +void DeleteArray( const TYPE *p ) +{ + delete [] p; +} + ScaLBL_GreyscaleModel::ScaLBL_GreyscaleModel(int RANK, int NP, MPI_Comm COMM): rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tau(0),Den(0),Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),GreyPorosity(0), Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) @@ -117,6 +123,7 @@ void ScaLBL_GreyscaleModel::ReadInput(){ Mask->Decomp(Filename); } else{ + if (rank==0) printf("Filename of input image is not found, reading ID.0* instead."); Mask->ReadIDs(); } for (int i=0; iid[i]; // save what was read @@ -357,39 +364,23 @@ void ScaLBL_GreyscaleModel::Create(){ void ScaLBL_GreyscaleModel::Initialize(){ - if (rank==0) printf ("Initializing distributions \n"); ScaLBL_D3Q19_Init(fq, Np); - /* - * This function initializes model - */ + if (Restart == true){ if (rank==0){ - printf("Reading restart file! \n"); + printf("Initializing distributions from Restart! \n"); } - // Read in the restart file to CPU buffers - int *TmpMap; - TmpMap = new int[Np]; - - double *cDist; - cDist = new double[19*Np]; - ScaLBL_CopyToHost(TmpMap, dvcMap, Np*sizeof(int)); - - ifstream File(LocalRestartFile,ios::binary); - int idx; - double value; - for (int n=0; n cfq; + cfq = std::shared_ptr(new double[19*Np],DeleteArray); + FILE *File; + File=fopen(LocalRestartFile,"rb"); + fread(cfq.get(),sizeof(double),19*Np,File); + fclose(File); + // Copy the restart data to the GPU - ScaLBL_CopyToDevice(fq,cDist,19*Np*sizeof(double)); + ScaLBL_CopyToDevice(fq,cfq.get(),19*Np*sizeof(double)); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); @@ -400,6 +391,21 @@ void ScaLBL_GreyscaleModel::Run(){ int nprocs=nprocx*nprocy*nprocz; const RankInfoStruct rank_info(rank,nprocx,nprocy,nprocz); + int analysis_interval = 1000; // number of timesteps in between in situ analysis + int visualization_interval = 1000; + int restart_interval = 10000; // number of timesteps in between in saving distributions for restart + if (analysis_db->keyExists( "analysis_interval" )){ + analysis_interval = analysis_db->getScalar( "analysis_interval" ); + } + if (analysis_db->keyExists( "visualization_interval" )){ + visualization_interval = analysis_db->getScalar( "visualization_interval" ); + } + if (analysis_db->keyExists( "restart_interval" )){ + restart_interval = analysis_db->getScalar( "restart_interval" ); + } + if (greyscale_db->keyExists( "timestep" )){ + timestep = greyscale_db->getScalar( "timestep" ); + } if (rank==0){ printf("********************************************************\n"); @@ -418,8 +424,7 @@ void ScaLBL_GreyscaleModel::Run(){ //************ MAIN ITERATION LOOP ***************************************/ PROFILE_START("Loop"); - //std::shared_ptr analysis_db; - timestep=0; + auto current_db = db->cloneDatabase(); double rlx = 1.0/tau; double error = 1.0; double flow_rate_previous = 0.0; @@ -443,7 +448,7 @@ void ScaLBL_GreyscaleModel::Run(){ ScaLBL_DeviceBarrier(); MPI_Barrier(comm); //************************************************************************/ - if (timestep%1000==0){ + if (timestep%analysis_interval==0){ //ScaLBL_D3Q19_Momentum(fq,Velocity, Np); //ScaLBL_DeviceBarrier(); MPI_Barrier(comm); ScaLBL_Comm->RegularLayout(Map,&Velocity[0],Velocity_x); @@ -518,7 +523,7 @@ void ScaLBL_GreyscaleModel::Run(){ WriteHeader=true; log_file = fopen("Permeability.csv","a"); if (WriteHeader) - fprintf(log_file,"timesteps Fx Fy Fz mu Vs As Hs Xs vax vay vaz absperm \n", + fprintf(log_file,"timestep Fx Fy Fz mu Vs As Hs Xs vax vay vaz AbsPerm \n", timestep,Fx,Fy,Fz,mu,h*h*h*Vs,h*h*As,h*Hs,Xs,vax,vay,vaz,absperm); fprintf(log_file,"%i %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g\n",timestep, Fx, Fy, Fz, mu, @@ -526,7 +531,35 @@ void ScaLBL_GreyscaleModel::Run(){ fclose(log_file); } } + + if (timestep%visualization_interval==0){ + VelocityField(); + } + + if (timestep%restart_interval==0){ + //Use rank=0 write out Restart.db + if (rank==0) { + greyscale_db->putScalar("timestep",timestep); + greyscale_db->putScalar( "Restart", true ); + current_db->putDatabase("Greyscale", greyscale_db); + std::ofstream OutStream("Restart.db"); + current_db->print(OutStream, ""); + OutStream.close(); + + } + //Write out Restart data. + std::shared_ptr cfq; + cfq = std::shared_ptr(new double[19*Np],DeleteArray); + ScaLBL_CopyToHost(cfq.get(),fq,19*Np*sizeof(double));// Copy restart data to the CPU + + FILE *RESTARTFILE; + RESTARTFILE=fopen(LocalRestartFile,"wb"); + fwrite(cfq.get(),sizeof(double),19*Np,RESTARTFILE); + fclose(RESTARTFILE); + MPI_Barrier(comm); + } } + PROFILE_STOP("Loop"); PROFILE_SAVE("lbpm_greyscale_simulator",1); //************************************************************************ diff --git a/tests/lbpm_greyscale_simulator.cpp b/tests/lbpm_greyscale_simulator.cpp index ef253cd7..b7ed442e 100644 --- a/tests/lbpm_greyscale_simulator.cpp +++ b/tests/lbpm_greyscale_simulator.cpp @@ -54,7 +54,7 @@ int main(int argc, char **argv) Greyscale.Create(); // creating the model will create data structure to match the pore structure and allocate variables Greyscale.Initialize(); // initializing the model will set initial conditions for variables Greyscale.Run(); - Greyscale.VelocityField(); + //Greyscale.VelocityField(); //Greyscale.WriteDebug(); } // **************************************************** From 0372b9d1e8379ba0e2c3e4f14aeb6eece9946c77 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Tue, 21 Jan 2020 23:24:10 -0500 Subject: [PATCH 023/121] save the work, update how flow_rate is computed in greyscale simulator --- models/GreyscaleModel.cpp | 36 ++++++++++++++++++++---------------- models/GreyscaleModel.h | 1 + 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/models/GreyscaleModel.cpp b/models/GreyscaleModel.cpp index 06077780..36f853b1 100644 --- a/models/GreyscaleModel.cpp +++ b/models/GreyscaleModel.cpp @@ -99,6 +99,7 @@ void ScaLBL_GreyscaleModel::SetDomain(){ Velocity_x.resize(Nx,Ny,Nz); Velocity_y.resize(Nx,Ny,Nz); Velocity_z.resize(Nx,Ny,Nz); + PorosityMap.resize(Nx,Ny,Nz); id = new signed char [N]; for (int i=0; iid[i] = 1; // initialize this way @@ -449,37 +450,40 @@ void ScaLBL_GreyscaleModel::Run(){ //************************************************************************/ if (timestep%analysis_interval==0){ - //ScaLBL_D3Q19_Momentum(fq,Velocity, Np); - //ScaLBL_DeviceBarrier(); MPI_Barrier(comm); ScaLBL_Comm->RegularLayout(Map,&Velocity[0],Velocity_x); ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],Velocity_y); ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],Velocity_z); + ScaLBL_Comm->RegularLayout(Map,Porosity,PorosityMap); double count_loc=0; double count; double vax,vay,vaz; - double vax_loc,vay_loc,vaz_loc; - vax_loc = vay_loc = vaz_loc = 0.f; + double px_loc,py_loc,pz_loc; + double px,py,pz; + double mass_loc,mass_glb; + + px_loc = py_loc = pz_loc = 0.f; + mass_loc = 0.f; for (int k=1; k 0){ - vax_loc += Velocity_x(i,j,k); - vay_loc += Velocity_y(i,j,k); - vaz_loc += Velocity_z(i,j,k); - count_loc+=1.0; + px_loc += Velocity_x(i,j,k)*Den*PorosityMap(i,j,k); + py_loc += Velocity_y(i,j,k)*Den*PorosityMap(i,j,k); + pz_loc += Velocity_z(i,j,k)*Den*PorosityMap(i,j,k); + mass_loc += Den*PorosityMap(i,j,k); } } } } - MPI_Allreduce(&vax_loc,&vax,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&vay_loc,&vay,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&vaz_loc,&vaz,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&px_loc, &px, 1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&py_loc, &py, 1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&pz_loc, &pz, 1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&mass_loc,&mass_glb,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - vax /= count; - vay /= count; - vaz /= count; + vax = px/mass_glb; + vay = py/mass_glb; + vaz = pz/mass_glb; double force_mag = sqrt(Fx*Fx+Fy*Fy+Fz*Fz); double dir_x = Fx/force_mag; @@ -492,7 +496,7 @@ void ScaLBL_GreyscaleModel::Run(){ dir_z = 1.0; force_mag = 1.0; } - double flow_rate = (vax*dir_x + vay*dir_y + vaz*dir_z); + double flow_rate = (px*dir_x + py*dir_y + pz*dir_z)/mass_glb; error = fabs(flow_rate - flow_rate_previous) / fabs(flow_rate); flow_rate_previous = flow_rate; diff --git a/models/GreyscaleModel.h b/models/GreyscaleModel.h index b427218b..d1399053 100644 --- a/models/GreyscaleModel.h +++ b/models/GreyscaleModel.h @@ -71,6 +71,7 @@ public: DoubleArray Velocity_x; DoubleArray Velocity_y; DoubleArray Velocity_z; + DoubleArray PorosityMap; private: MPI_Comm comm; From 2cee75ae977883539a860da7c1d595e495771d93 Mon Sep 17 00:00:00 2001 From: Mark Berrill Date: Thu, 21 Nov 2019 13:29:26 -0500 Subject: [PATCH 024/121] Copying halo when reading grid file --- models/ColorModel.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 69b5f485..ad0f6d66 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -4,6 +4,7 @@ color lattice boltzmann model #include "models/ColorModel.h" #include "analysis/distance.h" #include "analysis/morphology.h" +#include "common/Communication.h" #include "common/ReadMicroCT.h" #include #include @@ -191,8 +192,17 @@ void ScaLBL_ColorModel::ReadInput(){ IMAGE_INDEX++; } else if (domain_db->keyExists( "GridFile" )){ + // Read the local domain data auto input_id = readMicroCT( *domain_db, MPI_COMM_WORLD ); - for (int i=0; iid[i] = input_id(i); + // Fill the halo (assuming GCW of 1) + array size0 = { input_id.size(0), input_id.size(1), input_id.size(2) }; + ArraySize size1 = { Mask->Nx, Mask->Ny, Mask->Nz }; + ASSERT( size1[0] == size0[0]+2 && size1[1] == size0[1]+2 && size1[2] == size0[2]+2 ); + fillHalo fill( MPI_COMM_WORLD, Mask->rank_info, size0, { 1, 1, 1 }, 0, 1 ); + Array id_view; + id_view.viewRaw( size1, Mask->id ); + fill.copy( input_id, id_view ); + fill.fill( id_view ); } else if (domain_db->keyExists( "Filename" )){ auto Filename = domain_db->getScalar( "Filename" ); From 0006695d5f65d6f6a6853266b3f647a206f50336 Mon Sep 17 00:00:00 2001 From: Mark Berrill Date: Thu, 12 Dec 2019 13:58:51 -0500 Subject: [PATCH 025/121] Adding MPIFLAGS option --- cmake/libraries.cmake | 2 +- cmake/macros.cmake | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cmake/libraries.cmake b/cmake/libraries.cmake index 54d70b5d..ebc37f8f 100644 --- a/cmake/libraries.cmake +++ b/cmake/libraries.cmake @@ -77,7 +77,7 @@ MACRO( CONFIGURE_MPI ) ENDIF () ELSE () # Search for the MPI executable in the current directory - FIND_PROGRAM ( MPIEXEC NAMES mpiexec mpirun lamexec PATHS ${MPI_DIRECTORY}/bin NO_DEFAULT_PATH ) + FIND_PROGRAM( MPIEXEC NAMES mpiexec mpirun lamexec PATHS ${MPI_DIRECTORY}/bin NO_DEFAULT_PATH ) IF ( NOT MPIEXEC ) MESSAGE( FATAL_ERROR "Could not locate mpi executable" ) ENDIF() diff --git a/cmake/macros.cmake b/cmake/macros.cmake index 8791616c..d1c8dbe7 100644 --- a/cmake/macros.cmake +++ b/cmake/macros.cmake @@ -848,7 +848,7 @@ FUNCTION( ADD_${PROJ}_TEST EXEFILE ${ARGN} ) ADD_PROJ_PROVISIONAL_TEST( ${EXEFILE} ) CREATE_TEST_NAME( ${EXEFILE} ${ARGN} ) IF ( USE_MPI_FOR_SERIAL_TESTS ) - ADD_TEST( NAME ${TESTNAME} COMMAND ${MPIEXEC} "${MPIEXEC_NUMPROC_FLAG}" 1 $ ${ARGN} ) + ADD_TEST( NAME ${TESTNAME} COMMAND ${MPIEXEC} ${MPIFLAGS} "${MPIEXEC_NUMPROC_FLAG}" 1 $ ${ARGN} ) SET_PROPERTY( TEST ${TESTNAME} APPEND PROPERTY ENVIRONMENT OMPI_MCA_hwloc_base_binding_policy=none ) ELSE() ADD_TEST( NAME ${TESTNAME} COMMAND $ ${ARGN} ) @@ -877,7 +877,7 @@ FUNCTION( ADD_${PROJ}_WEEKLY_TEST EXEFILE PROCS ${ARGN} ) ELSEIF( ${PROCS} STREQUAL "1" ) CREATE_TEST_NAME( "${EXEFILE}_WEEKLY" ${ARGN} ) IF ( USE_MPI_FOR_SERIAL_TESTS ) - ADD_TEST( NAME ${TESTNAME} COMMAND ${MPIEXEC} "${MPIEXEC_NUMPROC_FLAG}" 1 $ ${ARGN} ) + ADD_TEST( NAME ${TESTNAME} COMMAND ${MPIEXEC} ${MPIFLAGS} "${MPIEXEC_NUMPROC_FLAG}" 1 $ ${ARGN} ) SET_PROPERTY( TEST ${TESTNAME} APPEND PROPERTY ENVIRONMENT OMPI_MCA_hwloc_base_binding_policy=none ) ELSE() ADD_TEST( NAME ${TESTNAME} COMMAND $ ${ARGN} ) @@ -909,7 +909,7 @@ FUNCTION( ADD_${PROJ}_TEST_PARALLEL EXEFILE PROCS ${ARGN} ) ELSEIF ( ${PROCS} GREATER ${TEST_MAX_PROCS} ) MESSAGE("Disabling test ${TESTNAME} (exceeds maximum number of processors ${TEST_MAX_PROCS})") ELSE() - ADD_TEST( NAME ${TESTNAME} COMMAND ${MPIEXEC} "${MPIEXEC_NUMPROC_FLAG}" ${PROCS} $ ${ARGN} ) + ADD_TEST( NAME ${TESTNAME} COMMAND ${MPIEXEC} ${MPIFLAGS} "${MPIEXEC_NUMPROC_FLAG}" ${PROCS} $ ${ARGN} ) SET_PROPERTY( TEST ${TESTNAME} APPEND PROPERTY ENVIRONMENT OMPI_MCA_hwloc_base_binding_policy=none ) SET_TESTS_PROPERTIES( ${TESTNAME} PROPERTIES FAIL_REGULAR_EXPRESSION "${TEST_FAIL_REGULAR_EXPRESSION}" PROCESSORS ${PROCS} ) ADD_RESOURCE_LOCK( ${TESTNAME} ${EXEFILE} ${ARGN} ) @@ -930,7 +930,7 @@ MACRO( ADD_${PROJ}_TEST_THREAD_MPI EXEFILE PROCS THREADS ${ARGN} ) SET_TESTS_PROPERTIES ( ${TESTNAME} PROPERTIES FAIL_REGULAR_EXPRESSION "${TEST_FAIL_REGULAR_EXPRESSION}" PROCESSORS ${TOT_PROCS} ) ADD_RESOURCE_LOCK( ${TESTNAME} ${EXEFILE} ${ARGN} ) ELSEIF ( USE_MPI OR USE_EXT_MPI ) - ADD_TEST( NAME ${TESTNAME} COMMAND ${MPIEXEC} "${MPIEXEC_NUMPROC_FLAG}" ${PROCS} $ ${ARGN} ) + ADD_TEST( NAME ${TESTNAME} COMMAND ${MPIEXEC} ${MPIFLAGS} "${MPIEXEC_NUMPROC_FLAG}" ${PROCS} $ ${ARGN} ) SET_PROPERTY( TEST ${TESTNAME} APPEND PROPERTY ENVIRONMENT OMPI_MCA_hwloc_base_binding_policy=none ) SET_TESTS_PROPERTIES ( ${TESTNAME} PROPERTIES FAIL_REGULAR_EXPRESSION "${TEST_FAIL_REGULAR_EXPRESSION}" PROCESSORS ${TOT_PROCS} ) ADD_RESOURCE_LOCK( ${TESTNAME} ${EXEFILE} ${ARGN} ) @@ -966,7 +966,7 @@ FUNCTION( ADD_${PROJ}_EXAMPLE EXEFILE PROCS ${ARGN} ) ADD_TEST( NAME ${TESTNAME} COMMAND $ ${ARGN} ) ELSEIF ( USE_EXT_MPI AND NOT (${PROCS} GREATER ${TEST_MAX_PROCS}) ) CREATE_TEST_NAME( "example--${EXEFILE}_${PROCS}procs" ${ARGN} ) - ADD_TEST( NAME ${TESTNAME} COMMAND ${MPIEXEC} "${MPIEXEC_NUMPROC_FLAG}" ${PROCS} $ ${ARGN} ) + ADD_TEST( NAME ${TESTNAME} COMMAND ${MPIEXEC} ${MPIFLAGS} "${MPIEXEC_NUMPROC_FLAG}" ${PROCS} $ ${ARGN} ) SET_PROPERTY( TEST ${TESTNAME} APPEND PROPERTY ENVIRONMENT OMPI_MCA_hwloc_base_binding_policy=none ) ENDIF() SET_TESTS_PROPERTIES( ${TESTNAME} PROPERTIES FAIL_REGULAR_EXPRESSION "${TEST_FAIL_REGULAR_EXPRESSION}" PROCESSORS ${PROCS} ) From 3c854fd002c02650e1400a44ea705dd1c84d9810 Mon Sep 17 00:00:00 2001 From: Mark Berrill Date: Thu, 2 Jan 2020 13:23:51 -0500 Subject: [PATCH 026/121] Updating StackTrace and improving performance converting uCT data --- StackTrace/StackTrace.cpp | 26 ++++++-- StackTrace/StackTrace.h | 11 ++++ StackTrace/Utilities.cpp | 58 ++++++++++++++--- StackTrace/Utilities.h | 18 +++++ StackTrace/string_view.h | 2 +- analysis/runAnalysis.cpp | 11 ++-- common/Communication.hpp | 12 ++-- common/ReadMicroCT.cpp | 37 +++++------ common/Utilities.cpp | 116 ++++++++++++++++++++++++++++++++- common/Utilities.h | 31 +++++++++ tests/lbpm_color_simulator.cpp | 70 ++++++++++---------- 11 files changed, 303 insertions(+), 89 deletions(-) diff --git a/StackTrace/StackTrace.cpp b/StackTrace/StackTrace.cpp index e9292990..55a24352 100644 --- a/StackTrace/StackTrace.cpp +++ b/StackTrace/StackTrace.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -348,8 +349,11 @@ static inline int exec3( const char *cmd, FUNCTION &fun ) if ( buffer[0] != 0 ) fun( buffer ); } - auto status = pclose( pipe ); - int code = WEXITSTATUS( status ); + int code = pclose( pipe ); + if ( errno == ECHILD ) { + errno = 0; + code = 0; + } std::this_thread::yield(); // Allow any signals to process resetSignal( SIGCHLD ); // Clear child exited return code; @@ -1741,7 +1745,7 @@ std::vector StackTrace::defaultSignalsToCatch() * Set the signal handlers * ****************************************************************************/ static std::function abort_fun; -static StackTrace::abort_error rethrow() +StackTrace::abort_error rethrow() { StackTrace::abort_error error; #ifdef USE_LINUX @@ -1775,14 +1779,14 @@ static StackTrace::abort_error rethrow() } return error; } -static void term_func_abort( int sig ) +void StackTrace::terminateFunctionSignal( int sig ) { StackTrace::abort_error err; err.type = StackTrace::terminateType::signal; err.signal = sig; err.bytes = StackTrace::Utilities::getMemoryUsage(); err.stack = StackTrace::backtrace(); - err.stackType = StackTrace::printStackType::global; + err.stackType = StackTrace::getDefaultStackType(); abort_fun( err ); } static bool signals_set[256] = { false }; @@ -1829,7 +1833,7 @@ void StackTrace::setErrorHandler( std::function allSignalsToCatch(); @@ -289,6 +293,13 @@ multi_stack_info generateFromString( const std::vector &str ); multi_stack_info generateFromString( const std::string &str ); +//! Set default stack type +void setDefaultStackType( StackTrace::printStackType ); + +//! Get default stack type +StackTrace::printStackType getDefaultStackType(); + + } // namespace StackTrace diff --git a/StackTrace/Utilities.cpp b/StackTrace/Utilities.cpp index 734a0056..11f05777 100644 --- a/StackTrace/Utilities.cpp +++ b/StackTrace/Utilities.cpp @@ -8,8 +8,10 @@ #include #include #include +#include #include #include +#include #ifdef USE_MPI #include "mpi.h" @@ -19,6 +21,10 @@ #include "MemoryApp.h" #endif +#ifdef USE_GCOV +extern "C" void __gcov_flush( void ); +#endif + #define perr std::cerr @@ -65,6 +71,12 @@ // clang-format on +#ifdef __GNUC__ +#define USE_ABI +#include +#endif + + namespace StackTrace { @@ -96,13 +108,12 @@ inline size_t findfirst( const std::vector &X, TYPE Y ) /**************************************************************************** * Function to terminate the program * ****************************************************************************/ -static bool abort_throwException = false; -static printStackType abort_stackType = printStackType::global; -static int force_exit = 0; +static bool abort_throwException = false; +static int force_exit = 0; void Utilities::setAbortBehavior( bool throwException, int stackType ) { abort_throwException = throwException; - abort_stackType = static_cast( stackType ); + StackTrace::setDefaultStackType( static_cast( stackType ) ); } void Utilities::abort( const std::string &message, const std::string &filename, const int line ) { @@ -112,16 +123,28 @@ void Utilities::abort( const std::string &message, const std::string &filename, err.type = terminateType::abort; err.line = line; err.bytes = Utilities::getMemoryUsage(); - err.stackType = abort_stackType; + err.stackType = StackTrace::getDefaultStackType(); err.stack = StackTrace::backtrace(); throw err; } -static void terminate( const StackTrace::abort_error &err ) +static std::mutex terminate_mutex; +static inline void callAbort() { +#ifdef USE_GCOV + __gcov_flush(); +#endif + terminate_mutex.unlock(); + std::abort(); +} +void Utilities::terminate( const StackTrace::abort_error &err ) +{ + // Lock mutex to ensure multiple threads do not try to abort simultaneously + terminate_mutex.lock(); + // Clear the error handlers clearErrorHandler(); // Print the message and abort if ( force_exit > 1 ) { - std::abort(); + callAbort(); } else if ( !abort_throwException ) { // Use MPI_abort (will terminate all processes) force_exit = 2; @@ -135,10 +158,11 @@ static void terminate( const StackTrace::abort_error &err ) MPI_Abort( MPI_COMM_WORLD, -1 ); } #endif - std::abort(); + callAbort(); } else { perr << err.what(); - std::abort(); + perr.flush(); + callAbort(); } } @@ -149,7 +173,7 @@ static void terminate( const StackTrace::abort_error &err ) static void setTerminateErrorHandler() { // Set the terminate routine for runtime errors - StackTrace::setErrorHandler( terminate ); + StackTrace::setErrorHandler( Utilities::terminate ); } void Utilities::setErrorHandlers() { @@ -293,4 +317,18 @@ std::string Utilities::exec( const string_view &cmd, int &exit_code ) } +/**************************************************************************** + * Get the type name * + ****************************************************************************/ +std::string Utilities::getTypeName( const std::type_info &id ) +{ + std::string name = id.name(); +#if defined( USE_ABI ) + int status; + name = abi::__cxa_demangle( name.c_str(), 0, 0, &status ); +#endif + return name; +} + + } // namespace StackTrace diff --git a/StackTrace/Utilities.h b/StackTrace/Utilities.h index 10ed9085..83c8d7aa 100644 --- a/StackTrace/Utilities.h +++ b/StackTrace/Utilities.h @@ -4,6 +4,7 @@ #include #include #include +#include #include "StackTrace/StackTrace.h" #include "StackTrace/string_view.h" @@ -28,9 +29,14 @@ void abort( const std::string &message, const std::string &filename, const int l void setAbortBehavior( bool throwException, int stackType = 2 ); +//! Function to terminate the application +void terminate( const StackTrace::abort_error &err ); + + //! Function to set the error handlers void setErrorHandlers(); + //! Function to clear the error handlers void clearErrorHandlers(); @@ -92,6 +98,18 @@ void cause_segfault(); std::string exec( const StackTrace::string_view &cmd, int &exit_code ); +//! Return the hopefully demangled name of the given type +std::string getTypeName( const std::type_info &id ); + + +//! Return the hopefully demangled name of the given type +template +inline std::string getTypeName() +{ + return getTypeName( typeid( TYPE ) ); +} + + } // namespace Utilities } // namespace StackTrace diff --git a/StackTrace/string_view.h b/StackTrace/string_view.h index d83d1f24..ee729f63 100644 --- a/StackTrace/string_view.h +++ b/StackTrace/string_view.h @@ -119,7 +119,7 @@ public: int result = 0; for ( int i = 0; i < N && result == 0; i++ ) if ( d_data[i] != other[i] ) - result = d_data[i] < other[i] ? -i : i; + result = d_data[i] < other[i] ? -( i + 1 ) : ( i + 1 ); if ( result == 0 ) result = size() == other.size() ? 0 : size() < other.size() ? -1 : 1; return result; diff --git a/analysis/runAnalysis.cpp b/analysis/runAnalysis.cpp index caa03b1b..6c76f58b 100644 --- a/analysis/runAnalysis.cpp +++ b/analysis/runAnalysis.cpp @@ -767,6 +767,8 @@ void runAnalysis::run(int timestep, std::shared_ptr input_db, TwoPhase double *Pressure, double *Velocity, double *fq, double *Den) { int N = d_N[0]*d_N[1]*d_N[2]; + NULL_USE( N ); + NULL_USE( Phi ); auto db = input_db->getDatabase( "Analysis" ); //int timestep = db->getWithDefault( "timestep", 0 ); @@ -937,8 +939,6 @@ void runAnalysis::run(int timestep, std::shared_ptr input_db, TwoPhase ******************************************************************/ void runAnalysis::basic(int timestep, std::shared_ptr input_db, SubPhase &Averages, const double *Phi, double *Pressure, double *Velocity, double *fq, double *Den) { - int N = d_N[0]*d_N[1]*d_N[2]; - // Check which analysis steps we need to perform auto color_db = input_db->getDatabase( "Color" ); auto vis_db = input_db->getDatabase( "Visualization" ); @@ -954,7 +954,7 @@ void runAnalysis::basic(int timestep, std::shared_ptr input_db, SubPha finish(); } - PROFILE_START("run"); + PROFILE_START("basic"); // Copy the appropriate variables to the host (so we can spawn new threads) ScaLBL_DeviceBarrier(); @@ -983,7 +983,6 @@ void runAnalysis::basic(int timestep, std::shared_ptr input_db, SubPha } PROFILE_STOP("Copy data to host"); - PROFILE_START("run",1); // Spawn threads to do the analysis work //if (timestep%d_restart_interval==0){ // if ( matches(type,AnalysisType::ComputeAverages) ) { @@ -1036,12 +1035,11 @@ void runAnalysis::basic(int timestep, std::shared_ptr input_db, SubPha d_wait_vis = d_tpool.add_work(work); } - PROFILE_STOP("run"); + PROFILE_STOP("basic"); } void runAnalysis::WriteVisData(int timestep, std::shared_ptr input_db, SubPhase &Averages, const double *Phi, double *Pressure, double *Velocity, double *fq, double *Den) { - int N = d_N[0]*d_N[1]*d_N[2]; auto color_db = input_db->getDatabase( "Color" ); auto vis_db = input_db->getDatabase( "Visualization" ); //int timestep = color_db->getWithDefault( "timestep", 0 ); @@ -1068,7 +1066,6 @@ void runAnalysis::WriteVisData(int timestep, std::shared_ptr input_db, d_wait_vis = d_tpool.add_work(work2); //Averages.WriteVis = false; - // } PROFILE_STOP("write vis"); } diff --git a/common/Communication.hpp b/common/Communication.hpp index cb9f3f18..33fed3a7 100644 --- a/common/Communication.hpp +++ b/common/Communication.hpp @@ -44,9 +44,9 @@ Array redistribute( const RankInfoStruct& src_rank, const Array& src if ( !src_data.empty() ) { int i1[3] = { src_size[0] * src_rank.ix, src_size[1] * src_rank.jy, src_size[2] * src_rank.kz }; int i2[3] = { i1[0] + src_size[0] - 1, i1[1] + src_size[1] - 1, i1[2] + src_size[2] - 1 }; - for ( size_t i=0; i redistribute( const RankInfoStruct& src_rank, const Array& src Array dst_data( dst_size[0], dst_size[1], dst_size[2] ); int i1[3] = { dst_size[0] * dst_rank.ix, dst_size[1] * dst_rank.jy, dst_size[2] * dst_rank.kz }; int i2[3] = { i1[0] + dst_size[0] - 1, i1[1] + dst_size[1] - 1, i1[2] + dst_size[2] - 1 }; - for ( size_t i=0; i readMicroCT( const Database& domain, MPI_Comm comm ) auto n = domain.getVector( "n" ); int rank = comm_rank(MPI_COMM_WORLD); auto nproc = domain.getVector( "nproc" ); - auto ReadValues = domain.getVector( "ReadValues" ); - auto WriteValues = domain.getVector( "WriteValues" ); RankInfoStruct rankInfo( rank, nproc[0], nproc[1], nproc[2] ); // Determine the largest file number to get @@ -95,29 +93,26 @@ Array readMicroCT( const Database& domain, MPI_Comm comm ) ERROR( "Invalid name for first file" ); } data = readMicroCT( filename ); - - // Relabel the data - for (int k = 0; k<1024; k++){ - for (int j = 0; j<1024; j++){ - for (int i = 0; i<1024; i++){ - //n = k*Nfx*Nfy + j*Nfx + i; - //char locval = loc_id[n]; - char locval = data(i,j,k); - for (int idx=0; idx( "ReadValues" ); + auto WriteValues = domain.getVector( "WriteValues" ); + ASSERT( ReadValues.size() == WriteValues.size() ); + int readMaxValue = 0; + for ( auto v : ReadValues ) + readMaxValue = std::max( data.max()+1, v ); + std::vector map( readMaxValue + 1, -1 ); + for ( size_t i=0; i= 0 && t <= readMaxValue ); + data(i) = map[t]; + } + return data; } diff --git a/common/Utilities.cpp b/common/Utilities.cpp index f6d810af..1cf764be 100644 --- a/common/Utilities.cpp +++ b/common/Utilities.cpp @@ -1,10 +1,116 @@ #include "common/Utilities.h" +#include "StackTrace/StackTrace.h" +#include "StackTrace/ErrorHandlers.h" + +#ifdef USE_TIMER +#include "MemoryApp.h" +#include "ProfilerApp.h" +#endif + +#ifdef USE_MPI +#include "mpi.h" +#endif -#include #include +#include +#include -// Factor a number into it's prime factors +// Mutex for Utility functions +static std::mutex Utilities_mutex; + + +/**************************************************************************** + * Function to perform the default startup/shutdown sequences * + ****************************************************************************/ +void Utilities::startup( int argc, char **argv ) +{ + NULL_USE( argc ); + NULL_USE( argv ); + // Disable OpenMP + Utilities::setenv( "OMP_NUM_THREADS", "1" ); + Utilities::setenv( "MKL_NUM_THREADS", "1" ); + // Start MPI +#ifdef USE_MPI + int provided; + MPI_Init_thread( &argc, &argv, MPI_THREAD_MULTIPLE, &provided ); + if ( provided < MPI_THREAD_MULTIPLE ) { + int rank; + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); + if ( rank == 0 ) + std::cerr << "Warning: Failed to start MPI with necessary thread support, thread support will be disabled" << std::endl; + } + StackTrace::globalCallStackInitialize( MPI_COMM_WORLD ); +#endif + // Set the error handlers + Utilities::setAbortBehavior( true, 3 ); + Utilities::setErrorHandlers(); +} +void Utilities::shutdown() +{ + // Clear the error handlers + Utilities::clearErrorHandlers(); + StackTrace::clearSignals(); + StackTrace::clearSymbols(); + int rank = 0; +#ifdef USE_MPI + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); + StackTrace::globalCallStackFinalize(); + MPI_Barrier( MPI_COMM_WORLD ); + MPI_Finalize(); +#endif +#ifdef USE_TIMER + PROFILE_DISABLE(); + auto memory = MemoryApp::getMemoryStats(); + if ( rank == 0 && memory.N_new > memory.N_delete ) + MemoryApp::print( std::cout ); +#endif +} + + +/**************************************************************************** + * Function to set an environemental variable * + ****************************************************************************/ +void Utilities::setenv( const std::string &name, const std::string &value ) +{ + Utilities_mutex.lock(); +#if defined( USE_LINUX ) || defined( USE_MAC ) + bool pass = false; + if ( !value.empty() ) + pass = ::setenv( name.data(), value.data(), 1 ) == 0; + else + pass = ::unsetenv( name.data() ) == 0; +#elif defined( USE_WINDOWS ) + bool pass = SetEnvironmentVariable( name.data(), value.data() ) != 0; +#else +#error Unknown OS +#endif + Utilities_mutex.unlock(); + if ( !pass ) { + char msg[1024]; + if ( !value.empty() ) + sprintf( + msg, "Error setting enviornmental variable: %s=%s\n", name.data(), value.data() ); + else + sprintf( msg, "Error clearing enviornmental variable: %s\n", name.data() ); + ERROR( msg ); + } +} +std::string Utilities::getenv( const std::string &name ) +{ + std::string var; + Utilities_mutex.lock(); + auto tmp = std::getenv( name.data() ); + if ( tmp ) + var = std::string( tmp ); + Utilities_mutex.unlock(); + return var; +} + + +/**************************************************************************** + * Factor a number into it's prime factors * + ****************************************************************************/ std::vector Utilities::factor(size_t number) { if ( number<=3 ) @@ -54,9 +160,13 @@ std::vector Utilities::factor(size_t number) } -// Dummy function to prevent compiler from optimizing away variable +/**************************************************************************** + * Dummy function to prevent compiler from optimizing away variable * + ****************************************************************************/ void Utilities::nullUse( void* data ) { NULL_USE(data); } + + diff --git a/common/Utilities.h b/common/Utilities.h index 90cb4008..da579966 100644 --- a/common/Utilities.h +++ b/common/Utilities.h @@ -25,6 +25,37 @@ using StackTrace::Utilities::sleep_ms; using StackTrace::Utilities::sleep_s; +/*! + * \brief Start MPI, error handlers + * \details This routine will peform the default startup sequence + * \param argc argc from main + * \param argv argv from main + */ +void startup( int argc, char **argv ); + +/*! + * \brief Stop MPI, error handlers + * \details This routine will peform the default shutdown sequence to match startup + */ +void shutdown(); + + +/*! + * Get an environmental variable + * @param name The name of the environmental variable + * @return The value of the enviornmental variable + */ +std::string getenv( const std::string &name ); + + +/*! + * Set an environmental variable + * @param name The name of the environmental variable + * @param value The value to set + */ +void setenv( const std::string &name, const std::string &value ); + + //! std::string version of sprintf inline std::string stringf( const char *format, ... ); diff --git a/tests/lbpm_color_simulator.cpp b/tests/lbpm_color_simulator.cpp index e8e675e2..1f63c653 100644 --- a/tests/lbpm_color_simulator.cpp +++ b/tests/lbpm_color_simulator.cpp @@ -7,6 +7,7 @@ #include #include "models/ColorModel.h" +#include "common/Utilities.h" //#define WRE_SURFACES @@ -15,7 +16,6 @@ * James E. McClure 2013-2014 */ -using namespace std; //************************************************************************* // Implementation of Two-Phase Immiscible LBM using CUDA @@ -23,27 +23,26 @@ using namespace std; int main(int argc, char **argv) { - // Initialize MPI - int provided_thread_support = -1; - MPI_Init_thread(&argc,&argv,MPI_THREAD_MULTIPLE,&provided_thread_support); - MPI_Comm comm; - MPI_Comm_dup(MPI_COMM_WORLD,&comm); - int rank = comm_rank(comm); - int nprocs = comm_size(comm); - if ( rank==0 && provided_thread_support Date: Wed, 22 Jan 2020 12:01:29 -0500 Subject: [PATCH 027/121] Fixing compile warnings --- IO/netcdf.cpp | 1 + analysis/SubPhase.cpp | 10 ++-- analysis/SubPhase.h | 2 +- analysis/TwoPhase.cpp | 7 ++- analysis/dcel.cpp | 6 ++- analysis/morphology.cpp | 18 +------- analysis/uCT.cpp | 2 + cmake/libraries.cmake | 4 ++ common/Domain.cpp | 42 ++++++++--------- common/Domain.h | 10 ++-- common/MPI_Helpers.h | 6 +++ common/ScaLBL.cpp | 25 +++++----- models/ColorModel.cpp | 53 ++++++++++----------- models/DFHModel.cpp | 1 - models/MRTModel.cpp | 4 -- tests/GenerateSphereTest.cpp | 5 +- tests/TestBubbleDFH.cpp | 4 +- tests/TestColorGradDFH.cpp | 6 +-- tests/TestCommD3Q19.cpp | 27 +++-------- tests/TestFluxBC.cpp | 20 ++------ tests/TestForceD3Q19.cpp | 14 ++---- tests/TestInterfaceSpeed.cpp | 6 +-- tests/TestMap.cpp | 6 +-- tests/TestMassConservationD3Q7.cpp | 25 ++-------- tests/TestNetcdf.cpp | 1 - tests/TestSubphase.cpp | 6 +-- tests/TestTopo3D.cpp | 3 +- tests/TestTorus.cpp | 7 ++- tests/TestTorusEvolve.cpp | 10 ++-- tests/lbpm_morph_pp.cpp | 8 ++-- tests/lbpm_morphdrain_pp.cpp | 31 +++++-------- tests/lbpm_morphopen_pp.cpp | 26 ++++------- tests/lbpm_permeability_simulator.cpp | 9 +--- tests/lbpm_refine_pp.cpp | 29 ++++++------ tests/lbpm_serial_decomp.cpp | 39 +++++++--------- tests/lbpm_uCT_pp.cpp | 66 +++++++++++++-------------- tests/pmmc_cylinder.cpp | 10 ---- 37 files changed, 215 insertions(+), 334 deletions(-) diff --git a/IO/netcdf.cpp b/IO/netcdf.cpp index e355c344..b36bb6d6 100644 --- a/IO/netcdf.cpp +++ b/IO/netcdf.cpp @@ -189,6 +189,7 @@ std::vector getAttDim( int fid, const std::string& att ) { std::vector dim(1,0); int err = nc_inq_attlen( fid, NC_GLOBAL, att.c_str(), dim.data() ); + CHECK_NC_ERR( err ); return dim; } std::vector getVarNames( int fid ) diff --git a/analysis/SubPhase.cpp b/analysis/SubPhase.cpp index 0848ded1..76541ffd 100644 --- a/analysis/SubPhase.cpp +++ b/analysis/SubPhase.cpp @@ -169,7 +169,6 @@ void SubPhase::Basic(){ nb.reset(); wb.reset(); - double nA,nB; double count_w = 0.0; double count_n = 0.0; @@ -297,8 +296,8 @@ void SubPhase::Basic(){ double saturation=gwb.V/(gwb.V + gnb.V); double water_flow_rate=gwb.V*(gwb.Px*dir_x + gwb.Py*dir_y + gwb.Pz*dir_z)/gwb.M / Dm->Volume; double not_water_flow_rate=gnb.V*(gnb.Px*dir_x + gnb.Py*dir_y + gnb.Pz*dir_z)/gnb.M/ Dm->Volume; - double total_flow_rate = water_flow_rate + not_water_flow_rate; - double fractional_flow= water_flow_rate / total_flow_rate; + //double total_flow_rate = water_flow_rate + not_water_flow_rate; + //double fractional_flow = water_flow_rate / total_flow_rate; double h = Dm->voxel_length; double krn = h*h*nu_n*not_water_flow_rate / force_mag ; @@ -697,7 +696,8 @@ void SubPhase::Full(){ } -void SubPhase::AggregateLabels(char *FILENAME){ +void SubPhase::AggregateLabels( const std::string& filename ) +{ int nx = Dm->Nx; int ny = Dm->Ny; @@ -721,7 +721,7 @@ void SubPhase::AggregateLabels(char *FILENAME){ } MPI_Barrier(Dm->Comm); - Dm->AggregateLabels(FILENAME); + Dm->AggregateLabels( filename ); } diff --git a/analysis/SubPhase.h b/analysis/SubPhase.h index 683fc46a..71b87ef0 100644 --- a/analysis/SubPhase.h +++ b/analysis/SubPhase.h @@ -101,7 +101,7 @@ public: void Basic(); void Full(); void Write(int time); - void AggregateLabels(char *FILENAME); + void AggregateLabels( const std::string& filename ); private: FILE *TIMELOG; diff --git a/analysis/TwoPhase.cpp b/analysis/TwoPhase.cpp index 9b87daef..9b2e5fd8 100644 --- a/analysis/TwoPhase.cpp +++ b/analysis/TwoPhase.cpp @@ -204,6 +204,7 @@ TwoPhase::~TwoPhase() void TwoPhase::ColorToSignedDistance(double Beta, DoubleArray &ColorData, DoubleArray &DistData) { + NULL_USE( Beta ); /*double factor,temp,value; factor=0.5/Beta; // Initialize to -1,1 (segmentation) @@ -627,8 +628,8 @@ void TwoPhase::ComputeLocal() void TwoPhase::AssignComponentLabels() { - int LabelNWP=1; - int LabelWP=2; + //int LabelNWP=1; + //int LabelWP=2; // NOTE: labeling the wetting phase components is tricky! One sandstone media had over 800,000 components // NumberComponents_WP = ComputeGlobalPhaseComponent(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2,Dm->rank_info,PhaseID,LabelWP,Label_WP); // treat all wetting phase is connected @@ -1172,6 +1173,8 @@ void TwoPhase::Reduce() void TwoPhase::NonDimensionalize(double D, double viscosity, double IFT) { + NULL_USE( viscosity ); + NULL_USE( IFT ); awn_global *= D; ans_global *= D; ans_global *= D; diff --git a/analysis/dcel.cpp b/analysis/dcel.cpp index 4c7be292..ca21c0e6 100644 --- a/analysis/dcel.cpp +++ b/analysis/dcel.cpp @@ -352,6 +352,8 @@ double DECL::EdgeAngle(int edge) void Isosurface(DoubleArray &A, const double &v) { + NULL_USE( v ); + Point P,Q; Point PlaceHolder; Point C0,C1,C2,C3,C4,C5,C6,C7; @@ -562,7 +564,7 @@ void Isosurface(DoubleArray &A, const double &v) if (P.z == 1.0 && Q.z == 1.0) HalfEdge[idx_edge][3] = -6; // ghost twin for z=1 face } // Find all the angles - for (int idx=0; idx int nx = Dm->Nx; int ny = Dm->Ny; int nz = Dm->Nz; - int iproc = Dm->iproc(); - int jproc = Dm->jproc(); - int kproc = Dm->kproc(); int nprocx = Dm->nprocx(); int nprocy = Dm->nprocy(); int nprocz = Dm->nprocz(); @@ -122,7 +119,6 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr int sendtag,recvtag; sendtag = recvtag = 7; - int x,y,z; int ii,jj,kk; int Nx = nx; int Ny = ny; @@ -336,9 +332,6 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptrNx; int ny = Dm->Ny; int nz = Dm->Nz; - int iproc = Dm->iproc(); - int jproc = Dm->jproc(); - int kproc = Dm->kproc(); int nprocx = Dm->nprocx(); int nprocy = Dm->nprocy(); int nprocz = Dm->nprocz(); @@ -427,7 +420,6 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptr &id, std::shared_ptr Dm, double TargetGrowth){ - +double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array &id, std::shared_ptr Dm, double TargetGrowth) +{ int Nx = Dm->Nx; int Ny = Dm->Ny; int Nz = Dm->Nz; - int iproc = Dm->iproc(); - int jproc = Dm->jproc(); - int kproc = Dm->kproc(); - int nprocx = Dm->nprocx(); - int nprocy = Dm->nprocy(); - int nprocz = Dm->nprocz(); int rank = Dm->rank(); double count=0.0; diff --git a/analysis/uCT.cpp b/analysis/uCT.cpp index 6a327432..912f8e85 100644 --- a/analysis/uCT.cpp +++ b/analysis/uCT.cpp @@ -157,6 +157,7 @@ void solve( const Array& VOL, Array& Mean, Array& ID, // int depth = 5; // float sigsq=0.1; int nlm_count = NLM3D( MultiScaleSmooth, Mean, Dist, NonLocalMean, depth, sigsq); + NULL_USE( nlm_count ); fillFloat.fill(NonLocalMean); } @@ -201,6 +202,7 @@ void refine( const Array& Dist_coarse, // int depth = 3; // float sigsq = 0.1; int nlm_count = NLM3D( MultiScaleSmooth, Mean, Dist, NonLocalMean, depth, sigsq); + NULL_USE( nlm_count ); fillFloat.fill(NonLocalMean); segment( NonLocalMean, ID, 0.001 ); for (size_t i=0; i db ) INSIST(nprocs == nproc[0]*nproc[1]*nproc[2],"Fatal error in processor count!"); } -void Domain::Decomp(std::string Filename) +void Domain::Decomp( const std::string& Filename ) { //....................................................................... // Reading the domain information file @@ -251,7 +254,6 @@ void Domain::Decomp(std::string Filename) int nprocs, nprocx, nprocy, nprocz, nx, ny, nz; int64_t global_Nx,global_Ny,global_Nz; int64_t i,j,k,n; - int BC=0; int64_t xStart,yStart,zStart; int checkerSize; //int inlet_layers_x, inlet_layers_y, inlet_layers_z; @@ -331,7 +333,7 @@ void Domain::Decomp(std::string Filename) if (RANK==0){ printf("Input media: %s\n",Filename.c_str()); printf("Relabeling %lu values\n",ReadValues.size()); - for (int idx=0; idxkeyExists( "image_sequence" )){ auto ImageList = color_db->getVector( "image_sequence"); int IMAGE_INDEX = color_db->getWithDefault( "image_index", 0 ); - int IMAGE_COUNT = ImageList.size(); std::string first_image = ImageList[IMAGE_INDEX]; Mask->Decomp(first_image); IMAGE_INDEX++; @@ -195,9 +194,9 @@ void ScaLBL_ColorModel::ReadInput(){ // Read the local domain data auto input_id = readMicroCT( *domain_db, MPI_COMM_WORLD ); // Fill the halo (assuming GCW of 1) - array size0 = { input_id.size(0), input_id.size(1), input_id.size(2) }; - ArraySize size1 = { Mask->Nx, Mask->Ny, Mask->Nz }; - ASSERT( size1[0] == size0[0]+2 && size1[1] == size0[1]+2 && size1[2] == size0[2]+2 ); + array size0 = { (int) input_id.size(0), (int) input_id.size(1), (int) input_id.size(2) }; + ArraySize size1 = { (size_t) Mask->Nx, (size_t) Mask->Ny, (size_t) Mask->Nz }; + ASSERT( (int) size1[0] == size0[0]+2 && (int) size1[1] == size0[1]+2 && (int) size1[2] == size0[2]+2 ); fillHalo fill( MPI_COMM_WORLD, Mask->rank_info, size0, { 1, 1, 1 }, 0, 1 ); Array id_view; id_view.viewRaw( size1, Mask->id ); @@ -216,7 +215,6 @@ void ScaLBL_ColorModel::ReadInput(){ // Generate the signed distance map // Initialize the domain and communication Array id_solid(Nx,Ny,Nz); - int count = 0; // Solve for the position of the solid phase for (int k=0;kSDs(i,j,k) = 2.0*double(id_solid(i,j,k))-1.0; } @@ -266,7 +263,7 @@ void ScaLBL_ColorModel::AssignComponentLabels(double *phase) double label_count_global[NLABELS]; // Assign the labels - for (int idx=0; idxid[i] = Mask->id[i]; - for (int idx=0; idxComm, label_count[idx]); + for (size_t idx=0; idxComm, label_count[idx]); if (rank==0){ printf("Component labels: %lu \n",NLABELS); @@ -373,16 +371,16 @@ void ScaLBL_ColorModel::Create(){ } // check that TmpMap is valid for (int idx=0; idxLastExterior(); idx++){ - int n = TmpMap[idx]; + auto n = TmpMap[idx]; if (n > Nx*Ny*Nz){ - printf("Bad value! idx=%i \n"); + printf("Bad value! idx=%i \n", n); TmpMap[idx] = Nx*Ny*Nz-1; } } for (int idx=ScaLBL_Comm->FirstInterior(); idxLastInterior(); idx++){ - int n = TmpMap[idx]; - if (n > Nx*Ny*Nz){ - printf("Bad value! idx=%i \n"); + auto n = TmpMap[idx]; + if ( n > Nx*Ny*Nz ){ + printf("Bad value! idx=%i \n",n); TmpMap[idx] = Nx*Ny*Nz-1; } } @@ -553,8 +551,9 @@ void ScaLBL_ColorModel::Run(){ } if (color_db->keyExists( "residual_endpoint_threshold" )){ - RESIDUAL_ENDPOINT_THRESHOLD = color_db->getScalar( "residual_endpoint_threshold" ); + RESIDUAL_ENDPOINT_THRESHOLD = color_db->getScalar( "residual_endpoint_threshold" ); } + NULL_USE( RESIDUAL_ENDPOINT_THRESHOLD ); if (color_db->keyExists( "noise_threshold" )){ NOISE_THRESHOLD = color_db->getScalar( "noise_threshold" ); USE_BUMP_RATE = true; @@ -874,7 +873,7 @@ void ScaLBL_ColorModel::Run(){ WriteHeader=true; kr_log_file = fopen("relperm.csv","a"); if (WriteHeader) - fprintf(kr_log_file,"timesteps sat.water eff.perm.oil eff.perm.water eff.perm.oil.connected eff.perm.water.connected eff.perm.oil.disconnected eff.perm.water.disconnected cap.pressure cap.pressure.connected pressure.drop Ca M\n",CURRENT_STEADY_TIMESTEPS,current_saturation,kAeff,kBeff,pAB,viscous_pressure_drop,Ca,Mobility); + fprintf(kr_log_file,"timesteps sat.water eff.perm.oil eff.perm.water eff.perm.oil.connected eff.perm.water.connected eff.perm.oil.disconnected eff.perm.water.disconnected cap.pressure cap.pressure.connected pressure.drop Ca M\n"); fprintf(kr_log_file,"%i %.5g %.5g %.5g %.5g %.5g %.5g %.5g %.5g %.5g %.5g %.5g %.5g\n",CURRENT_STEADY_TIMESTEPS,current_saturation,kAeff,kBeff,kAeff_connected,kBeff_connected,kAeff_disconnected,kBeff_disconnected,pAB,pAB_connected,viscous_pressure_drop,Ca,Mobility); fclose(kr_log_file); @@ -937,7 +936,7 @@ void ScaLBL_ColorModel::Run(){ else if (USE_SEED){ delta_volume = volA*Dm->Volume - initial_volume; CURRENT_MORPH_TIMESTEPS += analysis_interval; - double massChange = SeedPhaseField(seed_water); + //double massChange = SeedPhaseField(seed_water); if (rank==0) printf("***Seed water in oil %f, volume change %f / %f ***\n", seed_water, delta_volume, delta_volume_target); } else if (USE_MORPHOPEN_OIL){ @@ -1010,7 +1009,6 @@ void ScaLBL_ColorModel::Run(){ double ScaLBL_ColorModel::ImageInit(std::string Filename){ - bool suppress = false; if (rank==0) printf("Re-initializing fluids from file: %s \n", Filename.c_str()); Mask->Decomp(Filename); for (int i=0; iid[i]; // save what was read @@ -1080,10 +1078,9 @@ double ScaLBL_ColorModel::MorphOpenConnected(double target_volume_change){ ComputeGlobalBlobIDs(nx-2,ny-2,nz-2,Dm->rank_info,phase,Averages->SDs,vF,vS,phase_label,Dm->Comm); MPI_Barrier(Dm->Comm); - int count_oil=0; - int count_connected=0; - int count_porespace=0; - int count_water=0; + long long count_connected=0; + long long count_porespace=0; + long long count_water=0; for (int k=1; kComm, count); + double volume_initial = sumReduce( Dm->Comm, count); /* sprintf(LocalRankFilename,"phi_initial.%05i.raw",rank); FILE *INPUT = fopen(LocalRankFilename,"wb"); @@ -1352,16 +1348,16 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta } } } - volume_connected = sumReduce( Dm->Comm, count); + double volume_connected = sumReduce( Dm->Comm, count); second_biggest = sumReduce( Dm->Comm, second_biggest); - int reach_x, reach_y, reach_z; + /*int reach_x, reach_y, reach_z; for (int k=0; k phase_distance CalcDist(phase_distance,phase_id,*Dm); @@ -1417,7 +1413,6 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta for (int k=0; kSDs(i,j,k) > 0.f){ if (d < 3.f){ @@ -1441,7 +1436,7 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta } } } - volume_final= sumReduce( Dm->Comm, count); + double volume_final= sumReduce( Dm->Comm, count); delta_volume = (volume_final-volume_initial); if (rank == 0) printf("MorphInit: change fluid volume fraction by %f \n", delta_volume/volume_initial); diff --git a/models/DFHModel.cpp b/models/DFHModel.cpp index 7c7898de..4eb03bea 100644 --- a/models/DFHModel.cpp +++ b/models/DFHModel.cpp @@ -114,7 +114,6 @@ void ScaLBL_DFHModel::SetDomain(){ } void ScaLBL_DFHModel::ReadInput(){ - size_t readID; //....................................................................... if (rank == 0) printf("Read input media... \n"); //....................................................................... diff --git a/models/MRTModel.cpp b/models/MRTModel.cpp index cd52aa2f..9ba733ae 100644 --- a/models/MRTModel.cpp +++ b/models/MRTModel.cpp @@ -94,7 +94,6 @@ void ScaLBL_MRTModel::SetDomain(){ void ScaLBL_MRTModel::ReadInput(){ int rank=Dm->rank(); - size_t readID; //....................................................................... //....................................................................... Mask->ReadIDs(); @@ -106,7 +105,6 @@ void ScaLBL_MRTModel::ReadInput(){ // Generate the signed distance map // Initialize the domain and communication Array id_solid(Nx,Ny,Nz); - int count = 0; // Solve for the position of the solid phase for (int k=0;kgetScalar( "nspheres"); //printf("Set domain \n"); - int BoundaryCondition=1; + //int BoundaryCondition=1; //Nz += 2; //Nx = Ny = Nz; // Cubic domain int N = Nx*Ny*Nz; @@ -396,7 +396,7 @@ int main(int argc, char **argv) int sum = 0; double sum_local; double iVol_global = 1.0/(1.0*(Nx-2)*(Ny-2)*(Nz-2)*nprocs); - double porosity, pore_vol; + double porosity; //........................................................................... DoubleArray SignDist(Nx,Ny,Nz); //....................................................................... @@ -450,7 +450,6 @@ int main(int argc, char **argv) } } sum=0; - pore_vol = 0.0; for ( k=1;kRegularLayout(Map,Phi,PhaseField); FILE *OUTFILE; - sprintf(LocalRankFilename,"Phase.raw",rank); + sprintf(LocalRankFilename,"Phase.raw"); OUTFILE = fopen(LocalRankFilename,"wb"); fwrite(PhaseField.data(),8,N,OUTFILE); fclose(OUTFILE); diff --git a/tests/TestColorGradDFH.cpp b/tests/TestColorGradDFH.cpp index 2b0abf32..d6376d82 100644 --- a/tests/TestColorGradDFH.cpp +++ b/tests/TestColorGradDFH.cpp @@ -53,9 +53,6 @@ int main(int argc, char **argv) int Nx = db->getVector( "n" )[0]; int Ny = db->getVector( "n" )[1]; int Nz = db->getVector( "n" )[2]; - int nprocx = db->getVector( "nproc" )[0]; - int nprocy = db->getVector( "nproc" )[1]; - int nprocz = db->getVector( "nproc" )[2]; if (rank==0){ printf("********************************************************\n"); @@ -64,7 +61,7 @@ int main(int argc, char **argv) } // Get the rank info - std::shared_ptr Dm(new Domain(db,comm)); + auto Dm = std::make_shared(db,comm); Nx += 2; Ny += 2; Nz += 2; @@ -111,7 +108,6 @@ int main(int argc, char **argv) MPI_Barrier(comm); //......................device distributions................................. - int dist_mem_size = Np*sizeof(double); int neighborSize=18*Np*sizeof(int); if (rank==0) printf ("Allocating distributions \n"); int *NeighborList; diff --git a/tests/TestCommD3Q19.cpp b/tests/TestCommD3Q19.cpp index 0c3988c8..e1fa821f 100644 --- a/tests/TestCommD3Q19.cpp +++ b/tests/TestCommD3Q19.cpp @@ -49,7 +49,7 @@ extern void GlobalFlipScaLBL_D3Q19_Init(double *dist, IntArray Map, int Np, int {1,1,0},{-1,-1,0},{1,-1,0},{-1,1,0},{1,0,1},{-1,0,-1},{1,0,-1},{-1,0,1}, {0,1,1},{0,-1,-1},{0,1,-1},{0,-1,1}}; - int q,i,j,k,n,N; + int q,i,j,k; int Cqx,Cqy,Cqz; // Discrete velocity int x,y,z; // Global indices int xn,yn,zn; // Global indices of neighbor @@ -59,8 +59,6 @@ extern void GlobalFlipScaLBL_D3Q19_Init(double *dist, IntArray Map, int Np, int Y = Ny*nprocy; Z = Nz*nprocz; NULL_USE(Z); - N = (Nx+2)*(Ny+2)*(Nz+2); // size of the array including halo - for (k=0; kid[n] = id[n]; } @@ -270,7 +255,7 @@ int main(int argc, char **argv) for (k=1;kgetVector( "n" )[0]; int Ny = db->getVector( "n" )[1]; int Nz = db->getVector( "n" )[2]; - int nprocx = db->getVector( "nproc" )[0]; - int nprocy = db->getVector( "nproc" )[1]; - int nprocz = db->getVector( "nproc" )[2]; - std::shared_ptr Dm(new Domain(db,comm)); + auto Dm = std::make_shared(db,comm); Nx += 2; Ny+=2; Nz += 2; Nx = Ny = Nz; // Cubic domain @@ -55,8 +48,7 @@ int main (int argc, char **argv) //....................................................................... // Assign the phase ID //....................................................................... - char *id; - id = new char[N]; + auto id = new char[N]; for (k=0;k 1e-12){ error = 1; printf(" Inlet error %f \n",err); @@ -185,7 +175,7 @@ int main (int argc, char **argv) // Consider a larger number of timesteps and simulate flow double Fx, Fy, Fz; double tau = 1.0; - double mu=(tau-0.5)/3.0; + //double mu=(tau-0.5)/3.0; double rlx_setA=1.0/tau; double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA); dout=1.f; diff --git a/tests/TestForceD3Q19.cpp b/tests/TestForceD3Q19.cpp index 65453122..b8f88aae 100644 --- a/tests/TestForceD3Q19.cpp +++ b/tests/TestForceD3Q19.cpp @@ -457,24 +457,16 @@ int main (int argc, char **argv) double *x = new double[1]; ASSERT(x!=NULL); } - - // set the error code - // Note: the error code should be consistent across all processors - int error = 0; int Np = 1; - int Q = 9; + //int Q = 9; double Fx = 1.0; double Fy = 1.0; double Fz = 1.0; - double *dist; - double * Velocity; - - dist = new double [19*Np]; - Velocity = new double [3*Np]; - + auto dist = new double [19*Np]; + //auto Velocity = new double [3*Np for (int n=0; ngetVector( "n" )[1]; int Nz = domain_db->getVector( "n" )[2]; - std::shared_ptr Dm(new Domain(domain_db,comm)); + auto Dm = std::make_shared(domain_db,comm); Nx+=2; Ny+=2; Nz+=2; @@ -44,7 +44,7 @@ int main (int argc, char *argv[]) Dm->CommInit(); - std::shared_ptr Averages(new TwoPhase(Dm)); + auto Averages = std::make_shared(Dm); int timestep=0; double Cx,Cy,Cz; diff --git a/tests/TestMap.cpp b/tests/TestMap.cpp index 3e56cdf9..a47c0d9e 100644 --- a/tests/TestMap.cpp +++ b/tests/TestMap.cpp @@ -56,11 +56,7 @@ int main(int argc, char **argv) int Nx = db->getVector( "n" )[0]; int Ny = db->getVector( "n" )[1]; int Nz = db->getVector( "n" )[2]; - int nprocx = db->getVector( "nproc" )[0]; - int nprocy = db->getVector( "nproc" )[1]; - int nprocz = db->getVector( "nproc" )[2]; - - std::shared_ptr Dm(new Domain(db,comm)); + auto Dm = std::make_shared(db,comm); Nx += 2; Ny += 2; diff --git a/tests/TestMassConservationD3Q7.cpp b/tests/TestMassConservationD3Q7.cpp index d6f4243d..bbfe8cae 100644 --- a/tests/TestMassConservationD3Q7.cpp +++ b/tests/TestMassConservationD3Q7.cpp @@ -66,9 +66,6 @@ inline void InitializeBubble(ScaLBL_ColorModel &ColorModel, double BubbleRadius) int main(int argc, char **argv) { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI int rank,nprocs; MPI_Init(&argc,&argv); @@ -76,19 +73,6 @@ int main(int argc, char **argv) MPI_Comm_rank(comm,&rank); MPI_Comm_size(comm,&nprocs); // parallel domain size (# of sub-domains) - int nprocx,nprocy,nprocz; - int iproc,jproc,kproc; - int sendtag,recvtag; - //***************************************** - // MPI ranks for all 18 neighbors - //********************************** - int rank_x,rank_y,rank_z,rank_X,rank_Y,rank_Z; - int rank_xy,rank_XY,rank_xY,rank_Xy; - int rank_xz,rank_XZ,rank_xZ,rank_Xz; - int rank_yz,rank_YZ,rank_yZ,rank_Yz; - //********************************** - MPI_Request req1[18],req2[18]; - MPI_Status stat1[18],stat2[18]; if (rank == 0){ printf("********************************************************\n"); @@ -110,7 +94,6 @@ int main(int argc, char **argv) Ny = CM.Ny; Nz = CM.Nz; N = Nx*Ny*Nz; - int dist_mem_size = N*sizeof(double); //CM.ReadInput(); double radius=0.4*double(Nx); @@ -142,11 +125,9 @@ int main(int argc, char **argv) CM.Run(); int D3Q7[7][3]={{0,0,0},{1,0,0},{-1,0,0},{0,1,0},{0,-1,0},{0,0,1},{0,0,-1}}; // Compare and make sure mass is conserved at every lattice site - double *Error; - Error = new double [N]; - double *A_q, *B_q; - A_q = new double [7*Np]; - B_q = new double [7*Np]; + auto Error = new double[N]; + auto A_q = new double[7*Np]; + //auto B_q = new double[7*Np]; bool CleanCheck = true; double original,final, sum_q; double total_mass_A_0 = 0.0; diff --git a/tests/TestNetcdf.cpp b/tests/TestNetcdf.cpp index 7b6bae02..5ea5139f 100644 --- a/tests/TestNetcdf.cpp +++ b/tests/TestNetcdf.cpp @@ -14,7 +14,6 @@ void load( const std::string& ); void test_NETCDF( UnitTest& ut ) { const int rank = comm_rank( MPI_COMM_WORLD ); - const int size = comm_size( MPI_COMM_WORLD ); int nprocx = 2; int nprocy = 2; int nprocz = 2; diff --git a/tests/TestSubphase.cpp b/tests/TestSubphase.cpp index 8eb479bc..fd6383be 100644 --- a/tests/TestSubphase.cpp +++ b/tests/TestSubphase.cpp @@ -60,13 +60,11 @@ int main(int argc, char **argv) } // Get the rank info - std::shared_ptr Dm(new Domain(db,comm)); - // const RankInfoStruct rank_info(rank,nprocx,nprocy,nprocz); - std::shared_ptr Averages(new SubPhase(Dm)); + auto Dm = std::make_shared(db,comm); + auto Averages = std::make_shared(Dm); Nx += 2; Ny += 2; Nz += 2; - int N = Nx*Ny*Nz; //....................................................................... for ( k=1;k Dm(new Domain(db,comm)); + auto Dm = std::make_shared(db,comm); Nx += 2; Ny += 2; Nz += 2; - int N = Nx*Ny*Nz; //....................................................................... for ( k=1;k Dm(new Domain(db,comm)); + auto Dm = std::make_shared(db,comm); // const RankInfoStruct rank_info(rank,nprocx,nprocy,nprocz); - std::shared_ptr Averages(new TwoPhase(Dm)); + auto Averages = std::make_shared(Dm); Nx += 2; Ny += 2; Nz += 2; - int N = Nx*Ny*Nz; //....................................................................... for ( k=1;kUpdateSolid(); diff --git a/tests/TestTorusEvolve.cpp b/tests/TestTorusEvolve.cpp index dedec45d..1a65d268 100644 --- a/tests/TestTorusEvolve.cpp +++ b/tests/TestTorusEvolve.cpp @@ -60,12 +60,11 @@ int main(int argc, char **argv) } // Get the rank info - std::shared_ptr Dm(new Domain(db,comm)); + auto Dm = std::make_shared(db,comm); Nx += 2; Ny += 2; Nz += 2; - int N = Nx*Ny*Nz; //....................................................................... for ( k=1;kAggregateLabels(FILENAME); + auto filename2 = READFILE + ".morph.raw"; + if (rank==0) printf("Writing file to: %s \n", filename2.c_str()); + Mask->AggregateLabels(filename2); } MPI_Barrier(comm); diff --git a/tests/lbpm_morphdrain_pp.cpp b/tests/lbpm_morphdrain_pp.cpp index 4994e081..8d73b1e4 100644 --- a/tests/lbpm_morphdrain_pp.cpp +++ b/tests/lbpm_morphdrain_pp.cpp @@ -32,10 +32,7 @@ int main(int argc, char **argv) //....................................................................... // Reading the domain information file //....................................................................... - int n, nprocx, nprocy, nprocz, nx, ny, nz; - char LocalRankString[8]; char LocalRankFilename[40]; - char FILENAME[128]; string filename; double SW,Rcrit_new; @@ -43,8 +40,10 @@ int main(int argc, char **argv) filename=argv[1]; Rcrit_new=0.f; //SW=strtod(argv[2],NULL); - } - else ERROR("No input database provided\n"); + } else { + ERROR("No input database provided\n"); + } + NULL_USE( Rcrit_new ); // read the input database auto db = std::make_shared( filename ); auto domain_db = db->getDatabase( "Domain" ); @@ -62,19 +61,16 @@ int main(int argc, char **argv) if (rank==0) printf("Performing morphological opening with target saturation %f \n", SW); // GenerateResidual(id,nx,ny,nz,Saturation); - nx = size[0]; - ny = size[1]; - nz = size[2]; - nprocx = nproc[0]; - nprocy = nproc[1]; - nprocz = nproc[2]; + int nx = size[0]; + int ny = size[1]; + int nz = size[2]; - int N = (nx+2)*(ny+2)*(nz+2); + size_t N = (nx+2)*(ny+2)*(nz+2); std::shared_ptr Dm (new Domain(domain_db,comm)); std::shared_ptr Mask (new Domain(domain_db,comm)); // std::shared_ptr Dm (new Domain(nx,ny,nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BC)); - for (n=0; nid[n]=1; + for (size_t n=0; nid[n]=1; Dm->CommInit(); signed char *id; @@ -116,7 +112,6 @@ int main(int argc, char **argv) for (int k=0;kAggregateLabels(FILENAME); + auto filename2 = READFILE + ".morphdrain.raw"; + if (rank==0) printf("Writing file to: %s \n", filename2.data() ); + Mask->AggregateLabels( filename2 ); } MPI_Barrier(comm); diff --git a/tests/lbpm_morphopen_pp.cpp b/tests/lbpm_morphopen_pp.cpp index 48106a97..f8819348 100644 --- a/tests/lbpm_morphopen_pp.cpp +++ b/tests/lbpm_morphopen_pp.cpp @@ -32,10 +32,7 @@ int main(int argc, char **argv) //....................................................................... // Reading the domain information file //....................................................................... - int n, nprocx, nprocy, nprocz, nx, ny, nz; - char LocalRankString[8]; char LocalRankFilename[40]; - char FILENAME[128]; string filename; double SW,Rcrit_new; @@ -45,6 +42,7 @@ int main(int argc, char **argv) //SW=strtod(argv[2],NULL); } else ERROR("No input database provided\n"); + NULL_USE( Rcrit_new ); // read the input database auto db = std::make_shared( filename ); auto domain_db = db->getDatabase( "Domain" ); @@ -69,19 +67,16 @@ int main(int argc, char **argv) if (rank==0) printf("Performing morphological opening with target saturation %f \n", SW); // GenerateResidual(id,nx,ny,nz,Saturation); - nx = size[0]; - ny = size[1]; - nz = size[2]; - nprocx = nproc[0]; - nprocy = nproc[1]; - nprocz = nproc[2]; + int nx = size[0]; + int ny = size[1]; + int nz = size[2]; - int N = (nx+2)*(ny+2)*(nz+2); + size_t N = (nx+2)*(ny+2)*(nz+2); std::shared_ptr Dm (new Domain(domain_db,comm)); std::shared_ptr Mask (new Domain(domain_db,comm)); // std::shared_ptr Dm (new Domain(nx,ny,nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BC)); - for (n=0; nid[n]=1; + for (size_t n=0; nid[n]=1; Dm->CommInit(); signed char *id; @@ -119,7 +114,6 @@ int main(int argc, char **argv) for (int k=0;kAggregateLabels(FILENAME); + auto filename2 = READFILE + ".morphopen.raw"; + if (rank==0) printf("Writing file to: %s \n", filename2.data()); + Mask->AggregateLabels(filename2); } MPI_Barrier(comm); diff --git a/tests/lbpm_permeability_simulator.cpp b/tests/lbpm_permeability_simulator.cpp index 4fb5bbac..dbcfb96b 100644 --- a/tests/lbpm_permeability_simulator.cpp +++ b/tests/lbpm_permeability_simulator.cpp @@ -23,9 +23,6 @@ using namespace std; int main(int argc, char **argv) { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI int rank,nprocs; MPI_Init(&argc,&argv); @@ -33,10 +30,6 @@ int main(int argc, char **argv) MPI_Comm_rank(comm,&rank); MPI_Comm_size(comm,&nprocs); { - // parallel domain size (# of sub-domains) - int nprocx,nprocy,nprocz; - int iproc,jproc,kproc; - if (rank == 0){ printf("********************************************************\n"); printf("Running Single Phase Permeability Calculation \n"); @@ -44,10 +37,10 @@ int main(int argc, char **argv) } // Initialize compute device int device=ScaLBL_SetDevice(rank); + NULL_USE( device ); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); - ScaLBL_MRTModel MRT(rank,nprocs,comm); auto filename = argv[1]; MRT.ReadParams(filename); diff --git a/tests/lbpm_refine_pp.cpp b/tests/lbpm_refine_pp.cpp index 4b903d78..d90dbb04 100644 --- a/tests/lbpm_refine_pp.cpp +++ b/tests/lbpm_refine_pp.cpp @@ -26,10 +26,9 @@ int main(int argc, char **argv) //....................................................................... // Reading the domain information file //....................................................................... - int nprocx, nprocy, nprocz, nx, ny, nz, nspheres; double Lx, Ly, Lz; + Lx = Ly = Lz = 1.0; int i,j,k,n; - int BC=0; string filename; if (argc > 1){ @@ -47,12 +46,12 @@ int main(int argc, char **argv) auto ReadValues = domain_db->getVector( "ReadValues" ); auto WriteValues = domain_db->getVector( "WriteValues" ); - nx = size[0]; - ny = size[1]; - nz = size[2]; - nprocx = nproc[0]; - nprocy = nproc[1]; - nprocz = nproc[2]; + int nx = size[0]; + int ny = size[1]; + int nz = size[2]; + int nprocx = nproc[0]; + int nprocy = nproc[1]; + int nprocz = nproc[2]; // Check that the number of processors >= the number of ranks if ( rank==0 ) { @@ -66,10 +65,9 @@ int main(int argc, char **argv) char LocalRankFilename[40]; - int rnx,rny,rnz; - rnx=2*nx; - rny=2*ny; - rnz=2*nz; + int rnx=2*nx; + int rny=2*ny; + int rnz=2*nz; if (rank==0) printf("Refining mesh to %i x %i x %i \n",rnx,rny,rnz); @@ -128,13 +126,12 @@ int main(int argc, char **argv) } } - int ri,rj,rk,rn; //refined mesh indices //char *RefineLabel; //RefineLabel = new char [rnx*rny*rnz]; Array RefineLabel(rnx,rny,rnz); - for (rk=1; rkgetVector( "L" ); auto size = domain_db->getVector( "n" ); auto nproc = domain_db->getVector( "nproc" ); - int BoundaryCondition = domain_db->getScalar( "BC" ); + //int BoundaryCondition = domain_db->getScalar( "BC" ); int nx = size[0]; int ny = size[1]; int nz = size[2]; @@ -91,10 +91,10 @@ int main(int argc, char **argv) printf("Number of MPI ranks required: %i \n", nprocx*nprocy*nprocz); printf("Number of MPI ranks used: %i \n", nprocs); printf("Full domain size: %i x %i x %i \n",nx*nprocx,ny*nprocy,nz*nprocz); - printf("target value = %f \n",target); - printf("background value = %f \n",background); - printf("cylinder center = %i, %i, %i \n",center[0],center[1],center[2]); - printf("cylinder radius = %f \n",CylRad); + printf("target value = %f \n",target); + printf("background value = %f \n",background); + printf("cylinder center = %i, %i, %i \n",center[0],center[1],center[2]); + printf("cylinder radius = %f \n",CylRad); } if ( nprocs < nprocx*nprocy*nprocz ){ ERROR("Insufficient number of processors"); @@ -196,19 +196,19 @@ int main(int argc, char **argv) filter_src( *Dm[0], LOCVOL[0] ); // Set up the mask to be distance to cylinder (crop outside cylinder) - if (rank==0) printf("Cropping with cylinder: %i, %i, %i, radius=%f \n",Dm[0]->nprocx()*Nx[0],Dm[0]->nprocy()*Ny[0],Dm[0]->nprocz()*Nz[0],CylRad); + if (rank==0) printf("Cropping with cylinder: %i, %i, %i, radius=%f \n",Dm[0]->nprocx()*Nx[0],Dm[0]->nprocy()*Ny[0],Dm[0]->nprocz()*Nz[0],CylRad); for (int k=0;kiproc()*Nx[0]+i-1); - float y= float (Dm[0]->jproc()*Ny[0]+j-1); - float z= float(Dm[0]->kproc()*Nz[0]+k-1); - float cx = float(center[0] - offset[0]); - float cy = float(center[1] - offset[1]); - float cz = float(center[2] - offset[2]); + //float x= float(Dm[0]->iproc()*Nx[0]+i-1); + float y= float (Dm[0]->jproc()*Ny[0]+j-1); + float z= float(Dm[0]->kproc()*Nz[0]+k-1); + //float cx = float(center[0] - offset[0]); + float cy = float(center[1] - offset[1]); + float cz = float(center[2] - offset[2]); // distance from the center line MASK(i,j,k) = sqrt((z-cz)*(z-cz) + (y-cy)*(y-cy)); - //if (sqrt(((z-cz)*(z-cz) + (y-cy)*(y-cy)) ) > CylRad) LOCVOL[0](i,j,k)=background; + //if (sqrt(((z-cz)*(z-cz) + (y-cy)*(y-cy)) ) > CylRad) LOCVOL[0](i,j,k)=background; } } } @@ -219,18 +219,18 @@ int main(int argc, char **argv) float THRESHOLD=0.5*(target+background); float mean_plus=0; float mean_minus=0; - float min_value = LOCVOL[0](0); - float max_value = LOCVOL[0](0); + float min_value = LOCVOL[0](0); + float max_value = LOCVOL[0](0); int count_plus=0; int count_minus=0; for (int k=1;k 0){ // direction to background / target is the same if (fabs(tmp-target) > fabs(tmp-background)) tmp=background; // tmp closer to background @@ -241,20 +241,20 @@ int main(int argc, char **argv) mean_plus += tmp; count_plus++; } - else { + else { mean_minus += tmp; count_minus++; } - if (tmp < min_value) min_value = tmp; - if (tmp > max_value) max_value = tmp; - } + if (tmp < min_value) min_value = tmp; + if (tmp > max_value) max_value = tmp; + } } } } - count_plus=sumReduce( Dm[0]->Comm, count_plus); - count_minus=sumReduce( Dm[0]->Comm, count_minus); - if (rank==0) printf("minimum value=%f, max value=%f \n",min_value,max_value); - if (rank==0) printf("plus=%i, minus=%i \n",count_plus,count_minus); + count_plus=sumReduce( Dm[0]->Comm, count_plus); + count_minus=sumReduce( Dm[0]->Comm, count_minus); + if (rank==0) printf("minimum value=%f, max value=%f \n",min_value,max_value); + if (rank==0) printf("plus=%i, minus=%i \n",count_plus,count_minus); ASSERT( count_plus > 0 && count_minus > 0 ); MPI_Barrier(comm); mean_plus = sumReduce( Dm[0]->Comm, mean_plus ) / count_plus; @@ -262,25 +262,25 @@ int main(int argc, char **argv) MPI_Barrier(comm); if (rank==0) printf(" Region 1 mean (+): %f, Region 2 mean (-): %f \n",mean_plus, mean_minus); - //if (rank==0) printf("Scale the input data (size = %i) \n",LOCVOL[0].length()); + //if (rank==0) printf("Scale the input data (size = %i) \n",LOCVOL[0].length()); for (size_t i=0; i CylRad ){ - LOCVOL[0](i)=background; + if ( MASK(i) > CylRad ){ + LOCVOL[0](i)=background; } if ( LOCVOL[0](i) >= THRESHOLD ) { auto tmp = LOCVOL[0](i)/ mean_plus; LOCVOL[0](i) = std::min( tmp, 1.0f ); } - else { + else { auto tmp = -LOCVOL[0](i)/mean_minus; LOCVOL[0](i) = std::max( tmp, -1.0f ); } - //LOCVOL[0](i) = MASK(i); + //LOCVOL[0](i) = MASK(i); } // Fill the source data for the coarse meshes - if (rank==0) printf("Coarsen the mesh for N_levels=%i \n",N_levels); - MPI_Barrier(comm); + if (rank==0) printf("Coarsen the mesh for N_levels=%i \n",N_levels); + MPI_Barrier(comm); PROFILE_START("CoarsenMesh"); for (int i=1; i filter(ratio[0],ratio[1],ratio[2]); diff --git a/tests/pmmc_cylinder.cpp b/tests/pmmc_cylinder.cpp index a573fee3..3c5e25b6 100644 --- a/tests/pmmc_cylinder.cpp +++ b/tests/pmmc_cylinder.cpp @@ -11,17 +11,11 @@ int main (int argc, char **argv) { - // printf("Radius = %s \n,"RADIUS); - int SIZE = N*N*N; int Nx,Ny,Nz; Nx = Ny = Nz = N; int i,j,k,p,q,r; -// double *Solid; // cylinder -// double *Phase; // region of the cylinder -// Solid = new double [SIZE]; -// Phase = new double [SIZE]; DoubleArray SignDist(Nx,Ny,Nz); DoubleArray Phase(Nx,Ny,Nz); double fluid_isovalue = 0.0; @@ -36,9 +30,6 @@ int main (int argc, char **argv) //........................................................................... double awn,ans,aws,lwns,nwp_volume; double As; - double dEs,dAwn,dAns; // Global surface energy (calculated by rank=0) - double awn_global,ans_global,aws_global,lwns_global,nwp_volume_global; - double As_global; // bool add=1; // Set to false if any corners contain nw-phase ( F > fluid_isovalue) int cube[8][3] = {{0,0,0},{1,0,0},{0,1,0},{1,1,0},{0,0,1},{1,0,1},{0,1,1},{1,1,1}}; // cube corners // int count_in=0,count_out=0; @@ -75,7 +66,6 @@ int main (int argc, char **argv) int n_local_nws_pts; int c; - int newton_steps = 0; //........................................................................... int ncubes = (Nx-2)*(Ny-2)*(Nz-2); // Exclude the "upper" halo IntArray cubeList(3,ncubes); From acb2d30454421c4a7516f7b05189fac34d671f71 Mon Sep 17 00:00:00 2001 From: Mark Berrill Date: Wed, 22 Jan 2020 12:19:04 -0500 Subject: [PATCH 028/121] Fixing compile warnings --- cpu/D3Q19.cpp | 515 +++++++++++++++++++--------------------- cpu/dfh.cpp | 33 ++- tests/TestBubbleDFH.cpp | 34 +-- 3 files changed, 267 insertions(+), 315 deletions(-) diff --git a/cpu/D3Q19.cpp b/cpu/D3Q19.cpp index 2af59883..6b858d61 100644 --- a/cpu/D3Q19.cpp +++ b/cpu/D3Q19.cpp @@ -243,8 +243,6 @@ extern "C" double ScaLBL_D3Q19_Flux_BC_z(double *disteven, double *distodd, doub // odd distributions in disteven and even distributions in distodd. int n,N; // distributions - double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9; - double f10,f11,f12,f13,f14,f15,f16,f17,f18; double din = 0.f; N = Nx*Ny*Nz; @@ -256,26 +254,26 @@ extern "C" double ScaLBL_D3Q19_Flux_BC_z(double *disteven, double *distodd, doub // Read distributions from "opposite" memory convention //........................................................................ //........................................................................ - f1 = distodd[n]; - f3 = distodd[N+n]; - f5 = distodd[2*N+n]; - f7 = distodd[3*N+n]; - f9 = distodd[4*N+n]; - f11 = distodd[5*N+n]; - f13 = distodd[6*N+n]; - f15 = distodd[7*N+n]; - f17 = distodd[8*N+n]; + double f1 = distodd[n]; + double f3 = distodd[N+n]; + //double f5 = distodd[2*N+n]; + double f7 = distodd[3*N+n]; + double f9 = distodd[4*N+n]; + //double f11 = distodd[5*N+n]; + double f13 = distodd[6*N+n]; + //double f15 = distodd[7*N+n]; + double f17 = distodd[8*N+n]; //........................................................................ - f0 = disteven[n]; - f2 = disteven[N+n]; - f4 = disteven[2*N+n]; - f6 = disteven[3*N+n]; - f8 = disteven[4*N+n]; - f10 = disteven[5*N+n]; - f12 = disteven[6*N+n]; - f14 = disteven[7*N+n]; - f16 = disteven[8*N+n]; - f18 = disteven[9*N+n]; + double f0 = disteven[n]; + double f2 = disteven[N+n]; + double f4 = disteven[2*N+n]; + double f6 = disteven[3*N+n]; + double f8 = disteven[4*N+n]; + double f10 = disteven[5*N+n]; + double f12 = disteven[6*N+n]; + //double f14 = disteven[7*N+n]; + double f16 = disteven[8*N+n]; + //double f18 = disteven[9*N+n]; //................................................... // Determine the outlet flow velocity @@ -288,59 +286,58 @@ extern "C" double ScaLBL_D3Q19_Flux_BC_z(double *disteven, double *distodd, doub } extern "C" double ScaLBL_D3Q19_AAodd_Flux_BC_z(int *d_neighborList, int *list, double *dist, double flux, - double area, int count, int Np){ + double area, int count, int Np) +{ int idx, n; int nread; // distributions - double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9; - double f10,f11,f12,f13,f14,f15,f16,f17,f18; double factor = 1.f/(area); double sum = 0.f; for (idx=0; idx 0.f){ nA = 1.0; nB = 0.f; @@ -60,15 +59,13 @@ extern "C" void ScaLBL_DFH_Init(double *Phi, double *Den, double *Aq, double *Bq // LBM based on density functional hydrodynamics extern "C" void ScaLBL_D3Q19_AAeven_DFH(int *neighborList, double *dist, double *Aq, double *Bq, double *Den, double *Phi, double *Gradient, double *SolidForce, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, - double Fx, double Fy, double Fz, int start, int finish, int Np){ - - int ijk,nn,n; + double Fx, double Fy, double Fz, int start, int finish, int Np) +{ double fq; // conserved momemnts double rho,jx,jy,jz; // non-conserved moments double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; - double m3,m5,m7; double nA,nB; // number density double a1,b1,a2,b2,nAB,delta; double C,nx,ny,nz; //color gradient magnitude and direction @@ -586,7 +583,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_DFH(int *neighborList, double *dist, double * double *Phi, double *Gradient, double *SolidForce, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, double Fx, double Fy, double Fz, int start, int finish, int Np){ - int n,nn,ijk,nread; + int nread; int nr1,nr2,nr3,nr4,nr5,nr6; int nr7,nr8,nr9,nr10; int nr11,nr12,nr13,nr14; @@ -596,7 +593,6 @@ extern "C" void ScaLBL_D3Q19_AAodd_DFH(int *neighborList, double *dist, double * double rho,jx,jy,jz; // non-conserved moments double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; - double m3,m5,m7; double nA,nB; // number density double a1,b1,a2,b2,nAB,delta; double C,nx,ny,nz; //color gradient magnitude and direction @@ -1182,12 +1178,12 @@ extern "C" void ScaLBL_D3Q19_AAodd_DFH(int *neighborList, double *dist, double * } extern "C" void ScaLBL_D3Q7_AAodd_DFH(int *neighborList, double *Aq, double *Bq, - double *Den, double *Phi, int start, int finish, int Np){ - - int idx,n,nread; - double fq,nA,nB; + double *Den, double *Phi, int start, int finish, int Np) +{ for (int n=start; n Dm(new Domain(domain_db,comm)); + auto Dm = std::make_shared(domain_db,comm); for (int i=0; iNx*Dm->Ny*Dm->Nz; i++) Dm->id[i] = 1; - std::shared_ptr Averages( new TwoPhase(Dm) ); + auto Averages = std::make_shared(Dm); // TwoPhase Averages(Dm); Dm->CommInit(); // Mask that excludes the solid phase - std::shared_ptr Mask(new Domain(domain_db,comm)); + auto Mask = std::make_shared(domain_db,comm); MPI_Barrier(comm); Nx+=2; Ny+=2; Nz += 2; @@ -191,8 +186,7 @@ int main(int argc, char **argv) // printf("Local File Name = %s \n",LocalRankFilename); // .......... READ THE INPUT FILE ....................................... // char value; - char *id; - id = new char[N]; + auto id = new char[N]; double sum; //........................................................................... if (rank == 0) cout << "Setting up bubble..." << endl; @@ -244,19 +238,17 @@ int main(int argc, char **argv) // Initialize communication structures in averaging domain for (i=0; iNx*Mask->Ny*Mask->Nz; i++) Mask->id[i] = id[i]; Mask->CommInit(); - double *PhaseLabel; - PhaseLabel = new double[N]; + auto PhaseLabel = new double[N]; //........................................................................... if (rank==0) printf ("Create ScaLBL_Communicator \n"); // Create a communicator for the device (will use optimized layout) - std::shared_ptr ScaLBL_Comm(new ScaLBL_Communicator(Mask)); + auto ScaLBL_Comm = std::make_shared(Mask); int Npad=(Np/16 + 2)*16; if (rank==0) printf ("Set up memory efficient layout Npad=%i \n",Npad); - int *neighborList; IntArray Map(Nx,Ny,Nz); - neighborList= new int[18*Npad]; + auto neighborList= new int[18*Npad]; Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); MPI_Barrier(comm); @@ -515,9 +507,8 @@ int main(int argc, char **argv) // Copy back final phase indicator field and convert to regular layout DoubleArray PhaseField(Nx,Ny,Nz); ScaLBL_Comm->RegularLayout(Map,Phi,PhaseField); - FILE *OUTFILE; sprintf(LocalRankFilename,"Phase.raw"); - OUTFILE = fopen(LocalRankFilename,"wb"); + auto OUTFILE = fopen(LocalRankFilename,"wb"); fwrite(PhaseField.data(),8,N,OUTFILE); fclose(OUTFILE); @@ -535,9 +526,8 @@ int main(int argc, char **argv) } } } - FILE *GFILE; sprintf(LocalRankFilename,"Gradient.raw"); - GFILE = fopen(LocalRankFilename,"wb"); + auto GFILE = fopen(LocalRankFilename,"wb"); fwrite(GradNorm.data(),8,N,GFILE); fclose(GFILE); @@ -545,14 +535,12 @@ int main(int argc, char **argv) DoubleArray Rho2(Nx,Ny,Nz); ScaLBL_Comm->RegularLayout(Map,&Den[0],Rho1); ScaLBL_Comm->RegularLayout(Map,&Den[Np],Rho2); - FILE *RFILE1; sprintf(LocalRankFilename,"Rho1.raw"); - RFILE1 = fopen(LocalRankFilename,"wb"); + auto RFILE1 = fopen(LocalRankFilename,"wb"); fwrite(Rho1.data(),8,N,RFILE1); fclose(RFILE1); - FILE *RFILE2; sprintf(LocalRankFilename,"Rho2.raw"); - RFILE2 = fopen(LocalRankFilename,"wb"); + auto RFILE2 = fopen(LocalRankFilename,"wb"); fwrite(Rho2.data(),8,N,RFILE2); fclose(RFILE2); From d1f714a82e32be3e43d24446e5ff375453aff0be Mon Sep 17 00:00:00 2001 From: Mark Berrill Date: Tue, 28 Jan 2020 08:51:32 -0500 Subject: [PATCH 029/121] Adding MPI wrapper class --- CMakeLists.txt | 344 +- IO/MeshDatabase.cpp | 145 +- IO/MeshDatabase.h | 4 +- IO/PIO.cpp | 12 +- IO/PackData.cpp | 105 + IO/PackData.h | 78 + common/MPI_Helpers.hpp => IO/PackData.hpp | 9 +- IO/Writer.cpp | 20 +- IO/Writer.h | 4 +- IO/netcdf.cpp | 12 +- IO/netcdf.h | 4 +- IO/silo.cpp | 2 +- IO/silo.h | 2 +- IO/silo.hpp | 2 +- analysis/Minkowski.cpp | 16 +- analysis/Minkowski.h | 2 +- analysis/SubPhase.cpp | 154 +- analysis/SubPhase.h | 2 +- analysis/TwoPhase.cpp | 83 +- analysis/TwoPhase.h | 2 +- analysis/analysis.cpp | 83 +- analysis/analysis.h | 8 +- analysis/distance.cpp | 2 +- analysis/morphology.cpp | 106 +- analysis/runAnalysis.cpp | 49 +- analysis/runAnalysis.h | 8 +- analysis/uCT.cpp | 11 +- cmake/FindHIP.cmake | 579 ++++ common/Communication.h | 198 +- common/Communication.hpp | 53 +- common/Domain.cpp | 225 +- common/Domain.h | 6 +- common/MPI.I | 1143 +++++++ common/MPI.cpp | 3758 +++++++++++++++++++++ common/MPI.h | 1152 +++++++ common/MPI_Helpers.cpp | 266 -- common/MPI_Helpers.h | 239 -- common/ReadMicroCT.cpp | 4 +- common/ReadMicroCT.h | 3 +- common/ScaLBL.cpp | 226 +- common/ScaLBL.h | 3 +- common/SpherePack.cpp | 1 - common/SpherePack.h | 1 - common/UnitTest.cpp | 211 +- common/UnitTest.h | 71 +- common/UtilityMacros.h | 28 +- cpu/BGK.cpp | 5 +- cpu/Color.cpp | 51 +- cpu/exe/lb2_Color_mpi.cpp | 2 +- cpu/exe/lb2_Color_wia_mpi_bubble.cpp | 2 +- models/ColorModel.cpp | 57 +- models/ColorModel.h | 6 +- models/DFHModel.cpp | 35 +- models/DFHModel.h | 6 +- models/MRTModel.cpp | 49 +- models/MRTModel.h | 6 +- tests/BlobAnalyzeParallel.cpp | 21 +- tests/BlobIdentifyParallel.cpp | 9 +- tests/ColorToBinary.cpp | 9 +- tests/ComponentLabel.cpp | 9 +- tests/GenerateSphereTest.cpp | 75 +- tests/TestBlobAnalyze.cpp | 17 +- tests/TestBlobIdentify.cpp | 37 +- tests/TestBlobIdentifyCorners.cpp | 5 +- tests/TestBubble.cpp | 46 +- tests/TestBubbleDFH.cpp | 32 +- tests/TestColorBubble.cpp | 14 +- tests/TestColorGrad.cpp | 24 +- tests/TestColorGradDFH.cpp | 18 +- tests/TestColorMassBounceback.cpp | 32 +- tests/TestColorSquareTube.cpp | 14 +- tests/TestCommD3Q19.cpp | 23 +- tests/TestDatabase.cpp | 9 +- tests/TestFluxBC.cpp | 18 +- tests/TestForceD3Q19.cpp | 7 +- tests/TestForceMoments.cpp | 30 +- tests/TestInterfaceSpeed.cpp | 32 +- tests/TestMRT.cpp | 38 +- tests/TestMap.cpp | 17 +- tests/TestMassConservationD3Q7.cpp | 11 +- tests/TestMicroCTReader.cpp | 10 +- tests/TestMomentsD3Q19.cpp | 9 +- tests/TestNetcdf.cpp | 10 +- tests/TestPoiseuille.cpp | 18 +- tests/TestPressVel.cpp | 23 +- tests/TestSegDist.cpp | 13 +- tests/TestSubphase.cpp | 9 +- tests/TestTopo3D.cpp | 9 +- tests/TestTorus.cpp | 9 +- tests/TestTorusEvolve.cpp | 9 +- tests/TestTwoPhase.cpp | 11 +- tests/TestWriter.cpp | 21 +- tests/convertIO.cpp | 15 +- tests/hello_world.cpp | 11 +- tests/lb2_CMT_wia.cpp | 2 +- tests/lb2_Color_blob_wia_mpi.cpp | 48 +- tests/lbpm_BGK_simulator.cpp | 33 +- tests/lbpm_captube_pp.cpp | 16 +- tests/lbpm_color_macro_simulator.cpp | 36 +- tests/lbpm_color_simulator.cpp | 14 +- tests/lbpm_dfh_simulator.cpp | 12 +- tests/lbpm_disc_pp.cpp | 24 +- tests/lbpm_inkbottle_pp.cpp | 20 +- tests/lbpm_juanes_bench_disc_pp.cpp | 26 +- tests/lbpm_minkowski_scalar.cpp | 23 +- tests/lbpm_morph_pp.cpp | 22 +- tests/lbpm_morphdrain_pp.cpp | 12 +- tests/lbpm_morphopen_pp.cpp | 12 +- tests/lbpm_nondarcy_simulator.cpp | 28 +- tests/lbpm_nonnewtonian_simulator.cpp | 83 +- tests/lbpm_nonnewtonian_simulator.h | 40 +- tests/lbpm_permeability_simulator.cpp | 13 +- tests/lbpm_plates_pp.cpp | 20 +- tests/lbpm_porenetwork_pp.cpp | 20 +- tests/lbpm_random_pp.cpp | 13 +- tests/lbpm_refine_pp.cpp | 9 +- tests/lbpm_segmented_decomp.cpp | 20 +- tests/lbpm_segmented_pp.cpp | 9 +- tests/lbpm_sphere_pp.cpp | 18 +- tests/lbpm_squaretube_pp.cpp | 20 +- tests/lbpm_uCT_maskfilter.cpp | 16 +- tests/lbpm_uCT_pp.cpp | 37 +- tests/testCommunication.cpp | 34 +- tests/test_dcel_minkowski.cpp | 8 +- tests/test_dcel_tri_normal.cpp | 4 +- 125 files changed, 8530 insertions(+), 2541 deletions(-) create mode 100644 IO/PackData.cpp create mode 100644 IO/PackData.h rename common/MPI_Helpers.hpp => IO/PackData.hpp (95%) create mode 100644 cmake/FindHIP.cmake create mode 100644 common/MPI.I create mode 100644 common/MPI.cpp create mode 100644 common/MPI.h delete mode 100644 common/MPI_Helpers.cpp delete mode 100644 common/MPI_Helpers.h diff --git a/CMakeLists.txt b/CMakeLists.txt index acc2c2dc..1e7eeaea 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,170 +1,174 @@ -# Set some CMake properties -CMAKE_MINIMUM_REQUIRED( VERSION 3.9 ) - - -MESSAGE("====================") -MESSAGE("Configuring LBPM-WIA") -MESSAGE("====================") - - -# Set the project name -SET( PROJ LBPM ) # Set the project name for CMake -SET( LBPM_LIB lbpm-wia ) # Set the final library name -SET( LBPM_INC ) # Set an optional subfolder for includes (e.g. include/name/...) -SET( TEST_MAX_PROCS 16 ) - - -# Initialize the project -PROJECT( ${PROJ} LANGUAGES CXX ) - - -# Prevent users from building in place -IF ("${CMAKE_CURRENT_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_BINARY_DIR}" ) - MESSAGE( FATAL_ERROR "Building code in place is a bad idea" ) -ENDIF() - - -# Set the default C++ standard -SET( CMAKE_CXX_EXTENSIONS OFF ) -IF ( NOT CMAKE_CXX_STANDARD ) - IF ( CXX_STD ) - MESSAGE( FATAL_ERROR "CXX_STD is obsolete, please set CMAKE_CXX_STANDARD" ) - ENDIF() - SET( CMAKE_CXX_STANDARD 14 ) -ENDIF() -IF ( ( "${CMAKE_CXX_STANDARD}" GREATER "90" ) OR ( "${CMAKE_CXX_STANDARD}" LESS "14" ) ) - MESSAGE( FATAL_ERROR "C++14 or newer required" ) -ENDIF() - - -# Set source/install paths -SET( ${PROJ}_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}" ) -SET( ${PROJ}_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}" ) -IF( ${PROJ}_INSTALL_DIR ) - SET( ${PROJ}_INSTALL_DIR "${${PROJ}_INSTALL_DIR}" ) -ELSEIF( PREFIX ) - SET( ${PROJ}_INSTALL_DIR "${PREFIX}" ) -ELSEIF( NOT ${PROJ}_INSTALL_DIR ) - SET( ${PROJ}_INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}" ) -ENDIF() -INCLUDE_DIRECTORIES( "${${PROJ}_INSTALL_DIR}/include" ) -SET( CMAKE_MODULE_PATH ${${PROJ}_SOURCE_DIR} ${${PROJ}_SOURCE_DIR}/cmake ) - - -# Include macros -INCLUDE( "${CMAKE_CURRENT_SOURCE_DIR}/cmake/macros.cmake" ) -INCLUDE( "${CMAKE_CURRENT_SOURCE_DIR}/cmake/libraries.cmake" ) -INCLUDE( "${CMAKE_CURRENT_SOURCE_DIR}/cmake/LBPM-macros.cmake" ) - - -# Check if we are only compiling docs -CHECK_ENABLE_FLAG( ONLY_BUILD_DOCS 0 ) - - -# Set testing paramaters -SET( DROP_METHOD "http" ) -SET( DROP_SITE "" ) -SET( DROP_LOCATION "/CDash/submit.php?project=LBPM-WIA" ) -SET( TRIGGER_SITE "" ) -SET( DROP_SITE_CDASH TRUE ) -ENABLE_TESTING() -INCLUDE( CTest ) - - -# Check the compile mode and compile flags -IF ( NOT ONLY_BUILD_DOCS ) - CONFIGURE_SYSTEM() -ENDIF() - - -# Add some directories to include -INCLUDE_DIRECTORIES( "${${PROJ}_INSTALL_DIR}/include" ) - - -# Create the target for documentation -ADD_CUSTOM_TARGET( doc ) -ADD_CUSTOM_TARGET( latex_docs ) -CHECK_ENABLE_FLAG( USE_DOXYGEN 1 ) -CHECK_ENABLE_FLAG( USE_LATEX 1 ) -FILE( MAKE_DIRECTORY "${${PROJ}_INSTALL_DIR}/doc" ) -IF ( USE_DOXYGEN ) - SET( DOXYFILE_LATEX YES ) - SET( DOXYFILE_IN "${${PROJ}_SOURCE_DIR}/doxygen/Doxyfile.in" ) - SET( DOXY_HEADER_FILE "${${PROJ}_SOURCE_DIR}/doxygen/html/header.html" ) - SET( DOXY_FOOTER_FILE "${${PROJ}_SOURCE_DIR}/doxygen/html/footer.html" ) - SET( DOXYFILE_OUTPUT_DIR "${${PROJ}_INSTALL_DIR}/doc" ) - SET( DOXYFILE_SRC_HTML_DIR "${${PROJ}_SOURCE_DIR}/doxygen/html" ) - SET( DOXYFILE_SOURCE_DIR "${${PROJ}_SOURCE_DIR}" ) - SET( REL_PACKAGE_HTML "" ) - SET( DOXYGEN_MACROS "" ) - MESSAGE("DOXYGEN_MACROS = ${DOXYGEN_MACROS}") - INCLUDE( "${${PROJ}_SOURCE_DIR}/cmake/UseDoxygen.cmake" ) - IF ( DOXYGEN_FOUND ) - ADD_DEPENDENCIES( doxygen latex_docs ) - ADD_DEPENDENCIES( doc latex_docs doxygen ) - ELSE() - SET( USE_DOXYGEN 0 ) - ENDIF() -ENDIF() - - -# Create custom targets for build-test, check, and distclean -ADD_CUSTOM_TARGET( build-test ) -ADD_CUSTOM_TARGET( build-examples ) -ADD_CUSTOM_TARGET( check COMMAND make test ) -ADD_DISTCLEAN( analysis null_timer tests liblbpm-wia.* cpu gpu example common IO threadpool StackTrace ) - - -# Check for CUDA -CHECK_ENABLE_FLAG( USE_CUDA 0 ) -NULL_USE( CMAKE_CUDA_FLAGS ) -IF ( USE_CUDA ) - ADD_DEFINITIONS( -DUSE_CUDA ) - ENABLE_LANGUAGE( CUDA ) -ENDIF() - - -# Configure external packages -IF ( NOT ONLY_BUILD_DOCS ) - CONFIGURE_MPI() # MPI must be before other libraries - CONFIGURE_MIC() - CONFIGURE_NETCDF() - CONFIGURE_SILO() - CONFIGURE_LBPM() - CONFIGURE_TIMER( 0 "${${PROJ}_INSTALL_DIR}/null_timer" ) - CONFIGURE_LINE_COVERAGE() - # Set the external library link list - SET( EXTERNAL_LIBS ${EXTERNAL_LIBS} ${TIMER_LIBS} ) -ENDIF() - - - -# Macro to create 1,2,4 processor tests -MACRO( ADD_LBPM_TEST_1_2_4 EXENAME ${ARGN} ) - ADD_LBPM_TEST( ${EXENAME} ${ARGN} ) - ADD_LBPM_TEST_PARALLEL( ${EXENAME} 2 ${ARGN} ) - ADD_LBPM_TEST_PARALLEL( ${EXENAME} 4 ${ARGN} ) -ENDMACRO() - - -# Add the src directories -IF ( NOT ONLY_BUILD_DOCS ) - BEGIN_PACKAGE_CONFIG( lbpm-wia-library ) - ADD_PACKAGE_SUBDIRECTORY( common ) - ADD_PACKAGE_SUBDIRECTORY( analysis ) - ADD_PACKAGE_SUBDIRECTORY( IO ) - ADD_PACKAGE_SUBDIRECTORY( threadpool ) - ADD_PACKAGE_SUBDIRECTORY( StackTrace ) - ADD_PACKAGE_SUBDIRECTORY( models ) - IF ( USE_CUDA ) - ADD_PACKAGE_SUBDIRECTORY( gpu ) - ELSE() - ADD_PACKAGE_SUBDIRECTORY( cpu ) - ENDIF() - INSTALL_LBPM_TARGET( lbpm-wia-library ) - ADD_SUBDIRECTORY( tests ) - ADD_SUBDIRECTORY( example ) - #ADD_SUBDIRECTORY( workflows ) - INSTALL_PROJ_LIB() -ENDIF() - +# Set some CMake properties +CMAKE_MINIMUM_REQUIRED( VERSION 3.9 ) + + +MESSAGE("====================") +MESSAGE("Configuring LBPM-WIA") +MESSAGE("====================") + + +# Set the project name +SET( PROJ LBPM ) # Set the project name for CMake +SET( LBPM_LIB lbpm-wia ) # Set the final library name +SET( LBPM_INC ) # Set an optional subfolder for includes (e.g. include/name/...) +SET( TEST_MAX_PROCS 16 ) + + +# Initialize the project +PROJECT( ${PROJ} LANGUAGES CXX ) + + +# Prevent users from building in place +IF ("${CMAKE_CURRENT_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_BINARY_DIR}" ) + MESSAGE( FATAL_ERROR "Building code in place is a bad idea" ) +ENDIF() + + +# Set the default C++ standard +SET( CMAKE_CXX_EXTENSIONS OFF ) +IF ( NOT CMAKE_CXX_STANDARD ) + IF ( CXX_STD ) + MESSAGE( FATAL_ERROR "CXX_STD is obsolete, please set CMAKE_CXX_STANDARD" ) + ENDIF() + SET( CMAKE_CXX_STANDARD 14 ) +ENDIF() +IF ( ( "${CMAKE_CXX_STANDARD}" GREATER "90" ) OR ( "${CMAKE_CXX_STANDARD}" LESS "14" ) ) + MESSAGE( FATAL_ERROR "C++14 or newer required" ) +ENDIF() + + +# Set source/install paths +SET( ${PROJ}_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}" ) +SET( ${PROJ}_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}" ) +IF( ${PROJ}_INSTALL_DIR ) + SET( ${PROJ}_INSTALL_DIR "${${PROJ}_INSTALL_DIR}" ) +ELSEIF( PREFIX ) + SET( ${PROJ}_INSTALL_DIR "${PREFIX}" ) +ELSEIF( NOT ${PROJ}_INSTALL_DIR ) + SET( ${PROJ}_INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}" ) +ENDIF() +INCLUDE_DIRECTORIES( "${${PROJ}_INSTALL_DIR}/include" ) +SET( CMAKE_MODULE_PATH ${${PROJ}_SOURCE_DIR} ${${PROJ}_SOURCE_DIR}/cmake ) + + +# Include macros +INCLUDE( "${CMAKE_CURRENT_SOURCE_DIR}/cmake/macros.cmake" ) +INCLUDE( "${CMAKE_CURRENT_SOURCE_DIR}/cmake/libraries.cmake" ) +INCLUDE( "${CMAKE_CURRENT_SOURCE_DIR}/cmake/LBPM-macros.cmake" ) + + +# Check if we are only compiling docs +CHECK_ENABLE_FLAG( ONLY_BUILD_DOCS 0 ) + + +# Set testing paramaters +SET( DROP_METHOD "http" ) +SET( DROP_SITE "" ) +SET( DROP_LOCATION "/CDash/submit.php?project=LBPM-WIA" ) +SET( TRIGGER_SITE "" ) +SET( DROP_SITE_CDASH TRUE ) +ENABLE_TESTING() +INCLUDE( CTest ) + + +# Check the compile mode and compile flags +IF ( NOT ONLY_BUILD_DOCS ) + CONFIGURE_SYSTEM() +ENDIF() + + +# Add some directories to include +INCLUDE_DIRECTORIES( "${${PROJ}_INSTALL_DIR}/include" ) + + +# Create the target for documentation +ADD_CUSTOM_TARGET( doc ) +ADD_CUSTOM_TARGET( latex_docs ) +CHECK_ENABLE_FLAG( USE_DOXYGEN 1 ) +CHECK_ENABLE_FLAG( USE_LATEX 1 ) +FILE( MAKE_DIRECTORY "${${PROJ}_INSTALL_DIR}/doc" ) +IF ( USE_DOXYGEN ) + SET( DOXYFILE_LATEX YES ) + SET( DOXYFILE_IN "${${PROJ}_SOURCE_DIR}/doxygen/Doxyfile.in" ) + SET( DOXY_HEADER_FILE "${${PROJ}_SOURCE_DIR}/doxygen/html/header.html" ) + SET( DOXY_FOOTER_FILE "${${PROJ}_SOURCE_DIR}/doxygen/html/footer.html" ) + SET( DOXYFILE_OUTPUT_DIR "${${PROJ}_INSTALL_DIR}/doc" ) + SET( DOXYFILE_SRC_HTML_DIR "${${PROJ}_SOURCE_DIR}/doxygen/html" ) + SET( DOXYFILE_SOURCE_DIR "${${PROJ}_SOURCE_DIR}" ) + SET( REL_PACKAGE_HTML "" ) + SET( DOXYGEN_MACROS "" ) + MESSAGE("DOXYGEN_MACROS = ${DOXYGEN_MACROS}") + INCLUDE( "${${PROJ}_SOURCE_DIR}/cmake/UseDoxygen.cmake" ) + IF ( DOXYGEN_FOUND ) + ADD_DEPENDENCIES( doxygen latex_docs ) + ADD_DEPENDENCIES( doc latex_docs doxygen ) + ELSE() + SET( USE_DOXYGEN 0 ) + ENDIF() +ENDIF() + + +# Create custom targets for build-test, check, and distclean +ADD_CUSTOM_TARGET( build-test ) +ADD_CUSTOM_TARGET( build-examples ) +ADD_CUSTOM_TARGET( check COMMAND make test ) +ADD_DISTCLEAN( analysis null_timer tests liblbpm-wia.* cpu gpu example common IO threadpool StackTrace ) + + +# Check for CUDA +CHECK_ENABLE_FLAG( USE_CUDA 0 ) +CHECK_ENABLE_FLAG( USE_HIP 0 ) +NULL_USE( CMAKE_CUDA_FLAGS ) +IF ( USE_CUDA ) + ADD_DEFINITIONS( -DUSE_CUDA ) + ENABLE_LANGUAGE( CUDA ) +ELSEIF ( USE_HIP ) + FIND_PACKAGE( HIP ) + MESSAGE( FATAL_ERROR "STOP" ) +ENDIF() + + +# Configure external packages +IF ( NOT ONLY_BUILD_DOCS ) + CONFIGURE_MPI() # MPI must be before other libraries + CONFIGURE_MIC() + CONFIGURE_NETCDF() + CONFIGURE_SILO() + CONFIGURE_LBPM() + CONFIGURE_TIMER( 0 "${${PROJ}_INSTALL_DIR}/null_timer" ) + CONFIGURE_LINE_COVERAGE() + # Set the external library link list + SET( EXTERNAL_LIBS ${EXTERNAL_LIBS} ${TIMER_LIBS} ) +ENDIF() + + + +# Macro to create 1,2,4 processor tests +MACRO( ADD_LBPM_TEST_1_2_4 EXENAME ${ARGN} ) + ADD_LBPM_TEST( ${EXENAME} ${ARGN} ) + ADD_LBPM_TEST_PARALLEL( ${EXENAME} 2 ${ARGN} ) + ADD_LBPM_TEST_PARALLEL( ${EXENAME} 4 ${ARGN} ) +ENDMACRO() + + +# Add the src directories +IF ( NOT ONLY_BUILD_DOCS ) + BEGIN_PACKAGE_CONFIG( lbpm-wia-library ) + ADD_PACKAGE_SUBDIRECTORY( common ) + ADD_PACKAGE_SUBDIRECTORY( analysis ) + ADD_PACKAGE_SUBDIRECTORY( IO ) + ADD_PACKAGE_SUBDIRECTORY( threadpool ) + ADD_PACKAGE_SUBDIRECTORY( StackTrace ) + ADD_PACKAGE_SUBDIRECTORY( models ) + IF ( USE_CUDA ) + ADD_PACKAGE_SUBDIRECTORY( gpu ) + ELSE() + ADD_PACKAGE_SUBDIRECTORY( cpu ) + ENDIF() + INSTALL_LBPM_TARGET( lbpm-wia-library ) + ADD_SUBDIRECTORY( tests ) + ADD_SUBDIRECTORY( example ) + #ADD_SUBDIRECTORY( workflows ) + INSTALL_PROJ_LIB() +ENDIF() + diff --git a/IO/MeshDatabase.cpp b/IO/MeshDatabase.cpp index 1fad9231..2c03ddde 100644 --- a/IO/MeshDatabase.cpp +++ b/IO/MeshDatabase.cpp @@ -1,7 +1,8 @@ #include "IO/MeshDatabase.h" #include "IO/Mesh.h" +#include "IO/PackData.h" #include "IO/IOHelpers.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Utilities.h" #include @@ -13,8 +14,6 @@ -/**************************************************** -****************************************************/ // MeshType template<> size_t packsize( const IO::MeshType& rhs ) @@ -247,80 +246,76 @@ void DatabaseEntry::read( const std::string& line ) // Gather the mesh databases from all processors inline int tod( int N ) { return (N+7)/sizeof(double); } -std::vector gatherAll( const std::vector& meshes, MPI_Comm comm ) +std::vector gatherAll( const std::vector& meshes, const Utilities::MPI& comm ) { - #ifdef USE_MPI - PROFILE_START("gatherAll"); - PROFILE_START("gatherAll-pack",2); - int size = MPI_WORLD_SIZE(); - // First pack the mesh data to local buffers - int localsize = 0; - for (size_t i=0; i data; - pos = 0; - while ( pos < globalsize ) { - MeshDatabase tmp; - unpack(tmp,(char*)&globalbuf[pos]); - pos += tod(packsize(tmp)); - std::map::iterator it = data.find(tmp.name); - if ( it==data.end() ) { - data[tmp.name] = tmp; - } else { - for (size_t i=0; isecond.domains.push_back(tmp.domains[i]); - for (size_t i=0; isecond.variables.push_back(tmp.variables[i]); - it->second.variable_data.insert(tmp.variable_data.begin(),tmp.variable_data.end()); - } - } - for (std::map::iterator it=data.begin(); it!=data.end(); ++it) { - // Get the unique variables - std::set data2(it->second.variables.begin(),it->second.variables.end()); - it->second.variables = std::vector(data2.begin(),data2.end()); - } - // Free temporary memory - delete [] localbuf; - delete [] recvsize; - delete [] disp; - delete [] globalbuf; - // Return the results - std::vector data2(data.size()); - size_t i=0; - for (std::map::iterator it=data.begin(); it!=data.end(); ++it, ++i) - data2[i] = it->second; - PROFILE_STOP("gatherAll-unpack",2); - PROFILE_STOP("gatherAll"); - return data2; - #else + if ( comm.getSize() == 1 ) return meshes; - #endif + PROFILE_START("gatherAll"); + PROFILE_START("gatherAll-pack",2); + int size = comm.getSize(); + // First pack the mesh data to local buffers + int localsize = 0; + for (size_t i=0; i data; + pos = 0; + while ( pos < globalsize ) { + MeshDatabase tmp; + unpack(tmp,(char*)&globalbuf[pos]); + pos += tod(packsize(tmp)); + std::map::iterator it = data.find(tmp.name); + if ( it==data.end() ) { + data[tmp.name] = tmp; + } else { + for (size_t i=0; isecond.domains.push_back(tmp.domains[i]); + for (size_t i=0; isecond.variables.push_back(tmp.variables[i]); + it->second.variable_data.insert(tmp.variable_data.begin(),tmp.variable_data.end()); + } + } + for (auto it=data.begin(); it!=data.end(); ++it) { + // Get the unique variables + std::set data2(it->second.variables.begin(),it->second.variables.end()); + it->second.variables = std::vector(data2.begin(),data2.end()); + } + // Free temporary memory + delete [] localbuf; + delete [] disp; + delete [] globalbuf; + // Return the results + std::vector data2(data.size()); + size_t i=0; + for (std::map::iterator it=data.begin(); it!=data.end(); ++it, ++i) + data2[i] = it->second; + PROFILE_STOP("gatherAll-unpack",2); + PROFILE_STOP("gatherAll"); + return data2; } diff --git a/IO/MeshDatabase.h b/IO/MeshDatabase.h index 9f544925..8e501624 100644 --- a/IO/MeshDatabase.h +++ b/IO/MeshDatabase.h @@ -2,7 +2,7 @@ #define MeshDatabase_INC #include "IO/Mesh.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include #include @@ -70,7 +70,7 @@ public: //! Gather the mesh databases from all processors -std::vector gatherAll( const std::vector& meshes, MPI_Comm comm ); +std::vector gatherAll( const std::vector& meshes, const Utilities::MPI& comm ); //! Write the mesh databases to a file diff --git a/IO/PIO.cpp b/IO/PIO.cpp index 6c6ece2d..3c2f3934 100644 --- a/IO/PIO.cpp +++ b/IO/PIO.cpp @@ -1,6 +1,6 @@ #include "IO/PIO.h" #include "common/Utilities.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include #include @@ -36,10 +36,7 @@ static void shutdownFilestream( ) } void Utilities::logOnlyNodeZero( const std::string &filename ) { - int rank = 0; - #ifdef USE_MPI - MPI_Comm_rank( MPI_COMM_WORLD, &rank ); - #endif + int rank = ::Utilities::MPI( MPI_COMM_WORLD ).getRank(); if ( rank == 0 ) logAllNodes(filename,true); } @@ -54,10 +51,7 @@ void Utilities::logAllNodes( const std::string &filename, bool singleStream ) // Open the log stream and redirect output std::string full_filename = filename; if ( !singleStream ) { - int rank = 0; - #ifdef USE_MPI - MPI_Comm_rank( MPI_COMM_WORLD, &rank ); - #endif + int rank = ::Utilities::MPI( MPI_COMM_WORLD ).getRank(); char tmp[100]; sprintf(tmp,".%04i",rank); full_filename += std::string(tmp); diff --git a/IO/PackData.cpp b/IO/PackData.cpp new file mode 100644 index 00000000..f10d9ca7 --- /dev/null +++ b/IO/PackData.cpp @@ -0,0 +1,105 @@ +#include "IO/PackData.h" + +#include + + +/******************************************************** +* Concrete implimentations for packing/unpacking * +********************************************************/ +// unsigned char +template<> +size_t packsize( const unsigned char& rhs ) +{ + return sizeof(unsigned char); +} +template<> +void pack( const unsigned char& rhs, char *buffer ) +{ + memcpy(buffer,&rhs,sizeof(unsigned char)); +} +template<> +void unpack( unsigned char& data, const char *buffer ) +{ + memcpy(&data,buffer,sizeof(unsigned char)); +} +// char +template<> +size_t packsize( const char& rhs ) +{ + return sizeof(char); +} +template<> +void pack( const char& rhs, char *buffer ) +{ + memcpy(buffer,&rhs,sizeof(char)); +} +template<> +void unpack( char& data, const char *buffer ) +{ + memcpy(&data,buffer,sizeof(char)); +} +// int +template<> +size_t packsize( const int& rhs ) +{ + return sizeof(int); +} +template<> +void pack( const int& rhs, char *buffer ) +{ + memcpy(buffer,&rhs,sizeof(int)); +} +template<> +void unpack( int& data, const char *buffer ) +{ + memcpy(&data,buffer,sizeof(int)); +} +// unsigned int +template<> +size_t packsize( const unsigned int& rhs ) +{ + return sizeof(unsigned int); +} +template<> +void pack( const unsigned int& rhs, char *buffer ) +{ + memcpy(buffer,&rhs,sizeof(int)); +} +template<> +void unpack( unsigned int& data, const char *buffer ) +{ + memcpy(&data,buffer,sizeof(int)); +} +// size_t +template<> +size_t packsize( const size_t& rhs ) +{ + return sizeof(size_t); +} +template<> +void pack( const size_t& rhs, char *buffer ) +{ + memcpy(buffer,&rhs,sizeof(size_t)); +} +template<> +void unpack( size_t& data, const char *buffer ) +{ + memcpy(&data,buffer,sizeof(size_t)); +} +// std::string +template<> +size_t packsize( const std::string& rhs ) +{ + return rhs.size()+1; +} +template<> +void pack( const std::string& rhs, char *buffer ) +{ + memcpy(buffer,rhs.c_str(),rhs.size()+1); +} +template<> +void unpack( std::string& data, const char *buffer ) +{ + data = std::string(buffer); +} + diff --git a/IO/PackData.h b/IO/PackData.h new file mode 100644 index 00000000..85326c0b --- /dev/null +++ b/IO/PackData.h @@ -0,0 +1,78 @@ +// This file contains unctions to pack/unpack data structures +#ifndef included_PackData +#define included_PackData + +#include +#include +#include + + +//! Template function to return the buffer size required to pack a class +template +size_t packsize( const TYPE& rhs ); + +//! Template function to pack a class to a buffer +template +void pack( const TYPE& rhs, char *buffer ); + +//! Template function to unpack a class from a buffer +template +void unpack( TYPE& data, const char *buffer ); + + +//! Template function to return the buffer size required to pack a std::vector +template +size_t packsize( const std::vector& rhs ); + +//! Template function to pack a class to a buffer +template +void pack( const std::vector& rhs, char *buffer ); + +//! Template function to pack a class to a buffer +template +void unpack( std::vector& data, const char *buffer ); + + +//! Template function to return the buffer size required to pack a std::pair +template +size_t packsize( const std::pair& rhs ); + +//! Template function to pack a class to a buffer +template +void pack( const std::pair& rhs, char *buffer ); + +//! Template function to pack a class to a buffer +template +void unpack( std::pair& data, const char *buffer ); + + +//! Template function to return the buffer size required to pack a std::map +template +size_t packsize( const std::map& rhs ); + +//! Template function to pack a class to a buffer +template +void pack( const std::map& rhs, char *buffer ); + +//! Template function to pack a class to a buffer +template +void unpack( std::map& data, const char *buffer ); + + +//! Template function to return the buffer size required to pack a std::set +template +size_t packsize( const std::set& rhs ); + +//! Template function to pack a class to a buffer +template +void pack( const std::set& rhs, char *buffer ); + +//! Template function to pack a class to a buffer +template +void unpack( std::set& data, const char *buffer ); + + +#include "IO/PackData.hpp" + +#endif + diff --git a/common/MPI_Helpers.hpp b/IO/PackData.hpp similarity index 95% rename from common/MPI_Helpers.hpp rename to IO/PackData.hpp index 85261cf1..006cdf73 100644 --- a/common/MPI_Helpers.hpp +++ b/IO/PackData.hpp @@ -1,8 +1,9 @@ -// This file contains wrappers for MPI routines and functions to pack/unpack data structures -#ifndef MPI_WRAPPERS_HPP -#define MPI_WRAPPERS_HPP +// This file functions to pack/unpack data structures +#ifndef included_PackData_hpp +#define included_PackData_hpp + +#include "IO/PackData.h" -#include "common/MPI_Helpers.h" #include #include #include diff --git a/IO/Writer.cpp b/IO/Writer.cpp index 6581ad42..61c333af 100644 --- a/IO/Writer.cpp +++ b/IO/Writer.cpp @@ -2,7 +2,7 @@ #include "IO/MeshDatabase.h" #include "IO/IOHelpers.h" #include "IO/silo.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Utilities.h" #include @@ -36,7 +36,7 @@ void IO::initialize( const std::string& path, const std::string& format, bool ap global_IO_format = Format::SILO; else ERROR("Unknown format"); - int rank = comm_rank(MPI_COMM_WORLD); + int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); if ( !append && rank==0 ) { mkdir(path.c_str(),S_IRWXU|S_IRGRP); std::string filename; @@ -55,7 +55,7 @@ void IO::initialize( const std::string& path, const std::string& format, bool ap // Write the mesh data in the original format static std::vector writeMeshesOrigFormat( const std::vector& meshData, const std::string& path ) { - int rank = MPI_WORLD_RANK(); + int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); std::vector meshes_written; for (size_t i=0; i writeMeshesOrigFormat( const std::vector& meshes_written, cons static std::vector writeMeshesNewFormat( const std::vector& meshData, const std::string& path, int format ) { - int rank = MPI_WORLD_RANK(); + int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); std::vector meshes_written; char filename[100], fullpath[200]; sprintf(filename,"%05i",rank); @@ -419,7 +419,7 @@ static std::vector writeMeshesSilo( const std::vector& meshData, const std::string& path, int format ) { #ifdef USE_SILO - int rank = MPI_WORLD_RANK(); + int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); std::vector meshes_written; char filename[100], fullpath[200]; sprintf(filename,"%05i.silo",rank); @@ -441,12 +441,12 @@ static std::vector writeMeshesSilo( /**************************************************** * Write the mesh data * ****************************************************/ -void IO::writeData( const std::string& subdir, const std::vector& meshData, MPI_Comm comm ) +void IO::writeData( const std::string& subdir, const std::vector& meshData, const Utilities::MPI& comm ) { if ( global_IO_path.empty() ) IO::initialize( ); PROFILE_START("writeData"); - int rank = comm_rank(comm); + int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); // Check the meshData before writing for ( const auto& data : meshData ) { if ( !data.check() ) @@ -457,7 +457,7 @@ void IO::writeData( const std::string& subdir, const std::vector meshes_written; if ( global_IO_format == Format::OLD ) { diff --git a/IO/Writer.h b/IO/Writer.h index 710fa0d8..dfc22db8 100644 --- a/IO/Writer.h +++ b/IO/Writer.h @@ -34,7 +34,7 @@ void initialize( const std::string& path="", const std::string& format="silo", b * @param[in] meshData The data to write * @param[in] comm The comm to use for writing (usually MPI_COMM_WORLD or a dup thereof) */ -void writeData( const std::string& subdir, const std::vector& meshData, MPI_Comm comm ); +void writeData( const std::string& subdir, const std::vector& meshData, const Utilities::MPI& comm ); /*! @@ -44,7 +44,7 @@ void writeData( const std::string& subdir, const std::vector * @param[in] meshData The data to write * @param[in] comm The comm to use for writing (usually MPI_COMM_WORLD or a dup thereof) */ -inline void writeData( int timestep, const std::vector& meshData, MPI_Comm comm ) +inline void writeData( int timestep, const std::vector& meshData, const Utilities::MPI& comm ) { char subdir[100]; sprintf(subdir,"vis%03i",timestep); diff --git a/IO/netcdf.cpp b/IO/netcdf.cpp index b36bb6d6..e061579a 100644 --- a/IO/netcdf.cpp +++ b/IO/netcdf.cpp @@ -1,6 +1,6 @@ #include "IO/netcdf.h" #include "common/Utilities.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "ProfilerApp.h" @@ -116,7 +116,7 @@ std::string VariableTypeName( VariableType type ) /**************************************************** * Open/close a file * ****************************************************/ -int open( const std::string& filename, FileMode mode, MPI_Comm comm ) +int open( const std::string& filename, FileMode mode, const Utilities::MPI& comm ) { int fid = 0; if ( comm == MPI_COMM_NULL ) { @@ -134,13 +134,13 @@ int open( const std::string& filename, FileMode mode, MPI_Comm comm ) } } else { if ( mode == READ ) { - int err = nc_open_par( filename.c_str(), NC_MPIPOSIX, comm, MPI_INFO_NULL, &fid ); + int err = nc_open_par( filename.c_str(), NC_MPIPOSIX, comm.getCommunicator(), MPI_INFO_NULL, &fid ); CHECK_NC_ERR( err ); } else if ( mode == WRITE ) { - int err = nc_open_par( filename.c_str(), NC_WRITE|NC_MPIPOSIX, comm, MPI_INFO_NULL, &fid ); + int err = nc_open_par( filename.c_str(), NC_WRITE|NC_MPIPOSIX, comm.getCommunicator(), MPI_INFO_NULL, &fid ); CHECK_NC_ERR( err ); } else if ( mode == CREATE ) { - int err = nc_create_par( filename.c_str(), NC_NETCDF4|NC_MPIIO, comm, MPI_INFO_NULL, &fid ); + int err = nc_create_par( filename.c_str(), NC_NETCDF4|NC_MPIIO, comm.getCommunicator(), MPI_INFO_NULL, &fid ); CHECK_NC_ERR( err ); } else { ERROR("Unknown file mode"); @@ -375,7 +375,7 @@ Array getVar( int fid, const std::string& var, const std::vector& sta std::vector var_size = getVarDim( fid, var ); for (int d=0; d<(int)var_size.size(); d++) { if ( start[d]<0 || start[d]+stride[d]*(count[d]-1)>(int)var_size[d] ) { - int rank = comm_rank(MPI_COMM_WORLD); + int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); char tmp[1000]; sprintf(tmp,"%i: Range exceeded array dimension:\n" " start[%i]=%i, count[%i]=%i, stride[%i]=%i, var_size[%i]=%i", diff --git a/IO/netcdf.h b/IO/netcdf.h index 657747bf..b4559e51 100644 --- a/IO/netcdf.h +++ b/IO/netcdf.h @@ -5,7 +5,7 @@ #include #include "common/Array.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Communication.h" @@ -32,7 +32,7 @@ std::string VariableTypeName( VariableType type ); * @param mode Open the file for reading or writing * @param comm MPI communicator to use (MPI_COMM_WORLD: don't use parallel netcdf) */ -int open( const std::string& filename, FileMode mode, MPI_Comm comm=MPI_COMM_NULL ); +int open( const std::string& filename, FileMode mode, const Utilities::MPI& comm=MPI_COMM_NULL ); /*! diff --git a/IO/silo.cpp b/IO/silo.cpp index eece8583..ddf3646a 100644 --- a/IO/silo.cpp +++ b/IO/silo.cpp @@ -1,6 +1,6 @@ #include "IO/silo.h" #include "common/Utilities.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "ProfilerApp.h" diff --git a/IO/silo.h b/IO/silo.h index 4c7081e5..339a5c34 100644 --- a/IO/silo.h +++ b/IO/silo.h @@ -6,7 +6,7 @@ #include #include "common/Array.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Communication.h" diff --git a/IO/silo.hpp b/IO/silo.hpp index 312f32d8..35852004 100644 --- a/IO/silo.hpp +++ b/IO/silo.hpp @@ -3,7 +3,7 @@ #include "IO/silo.h" #include "common/Utilities.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "ProfilerApp.h" diff --git a/analysis/Minkowski.cpp b/analysis/Minkowski.cpp index faac6142..3e3fb35e 100644 --- a/analysis/Minkowski.cpp +++ b/analysis/Minkowski.cpp @@ -4,7 +4,7 @@ #include "common/Domain.h" #include "common/Communication.h" #include "common/Utilities.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "IO/MeshDatabase.h" #include "IO/Reader.h" #include "IO/Writer.h" @@ -109,13 +109,13 @@ void Minkowski::ComputeScalar(const DoubleArray& Field, const double isovalue) // convert X for 2D manifold to 3D object Xi *= 0.5; - MPI_Barrier(Dm->Comm); + Dm->Comm.barrier(); // Phase averages - MPI_Allreduce(&Vi,&Vi_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&Xi,&Xi_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&Ai,&Ai_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&Ji,&Ji_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Barrier(Dm->Comm); + Vi_global = Dm->Comm.sumReduce( Vi ); + Xi_global = Dm->Comm.sumReduce( Xi ); + Ai_global = Dm->Comm.sumReduce( Ai ); + Ji_global = Dm->Comm.sumReduce( Ji ); + Dm->Comm.barrier(); PROFILE_STOP("ComputeScalar"); } @@ -168,7 +168,7 @@ int Minkowski::MeasureConnectedPathway(){ double vF=0.0; n_connected_components = ComputeGlobalBlobIDs(Nx-2,Ny-2,Nz-2,Dm->rank_info,distance,distance,vF,vF,label,Dm->Comm); // int n_connected_components = ComputeGlobalPhaseComponent(Nx-2,Ny-2,Nz-2,Dm->rank_info,const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, Dm->Comm ) - MPI_Barrier(Dm->Comm); + Dm->Comm.barrier(); for (int k=0; kComm, wb.V); - gnb.V=sumReduce( Dm->Comm, nb.V); - gwb.M=sumReduce( Dm->Comm, wb.M); - gnb.M=sumReduce( Dm->Comm, nb.M); - gwb.Px=sumReduce( Dm->Comm, wb.Px); - gwb.Py=sumReduce( Dm->Comm, wb.Py); - gwb.Pz=sumReduce( Dm->Comm, wb.Pz); - gnb.Px=sumReduce( Dm->Comm, nb.Px); - gnb.Py=sumReduce( Dm->Comm, nb.Py); - gnb.Pz=sumReduce( Dm->Comm, nb.Pz); + gwb.V = Dm->Comm.sumReduce( wb.V); + gnb.V = Dm->Comm.sumReduce( nb.V); + gwb.M = Dm->Comm.sumReduce( wb.M); + gnb.M = Dm->Comm.sumReduce( nb.M); + gwb.Px = Dm->Comm.sumReduce( wb.Px); + gwb.Py = Dm->Comm.sumReduce( wb.Py); + gwb.Pz = Dm->Comm.sumReduce( wb.Pz); + gnb.Px = Dm->Comm.sumReduce( nb.Px); + gnb.Py = Dm->Comm.sumReduce( nb.Py); + gnb.Pz = Dm->Comm.sumReduce( nb.Pz); - count_w=sumReduce( Dm->Comm, count_w); - count_n=sumReduce( Dm->Comm, count_n); + count_w = Dm->Comm.sumReduce( count_w); + count_n = Dm->Comm.sumReduce( count_n); if (count_w > 0.0) - gwb.p=sumReduce( Dm->Comm, wb.p) / count_w; + gwb.p = Dm->Comm.sumReduce(wb.p) / count_w; else gwb.p = 0.0; if (count_n > 0.0) - gnb.p=sumReduce( Dm->Comm, nb.p) / count_n; + gnb.p = Dm->Comm.sumReduce( nb.p) / count_n; else gnb.p = 0.0; @@ -444,14 +444,14 @@ void SubPhase::Full(){ nd.X -= nc.X; // compute global entities - gnc.V=sumReduce( Dm->Comm, nc.V); - gnc.A=sumReduce( Dm->Comm, nc.A); - gnc.H=sumReduce( Dm->Comm, nc.H); - gnc.X=sumReduce( Dm->Comm, nc.X); - gnd.V=sumReduce( Dm->Comm, nd.V); - gnd.A=sumReduce( Dm->Comm, nd.A); - gnd.H=sumReduce( Dm->Comm, nd.H); - gnd.X=sumReduce( Dm->Comm, nd.X); + gnc.V = Dm->Comm.sumReduce( nc.V ); + gnc.A = Dm->Comm.sumReduce( nc.A ); + gnc.H = Dm->Comm.sumReduce( nc.H ); + gnc.X = Dm->Comm.sumReduce( nc.X ); + gnd.V = Dm->Comm.sumReduce( nd.V ); + gnd.A = Dm->Comm.sumReduce( nd.A ); + gnd.H = Dm->Comm.sumReduce( nd.H ); + gnd.X = Dm->Comm.sumReduce( nd.X ); gnd.Nc = nd.Nc; // wetting for (k=0; kComm, wc.V); - gwc.A=sumReduce( Dm->Comm, wc.A); - gwc.H=sumReduce( Dm->Comm, wc.H); - gwc.X=sumReduce( Dm->Comm, wc.X); - gwd.V=sumReduce( Dm->Comm, wd.V); - gwd.A=sumReduce( Dm->Comm, wd.A); - gwd.H=sumReduce( Dm->Comm, wd.H); - gwd.X=sumReduce( Dm->Comm, wd.X); + gwc.V = Dm->Comm.sumReduce( wc.V ); + gwc.A = Dm->Comm.sumReduce( wc.A ); + gwc.H = Dm->Comm.sumReduce( wc.H ); + gwc.X = Dm->Comm.sumReduce( wc.X ); + gwd.V = Dm->Comm.sumReduce( wd.V ); + gwd.A = Dm->Comm.sumReduce( wd.A ); + gwd.H = Dm->Comm.sumReduce( wd.H ); + gwd.X = Dm->Comm.sumReduce( wd.X ); gwd.Nc = wd.Nc; /* Set up geometric analysis of interface region */ @@ -526,20 +526,20 @@ void SubPhase::Full(){ iwn.A = morph_i->A(); iwn.H = morph_i->H(); iwn.X = morph_i->X(); - giwn.V=sumReduce( Dm->Comm, iwn.V); - giwn.A=sumReduce( Dm->Comm, iwn.A); - giwn.H=sumReduce( Dm->Comm, iwn.H); - giwn.X=sumReduce( Dm->Comm, iwn.X); + giwn.V = Dm->Comm.sumReduce( iwn.V ); + giwn.A = Dm->Comm.sumReduce( iwn.A ); + giwn.H = Dm->Comm.sumReduce( iwn.H ); + giwn.X = Dm->Comm.sumReduce( iwn.X ); // measure only the connected part iwnc.Nc = morph_i->MeasureConnectedPathway(); iwnc.V = morph_i->V(); iwnc.A = morph_i->A(); iwnc.H = morph_i->H(); iwnc.X = morph_i->X(); - giwnc.V=sumReduce( Dm->Comm, iwnc.V); - giwnc.A=sumReduce( Dm->Comm, iwnc.A); - giwnc.H=sumReduce( Dm->Comm, iwnc.H); - giwnc.X=sumReduce( Dm->Comm, iwnc.X); + giwnc.V = Dm->Comm.sumReduce( iwnc.V ); + giwnc.A = Dm->Comm.sumReduce( iwnc.A ); + giwnc.H = Dm->Comm.sumReduce( iwnc.H ); + giwnc.X = Dm->Comm.sumReduce( iwnc.X ); giwnc.Nc = iwnc.Nc; double vol_nc_bulk = 0.0; @@ -630,46 +630,46 @@ void SubPhase::Full(){ } } - gnd.M=sumReduce( Dm->Comm, nd.M); - gnd.Px=sumReduce( Dm->Comm, nd.Px); - gnd.Py=sumReduce( Dm->Comm, nd.Py); - gnd.Pz=sumReduce( Dm->Comm, nd.Pz); - gnd.K=sumReduce( Dm->Comm, nd.K); + gnd.M = Dm->Comm.sumReduce( nd.M ); + gnd.Px = Dm->Comm.sumReduce( nd.Px ); + gnd.Py = Dm->Comm.sumReduce( nd.Py ); + gnd.Pz = Dm->Comm.sumReduce( nd.Pz ); + gnd.K = Dm->Comm.sumReduce( nd.K ); - gwd.M=sumReduce( Dm->Comm, wd.M); - gwd.Px=sumReduce( Dm->Comm, wd.Px); - gwd.Py=sumReduce( Dm->Comm, wd.Py); - gwd.Pz=sumReduce( Dm->Comm, wd.Pz); - gwd.K=sumReduce( Dm->Comm, wd.K); + gwd.M = Dm->Comm.sumReduce( wd.M ); + gwd.Px = Dm->Comm.sumReduce( wd.Px ); + gwd.Py = Dm->Comm.sumReduce( wd.Py ); + gwd.Pz = Dm->Comm.sumReduce( wd.Pz ); + gwd.K = Dm->Comm.sumReduce( wd.K ); - gnc.M=sumReduce( Dm->Comm, nc.M); - gnc.Px=sumReduce( Dm->Comm, nc.Px); - gnc.Py=sumReduce( Dm->Comm, nc.Py); - gnc.Pz=sumReduce( Dm->Comm, nc.Pz); - gnc.K=sumReduce( Dm->Comm, nc.K); + gnc.M = Dm->Comm.sumReduce( nc.M ); + gnc.Px = Dm->Comm.sumReduce( nc.Px ); + gnc.Py = Dm->Comm.sumReduce( nc.Py ); + gnc.Pz = Dm->Comm.sumReduce( nc.Pz ); + gnc.K = Dm->Comm.sumReduce( nc.K ); - gwc.M=sumReduce( Dm->Comm, wc.M); - gwc.Px=sumReduce( Dm->Comm, wc.Px); - gwc.Py=sumReduce( Dm->Comm, wc.Py); - gwc.Pz=sumReduce( Dm->Comm, wc.Pz); - gwc.K=sumReduce( Dm->Comm, wc.K); + gwc.M = Dm->Comm.sumReduce( wc.M ); + gwc.Px = Dm->Comm.sumReduce( wc.Px ); + gwc.Py = Dm->Comm.sumReduce( wc.Py ); + gwc.Pz = Dm->Comm.sumReduce( wc.Pz ); + gwc.K = Dm->Comm.sumReduce( wc.K ); - giwn.Mn=sumReduce( Dm->Comm, iwn.Mn); - giwn.Pnx=sumReduce( Dm->Comm, iwn.Pnx); - giwn.Pny=sumReduce( Dm->Comm, iwn.Pny); - giwn.Pnz=sumReduce( Dm->Comm, iwn.Pnz); - giwn.Kn=sumReduce( Dm->Comm, iwn.Kn); - giwn.Mw=sumReduce( Dm->Comm, iwn.Mw); - giwn.Pwx=sumReduce( Dm->Comm, iwn.Pwx); - giwn.Pwy=sumReduce( Dm->Comm, iwn.Pwy); - giwn.Pwz=sumReduce( Dm->Comm, iwn.Pwz); - giwn.Kw=sumReduce( Dm->Comm, iwn.Kw); + giwn.Mn = Dm->Comm.sumReduce( iwn.Mn ); + giwn.Pnx = Dm->Comm.sumReduce( iwn.Pnx ); + giwn.Pny = Dm->Comm.sumReduce( iwn.Pny ); + giwn.Pnz = Dm->Comm.sumReduce( iwn.Pnz ); + giwn.Kn = Dm->Comm.sumReduce( iwn.Kn ); + giwn.Mw = Dm->Comm.sumReduce( iwn.Mw ); + giwn.Pwx = Dm->Comm.sumReduce( iwn.Pwx ); + giwn.Pwy = Dm->Comm.sumReduce( iwn.Pwy ); + giwn.Pwz = Dm->Comm.sumReduce( iwn.Pwz ); + giwn.Kw = Dm->Comm.sumReduce( iwn.Kw ); // pressure averaging - gnc.p=sumReduce( Dm->Comm, nc.p); - gnd.p=sumReduce( Dm->Comm, nd.p); - gwc.p=sumReduce( Dm->Comm, wc.p); - gwd.p=sumReduce( Dm->Comm, wd.p); + gnc.p = Dm->Comm.sumReduce( nc.p ); + gnd.p = Dm->Comm.sumReduce( nd.p ); + gwc.p = Dm->Comm.sumReduce( wc.p ); + gwd.p = Dm->Comm.sumReduce( wd.p ); if (vol_wc_bulk > 0.0) wc.p = wc.p /vol_wc_bulk; @@ -680,10 +680,10 @@ void SubPhase::Full(){ if (vol_nd_bulk > 0.0) nd.p = nd.p /vol_nd_bulk; - vol_wc_bulk=sumReduce( Dm->Comm, vol_wc_bulk); - vol_wd_bulk=sumReduce( Dm->Comm, vol_wd_bulk); - vol_nc_bulk=sumReduce( Dm->Comm, vol_nc_bulk); - vol_nd_bulk=sumReduce( Dm->Comm, vol_nd_bulk); + vol_wc_bulk = Dm->Comm.sumReduce( vol_wc_bulk ); + vol_wd_bulk = Dm->Comm.sumReduce( vol_wd_bulk ); + vol_nc_bulk = Dm->Comm.sumReduce( vol_nc_bulk ); + vol_nd_bulk = Dm->Comm.sumReduce( vol_nd_bulk ); if (vol_wc_bulk > 0.0) gwc.p = gwc.p /vol_wc_bulk; @@ -719,7 +719,7 @@ void SubPhase::AggregateLabels( const std::string& filename ) } } } - MPI_Barrier(Dm->Comm); + Dm->Comm.barrier(); Dm->AggregateLabels( filename ); diff --git a/analysis/SubPhase.h b/analysis/SubPhase.h index 71b87ef0..691c654f 100644 --- a/analysis/SubPhase.h +++ b/analysis/SubPhase.h @@ -12,7 +12,7 @@ #include "analysis/distance.h" #include "analysis/Minkowski.h" #include "common/Utilities.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "IO/MeshDatabase.h" #include "IO/Reader.h" #include "IO/Writer.h" diff --git a/analysis/TwoPhase.cpp b/analysis/TwoPhase.cpp index 9b2e5fd8..ea136758 100644 --- a/analysis/TwoPhase.cpp +++ b/analysis/TwoPhase.cpp @@ -5,7 +5,7 @@ #include "common/Domain.h" #include "common/Communication.h" #include "common/Utilities.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "IO/MeshDatabase.h" #include "IO/Reader.h" #include "IO/Writer.h" @@ -882,7 +882,7 @@ void TwoPhase::ComponentAverages() } } - MPI_Barrier(Dm->Comm); + Dm->Comm.barrier(); if (Dm->rank()==0){ printf("Component averages computed locally -- reducing result... \n"); } @@ -895,8 +895,8 @@ void TwoPhase::ComponentAverages() for (int idx=0; idxComm); - MPI_Allreduce(ComponentAverages_NWP.data(),RecvBuffer.data(),BLOB_AVG_COUNT*NumberComponents_NWP, MPI_DOUBLE,MPI_SUM,Dm->Comm); + Dm->Comm.barrier(); + Dm->Comm.sumReduce(ComponentAverages_NWP.data(),RecvBuffer.data(),BLOB_AVG_COUNT*NumberComponents_NWP); // MPI_Reduce(ComponentAverages_NWP.data(),RecvBuffer.data(),BLOB_AVG_COUNT,MPI_DOUBLE,MPI_SUM,0,Dm->Comm); if (Dm->rank()==0){ @@ -993,9 +993,9 @@ void TwoPhase::ComponentAverages() // reduce the wetting phase averages for (int b=0; bComm); + Dm->Comm.barrier(); // MPI_Allreduce(&ComponentAverages_WP(0,b),RecvBuffer.data(),BLOB_AVG_COUNT,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Reduce(&ComponentAverages_WP(0,b),RecvBuffer.data(),BLOB_AVG_COUNT,MPI_DOUBLE,MPI_SUM,0,Dm->Comm); + Dm->Comm.sumReduce(&ComponentAverages_WP(0,b),RecvBuffer.data(),BLOB_AVG_COUNT); for (int idx=0; idxComm); - MPI_Allreduce(&nwp_volume,&nwp_volume_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&wp_volume,&wp_volume_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&awn,&awn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&ans,&ans_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&aws,&aws_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&lwns,&lwns_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&As,&As_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&Jwn,&Jwn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&Kwn,&Kwn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&KGwns,&KGwns_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&KNwns,&KNwns_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&efawns,&efawns_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&wwndnw,&wwndnw_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&wwnsdnwn,&wwnsdnwn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&Jwnwwndnw,&Jwnwwndnw_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + Dm->Comm.barrier(); + nwp_volume_global = Dm->Comm.sumReduce( nwp_volume ); + wp_volume_global = Dm->Comm.sumReduce( wp_volume ); + awn_global = Dm->Comm.sumReduce( awn ); + ans_global = Dm->Comm.sumReduce( ans ); + aws_global = Dm->Comm.sumReduce( aws ); + lwns_global = Dm->Comm.sumReduce( lwns ); + As_global = Dm->Comm.sumReduce( As ); + Jwn_global = Dm->Comm.sumReduce( Jwn ); + Kwn_global = Dm->Comm.sumReduce( Kwn ); + KGwns_global = Dm->Comm.sumReduce( KGwns ); + KNwns_global = Dm->Comm.sumReduce( KNwns ); + efawns_global = Dm->Comm.sumReduce( efawns ); + wwndnw_global = Dm->Comm.sumReduce( wwndnw ); + wwnsdnwn_global = Dm->Comm.sumReduce( wwnsdnwn ); + Jwnwwndnw_global = Dm->Comm.sumReduce( Jwnwwndnw ); // Phase averages - MPI_Allreduce(&vol_w,&vol_w_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&vol_n,&vol_n_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&paw,&paw_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&pan,&pan_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&vaw(0),&vaw_global(0),3,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&van(0),&van_global(0),3,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&vawn(0),&vawn_global(0),3,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&vawns(0),&vawns_global(0),3,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&Gwn(0),&Gwn_global(0),6,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&Gns(0),&Gns_global(0),6,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&Gws(0),&Gws_global(0),6,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&trawn,&trawn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&trJwn,&trJwn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&trRwn,&trRwn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&euler,&euler_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&An,&An_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&Jn,&Jn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&Kn,&Kn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - - MPI_Barrier(Dm->Comm); + vol_w_global = Dm->Comm.sumReduce( vol_w ); + vol_n_global = Dm->Comm.sumReduce( vol_n ); + paw_global = Dm->Comm.sumReduce( paw ); + pan_global = Dm->Comm.sumReduce( pan ); + vaw_global(0) = Dm->Comm.sumReduce( vaw(0) ); + van_global(0) = Dm->Comm.sumReduce( van(0) ); + vawn_global(0) = Dm->Comm.sumReduce( vawn(0) ); + vawns_global(0) = Dm->Comm.sumReduce( vawns(0) ); + Gwn_global(0) = Dm->Comm.sumReduce( Gwn(0) ); + Gns_global(0) = Dm->Comm.sumReduce( Gns(0) ); + Gws_global(0) = Dm->Comm.sumReduce( Gws(0) ); + trawn_global = Dm->Comm.sumReduce( trawn ); + trJwn_global = Dm->Comm.sumReduce( trJwn ); + trRwn_global = Dm->Comm.sumReduce( trRwn ); + euler_global = Dm->Comm.sumReduce( euler ); + An_global = Dm->Comm.sumReduce( An ); + Jn_global = Dm->Comm.sumReduce( Jn ); + Kn_global = Dm->Comm.sumReduce( Kn ); + Dm->Comm.barrier(); // Normalize the phase averages // (density of both components = 1.0) diff --git a/analysis/TwoPhase.h b/analysis/TwoPhase.h index fddd04e8..4d500a89 100644 --- a/analysis/TwoPhase.h +++ b/analysis/TwoPhase.h @@ -12,7 +12,7 @@ #include "common/Domain.h" #include "common/Communication.h" #include "common/Utilities.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "IO/MeshDatabase.h" #include "IO/Reader.h" #include "IO/Writer.h" diff --git a/analysis/analysis.cpp b/analysis/analysis.cpp index 7587f3c5..4298750e 100644 --- a/analysis/analysis.cpp +++ b/analysis/analysis.cpp @@ -188,7 +188,7 @@ int ComputeLocalPhaseComponent(const IntArray &PhaseID, int &VALUE, BlobIDArray /****************************************************************** * Reorder the global blob ids * ******************************************************************/ -static int ReorderBlobIDs2( BlobIDArray& ID, int N_blobs, int ngx, int ngy, int ngz, MPI_Comm comm ) +static int ReorderBlobIDs2( BlobIDArray& ID, int N_blobs, int ngx, int ngy, int ngz, const Utilities::MPI& comm ) { if ( N_blobs==0 ) return 0; @@ -212,7 +212,7 @@ static int ReorderBlobIDs2( BlobIDArray& ID, int N_blobs, int ngx, int ngy, int } } ASSERT(max_id > map1(N_blobs); int N_blobs2 = 0; for (int i=0; i& N_recv, int64_t *send_buf, std::vector& recv_buf, std::map& remote_map, - MPI_Comm comm ) + const Utilities::MPI& comm ) { std::vector send_req(neighbors.size()); std::vector recv_req(neighbors.size()); - std::vector status(neighbors.size()); - std::map::const_iterator it = map.begin(); + auto it = map.begin(); ASSERT(N_send==(int)map.size()); for (size_t i=0; ifirst; send_buf[2*i+1] = it->second.new_id; } for (size_t i=0; ifirst] = it->second.new_id; } for (size_t i=0; i& remote_map, @@ -304,18 +303,18 @@ static bool updateLocalIds( const std::map& remote_map, return changed; } static int LocalToGlobalIDs( int nx, int ny, int nz, const RankInfoStruct& rank_info, - int nblobs, BlobIDArray& IDs, MPI_Comm comm ) + int nblobs, BlobIDArray& IDs, const Utilities::MPI& comm ) { PROFILE_START("LocalToGlobalIDs",1); const int rank = rank_info.rank[1][1][1]; - int nprocs = comm_size(comm); + int nprocs = comm.getSize(); const int ngx = (IDs.size(0)-nx)/2; const int ngy = (IDs.size(1)-ny)/2; const int ngz = (IDs.size(2)-nz)/2; // Get the number of blobs for each rank std::vector N_blobs(nprocs,0); PROFILE_START("LocalToGlobalIDs-Allgather",1); - MPI_Allgather(&nblobs,1,MPI_INT,getPtr(N_blobs),1,MPI_INT,comm); + comm.allGather(nblobs,getPtr(N_blobs)); PROFILE_STOP("LocalToGlobalIDs-Allgather",1); int64_t N_blobs_tot = 0; int offset = 0; @@ -363,13 +362,12 @@ static int LocalToGlobalIDs( int nx, int ny, int nz, const RankInfoStruct& rank_ std::vector N_recv(neighbors.size(),0); std::vector send_req(neighbors.size()); std::vector recv_req(neighbors.size()); - std::vector status(neighbors.size()); for (size_t i=0; i recv_buf(neighbors.size()); @@ -398,8 +396,7 @@ static int LocalToGlobalIDs( int nx, int ny, int nz, const RankInfoStruct& rank_ bool changed = updateLocalIds( remote_map, map ); // Check if we are finished int test = changed ? 1:0; - int result = 0; - MPI_Allreduce(&test,&result,1,MPI_INT,MPI_SUM,comm); + int result = comm.sumReduce( test ); if ( result==0 ) break; } @@ -435,7 +432,7 @@ static int LocalToGlobalIDs( int nx, int ny, int nz, const RankInfoStruct& rank_ } int ComputeGlobalBlobIDs( int nx, int ny, int nz, const RankInfoStruct& rank_info, const DoubleArray& Phase, const DoubleArray& SignDist, double vF, double vS, - BlobIDArray& GlobalBlobID, MPI_Comm comm ) + BlobIDArray& GlobalBlobID, const Utilities::MPI& comm ) { PROFILE_START("ComputeGlobalBlobIDs"); // First compute the local ids @@ -446,7 +443,7 @@ int ComputeGlobalBlobIDs( int nx, int ny, int nz, const RankInfoStruct& rank_inf return nglobal; } int ComputeGlobalPhaseComponent( int nx, int ny, int nz, const RankInfoStruct& rank_info, - const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, MPI_Comm comm ) + const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, const Utilities::MPI& comm ) { PROFILE_START("ComputeGlobalPhaseComponent"); // First compute the local ids @@ -462,37 +459,27 @@ int ComputeGlobalPhaseComponent( int nx, int ny, int nz, const RankInfoStruct& r * Compute the mapping of blob ids between timesteps * ******************************************************************/ typedef std::map > map_type; -template inline MPI_Datatype getMPIType(); -template<> inline MPI_Datatype getMPIType() { return MPI_INT; } -template<> inline MPI_Datatype getMPIType() { - if ( sizeof(int64_t)==sizeof(long int) ) - return MPI_LONG; - else if ( sizeof(int64_t)==sizeof(double) ) - return MPI_DOUBLE; -} template -void gatherSet( std::set& set, MPI_Comm comm ) +void gatherSet( std::set& set, const Utilities::MPI& comm ) { - int nprocs = comm_size(comm); - MPI_Datatype type = getMPIType(); + int nprocs = comm.getSize(); std::vector send_data(set.begin(),set.end()); int send_count = send_data.size(); std::vector recv_count(nprocs,0), recv_disp(nprocs,0); - MPI_Allgather(&send_count,1,MPI_INT,getPtr(recv_count),1,MPI_INT,comm); + comm.allGather( send_count, getPtr(recv_count) ); for (int i=1; i recv_data(recv_disp[nprocs-1]+recv_count[nprocs-1]); - MPI_Allgatherv(getPtr(send_data),send_count,type, - getPtr(recv_data),getPtr(recv_count),getPtr(recv_disp),type,comm); + comm.allGather( getPtr(send_data), send_count, getPtr(recv_data), + getPtr(recv_count), getPtr(recv_disp), true ); for (size_t i=0; i(); + int nprocs = comm.getSize(); std::vector send_data; - for (map_type::const_iterator it=src_map.begin(); it!=src_map.end(); ++it) { + for (auto it=src_map.begin(); it!=src_map.end(); ++it) { int id = it->first; const std::map& src_ids = it->second; send_data.push_back(id); @@ -505,21 +492,21 @@ void gatherSrcIDMap( map_type& src_map, MPI_Comm comm ) } int send_count = send_data.size(); std::vector recv_count(nprocs,0), recv_disp(nprocs,0); - MPI_Allgather(&send_count,1,MPI_INT,getPtr(recv_count),1,MPI_INT,comm); + comm.allGather(send_count,getPtr(recv_count)); for (int i=1; i recv_data(recv_disp[nprocs-1]+recv_count[nprocs-1]); - MPI_Allgatherv(getPtr(send_data),send_count,type, - getPtr(recv_data),getPtr(recv_count),getPtr(recv_disp),type,comm); + comm.allGather(getPtr(send_data),send_count, + getPtr(recv_data),getPtr(recv_count),getPtr(recv_disp),true); size_t i=0; src_map.clear(); while ( i < recv_data.size() ) { BlobIDType id = recv_data[i]; size_t count = recv_data[i+1]; i += 2; - std::map& src_ids = src_map[id]; + auto& src_ids = src_map[id]; for (size_t j=0; j::iterator it = src_ids.find(recv_data[i]); + auto it = src_ids.find(recv_data[i]); if ( it == src_ids.end() ) src_ids.insert(std::pair(recv_data[i],recv_data[i+1])); else @@ -538,7 +525,7 @@ void addSrcDstIDs( BlobIDType src_id, map_type& src_map, map_type& dst_map, } } ID_map_struct computeIDMap( int nx, int ny, int nz, - const BlobIDArray& ID1, const BlobIDArray& ID2, MPI_Comm comm ) + const BlobIDArray& ID1, const BlobIDArray& ID2, const Utilities::MPI& comm ) { ASSERT(ID1.size()==ID2.size()); PROFILE_START("computeIDMap"); @@ -780,7 +767,7 @@ void renumberIDs( const std::vector& new_ids, BlobIDArray& IDs ) ******************************************************************/ void writeIDMap( const ID_map_struct& map, long long int timestep, const std::string& filename ) { - int rank = MPI_WORLD_RANK(); + int rank = Utilities::MPI( MPI_COMM_WORLD ).getRank(); if ( rank!=0 ) return; bool empty = map.created.empty() && map.destroyed.empty() && diff --git a/analysis/analysis.h b/analysis/analysis.h index 2ce531b1..ec377995 100644 --- a/analysis/analysis.h +++ b/analysis/analysis.h @@ -58,7 +58,7 @@ int ComputeLocalPhaseComponent( const IntArray &PhaseID, int &VALUE, IntArray &C */ int ComputeGlobalBlobIDs( int nx, int ny, int nz, const RankInfoStruct& rank_info, const DoubleArray& Phase, const DoubleArray& SignDist, double vF, double vS, - BlobIDArray& GlobalBlobID, MPI_Comm comm ); + BlobIDArray& GlobalBlobID, const Utilities::MPI& comm ); /*! @@ -75,7 +75,7 @@ int ComputeGlobalBlobIDs( int nx, int ny, int nz, const RankInfoStruct& rank_inf * @return Return the number of components in the specified phase */ int ComputeGlobalPhaseComponent( int nx, int ny, int nz, const RankInfoStruct& rank_info, - const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, MPI_Comm comm ); + const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, const Utilities::MPI& comm ); /*! @@ -87,7 +87,7 @@ int ComputeGlobalPhaseComponent( int nx, int ny, int nz, const RankInfoStruct& r * @param[in] nz Number of elements in the z-direction * @param[in/out] ID The ids of the blobs */ -void ReorderBlobIDs( BlobIDArray& ID, MPI_Comm comm ); +void ReorderBlobIDs( BlobIDArray& ID, const Utilities::MPI& comm ); typedef std::pair > BlobIDSplitStruct; @@ -120,7 +120,7 @@ struct ID_map_struct { * @param[in] ID1 The blob ids at the first timestep * @param[in] ID2 The blob ids at the second timestep */ -ID_map_struct computeIDMap( int nx, int ny, int nz, const BlobIDArray& ID1, const BlobIDArray& ID2, MPI_Comm comm ); +ID_map_struct computeIDMap( int nx, int ny, int nz, const BlobIDArray& ID1, const BlobIDArray& ID2, const Utilities::MPI& comm ); /*! diff --git a/analysis/distance.cpp b/analysis/distance.cpp index e297b435..9c605e1e 100644 --- a/analysis/distance.cpp +++ b/analysis/distance.cpp @@ -176,7 +176,7 @@ void CalcVecDist( Array &d, const Array &ID0, const Domain &Dm, // Update distance double err = calcVecUpdateInterior( d, dx[0], dx[1], dx[2] ); // Check if we are finished - err = maxReduce( Dm.Comm, err ); + err = Dm.Comm.maxReduce( err ); if ( err < tol ) break; } diff --git a/analysis/morphology.cpp b/analysis/morphology.cpp index 05278313..a65cb237 100644 --- a/analysis/morphology.cpp +++ b/analysis/morphology.cpp @@ -58,11 +58,11 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr } } } - MPI_Barrier(Dm->Comm); + Dm->Comm.barrier(); // total Global is the number of nodes in the pore-space - MPI_Allreduce(&count,&totalGlobal,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&maxdist,&maxdistGlobal,1,MPI_DOUBLE,MPI_MAX,Dm->Comm); + totalGlobal = Dm->Comm.sumReduce( count ); + maxdistGlobal = Dm->Comm.sumReduce( maxdist ); double volume=double(nprocx*nprocy*nprocz)*double(nx-2)*double(ny-2)*double(nz-2); double volume_fraction=totalGlobal/volume; if (rank==0) printf("Volume fraction for morphological opening: %f \n",volume_fraction); @@ -133,7 +133,6 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr double deltaR=0.05; // amount to change the radius in voxel units double Rcrit_old; - double GlobalNumber = 1.f; int imin,jmin,kmin,imax,jmax,kmax; if (ErodeLabel == 1){ @@ -203,41 +202,41 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr PackID(Dm->sendList_YZ, Dm->sendCount_YZ ,sendID_YZ, id); //...................................................................................... MPI_Sendrecv(sendID_x,Dm->sendCount_x,MPI_CHAR,Dm->rank_x(),sendtag, - recvID_X,Dm->recvCount_X,MPI_CHAR,Dm->rank_X(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_X,Dm->recvCount_X,MPI_CHAR,Dm->rank_X(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_X,Dm->sendCount_X,MPI_CHAR,Dm->rank_X(),sendtag, - recvID_x,Dm->recvCount_x,MPI_CHAR,Dm->rank_x(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_x,Dm->recvCount_x,MPI_CHAR,Dm->rank_x(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_y,Dm->sendCount_y,MPI_CHAR,Dm->rank_y(),sendtag, - recvID_Y,Dm->recvCount_Y,MPI_CHAR,Dm->rank_Y(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_Y,Dm->recvCount_Y,MPI_CHAR,Dm->rank_Y(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_Y,Dm->sendCount_Y,MPI_CHAR,Dm->rank_Y(),sendtag, - recvID_y,Dm->recvCount_y,MPI_CHAR,Dm->rank_y(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_y,Dm->recvCount_y,MPI_CHAR,Dm->rank_y(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_z,Dm->sendCount_z,MPI_CHAR,Dm->rank_z(),sendtag, - recvID_Z,Dm->recvCount_Z,MPI_CHAR,Dm->rank_Z(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_Z,Dm->recvCount_Z,MPI_CHAR,Dm->rank_Z(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_Z,Dm->sendCount_Z,MPI_CHAR,Dm->rank_Z(),sendtag, - recvID_z,Dm->recvCount_z,MPI_CHAR,Dm->rank_z(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_z,Dm->recvCount_z,MPI_CHAR,Dm->rank_z(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_xy,Dm->sendCount_xy,MPI_CHAR,Dm->rank_xy(),sendtag, - recvID_XY,Dm->recvCount_XY,MPI_CHAR,Dm->rank_XY(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_XY,Dm->recvCount_XY,MPI_CHAR,Dm->rank_XY(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_XY,Dm->sendCount_XY,MPI_CHAR,Dm->rank_XY(),sendtag, - recvID_xy,Dm->recvCount_xy,MPI_CHAR,Dm->rank_xy(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_xy,Dm->recvCount_xy,MPI_CHAR,Dm->rank_xy(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_Xy,Dm->sendCount_Xy,MPI_CHAR,Dm->rank_Xy(),sendtag, - recvID_xY,Dm->recvCount_xY,MPI_CHAR,Dm->rank_xY(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_xY,Dm->recvCount_xY,MPI_CHAR,Dm->rank_xY(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_xY,Dm->sendCount_xY,MPI_CHAR,Dm->rank_xY(),sendtag, - recvID_Xy,Dm->recvCount_Xy,MPI_CHAR,Dm->rank_Xy(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_Xy,Dm->recvCount_Xy,MPI_CHAR,Dm->rank_Xy(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_xz,Dm->sendCount_xz,MPI_CHAR,Dm->rank_xz(),sendtag, - recvID_XZ,Dm->recvCount_XZ,MPI_CHAR,Dm->rank_XZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_XZ,Dm->recvCount_XZ,MPI_CHAR,Dm->rank_XZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_XZ,Dm->sendCount_XZ,MPI_CHAR,Dm->rank_XZ(),sendtag, - recvID_xz,Dm->recvCount_xz,MPI_CHAR,Dm->rank_xz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_xz,Dm->recvCount_xz,MPI_CHAR,Dm->rank_xz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_Xz,Dm->sendCount_Xz,MPI_CHAR,Dm->rank_Xz(),sendtag, - recvID_xZ,Dm->recvCount_xZ,MPI_CHAR,Dm->rank_xZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_xZ,Dm->recvCount_xZ,MPI_CHAR,Dm->rank_xZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_xZ,Dm->sendCount_xZ,MPI_CHAR,Dm->rank_xZ(),sendtag, - recvID_Xz,Dm->recvCount_Xz,MPI_CHAR,Dm->rank_Xz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_Xz,Dm->recvCount_Xz,MPI_CHAR,Dm->rank_Xz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_yz,Dm->sendCount_yz,MPI_CHAR,Dm->rank_yz(),sendtag, - recvID_YZ,Dm->recvCount_YZ,MPI_CHAR,Dm->rank_YZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_YZ,Dm->recvCount_YZ,MPI_CHAR,Dm->rank_YZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_YZ,Dm->sendCount_YZ,MPI_CHAR,Dm->rank_YZ(),sendtag, - recvID_yz,Dm->recvCount_yz,MPI_CHAR,Dm->rank_yz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_yz,Dm->recvCount_yz,MPI_CHAR,Dm->rank_yz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_Yz,Dm->sendCount_Yz,MPI_CHAR,Dm->rank_Yz(),sendtag, - recvID_yZ,Dm->recvCount_yZ,MPI_CHAR,Dm->rank_yZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_yZ,Dm->recvCount_yZ,MPI_CHAR,Dm->rank_yZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_yZ,Dm->sendCount_yZ,MPI_CHAR,Dm->rank_yZ(),sendtag, - recvID_Yz,Dm->recvCount_Yz,MPI_CHAR,Dm->rank_Yz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_Yz,Dm->recvCount_Yz,MPI_CHAR,Dm->rank_Yz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); //...................................................................................... UnpackID(Dm->recvList_x, Dm->recvCount_x ,recvID_x, id); UnpackID(Dm->recvList_X, Dm->recvCount_X ,recvID_X, id); @@ -259,7 +258,7 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr UnpackID(Dm->recvList_YZ, Dm->recvCount_YZ ,recvID_YZ, id); //...................................................................................... - MPI_Allreduce(&LocalNumber,&GlobalNumber,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + //double GlobalNumber = Dm->Comm.sumReduce( LocalNumber ); count = 0.f; for (int k=1; k } } } - MPI_Allreduce(&count,&countGlobal,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + countGlobal = Dm->Comm.sumReduce( count ); void_fraction_new = countGlobal/totalGlobal; void_fraction_diff_new = abs(void_fraction_new-VoidFraction); /* if (rank==0){ @@ -360,11 +359,11 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptrComm); + Dm->Comm.barrier(); // total Global is the number of nodes in the pore-space - MPI_Allreduce(&count,&totalGlobal,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&maxdist,&maxdistGlobal,1,MPI_DOUBLE,MPI_MAX,Dm->Comm); + totalGlobal = Dm->Comm.sumReduce( count ); + maxdistGlobal = Dm->Comm.sumReduce( maxdist ); double volume=double(nprocx*nprocy*nprocz)*double(nx-2)*double(ny-2)*double(nz-2); double volume_fraction=totalGlobal/volume; if (rank==0) printf("Volume fraction for morphological opening: %f \n",volume_fraction); @@ -434,7 +433,6 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptrComm); + Dm->Comm.barrier(); FILE *DRAIN = fopen("morphdrain.csv","w"); @@ -509,41 +507,41 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptrsendList_YZ, Dm->sendCount_YZ ,sendID_YZ, id); //...................................................................................... MPI_Sendrecv(sendID_x,Dm->sendCount_x,MPI_CHAR,Dm->rank_x(),sendtag, - recvID_X,Dm->recvCount_X,MPI_CHAR,Dm->rank_X(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_X,Dm->recvCount_X,MPI_CHAR,Dm->rank_X(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_X,Dm->sendCount_X,MPI_CHAR,Dm->rank_X(),sendtag, - recvID_x,Dm->recvCount_x,MPI_CHAR,Dm->rank_x(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_x,Dm->recvCount_x,MPI_CHAR,Dm->rank_x(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_y,Dm->sendCount_y,MPI_CHAR,Dm->rank_y(),sendtag, - recvID_Y,Dm->recvCount_Y,MPI_CHAR,Dm->rank_Y(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_Y,Dm->recvCount_Y,MPI_CHAR,Dm->rank_Y(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_Y,Dm->sendCount_Y,MPI_CHAR,Dm->rank_Y(),sendtag, - recvID_y,Dm->recvCount_y,MPI_CHAR,Dm->rank_y(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_y,Dm->recvCount_y,MPI_CHAR,Dm->rank_y(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_z,Dm->sendCount_z,MPI_CHAR,Dm->rank_z(),sendtag, - recvID_Z,Dm->recvCount_Z,MPI_CHAR,Dm->rank_Z(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_Z,Dm->recvCount_Z,MPI_CHAR,Dm->rank_Z(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_Z,Dm->sendCount_Z,MPI_CHAR,Dm->rank_Z(),sendtag, - recvID_z,Dm->recvCount_z,MPI_CHAR,Dm->rank_z(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_z,Dm->recvCount_z,MPI_CHAR,Dm->rank_z(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_xy,Dm->sendCount_xy,MPI_CHAR,Dm->rank_xy(),sendtag, - recvID_XY,Dm->recvCount_XY,MPI_CHAR,Dm->rank_XY(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_XY,Dm->recvCount_XY,MPI_CHAR,Dm->rank_XY(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_XY,Dm->sendCount_XY,MPI_CHAR,Dm->rank_XY(),sendtag, - recvID_xy,Dm->recvCount_xy,MPI_CHAR,Dm->rank_xy(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_xy,Dm->recvCount_xy,MPI_CHAR,Dm->rank_xy(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_Xy,Dm->sendCount_Xy,MPI_CHAR,Dm->rank_Xy(),sendtag, - recvID_xY,Dm->recvCount_xY,MPI_CHAR,Dm->rank_xY(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_xY,Dm->recvCount_xY,MPI_CHAR,Dm->rank_xY(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_xY,Dm->sendCount_xY,MPI_CHAR,Dm->rank_xY(),sendtag, - recvID_Xy,Dm->recvCount_Xy,MPI_CHAR,Dm->rank_Xy(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_Xy,Dm->recvCount_Xy,MPI_CHAR,Dm->rank_Xy(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_xz,Dm->sendCount_xz,MPI_CHAR,Dm->rank_xz(),sendtag, - recvID_XZ,Dm->recvCount_XZ,MPI_CHAR,Dm->rank_XZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_XZ,Dm->recvCount_XZ,MPI_CHAR,Dm->rank_XZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_XZ,Dm->sendCount_XZ,MPI_CHAR,Dm->rank_XZ(),sendtag, - recvID_xz,Dm->recvCount_xz,MPI_CHAR,Dm->rank_xz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_xz,Dm->recvCount_xz,MPI_CHAR,Dm->rank_xz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_Xz,Dm->sendCount_Xz,MPI_CHAR,Dm->rank_Xz(),sendtag, - recvID_xZ,Dm->recvCount_xZ,MPI_CHAR,Dm->rank_xZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_xZ,Dm->recvCount_xZ,MPI_CHAR,Dm->rank_xZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_xZ,Dm->sendCount_xZ,MPI_CHAR,Dm->rank_xZ(),sendtag, - recvID_Xz,Dm->recvCount_Xz,MPI_CHAR,Dm->rank_Xz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_Xz,Dm->recvCount_Xz,MPI_CHAR,Dm->rank_Xz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_yz,Dm->sendCount_yz,MPI_CHAR,Dm->rank_yz(),sendtag, - recvID_YZ,Dm->recvCount_YZ,MPI_CHAR,Dm->rank_YZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_YZ,Dm->recvCount_YZ,MPI_CHAR,Dm->rank_YZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_YZ,Dm->sendCount_YZ,MPI_CHAR,Dm->rank_YZ(),sendtag, - recvID_yz,Dm->recvCount_yz,MPI_CHAR,Dm->rank_yz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_yz,Dm->recvCount_yz,MPI_CHAR,Dm->rank_yz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_Yz,Dm->sendCount_Yz,MPI_CHAR,Dm->rank_Yz(),sendtag, - recvID_yZ,Dm->recvCount_yZ,MPI_CHAR,Dm->rank_yZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_yZ,Dm->recvCount_yZ,MPI_CHAR,Dm->rank_yZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_yZ,Dm->sendCount_yZ,MPI_CHAR,Dm->rank_yZ(),sendtag, - recvID_Yz,Dm->recvCount_Yz,MPI_CHAR,Dm->rank_Yz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + recvID_Yz,Dm->recvCount_Yz,MPI_CHAR,Dm->rank_Yz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); //...................................................................................... UnpackID(Dm->recvList_x, Dm->recvCount_x ,recvID_x, id); UnpackID(Dm->recvList_X, Dm->recvCount_X ,recvID_X, id); @@ -564,7 +562,7 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptrrecvList_yZ, Dm->recvCount_yZ ,recvID_yZ, id); UnpackID(Dm->recvList_YZ, Dm->recvCount_YZ ,recvID_YZ, id); //...................................................................................... - MPI_Allreduce(&LocalNumber,&GlobalNumber,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + // double GlobalNumber = Dm->Comm.sumReduce( LocalNumber ); for (int k=0; krank_info,phase,SignDist,vF,vS,phase_label,Dm->Comm); - MPI_Barrier(Dm->Comm); + Dm->Comm.barrier(); for (int k=0; kComm); + countGlobal = Dm->Comm.sumReduce( count ); void_fraction_new = countGlobal/totalGlobal; void_fraction_diff_new = abs(void_fraction_new-VoidFraction); if (rank==0){ @@ -702,7 +700,7 @@ double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array &id, } } } - double count_original=sumReduce( Dm->Comm, count); + double count_original = Dm->Comm.sumReduce( count); // Estimate morph_delta double morph_delta = 0.0; @@ -732,8 +730,8 @@ double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array &id, } } } - count=sumReduce( Dm->Comm, count); - MAX_DISPLACEMENT = maxReduce( Dm->Comm, MAX_DISPLACEMENT); + count = Dm->Comm.sumReduce( count ); + MAX_DISPLACEMENT = Dm->Comm.maxReduce( MAX_DISPLACEMENT ); GrowthEstimate = count - count_original; ERROR = fabs((GrowthEstimate-TargetGrowth) /TargetGrowth); @@ -776,7 +774,7 @@ double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array &id, } } } - count=sumReduce( Dm->Comm, count); + count = Dm->Comm.sumReduce( count ); return count; } diff --git a/analysis/runAnalysis.cpp b/analysis/runAnalysis.cpp index 6c76f58b..89451c7b 100644 --- a/analysis/runAnalysis.cpp +++ b/analysis/runAnalysis.cpp @@ -3,7 +3,7 @@ #include "analysis/analysis.h" #include "common/Array.h" #include "common/Communication.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/ScaLBL.h" #include "models/ColorModel.h" @@ -462,7 +462,7 @@ private: /****************************************************************** * MPI comm wrapper for use with analysis * ******************************************************************/ -runAnalysis::commWrapper::commWrapper( int tag_, MPI_Comm comm_, runAnalysis* analysis_ ): +runAnalysis::commWrapper::commWrapper( int tag_, const Utilities::MPI& comm_, runAnalysis* analysis_ ): comm(comm_), tag(tag_), analysis(analysis_) @@ -479,7 +479,7 @@ runAnalysis::commWrapper::~commWrapper() { if ( tag == -1 ) return; - MPI_Barrier( comm ); + comm.barrier(); analysis->d_comm_used[tag] = false; } runAnalysis::commWrapper runAnalysis::getComm( ) @@ -496,10 +496,10 @@ runAnalysis::commWrapper runAnalysis::getComm( ) if ( tag == -1 ) ERROR("Unable to get comm"); } - MPI_Bcast( &tag, 1, MPI_INT, 0, d_comm ); + tag = d_comm.bcast( tag, 0 ); d_comm_used[tag] = true; - if ( d_comms[tag] == MPI_COMM_NULL ) - MPI_Comm_dup( MPI_COMM_WORLD, &d_comms[tag] ); + if ( d_comms[tag].isNull() ) + d_comms[tag] = d_comm.dup(); return commWrapper(tag,d_comms[tag],this); } @@ -507,14 +507,20 @@ runAnalysis::commWrapper runAnalysis::getComm( ) /****************************************************************** * Constructor/Destructors * ******************************************************************/ -runAnalysis::runAnalysis(std::shared_ptr input_db, const RankInfoStruct& rank_info, std::shared_ptr ScaLBL_Comm, std::shared_ptr Dm, - int Np, bool Regular, IntArray Map ): - d_Np( Np ), - d_regular ( Regular), - d_rank_info( rank_info ), - d_Map( Map ), - d_fillData(Dm->Comm,Dm->rank_info,{Dm->Nx-2,Dm->Ny-2,Dm->Nz-2},{1,1,1},0,1), - d_ScaLBL_Comm( ScaLBL_Comm) +runAnalysis::runAnalysis( std::shared_ptr input_db, + const RankInfoStruct& rank_info, + std::shared_ptr ScaLBL_Comm, + std::shared_ptr Dm, + int Np, + bool Regular, + IntArray Map ): + d_Np( Np ), + d_regular ( Regular), + d_rank_info( rank_info ), + d_Map( Map ), + d_fillData(Dm->Comm,Dm->rank_info,{Dm->Nx-2,Dm->Ny-2,Dm->Nz-2},{1,1,1},0,1), + d_comm( Utilities::MPI( MPI_COMM_WORLD ).dup() ), + d_ScaLBL_Comm( ScaLBL_Comm) { auto db = input_db->getDatabase( "Analysis" ); @@ -552,7 +558,7 @@ runAnalysis::runAnalysis(std::shared_ptr input_db, const RankInfoStruc d_restartFile = restart_file + "." + rankString; - d_rank = MPI_WORLD_RANK(); + d_rank = d_comm.getRank(); writeIDMap(ID_map_struct(),0,id_map_filename); // Initialize IO for silo IO::initialize("","silo","false"); @@ -621,11 +627,8 @@ runAnalysis::runAnalysis(std::shared_ptr input_db, const RankInfoStruc // Initialize the comms - MPI_Comm_dup(MPI_COMM_WORLD,&d_comm); - for (int i=0; i<1024; i++) { - d_comms[i] = MPI_COMM_NULL; + for (int i=0; i<1024; i++) d_comm_used[i] = false; - } // Initialize the threads int N_threads = db->getWithDefault( "N_threads", 4 ); auto method = db->getWithDefault( "load_balance", "default" ); @@ -635,12 +638,6 @@ runAnalysis::~runAnalysis( ) { // Finish processing analysis finish(); - // Clear internal data - MPI_Comm_free( &d_comm ); - for (int i=0; i<1024; i++) { - if ( d_comms[i] != MPI_COMM_NULL ) - MPI_Comm_free(&d_comms[i]); - } } void runAnalysis::finish( ) { @@ -654,7 +651,7 @@ void runAnalysis::finish( ) d_wait_subphase.reset(); d_wait_restart.reset(); // Syncronize - MPI_Barrier( d_comm ); + d_comm.barrier(); PROFILE_STOP("finish"); } diff --git a/analysis/runAnalysis.h b/analysis/runAnalysis.h index 0bf2f676..3c5bc7f0 100644 --- a/analysis/runAnalysis.h +++ b/analysis/runAnalysis.h @@ -68,10 +68,10 @@ public: class commWrapper { public: - MPI_Comm comm; + Utilities::MPI comm; int tag; runAnalysis *analysis; - commWrapper( int tag, MPI_Comm comm, runAnalysis *analysis ); + commWrapper( int tag, const Utilities::MPI& comm, runAnalysis *analysis ); commWrapper( ) = delete; commWrapper( const commWrapper &rhs ) = delete; commWrapper& operator=( const commWrapper &rhs ) = delete; @@ -100,8 +100,8 @@ private: std::vector d_meshData; fillHalo d_fillData; std::string d_restartFile; - MPI_Comm d_comm; - MPI_Comm d_comms[1024]; + Utilities::MPI d_comm; + Utilities::MPI d_comms[1024]; volatile bool d_comm_used[1024]; std::shared_ptr d_ScaLBL_Comm; diff --git a/analysis/uCT.cpp b/analysis/uCT.cpp index 912f8e85..28d677c1 100644 --- a/analysis/uCT.cpp +++ b/analysis/uCT.cpp @@ -228,8 +228,7 @@ void filter_final( Array& ID, Array& Dist, Array& Mean, Array& Dist1, Array& Dist2 ) { PROFILE_SCOPED(timer,"filter_final"); - int rank; - MPI_Comm_rank(Dm.Comm,&rank); + int rank = Dm.Comm.getRank(); int Nx = Dm.Nx-2; int Ny = Dm.Ny-2; int Nz = Dm.Nz-2; @@ -242,7 +241,7 @@ void filter_final( Array& ID, Array& Dist, float tmp = 0; for (size_t i=0; i(Dist0.length()) ); const float dx1 = 0.3*tmp; const float dx2 = 1.05*dx1; if (rank==0) @@ -285,7 +284,7 @@ void filter_final( Array& ID, Array& Dist, Phase.fill(1); ComputeGlobalBlobIDs( Nx, Ny, Nz, Dm.rank_info, Phase, SignDist, 0, 0, GlobalBlobID, Dm.Comm ); fillInt.fill(GlobalBlobID); - int N_blobs = maxReduce(Dm.Comm,GlobalBlobID.max()+1); + int N_blobs = Dm.Comm.maxReduce(GlobalBlobID.max()+1); std::vector mean(N_blobs,0); std::vector count(N_blobs,0); for (int k=1; k<=Nz; k++) { @@ -321,8 +320,8 @@ void filter_final( Array& ID, Array& Dist, } } } - mean = sumReduce(Dm.Comm,mean); - count = sumReduce(Dm.Comm,count); + mean = Dm.Comm.sumReduce(mean); + count = Dm.Comm.sumReduce(count); for (size_t i=0; i -o ") +set(CMAKE_HIP_CREATE_SHARED_MODULE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_PATH} -o -shared" ) +set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_PATH} -o ") + +############################################################################### +# FIND: HIP and associated helper binaries +############################################################################### +# HIP is supported on Linux only +if(UNIX AND NOT APPLE AND NOT CYGWIN) + # Search for HIP installation + if(NOT HIP_ROOT_DIR) + # Search in user specified path first + find_path( + HIP_ROOT_DIR + NAMES hipconfig + PATHS + ENV ROCM_PATH + ENV HIP_PATH + PATH_SUFFIXES bin + DOC "HIP installed location" + NO_DEFAULT_PATH + ) + # Now search in default path + find_path( + HIP_ROOT_DIR + NAMES hipconfig + PATHS + /opt/rocm + /opt/rocm/hip + PATH_SUFFIXES bin + DOC "HIP installed location" + ) + + # Check if we found HIP installation + if(HIP_ROOT_DIR) + # If so, fix the path + string(REGEX REPLACE "[/\\\\]?bin[64]*[/\\\\]?$" "" HIP_ROOT_DIR ${HIP_ROOT_DIR}) + # And push it back to the cache + set(HIP_ROOT_DIR ${HIP_ROOT_DIR} CACHE PATH "HIP installed location" FORCE) + endif() + if(NOT EXISTS ${HIP_ROOT_DIR}) + if(HIP_FIND_REQUIRED) + message(FATAL_ERROR "Specify HIP_ROOT_DIR") + elseif(NOT HIP_FIND_QUIETLY) + message("HIP_ROOT_DIR not found or specified") + endif() + endif() + endif() + + # Find HIPCC executable + find_program( + HIP_HIPCC_EXECUTABLE + NAMES hipcc + PATHS + "${HIP_ROOT_DIR}" + ENV ROCM_PATH + ENV HIP_PATH + /opt/rocm + /opt/rocm/hip + PATH_SUFFIXES bin + NO_DEFAULT_PATH + ) + if(NOT HIP_HIPCC_EXECUTABLE) + # Now search in default paths + find_program(HIP_HIPCC_EXECUTABLE hipcc) + endif() + mark_as_advanced(HIP_HIPCC_EXECUTABLE) + + # Find HIPCONFIG executable + find_program( + HIP_HIPCONFIG_EXECUTABLE + NAMES hipconfig + PATHS + "${HIP_ROOT_DIR}" + ENV ROCM_PATH + ENV HIP_PATH + /opt/rocm + /opt/rocm/hip + PATH_SUFFIXES bin + NO_DEFAULT_PATH + ) + if(NOT HIP_HIPCONFIG_EXECUTABLE) + # Now search in default paths + find_program(HIP_HIPCONFIG_EXECUTABLE hipconfig) + endif() + mark_as_advanced(HIP_HIPCONFIG_EXECUTABLE) + + # Find HIPCC_CMAKE_LINKER_HELPER executable + find_program( + HIP_HIPCC_CMAKE_LINKER_HELPER + NAMES hipcc_cmake_linker_helper + PATHS + "${HIP_ROOT_DIR}" + ENV ROCM_PATH + ENV HIP_PATH + /opt/rocm + /opt/rocm/hip + PATH_SUFFIXES bin + NO_DEFAULT_PATH + ) + if(NOT HIP_HIPCC_CMAKE_LINKER_HELPER) + # Now search in default paths + find_program(HIP_HIPCC_CMAKE_LINKER_HELPER hipcc_cmake_linker_helper) + endif() + mark_as_advanced(HIP_HIPCC_CMAKE_LINKER_HELPER) + + if(HIP_HIPCONFIG_EXECUTABLE AND NOT HIP_VERSION) + # Compute the version + execute_process( + COMMAND ${HIP_HIPCONFIG_EXECUTABLE} --version + OUTPUT_VARIABLE _hip_version + ERROR_VARIABLE _hip_error + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_STRIP_TRAILING_WHITESPACE + ) + if(NOT _hip_error) + set(HIP_VERSION ${_hip_version} CACHE STRING "Version of HIP as computed from hipcc") + else() + set(HIP_VERSION "0.0.0" CACHE STRING "Version of HIP as computed by FindHIP()") + endif() + mark_as_advanced(HIP_VERSION) + endif() + if(HIP_VERSION) + string(REPLACE "." ";" _hip_version_list "${HIP_VERSION}") + list(GET _hip_version_list 0 HIP_VERSION_MAJOR) + list(GET _hip_version_list 1 HIP_VERSION_MINOR) + list(GET _hip_version_list 2 HIP_VERSION_PATCH) + set(HIP_VERSION_STRING "${HIP_VERSION}") + endif() + + if(HIP_HIPCONFIG_EXECUTABLE AND NOT HIP_PLATFORM) + # Compute the platform + execute_process( + COMMAND ${HIP_HIPCONFIG_EXECUTABLE} --platform + OUTPUT_VARIABLE _hip_platform + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + set(HIP_PLATFORM ${_hip_platform} CACHE STRING "HIP platform as computed by hipconfig") + mark_as_advanced(HIP_PLATFORM) + endif() +endif() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args( + HIP + REQUIRED_VARS + HIP_ROOT_DIR + HIP_HIPCC_EXECUTABLE + HIP_HIPCONFIG_EXECUTABLE + HIP_PLATFORM + VERSION_VAR HIP_VERSION + ) + +############################################################################### +# MACRO: Locate helper files +############################################################################### +macro(HIP_FIND_HELPER_FILE _name _extension) + set(_hip_full_name "${_name}.${_extension}") + get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) + set(HIP_${_name} "${CMAKE_CURRENT_LIST_DIR}/FindHIP/${_hip_full_name}") + if(NOT EXISTS "${HIP_${_name}}") + set(error_message "${_hip_full_name} not found in ${CMAKE_CURRENT_LIST_DIR}/FindHIP") + if(HIP_FIND_REQUIRED) + message(FATAL_ERROR "${error_message}") + else() + if(NOT HIP_FIND_QUIETLY) + message(STATUS "${error_message}") + endif() + endif() + endif() + # Set this variable as internal, so the user isn't bugged with it. + set(HIP_${_name} ${HIP_${_name}} CACHE INTERNAL "Location of ${_full_name}" FORCE) +endmacro() + +############################################################################### +hip_find_helper_file(run_make2cmake cmake) +hip_find_helper_file(run_hipcc cmake) +############################################################################### + +############################################################################### +# MACRO: Reset compiler flags +############################################################################### +macro(HIP_RESET_FLAGS) + unset(HIP_HIPCC_FLAGS) + unset(HIP_HCC_FLAGS) + unset(HIP_NVCC_FLAGS) + foreach(config ${_hip_configuration_types}) + string(TOUPPER ${config} config_upper) + unset(HIP_HIPCC_FLAGS_${config_upper}) + unset(HIP_HCC_FLAGS_${config_upper}) + unset(HIP_NVCC_FLAGS_${config_upper}) + endforeach() +endmacro() + +############################################################################### +# MACRO: Separate the options from the sources +############################################################################### +macro(HIP_GET_SOURCES_AND_OPTIONS _sources _cmake_options _hipcc_options _hcc_options _nvcc_options) + set(${_sources}) + set(${_cmake_options}) + set(${_hipcc_options}) + set(${_hcc_options}) + set(${_nvcc_options}) + set(_hipcc_found_options FALSE) + set(_hcc_found_options FALSE) + set(_nvcc_found_options FALSE) + foreach(arg ${ARGN}) + if("x${arg}" STREQUAL "xHIPCC_OPTIONS") + set(_hipcc_found_options TRUE) + set(_hcc_found_options FALSE) + set(_nvcc_found_options FALSE) + elseif("x${arg}" STREQUAL "xHCC_OPTIONS") + set(_hipcc_found_options FALSE) + set(_hcc_found_options TRUE) + set(_nvcc_found_options FALSE) + elseif("x${arg}" STREQUAL "xNVCC_OPTIONS") + set(_hipcc_found_options FALSE) + set(_hcc_found_options FALSE) + set(_nvcc_found_options TRUE) + elseif( + "x${arg}" STREQUAL "xEXCLUDE_FROM_ALL" OR + "x${arg}" STREQUAL "xSTATIC" OR + "x${arg}" STREQUAL "xSHARED" OR + "x${arg}" STREQUAL "xMODULE" + ) + list(APPEND ${_cmake_options} ${arg}) + else() + if(_hipcc_found_options) + list(APPEND ${_hipcc_options} ${arg}) + elseif(_hcc_found_options) + list(APPEND ${_hcc_options} ${arg}) + elseif(_nvcc_found_options) + list(APPEND ${_nvcc_options} ${arg}) + else() + # Assume this is a file + list(APPEND ${_sources} ${arg}) + endif() + endif() + endforeach() +endmacro() + +############################################################################### +# MACRO: Add include directories to pass to the hipcc command +############################################################################### +set(HIP_HIPCC_INCLUDE_ARGS_USER "") +macro(HIP_INCLUDE_DIRECTORIES) + foreach(dir ${ARGN}) + list(APPEND HIP_HIPCC_INCLUDE_ARGS_USER $<$:-I${dir}>) + endforeach() +endmacro() + +############################################################################### +# FUNCTION: Helper to avoid clashes of files with the same basename but different paths +############################################################################### +function(HIP_COMPUTE_BUILD_PATH path build_path) + # Convert to cmake style paths + file(TO_CMAKE_PATH "${path}" bpath) + if(IS_ABSOLUTE "${bpath}") + string(FIND "${bpath}" "${CMAKE_CURRENT_BINARY_DIR}" _binary_dir_pos) + if(_binary_dir_pos EQUAL 0) + file(RELATIVE_PATH bpath "${CMAKE_CURRENT_BINARY_DIR}" "${bpath}") + else() + file(RELATIVE_PATH bpath "${CMAKE_CURRENT_SOURCE_DIR}" "${bpath}") + endif() + endif() + + # Remove leading / + string(REGEX REPLACE "^[/]+" "" bpath "${bpath}") + # Avoid absolute paths by removing ':' + string(REPLACE ":" "_" bpath "${bpath}") + # Avoid relative paths that go up the tree + string(REPLACE "../" "__/" bpath "${bpath}") + # Avoid spaces + string(REPLACE " " "_" bpath "${bpath}") + # Strip off the filename + get_filename_component(bpath "${bpath}" PATH) + + set(${build_path} "${bpath}" PARENT_SCOPE) +endfunction() + +############################################################################### +# MACRO: Parse OPTIONS from ARGN & set variables prefixed by _option_prefix +############################################################################### +macro(HIP_PARSE_HIPCC_OPTIONS _option_prefix) + set(_hip_found_config) + foreach(arg ${ARGN}) + # Determine if we are dealing with a per-configuration flag + foreach(config ${_hip_configuration_types}) + string(TOUPPER ${config} config_upper) + if(arg STREQUAL "${config_upper}") + set(_hip_found_config _${arg}) + # Clear arg to prevent it from being processed anymore + set(arg) + endif() + endforeach() + if(arg) + list(APPEND ${_option_prefix}${_hip_found_config} "${arg}") + endif() + endforeach() +endmacro() + +############################################################################### +# MACRO: Try and include dependency file if it exists +############################################################################### +macro(HIP_INCLUDE_HIPCC_DEPENDENCIES dependency_file) + set(HIP_HIPCC_DEPEND) + set(HIP_HIPCC_DEPEND_REGENERATE FALSE) + + # Create the dependency file if it doesn't exist + if(NOT EXISTS ${dependency_file}) + file(WRITE ${dependency_file} "# Generated by: FindHIP.cmake. Do not edit.\n") + endif() + # Include the dependency file + include(${dependency_file}) + + # Verify the existence of all the included files + if(HIP_HIPCC_DEPEND) + foreach(f ${HIP_HIPCC_DEPEND}) + if(NOT EXISTS ${f}) + # If they aren't there, regenerate the file again + set(HIP_HIPCC_DEPEND_REGENERATE TRUE) + endif() + endforeach() + else() + # No dependencies, so regenerate the file + set(HIP_HIPCC_DEPEND_REGENERATE TRUE) + endif() + + # Regenerate the dependency file if needed + if(HIP_HIPCC_DEPEND_REGENERATE) + set(HIP_HIPCC_DEPEND ${dependency_file}) + file(WRITE ${dependency_file} "# Generated by: FindHIP.cmake. Do not edit.\n") + endif() +endmacro() + +############################################################################### +# MACRO: Prepare cmake commands for the target +############################################################################### +macro(HIP_PREPARE_TARGET_COMMANDS _target _format _generated_files _source_files) + set(_hip_flags "") + string(TOUPPER "${CMAKE_BUILD_TYPE}" _hip_build_configuration) + if(HIP_HOST_COMPILATION_CPP) + set(HIP_C_OR_CXX CXX) + else() + set(HIP_C_OR_CXX C) + endif() + set(generated_extension ${CMAKE_${HIP_C_OR_CXX}_OUTPUT_EXTENSION}) + + # Initialize list of includes with those specified by the user. Append with + # ones specified to cmake directly. + set(HIP_HIPCC_INCLUDE_ARGS ${HIP_HIPCC_INCLUDE_ARGS_USER}) + + # Add the include directories + set(include_directories_generator "$") + list(APPEND HIP_HIPCC_INCLUDE_ARGS "$<$:-I$>") + + get_directory_property(_hip_include_directories INCLUDE_DIRECTORIES) + list(REMOVE_DUPLICATES _hip_include_directories) + if(_hip_include_directories) + foreach(dir ${_hip_include_directories}) + list(APPEND HIP_HIPCC_INCLUDE_ARGS $<$:-I${dir}>) + endforeach() + endif() + + HIP_GET_SOURCES_AND_OPTIONS(_hip_sources _hip_cmake_options _hipcc_options _hcc_options _nvcc_options ${ARGN}) + HIP_PARSE_HIPCC_OPTIONS(HIP_HIPCC_FLAGS ${_hipcc_options}) + HIP_PARSE_HIPCC_OPTIONS(HIP_HCC_FLAGS ${_hcc_options}) + HIP_PARSE_HIPCC_OPTIONS(HIP_NVCC_FLAGS ${_nvcc_options}) + + # Add the compile definitions + set(compile_definition_generator "$") + list(APPEND HIP_HIPCC_FLAGS "$<$:-D$>") + + # Check if we are building shared library. + set(_hip_build_shared_libs FALSE) + list(FIND _hip_cmake_options SHARED _hip_found_SHARED) + list(FIND _hip_cmake_options MODULE _hip_found_MODULE) + if(_hip_found_SHARED GREATER -1 OR _hip_found_MODULE GREATER -1) + set(_hip_build_shared_libs TRUE) + endif() + list(FIND _hip_cmake_options STATIC _hip_found_STATIC) + if(_hip_found_STATIC GREATER -1) + set(_hip_build_shared_libs FALSE) + endif() + + # If we are building a shared library, add extra flags to HIP_HIPCC_FLAGS + if(_hip_build_shared_libs) + list(APPEND HIP_HCC_FLAGS "-fPIC") + list(APPEND HIP_NVCC_FLAGS "--shared -Xcompiler '-fPIC'") + endif() + + # Set host compiler + set(HIP_HOST_COMPILER "${CMAKE_${HIP_C_OR_CXX}_COMPILER}") + + # Set compiler flags + set(_HIP_HOST_FLAGS "set(CMAKE_HOST_FLAGS ${CMAKE_${HIP_C_OR_CXX}_FLAGS})") + set(_HIP_HIPCC_FLAGS "set(HIP_HIPCC_FLAGS ${HIP_HIPCC_FLAGS})") + set(_HIP_HCC_FLAGS "set(HIP_HCC_FLAGS ${HIP_HCC_FLAGS})") + set(_HIP_NVCC_FLAGS "set(HIP_NVCC_FLAGS ${HIP_NVCC_FLAGS})") + foreach(config ${_hip_configuration_types}) + string(TOUPPER ${config} config_upper) + set(_HIP_HOST_FLAGS "${_HIP_HOST_FLAGS}\nset(CMAKE_HOST_FLAGS_${config_upper} ${CMAKE_${HIP_C_OR_CXX}_FLAGS_${config_upper}})") + set(_HIP_HIPCC_FLAGS "${_HIP_HIPCC_FLAGS}\nset(HIP_HIPCC_FLAGS_${config_upper} ${HIP_HIPCC_FLAGS_${config_upper}})") + set(_HIP_HCC_FLAGS "${_HIP_HCC_FLAGS}\nset(HIP_HCC_FLAGS_${config_upper} ${HIP_HCC_FLAGS_${config_upper}})") + set(_HIP_NVCC_FLAGS "${_HIP_NVCC_FLAGS}\nset(HIP_NVCC_FLAGS_${config_upper} ${HIP_NVCC_FLAGS_${config_upper}})") + endforeach() + + # Reset the output variable + set(_hip_generated_files "") + set(_hip_source_files "") + + # Iterate over all arguments and create custom commands for all source files + foreach(file ${ARGN}) + # Ignore any file marked as a HEADER_FILE_ONLY + get_source_file_property(_is_header ${file} HEADER_FILE_ONLY) + # Allow per source file overrides of the format. Also allows compiling non .cu files. + get_source_file_property(_hip_source_format ${file} HIP_SOURCE_PROPERTY_FORMAT) + if((${file} MATCHES "\\.cu$" OR _hip_source_format) AND NOT _is_header) + set(host_flag FALSE) + else() + set(host_flag TRUE) + endif() + + if(NOT host_flag) + # Determine output directory + HIP_COMPUTE_BUILD_PATH("${file}" hip_build_path) + set(hip_compile_output_dir "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${_target}.dir/${hip_build_path}") + + get_filename_component(basename ${file} NAME) + set(generated_file_path "${hip_compile_output_dir}/${CMAKE_CFG_INTDIR}") + set(generated_file_basename "${_target}_generated_${basename}${generated_extension}") + + # Set file names + set(generated_file "${generated_file_path}/${generated_file_basename}") + set(cmake_dependency_file "${hip_compile_output_dir}/${generated_file_basename}.depend") + set(custom_target_script_pregen "${hip_compile_output_dir}/${generated_file_basename}.cmake.pre-gen") + set(custom_target_script "${hip_compile_output_dir}/${generated_file_basename}.cmake") + + # Set properties for object files + set_source_files_properties("${generated_file}" + PROPERTIES + EXTERNAL_OBJECT true # This is an object file not to be compiled, but only be linked + ) + + # Don't add CMAKE_CURRENT_SOURCE_DIR if the path is already an absolute path + get_filename_component(file_path "${file}" PATH) + if(IS_ABSOLUTE "${file_path}") + set(source_file "${file}") + else() + set(source_file "${CMAKE_CURRENT_SOURCE_DIR}/${file}") + endif() + + # Bring in the dependencies + HIP_INCLUDE_HIPCC_DEPENDENCIES(${cmake_dependency_file}) + + # Configure the build script + configure_file("${HIP_run_hipcc}" "${custom_target_script_pregen}" @ONLY) + file(GENERATE + OUTPUT "${custom_target_script}" + INPUT "${custom_target_script_pregen}" + ) + set(main_dep DEPENDS ${source_file}) + if(CMAKE_GENERATOR MATCHES "Makefiles") + set(verbose_output "$(VERBOSE)") + elseif(HIP_VERBOSE_BUILD) + set(verbose_output ON) + else() + set(verbose_output OFF) + endif() + + # Create up the comment string + file(RELATIVE_PATH generated_file_relative_path "${CMAKE_BINARY_DIR}" "${generated_file}") + set(hip_build_comment_string "Building HIPCC object ${generated_file_relative_path}") + + # Build the generated file and dependency file + add_custom_command( + OUTPUT ${generated_file} + # These output files depend on the source_file and the contents of cmake_dependency_file + ${main_dep} + DEPENDS ${HIP_HIPCC_DEPEND} + DEPENDS ${custom_target_script} + # Make sure the output directory exists before trying to write to it. + COMMAND ${CMAKE_COMMAND} -E make_directory "${generated_file_path}" + COMMAND ${CMAKE_COMMAND} ARGS + -D verbose:BOOL=${verbose_output} + -D build_configuration:STRING=${_hip_build_configuration} + -D "generated_file:STRING=${generated_file}" + -P "${custom_target_script}" + WORKING_DIRECTORY "${hip_compile_output_dir}" + COMMENT "${hip_build_comment_string}" + ) + + # Make sure the build system knows the file is generated + set_source_files_properties(${generated_file} PROPERTIES GENERATED TRUE) + list(APPEND _hip_generated_files ${generated_file}) + list(APPEND _hip_source_files ${file}) + endif() + endforeach() + + # Set the return parameter + set(${_generated_files} ${_hip_generated_files}) + set(${_source_files} ${_hip_source_files}) +endmacro() + +############################################################################### +# HIP_ADD_EXECUTABLE +############################################################################### +macro(HIP_ADD_EXECUTABLE hip_target) + # Separate the sources from the options + HIP_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _hipcc_options _hcc_options _nvcc_options ${ARGN}) + HIP_PREPARE_TARGET_COMMANDS(${hip_target} OBJ _generated_files _source_files ${_sources} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} NVCC_OPTIONS ${_nvcc_options}) + if(_source_files) + list(REMOVE_ITEM _sources ${_source_files}) + endif() + if("x${HCC_HOME}" STREQUAL "x") + set(HCC_HOME "/opt/rocm/hcc") + endif() + set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} -o ") + add_executable(${hip_target} ${_cmake_options} ${_generated_files} ${_sources}) + set_target_properties(${hip_target} PROPERTIES LINKER_LANGUAGE HIP) +endmacro() + +############################################################################### +# HIP_ADD_LIBRARY +############################################################################### +macro(HIP_ADD_LIBRARY hip_target) + # Separate the sources from the options + HIP_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _hipcc_options _hcc_options _nvcc_options ${ARGN}) + HIP_PREPARE_TARGET_COMMANDS(${hip_target} OBJ _generated_files _source_files ${_sources} ${_cmake_options} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} NVCC_OPTIONS ${_nvcc_options}) + if(_source_files) + list(REMOVE_ITEM _sources ${_source_files}) + endif() + add_library(${hip_target} ${_cmake_options} ${_generated_files} ${_sources}) + set_target_properties(${hip_target} PROPERTIES LINKER_LANGUAGE ${HIP_C_OR_CXX}) +endmacro() + +# vim: ts=4:sw=4:expandtab:smartindent diff --git a/common/Communication.h b/common/Communication.h index 7819a0bb..7c2f8d08 100644 --- a/common/Communication.h +++ b/common/Communication.h @@ -1,7 +1,7 @@ #ifndef COMMUNICATION_H_INC #define COMMUNICATION_H_INC -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Utilities.h" #include "common/Array.h" @@ -38,7 +38,7 @@ struct RankInfoStruct { //! Redistribute domain data (dst may be smaller than the src) template Array redistribute( const RankInfoStruct& src_rank, const Array& src_data, - const RankInfoStruct& dst_rank, std::array dst_size, MPI_Comm comm ); + const RankInfoStruct& dst_rank, std::array dst_size, const Utilities::MPI& comm ); /*! @@ -59,7 +59,7 @@ public: * @param[in] fill Fill {faces,edges,corners} * @param[in] periodic Periodic dimensions */ - fillHalo( MPI_Comm comm, const RankInfoStruct& info, + fillHalo( const Utilities::MPI& comm, const RankInfoStruct& info, std::array n, std::array ng, int tag, int depth, std::array fill = {true,true,true}, std::array periodic = {true,true,true} ); @@ -83,7 +83,7 @@ public: private: - MPI_Comm comm; + Utilities::MPI comm; RankInfoStruct info; std::array n, ng; int depth; @@ -93,8 +93,6 @@ private: TYPE *mem; TYPE *send[3][3][3], *recv[3][3][3]; MPI_Request send_req[3][3][3], recv_req[3][3][3]; - size_t N_type; - MPI_Datatype datatype; fillHalo(); // Private empty constructor fillHalo(const fillHalo&); // Private copy constructor fillHalo& operator=(const fillHalo&); // Private assignment operator @@ -136,7 +134,7 @@ void InitializeRanks( const int rank, const int nprocx, const int nprocy, const //*************************************************************************************** -inline void CommunicateSendRecvCounts( MPI_Comm Communicator, int sendtag, int recvtag, +inline void CommunicateSendRecvCounts( const Utilities::MPI& Communicator, int sendtag, int recvtag, int rank_x, int rank_y, int rank_z, int rank_X, int rank_Y, int rank_Z, int rank_xy, int rank_XY, int rank_xY, int rank_Xy, @@ -155,53 +153,53 @@ inline void CommunicateSendRecvCounts( MPI_Comm Communicator, int sendtag, int r { MPI_Request req1[18], req2[18]; MPI_Status stat1[18],stat2[18]; - MPI_Isend(&sendCount_x, 1,MPI_INT,rank_x,sendtag+0,Communicator,&req1[0]); - MPI_Irecv(&recvCount_X, 1,MPI_INT,rank_X,recvtag+0,Communicator,&req2[0]); - MPI_Isend(&sendCount_X, 1,MPI_INT,rank_X,sendtag+1,Communicator,&req1[1]); - MPI_Irecv(&recvCount_x, 1,MPI_INT,rank_x,recvtag+1,Communicator,&req2[1]); - MPI_Isend(&sendCount_y, 1,MPI_INT,rank_y,sendtag+2,Communicator,&req1[2]); - MPI_Irecv(&recvCount_Y, 1,MPI_INT,rank_Y,recvtag+2,Communicator,&req2[2]); - MPI_Isend(&sendCount_Y, 1,MPI_INT,rank_Y,sendtag+3,Communicator,&req1[3]); - MPI_Irecv(&recvCount_y, 1,MPI_INT,rank_y,recvtag+3,Communicator,&req2[3]); - MPI_Isend(&sendCount_z, 1,MPI_INT,rank_z,sendtag+4,Communicator,&req1[4]); - MPI_Irecv(&recvCount_Z, 1,MPI_INT,rank_Z,recvtag+4,Communicator,&req2[4]); - MPI_Isend(&sendCount_Z, 1,MPI_INT,rank_Z,sendtag+5,Communicator,&req1[5]); - MPI_Irecv(&recvCount_z, 1,MPI_INT,rank_z,recvtag+5,Communicator,&req2[5]); + MPI_Isend(&sendCount_x, 1,MPI_INT,rank_x,sendtag+0,Communicator.getCommunicator(),&req1[0]); + MPI_Irecv(&recvCount_X, 1,MPI_INT,rank_X,recvtag+0,Communicator.getCommunicator(),&req2[0]); + MPI_Isend(&sendCount_X, 1,MPI_INT,rank_X,sendtag+1,Communicator.getCommunicator(),&req1[1]); + MPI_Irecv(&recvCount_x, 1,MPI_INT,rank_x,recvtag+1,Communicator.getCommunicator(),&req2[1]); + MPI_Isend(&sendCount_y, 1,MPI_INT,rank_y,sendtag+2,Communicator.getCommunicator(),&req1[2]); + MPI_Irecv(&recvCount_Y, 1,MPI_INT,rank_Y,recvtag+2,Communicator.getCommunicator(),&req2[2]); + MPI_Isend(&sendCount_Y, 1,MPI_INT,rank_Y,sendtag+3,Communicator.getCommunicator(),&req1[3]); + MPI_Irecv(&recvCount_y, 1,MPI_INT,rank_y,recvtag+3,Communicator.getCommunicator(),&req2[3]); + MPI_Isend(&sendCount_z, 1,MPI_INT,rank_z,sendtag+4,Communicator.getCommunicator(),&req1[4]); + MPI_Irecv(&recvCount_Z, 1,MPI_INT,rank_Z,recvtag+4,Communicator.getCommunicator(),&req2[4]); + MPI_Isend(&sendCount_Z, 1,MPI_INT,rank_Z,sendtag+5,Communicator.getCommunicator(),&req1[5]); + MPI_Irecv(&recvCount_z, 1,MPI_INT,rank_z,recvtag+5,Communicator.getCommunicator(),&req2[5]); - MPI_Isend(&sendCount_xy, 1,MPI_INT,rank_xy,sendtag+6,Communicator,&req1[6]); - MPI_Irecv(&recvCount_XY, 1,MPI_INT,rank_XY,recvtag+6,Communicator,&req2[6]); - MPI_Isend(&sendCount_XY, 1,MPI_INT,rank_XY,sendtag+7,Communicator,&req1[7]); - MPI_Irecv(&recvCount_xy, 1,MPI_INT,rank_xy,recvtag+7,Communicator,&req2[7]); - MPI_Isend(&sendCount_Xy, 1,MPI_INT,rank_Xy,sendtag+8,Communicator,&req1[8]); - MPI_Irecv(&recvCount_xY, 1,MPI_INT,rank_xY,recvtag+8,Communicator,&req2[8]); - MPI_Isend(&sendCount_xY, 1,MPI_INT,rank_xY,sendtag+9,Communicator,&req1[9]); - MPI_Irecv(&recvCount_Xy, 1,MPI_INT,rank_Xy,recvtag+9,Communicator,&req2[9]); + MPI_Isend(&sendCount_xy, 1,MPI_INT,rank_xy,sendtag+6,Communicator.getCommunicator(),&req1[6]); + MPI_Irecv(&recvCount_XY, 1,MPI_INT,rank_XY,recvtag+6,Communicator.getCommunicator(),&req2[6]); + MPI_Isend(&sendCount_XY, 1,MPI_INT,rank_XY,sendtag+7,Communicator.getCommunicator(),&req1[7]); + MPI_Irecv(&recvCount_xy, 1,MPI_INT,rank_xy,recvtag+7,Communicator.getCommunicator(),&req2[7]); + MPI_Isend(&sendCount_Xy, 1,MPI_INT,rank_Xy,sendtag+8,Communicator.getCommunicator(),&req1[8]); + MPI_Irecv(&recvCount_xY, 1,MPI_INT,rank_xY,recvtag+8,Communicator.getCommunicator(),&req2[8]); + MPI_Isend(&sendCount_xY, 1,MPI_INT,rank_xY,sendtag+9,Communicator.getCommunicator(),&req1[9]); + MPI_Irecv(&recvCount_Xy, 1,MPI_INT,rank_Xy,recvtag+9,Communicator.getCommunicator(),&req2[9]); - MPI_Isend(&sendCount_xz, 1,MPI_INT,rank_xz,sendtag+10,Communicator,&req1[10]); - MPI_Irecv(&recvCount_XZ, 1,MPI_INT,rank_XZ,recvtag+10,Communicator,&req2[10]); - MPI_Isend(&sendCount_XZ, 1,MPI_INT,rank_XZ,sendtag+11,Communicator,&req1[11]); - MPI_Irecv(&recvCount_xz, 1,MPI_INT,rank_xz,recvtag+11,Communicator,&req2[11]); - MPI_Isend(&sendCount_Xz, 1,MPI_INT,rank_Xz,sendtag+12,Communicator,&req1[12]); - MPI_Irecv(&recvCount_xZ, 1,MPI_INT,rank_xZ,recvtag+12,Communicator,&req2[12]); - MPI_Isend(&sendCount_xZ, 1,MPI_INT,rank_xZ,sendtag+13,Communicator,&req1[13]); - MPI_Irecv(&recvCount_Xz, 1,MPI_INT,rank_Xz,recvtag+13,Communicator,&req2[13]); + MPI_Isend(&sendCount_xz, 1,MPI_INT,rank_xz,sendtag+10,Communicator.getCommunicator(),&req1[10]); + MPI_Irecv(&recvCount_XZ, 1,MPI_INT,rank_XZ,recvtag+10,Communicator.getCommunicator(),&req2[10]); + MPI_Isend(&sendCount_XZ, 1,MPI_INT,rank_XZ,sendtag+11,Communicator.getCommunicator(),&req1[11]); + MPI_Irecv(&recvCount_xz, 1,MPI_INT,rank_xz,recvtag+11,Communicator.getCommunicator(),&req2[11]); + MPI_Isend(&sendCount_Xz, 1,MPI_INT,rank_Xz,sendtag+12,Communicator.getCommunicator(),&req1[12]); + MPI_Irecv(&recvCount_xZ, 1,MPI_INT,rank_xZ,recvtag+12,Communicator.getCommunicator(),&req2[12]); + MPI_Isend(&sendCount_xZ, 1,MPI_INT,rank_xZ,sendtag+13,Communicator.getCommunicator(),&req1[13]); + MPI_Irecv(&recvCount_Xz, 1,MPI_INT,rank_Xz,recvtag+13,Communicator.getCommunicator(),&req2[13]); - MPI_Isend(&sendCount_yz, 1,MPI_INT,rank_yz,sendtag+14,Communicator,&req1[14]); - MPI_Irecv(&recvCount_YZ, 1,MPI_INT,rank_YZ,recvtag+14,Communicator,&req2[14]); - MPI_Isend(&sendCount_YZ, 1,MPI_INT,rank_YZ,sendtag+15,Communicator,&req1[15]); - MPI_Irecv(&recvCount_yz, 1,MPI_INT,rank_yz,recvtag+15,Communicator,&req2[15]); - MPI_Isend(&sendCount_Yz, 1,MPI_INT,rank_Yz,sendtag+16,Communicator,&req1[16]); - MPI_Irecv(&recvCount_yZ, 1,MPI_INT,rank_yZ,recvtag+16,Communicator,&req2[16]); - MPI_Isend(&sendCount_yZ, 1,MPI_INT,rank_yZ,sendtag+17,Communicator,&req1[17]); - MPI_Irecv(&recvCount_Yz, 1,MPI_INT,rank_Yz,recvtag+17,Communicator,&req2[17]); + MPI_Isend(&sendCount_yz, 1,MPI_INT,rank_yz,sendtag+14,Communicator.getCommunicator(),&req1[14]); + MPI_Irecv(&recvCount_YZ, 1,MPI_INT,rank_YZ,recvtag+14,Communicator.getCommunicator(),&req2[14]); + MPI_Isend(&sendCount_YZ, 1,MPI_INT,rank_YZ,sendtag+15,Communicator.getCommunicator(),&req1[15]); + MPI_Irecv(&recvCount_yz, 1,MPI_INT,rank_yz,recvtag+15,Communicator.getCommunicator(),&req2[15]); + MPI_Isend(&sendCount_Yz, 1,MPI_INT,rank_Yz,sendtag+16,Communicator.getCommunicator(),&req1[16]); + MPI_Irecv(&recvCount_yZ, 1,MPI_INT,rank_yZ,recvtag+16,Communicator.getCommunicator(),&req2[16]); + MPI_Isend(&sendCount_yZ, 1,MPI_INT,rank_yZ,sendtag+17,Communicator.getCommunicator(),&req1[17]); + MPI_Irecv(&recvCount_Yz, 1,MPI_INT,rank_Yz,recvtag+17,Communicator.getCommunicator(),&req2[17]); MPI_Waitall(18,req1,stat1); MPI_Waitall(18,req2,stat2); - MPI_Barrier(Communicator); + Communicator.barrier(); } //*************************************************************************************** -inline void CommunicateRecvLists( MPI_Comm Communicator, int sendtag, int recvtag, +inline void CommunicateRecvLists( const Utilities::MPI& Communicator, int sendtag, int recvtag, int *sendList_x, int *sendList_y, int *sendList_z, int *sendList_X, int *sendList_Y, int *sendList_Z, int *sendList_xy, int *sendList_XY, int *sendList_xY, int *sendList_Xy, int *sendList_xz, int *sendList_XZ, int *sendList_xZ, int *sendList_Xz, @@ -223,52 +221,52 @@ inline void CommunicateRecvLists( MPI_Comm Communicator, int sendtag, int recvta { MPI_Request req1[18], req2[18]; MPI_Status stat1[18],stat2[18]; - MPI_Isend(sendList_x, sendCount_x,MPI_INT,rank_x,sendtag,Communicator,&req1[0]); - MPI_Irecv(recvList_X, recvCount_X,MPI_INT,rank_X,recvtag,Communicator,&req2[0]); - MPI_Isend(sendList_X, sendCount_X,MPI_INT,rank_X,sendtag,Communicator,&req1[1]); - MPI_Irecv(recvList_x, recvCount_x,MPI_INT,rank_x,recvtag,Communicator,&req2[1]); - MPI_Isend(sendList_y, sendCount_y,MPI_INT,rank_y,sendtag,Communicator,&req1[2]); - MPI_Irecv(recvList_Y, recvCount_Y,MPI_INT,rank_Y,recvtag,Communicator,&req2[2]); - MPI_Isend(sendList_Y, sendCount_Y,MPI_INT,rank_Y,sendtag,Communicator,&req1[3]); - MPI_Irecv(recvList_y, recvCount_y,MPI_INT,rank_y,recvtag,Communicator,&req2[3]); - MPI_Isend(sendList_z, sendCount_z,MPI_INT,rank_z,sendtag,Communicator,&req1[4]); - MPI_Irecv(recvList_Z, recvCount_Z,MPI_INT,rank_Z,recvtag,Communicator,&req2[4]); - MPI_Isend(sendList_Z, sendCount_Z,MPI_INT,rank_Z,sendtag,Communicator,&req1[5]); - MPI_Irecv(recvList_z, recvCount_z,MPI_INT,rank_z,recvtag,Communicator,&req2[5]); + MPI_Isend(sendList_x, sendCount_x,MPI_INT,rank_x,sendtag,Communicator.getCommunicator(),&req1[0]); + MPI_Irecv(recvList_X, recvCount_X,MPI_INT,rank_X,recvtag,Communicator.getCommunicator(),&req2[0]); + MPI_Isend(sendList_X, sendCount_X,MPI_INT,rank_X,sendtag,Communicator.getCommunicator(),&req1[1]); + MPI_Irecv(recvList_x, recvCount_x,MPI_INT,rank_x,recvtag,Communicator.getCommunicator(),&req2[1]); + MPI_Isend(sendList_y, sendCount_y,MPI_INT,rank_y,sendtag,Communicator.getCommunicator(),&req1[2]); + MPI_Irecv(recvList_Y, recvCount_Y,MPI_INT,rank_Y,recvtag,Communicator.getCommunicator(),&req2[2]); + MPI_Isend(sendList_Y, sendCount_Y,MPI_INT,rank_Y,sendtag,Communicator.getCommunicator(),&req1[3]); + MPI_Irecv(recvList_y, recvCount_y,MPI_INT,rank_y,recvtag,Communicator.getCommunicator(),&req2[3]); + MPI_Isend(sendList_z, sendCount_z,MPI_INT,rank_z,sendtag,Communicator.getCommunicator(),&req1[4]); + MPI_Irecv(recvList_Z, recvCount_Z,MPI_INT,rank_Z,recvtag,Communicator.getCommunicator(),&req2[4]); + MPI_Isend(sendList_Z, sendCount_Z,MPI_INT,rank_Z,sendtag,Communicator.getCommunicator(),&req1[5]); + MPI_Irecv(recvList_z, recvCount_z,MPI_INT,rank_z,recvtag,Communicator.getCommunicator(),&req2[5]); - MPI_Isend(sendList_xy, sendCount_xy,MPI_INT,rank_xy,sendtag,Communicator,&req1[6]); - MPI_Irecv(recvList_XY, recvCount_XY,MPI_INT,rank_XY,recvtag,Communicator,&req2[6]); - MPI_Isend(sendList_XY, sendCount_XY,MPI_INT,rank_XY,sendtag,Communicator,&req1[7]); - MPI_Irecv(recvList_xy, recvCount_xy,MPI_INT,rank_xy,recvtag,Communicator,&req2[7]); - MPI_Isend(sendList_Xy, sendCount_Xy,MPI_INT,rank_Xy,sendtag,Communicator,&req1[8]); - MPI_Irecv(recvList_xY, recvCount_xY,MPI_INT,rank_xY,recvtag,Communicator,&req2[8]); - MPI_Isend(sendList_xY, sendCount_xY,MPI_INT,rank_xY,sendtag,Communicator,&req1[9]); - MPI_Irecv(recvList_Xy, recvCount_Xy,MPI_INT,rank_Xy,recvtag,Communicator,&req2[9]); + MPI_Isend(sendList_xy, sendCount_xy,MPI_INT,rank_xy,sendtag,Communicator.getCommunicator(),&req1[6]); + MPI_Irecv(recvList_XY, recvCount_XY,MPI_INT,rank_XY,recvtag,Communicator.getCommunicator(),&req2[6]); + MPI_Isend(sendList_XY, sendCount_XY,MPI_INT,rank_XY,sendtag,Communicator.getCommunicator(),&req1[7]); + MPI_Irecv(recvList_xy, recvCount_xy,MPI_INT,rank_xy,recvtag,Communicator.getCommunicator(),&req2[7]); + MPI_Isend(sendList_Xy, sendCount_Xy,MPI_INT,rank_Xy,sendtag,Communicator.getCommunicator(),&req1[8]); + MPI_Irecv(recvList_xY, recvCount_xY,MPI_INT,rank_xY,recvtag,Communicator.getCommunicator(),&req2[8]); + MPI_Isend(sendList_xY, sendCount_xY,MPI_INT,rank_xY,sendtag,Communicator.getCommunicator(),&req1[9]); + MPI_Irecv(recvList_Xy, recvCount_Xy,MPI_INT,rank_Xy,recvtag,Communicator.getCommunicator(),&req2[9]); - MPI_Isend(sendList_xz, sendCount_xz,MPI_INT,rank_xz,sendtag,Communicator,&req1[10]); - MPI_Irecv(recvList_XZ, recvCount_XZ,MPI_INT,rank_XZ,recvtag,Communicator,&req2[10]); - MPI_Isend(sendList_XZ, sendCount_XZ,MPI_INT,rank_XZ,sendtag,Communicator,&req1[11]); - MPI_Irecv(recvList_xz, recvCount_xz,MPI_INT,rank_xz,recvtag,Communicator,&req2[11]); - MPI_Isend(sendList_Xz, sendCount_Xz,MPI_INT,rank_Xz,sendtag,Communicator,&req1[12]); - MPI_Irecv(recvList_xZ, recvCount_xZ,MPI_INT,rank_xZ,recvtag,Communicator,&req2[12]); - MPI_Isend(sendList_xZ, sendCount_xZ,MPI_INT,rank_xZ,sendtag,Communicator,&req1[13]); - MPI_Irecv(recvList_Xz, recvCount_Xz,MPI_INT,rank_Xz,recvtag,Communicator,&req2[13]); + MPI_Isend(sendList_xz, sendCount_xz,MPI_INT,rank_xz,sendtag,Communicator.getCommunicator(),&req1[10]); + MPI_Irecv(recvList_XZ, recvCount_XZ,MPI_INT,rank_XZ,recvtag,Communicator.getCommunicator(),&req2[10]); + MPI_Isend(sendList_XZ, sendCount_XZ,MPI_INT,rank_XZ,sendtag,Communicator.getCommunicator(),&req1[11]); + MPI_Irecv(recvList_xz, recvCount_xz,MPI_INT,rank_xz,recvtag,Communicator.getCommunicator(),&req2[11]); + MPI_Isend(sendList_Xz, sendCount_Xz,MPI_INT,rank_Xz,sendtag,Communicator.getCommunicator(),&req1[12]); + MPI_Irecv(recvList_xZ, recvCount_xZ,MPI_INT,rank_xZ,recvtag,Communicator.getCommunicator(),&req2[12]); + MPI_Isend(sendList_xZ, sendCount_xZ,MPI_INT,rank_xZ,sendtag,Communicator.getCommunicator(),&req1[13]); + MPI_Irecv(recvList_Xz, recvCount_Xz,MPI_INT,rank_Xz,recvtag,Communicator.getCommunicator(),&req2[13]); - MPI_Isend(sendList_yz, sendCount_yz,MPI_INT,rank_yz,sendtag,Communicator,&req1[14]); - MPI_Irecv(recvList_YZ, recvCount_YZ,MPI_INT,rank_YZ,recvtag,Communicator,&req2[14]); - MPI_Isend(sendList_YZ, sendCount_YZ,MPI_INT,rank_YZ,sendtag,Communicator,&req1[15]); - MPI_Irecv(recvList_yz, recvCount_yz,MPI_INT,rank_yz,recvtag,Communicator,&req2[15]); - MPI_Isend(sendList_Yz, sendCount_Yz,MPI_INT,rank_Yz,sendtag,Communicator,&req1[16]); - MPI_Irecv(recvList_yZ, recvCount_yZ,MPI_INT,rank_yZ,recvtag,Communicator,&req2[16]); - MPI_Isend(sendList_yZ, sendCount_yZ,MPI_INT,rank_yZ,sendtag,Communicator,&req1[17]); - MPI_Irecv(recvList_Yz, recvCount_Yz,MPI_INT,rank_Yz,recvtag,Communicator,&req2[17]); + MPI_Isend(sendList_yz, sendCount_yz,MPI_INT,rank_yz,sendtag,Communicator.getCommunicator(),&req1[14]); + MPI_Irecv(recvList_YZ, recvCount_YZ,MPI_INT,rank_YZ,recvtag,Communicator.getCommunicator(),&req2[14]); + MPI_Isend(sendList_YZ, sendCount_YZ,MPI_INT,rank_YZ,sendtag,Communicator.getCommunicator(),&req1[15]); + MPI_Irecv(recvList_yz, recvCount_yz,MPI_INT,rank_yz,recvtag,Communicator.getCommunicator(),&req2[15]); + MPI_Isend(sendList_Yz, sendCount_Yz,MPI_INT,rank_Yz,sendtag,Communicator.getCommunicator(),&req1[16]); + MPI_Irecv(recvList_yZ, recvCount_yZ,MPI_INT,rank_yZ,recvtag,Communicator.getCommunicator(),&req2[16]); + MPI_Isend(sendList_yZ, sendCount_yZ,MPI_INT,rank_yZ,sendtag,Communicator.getCommunicator(),&req1[17]); + MPI_Irecv(recvList_Yz, recvCount_Yz,MPI_INT,rank_Yz,recvtag,Communicator.getCommunicator(),&req2[17]); MPI_Waitall(18,req1,stat1); MPI_Waitall(18,req2,stat2); } //*************************************************************************************** -inline void CommunicateMeshHalo(DoubleArray &Mesh, MPI_Comm Communicator, +inline void CommunicateMeshHalo(DoubleArray &Mesh, const Utilities::MPI& Communicator, double *sendbuf_x,double *sendbuf_y,double *sendbuf_z,double *sendbuf_X,double *sendbuf_Y,double *sendbuf_Z, double *sendbuf_xy,double *sendbuf_XY,double *sendbuf_xY,double *sendbuf_Xy, double *sendbuf_xz,double *sendbuf_XZ,double *sendbuf_xZ,double *sendbuf_Xz, @@ -319,41 +317,41 @@ inline void CommunicateMeshHalo(DoubleArray &Mesh, MPI_Comm Communicator, PackMeshData(sendList_YZ, sendCount_YZ ,sendbuf_YZ, MeshData); //...................................................................................... MPI_Sendrecv(sendbuf_x,sendCount_x,MPI_DOUBLE,rank_x,sendtag, - recvbuf_X,recvCount_X,MPI_DOUBLE,rank_X,recvtag,Communicator,MPI_STATUS_IGNORE); + recvbuf_X,recvCount_X,MPI_DOUBLE,rank_X,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_X,sendCount_X,MPI_DOUBLE,rank_X,sendtag, - recvbuf_x,recvCount_x,MPI_DOUBLE,rank_x,recvtag,Communicator,MPI_STATUS_IGNORE); + recvbuf_x,recvCount_x,MPI_DOUBLE,rank_x,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_y,sendCount_y,MPI_DOUBLE,rank_y,sendtag, - recvbuf_Y,recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,Communicator,MPI_STATUS_IGNORE); + recvbuf_Y,recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_Y,sendCount_Y,MPI_DOUBLE,rank_Y,sendtag, - recvbuf_y,recvCount_y,MPI_DOUBLE,rank_y,recvtag,Communicator,MPI_STATUS_IGNORE); + recvbuf_y,recvCount_y,MPI_DOUBLE,rank_y,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_z,sendCount_z,MPI_DOUBLE,rank_z,sendtag, - recvbuf_Z,recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,Communicator,MPI_STATUS_IGNORE); + recvbuf_Z,recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_Z,sendCount_Z,MPI_DOUBLE,rank_Z,sendtag, - recvbuf_z,recvCount_z,MPI_DOUBLE,rank_z,recvtag,Communicator,MPI_STATUS_IGNORE); + recvbuf_z,recvCount_z,MPI_DOUBLE,rank_z,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_xy,sendCount_xy,MPI_DOUBLE,rank_xy,sendtag, - recvbuf_XY,recvCount_XY,MPI_DOUBLE,rank_XY,recvtag,Communicator,MPI_STATUS_IGNORE); + recvbuf_XY,recvCount_XY,MPI_DOUBLE,rank_XY,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_XY,sendCount_XY,MPI_DOUBLE,rank_XY,sendtag, - recvbuf_xy,recvCount_xy,MPI_DOUBLE,rank_xy,recvtag,Communicator,MPI_STATUS_IGNORE); + recvbuf_xy,recvCount_xy,MPI_DOUBLE,rank_xy,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_Xy,sendCount_Xy,MPI_DOUBLE,rank_Xy,sendtag, - recvbuf_xY,recvCount_xY,MPI_DOUBLE,rank_xY,recvtag,Communicator,MPI_STATUS_IGNORE); + recvbuf_xY,recvCount_xY,MPI_DOUBLE,rank_xY,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_xY,sendCount_xY,MPI_DOUBLE,rank_xY,sendtag, - recvbuf_Xy,recvCount_Xy,MPI_DOUBLE,rank_Xy,recvtag,Communicator,MPI_STATUS_IGNORE); + recvbuf_Xy,recvCount_Xy,MPI_DOUBLE,rank_Xy,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_xz,sendCount_xz,MPI_DOUBLE,rank_xz,sendtag, - recvbuf_XZ,recvCount_XZ,MPI_DOUBLE,rank_XZ,recvtag,Communicator,MPI_STATUS_IGNORE); + recvbuf_XZ,recvCount_XZ,MPI_DOUBLE,rank_XZ,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_XZ,sendCount_XZ,MPI_DOUBLE,rank_XZ,sendtag, - recvbuf_xz,recvCount_xz,MPI_DOUBLE,rank_xz,recvtag,Communicator,MPI_STATUS_IGNORE); + recvbuf_xz,recvCount_xz,MPI_DOUBLE,rank_xz,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_Xz,sendCount_Xz,MPI_DOUBLE,rank_Xz,sendtag, - recvbuf_xZ,recvCount_xZ,MPI_DOUBLE,rank_xZ,recvtag,Communicator,MPI_STATUS_IGNORE); + recvbuf_xZ,recvCount_xZ,MPI_DOUBLE,rank_xZ,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_xZ,sendCount_xZ,MPI_DOUBLE,rank_xZ,sendtag, - recvbuf_Xz,recvCount_Xz,MPI_DOUBLE,rank_Xz,recvtag,Communicator,MPI_STATUS_IGNORE); + recvbuf_Xz,recvCount_Xz,MPI_DOUBLE,rank_Xz,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_yz,sendCount_yz,MPI_DOUBLE,rank_yz,sendtag, - recvbuf_YZ,recvCount_YZ,MPI_DOUBLE,rank_YZ,recvtag,Communicator,MPI_STATUS_IGNORE); + recvbuf_YZ,recvCount_YZ,MPI_DOUBLE,rank_YZ,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_YZ,sendCount_YZ,MPI_DOUBLE,rank_YZ,sendtag, - recvbuf_yz,recvCount_yz,MPI_DOUBLE,rank_yz,recvtag,Communicator,MPI_STATUS_IGNORE); + recvbuf_yz,recvCount_yz,MPI_DOUBLE,rank_yz,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_Yz,sendCount_Yz,MPI_DOUBLE,rank_Yz,sendtag, - recvbuf_yZ,recvCount_yZ,MPI_DOUBLE,rank_yZ,recvtag,Communicator,MPI_STATUS_IGNORE); + recvbuf_yZ,recvCount_yZ,MPI_DOUBLE,rank_yZ,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_yZ,sendCount_yZ,MPI_DOUBLE,rank_yZ,sendtag, - recvbuf_Yz,recvCount_Yz,MPI_DOUBLE,rank_Yz,recvtag,Communicator,MPI_STATUS_IGNORE); + recvbuf_Yz,recvCount_Yz,MPI_DOUBLE,rank_Yz,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); //........................................................................................ UnpackMeshData(recvList_x, recvCount_x ,recvbuf_x, MeshData); UnpackMeshData(recvList_X, recvCount_X ,recvbuf_X, MeshData); diff --git a/common/Communication.hpp b/common/Communication.hpp index 33fed3a7..ca310ea5 100644 --- a/common/Communication.hpp +++ b/common/Communication.hpp @@ -2,9 +2,8 @@ #define COMMUNICATION_HPP_INC #include "common/Communication.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Utilities.h" -//#include "ProfilerApp.h" /******************************************************** @@ -12,17 +11,19 @@ ********************************************************/ template Array redistribute( const RankInfoStruct& src_rank, const Array& src_data, - const RankInfoStruct& dst_rank, std::array dst_size, MPI_Comm comm ) + const RankInfoStruct& dst_rank, std::array dst_size, const Utilities::MPI& comm ) { -#ifdef USE_MPI + if ( comm.getSize() == 1 ) { + return src_data.subset( { 0, (size_t) dst_size[0]-1, 0, (size_t) dst_size[1]-1, 0, (size_t) dst_size[2]-1 } ); + } // Get the src size std::array src_size; int size0[3] = { (int) src_data.size(0), (int) src_data.size(1), (int) src_data.size(2) }; - MPI_Allreduce( size0, src_size.data(), 3, MPI_INT, MPI_MAX, comm ); + comm.maxReduce( size0, src_size.data(), 3 ); if ( !src_data.empty() ) ASSERT( src_size[0] == size0[0] && src_size[1] == size0[1] && src_size[2] == size0[2] ); // Check that dst_size matches on all ranks - MPI_Allreduce( dst_size.data(), size0, 3, MPI_INT, MPI_MAX, comm ); + comm.maxReduce( dst_size.data(), size0, 3 ); ASSERT( dst_size[0] == size0[0] && dst_size[1] == size0[1] && dst_size[2] == size0[2] ); // Function to get overlap range auto calcOverlap = []( int i1[3], int i2[3], int j1[3], int j2[3] ) { @@ -60,7 +61,7 @@ Array redistribute( const RankInfoStruct& src_rank, const Array& src } std::vector send_request( send_rank.size() ); for (size_t i=0; i dst_data( dst_size[0], dst_size[1], dst_size[2] ); int i1[3] = { dst_size[0] * dst_rank.ix, dst_size[1] * dst_rank.jy, dst_size[2] * dst_rank.kz }; @@ -75,17 +76,14 @@ Array redistribute( const RankInfoStruct& src_rank, const Array& src continue; int rank = src_rank.getRankForBlock(i,j,k); Array data( index[1] - index[0] + 1, index[3] - index[2] + 1, index[5] - index[4] + 1 ); - MPI_Recv( data.data(), sizeof(TYPE)*data.length(), MPI_BYTE, rank, 5462, comm, MPI_STATUS_IGNORE ); + comm.recv( data.data(), data.length(), rank, 5462 ); dst_data.copySubset( index, data ); } } } // Free data - MPI_Waitall( send_request.size(), send_request.data(), MPI_STATUSES_IGNORE ); + comm.waitAll( send_request.size(), send_request.data() ); return dst_data; -#else - return src_data.subset( { 0, dst_size[0]-1, 0, dst_size[1]-1, 0, dst_size[2]-1 ); -#endif } @@ -94,27 +92,11 @@ Array redistribute( const RankInfoStruct& src_rank, const Array& src * Structure to fill halo cells * ********************************************************/ template -fillHalo::fillHalo( MPI_Comm comm_, const RankInfoStruct& info_, +fillHalo::fillHalo( const Utilities::MPI& comm_, const RankInfoStruct& info_, std::array n_, std::array ng_, int tag0, int depth_, std::array fill, std::array periodic ): comm(comm_), info(info_), n(n_), ng(ng_), depth(depth_) { - if ( std::is_same() ) { - N_type = 1; - datatype = MPI_DOUBLE; - } else if ( std::is_same() ) { - N_type = 1; - datatype = MPI_FLOAT; - } else if ( sizeof(TYPE)%sizeof(double)==0 ) { - N_type = sizeof(TYPE) / sizeof(double); - datatype = MPI_DOUBLE; - } else if ( sizeof(TYPE)%sizeof(float)==0 ) { - N_type = sizeof(TYPE) / sizeof(float); - datatype = MPI_FLOAT; - } else { - N_type = sizeof(TYPE); - datatype = MPI_BYTE; - } // Set the fill pattern memset(fill_pattern,0,sizeof(fill_pattern)); if ( fill[0] ) { @@ -251,8 +233,8 @@ void fillHalo::fill( Array& data ) for (int k=0; k<3; k++) { if ( !fill_pattern[i][j][k] ) continue; - MPI_Irecv( recv[i][j][k], N_type*depth2*N_send_recv[i][j][k], datatype, - info.rank[i][j][k], tag[2-i][2-j][2-k], comm, &recv_req[i][j][k] ); + recv_req[i][j][k] = comm.Irecv( recv[i][j][k], depth2*N_send_recv[i][j][k], + info.rank[i][j][k], tag[2-i][2-j][2-k] ); } } } @@ -263,19 +245,18 @@ void fillHalo::fill( Array& data ) if ( !fill_pattern[i][j][k] ) continue; pack( data, i-1, j-1, k-1, send[i][j][k] ); - MPI_Isend( send[i][j][k], N_type*depth2*N_send_recv[i][j][k], datatype, - info.rank[i][j][k], tag[i][j][k], comm, &send_req[i][j][k] ); + send_req[i][j][k] = comm.Isend( send[i][j][k], depth2*N_send_recv[i][j][k], + info.rank[i][j][k], tag[i][j][k] ); } } } // Recv the dst data and unpack (we recive in reverse order to match the sends) - MPI_Status status; for (int i=2; i>=0; i--) { for (int j=2; j>=0; j--) { for (int k=2; k>=0; k--) { if ( !fill_pattern[i][j][k] ) continue; - MPI_Wait(&recv_req[i][j][k],&status); + comm.wait( recv_req[i][j][k] ); unpack( data, i-1, j-1, k-1, recv[i][j][k] ); } } @@ -286,7 +267,7 @@ void fillHalo::fill( Array& data ) for (int k=0; k<3; k++) { if ( !fill_pattern[i][j][k] ) continue; - MPI_Wait(&send_req[i][j][k],&status); + comm.wait( send_req[i][j][k] ); } } } diff --git a/common/Domain.cpp b/common/Domain.cpp index a4959508..58ca099b 100644 --- a/common/Domain.cpp +++ b/common/Domain.cpp @@ -12,7 +12,7 @@ #include "common/Domain.h" #include "common/Array.h" #include "common/Utilities.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Communication.h" // Inline function to read line without a return argument @@ -62,11 +62,10 @@ Domain::Domain( int nx, int ny, int nz, int rnk, int npx, int npy, int npz, NULL_USE( npy ); NULL_USE( npz ); // set up the neighbor ranks - int myrank; - MPI_Comm_rank( Comm, &myrank ); + int myrank = Comm.getRank(); rank_info = RankInfoStruct( myrank, rank_info.nx, rank_info.ny, rank_info.nz ); - MPI_Barrier(Comm); + Comm.barrier(); auto db = std::make_shared( ); db->putScalar( "BC", BC ); @@ -76,10 +75,9 @@ Domain::Domain( int nx, int ny, int nz, int rnk, int npx, int npy, int npz, db->putVector( "L", { lx, ly, lz } ); initialize( db ); } -Domain::Domain( std::shared_ptr db, MPI_Comm Communicator): +Domain::Domain( std::shared_ptr db, const Utilities::MPI& Communicator): database(db), Nx(0), Ny(0), Nz(0), Lx(0), Ly(0), Lz(0), Volume(0), BoundaryCondition(0), - Comm(MPI_COMM_NULL), inlet_layers_x(0), inlet_layers_y(0), inlet_layers_z(0), outlet_layers_x(0), outlet_layers_y(0), outlet_layers_z(0), inlet_layers_phase(1),outlet_layers_phase(2), @@ -109,14 +107,13 @@ Domain::Domain( std::shared_ptr db, MPI_Comm Communicator): recvData_xY(NULL), recvData_yZ(NULL), recvData_Xz(NULL), recvData_XY(NULL), recvData_YZ(NULL), recvData_XZ(NULL), id(NULL) { - MPI_Comm_dup(Communicator,&Comm); + Comm = Communicator.dup(); // set up the neighbor ranks - int myrank; - MPI_Comm_rank( Comm, &myrank ); + int myrank = Comm.getRank(); initialize( db ); rank_info = RankInfoStruct( myrank, rank_info.nx, rank_info.ny, rank_info.nz ); - MPI_Barrier(Comm); + Comm.barrier(); } Domain::~Domain() @@ -165,10 +162,6 @@ Domain::~Domain() delete [] recvData_yZ; delete [] recvData_Yz; delete [] recvData_YZ; // Free id delete [] id; - // Free the communicator - if ( Comm != MPI_COMM_WORLD && Comm != MPI_COMM_NULL ) { - MPI_Comm_free(&Comm); - } } void Domain::initialize( std::shared_ptr db ) @@ -219,8 +212,7 @@ void Domain::initialize( std::shared_ptr db ) Ny = ny+2; Nz = nz+2; // Initialize ranks - int myrank; - MPI_Comm_rank( Comm, &myrank ); + int myrank = Comm.getRank(); rank_info = RankInfoStruct(myrank,nproc[0],nproc[1],nproc[2]); // inlet layers only apply to lower part of domain if (rank_info.ix > 0) inlet_layers_x = 0; @@ -239,8 +231,7 @@ void Domain::initialize( std::shared_ptr db ) id = new signed char[N]; memset(id,0,N); BoundaryCondition = d_db->getScalar("BC"); - int nprocs; - MPI_Comm_size( Comm, &nprocs ); + int nprocs = Comm.getSize(); INSIST(nprocs == nproc[0]*nproc[1]*nproc[2],"Fatal error in processor count!"); } @@ -569,7 +560,7 @@ void Domain::Decomp( const std::string& Filename ) } else{ //printf("Sending data to process %i \n", rnk); - MPI_Send(loc_id,N,MPI_CHAR,rnk,15,Comm); + Comm.send(loc_id,N,rnk,15); } // Write the data for this rank data sprintf(LocalRankFilename,"ID.%05i",rnk+rank_offset); @@ -584,9 +575,9 @@ void Domain::Decomp( const std::string& Filename ) else{ // Recieve the subdomain from rank = 0 //printf("Ready to recieve data %i at process %i \n", N,rank); - MPI_Recv(id,N,MPI_CHAR,0,15,Comm,MPI_STATUS_IGNORE); + Comm.recv(id,N,0,15); } - MPI_Barrier(Comm); + Comm.barrier(); } void Domain::AggregateLabels( const std::string& filename ){ @@ -625,7 +616,7 @@ void Domain::AggregateLabels( const std::string& filename ){ } } } - MPI_Barrier(Comm); + Comm.barrier(); // populate the FullID if (rank() == 0){ @@ -651,7 +642,7 @@ void Domain::AggregateLabels( const std::string& filename ){ ipx = (rnk - ipz*npx*npy - ipy*npx); //printf("ipx=%i ipy=%i ipz=%i\n", ipx, ipy, ipz); int tag = 15+rnk; - MPI_Recv(LocalID,local_size,MPI_CHAR,rnk,tag,Comm,MPI_STATUS_IGNORE); + Comm.recv(LocalID,local_size,rnk,tag); for (int k=1; k db, MPI_Comm Communicator); + Domain( std::shared_ptr db, const Utilities::MPI& Communicator); //! Obsolete constructor Domain( int nx, int ny, int nz, int rnk, int npx, int npy, int npz, @@ -116,7 +116,7 @@ public: // Public variables (need to create accessors instead) double porosity; RankInfoStruct rank_info; - MPI_Comm Comm; // MPI Communicator for this domain + Utilities::MPI Comm; // MPI Communicator for this domain int BoundaryCondition; diff --git a/common/MPI.I b/common/MPI.I new file mode 100644 index 00000000..8cbc9c09 --- /dev/null +++ b/common/MPI.I @@ -0,0 +1,1143 @@ +// This file contains the default instantiations for templated operations +// Note: Intel compilers need definitions before all default instantions to compile correctly +#ifndef included_MPI_I +#define included_MPI_I + +#include "common/Utilities.h" + +#include + + +#define MPI_CLASS MPI +#define MPI_CLASS_ERROR ERROR +#define MPI_CLASS_ASSERT ASSERT + +#undef NULL_USE +#define NULL_USE( variable ) \ + do { \ + if ( 0 ) { \ + auto static t = (char *) &variable; \ + t++; \ + } \ + } while ( 0 ) + + +namespace Utilities { + + +// Function to test if a type is a std::pair +template +struct is_pair : std::false_type { +}; +template +struct is_pair> : std::true_type { +}; + + +// Function to test if a type can be passed by MPI +template +constexpr typename std::enable_if::value,bool>::type + is_mpi_copyable() +{ + return true; +} +template +constexpr typename std::enable_if::value&&is_pair::value,bool>::type + is_mpi_copyable() +{ + return is_mpi_copyable() && is_mpi_copyable(); +} +template +constexpr typename std::enable_if::value&&!is_pair::value,bool>::type + is_mpi_copyable() +{ + return false; +} + + +/************************************************************************ + * sumReduce * + ************************************************************************/ +template +inline TYPE MPI_CLASS::sumReduce( const TYPE value ) const +{ + if ( comm_size > 1 ) { + TYPE tmp = value; + call_sumReduce( &tmp, 1 ); + return tmp; + } else { + return value; + } +} +template +inline void MPI_CLASS::sumReduce( TYPE *x, const int n ) const +{ + if ( comm_size > 1 ) + call_sumReduce( x, n ); +} +template +inline void MPI_CLASS::sumReduce( const TYPE *x, TYPE *y, const int n ) const +{ + if ( comm_size > 1 ) { + call_sumReduce( x, y, n ); + } else { + for ( int i = 0; i < n; i++ ) + y[i] = x[i]; + } +} +// Define specializations of call_sumReduce(TYPE*, const int) +#if defined( USE_MPI ) || defined( USE_EXT_MPI ) +template<> +void MPI_CLASS::call_sumReduce( unsigned char *, const int ) const; +template<> +void MPI_CLASS::call_sumReduce( char *, const int ) const; +template<> +void MPI_CLASS::call_sumReduce( unsigned int *, const int ) const; +template<> +void MPI_CLASS::call_sumReduce( int *, const int ) const; +template<> +void MPI_CLASS::call_sumReduce( unsigned long int *, const int ) const; +template<> +void MPI_CLASS::call_sumReduce( long int *, const int ) const; +template<> +void MPI_CLASS::call_sumReduce( size_t *, const int ) const; +template<> +void MPI_CLASS::call_sumReduce( float *, const int ) const; +template<> +void MPI_CLASS::call_sumReduce( double *, const int ) const; +template<> +void MPI_CLASS::call_sumReduce>( std::complex *, const int ) const; +#endif +// Default instantiations of call_sumReduce(TYPE*, const int) +template +void MPI_CLASS::call_sumReduce( TYPE *, const int ) const +{ + char message[200]; + sprintf( message, "Default instantion of sumReduce in parallel is not supported (%s)", + typeid( TYPE ).name() ); + MPI_CLASS_ERROR( message ); +} +// Define specializations of call_sumReduce(const TYPE*, TYPE*, const int) +#if defined( USE_MPI ) || defined( USE_EXT_MPI ) +template<> +void MPI_CLASS::call_sumReduce( + const unsigned char *, unsigned char *, const int ) const; +template<> +void MPI_CLASS::call_sumReduce( const char *, char *, const int ) const; +template<> +void MPI_CLASS::call_sumReduce( + const unsigned int *, unsigned int *, const int ) const; +template<> +void MPI_CLASS::call_sumReduce( const int *, int *, const int ) const; +template<> +void MPI_CLASS::call_sumReduce( + const unsigned long int *, unsigned long int *, const int ) const; +template<> +void MPI_CLASS::call_sumReduce( const long int *, long int *, const int ) const; +template<> +void MPI_CLASS::call_sumReduce( const size_t *, size_t *, const int ) const; +template<> +void MPI_CLASS::call_sumReduce( const float *, float *, const int ) const; +template<> +void MPI_CLASS::call_sumReduce( const double *, double *, const int ) const; +template<> +void MPI_CLASS::call_sumReduce>( + const std::complex *, std::complex *, const int ) const; +#endif +// Default instantiations of call_sumReduce(const TYPE*, TYPE*, const int) +template +void MPI_CLASS::call_sumReduce( const TYPE *x, TYPE *y, const int n ) const +{ + NULL_USE( x ); + NULL_USE( y ); + NULL_USE( n ); + char message[200]; + sprintf( message, "Default instantion of sumReduce in parallel is not supported (%s)", + typeid( TYPE ).name() ); + MPI_CLASS_ERROR( message ); +} + + +/************************************************************************ + * minReduce * + ************************************************************************/ +template +inline TYPE MPI_CLASS::minReduce( const TYPE value ) const +{ + if ( comm_size > 1 ) { + TYPE tmp = value; + call_minReduce( &tmp, 1, nullptr ); + return tmp; + } else { + return value; + } +} +template +inline void MPI_CLASS::minReduce( TYPE *x, const int n, int *rank_of_min ) const +{ + if ( comm_size > 1 ) { + call_minReduce( x, n, rank_of_min ); + } else { + if ( rank_of_min != nullptr ) { + for ( int i = 0; i < n; i++ ) + rank_of_min[i] = 0; + } + } +} +template +inline void MPI_CLASS::minReduce( const TYPE *x, TYPE *y, const int n, int *rank_of_min ) const +{ + if ( comm_size > 1 ) { + call_minReduce( x, y, n, rank_of_min ); + } else { + for ( int i = 0; i < n; i++ ) { + y[i] = x[i]; + if ( rank_of_min != nullptr ) + rank_of_min[i] = 0; + } + } +} +// Define specializations of call_minReduce(TYPE*, const int, int*) +#if defined( USE_MPI ) || defined( USE_EXT_MPI ) +template<> +void MPI_CLASS::call_minReduce( unsigned char *, const int, int * ) const; +template<> +void MPI_CLASS::call_minReduce( char *, const int, int * ) const; +template<> +void MPI_CLASS::call_minReduce( unsigned int *, const int, int * ) const; +template<> +void MPI_CLASS::call_minReduce( int *, const int, int * ) const; +template<> +void MPI_CLASS::call_minReduce( unsigned long int *, const int, int * ) const; +template<> +void MPI_CLASS::call_minReduce( long int *, const int, int * ) const; +template<> +void MPI_CLASS::call_minReduce( + unsigned long long int *, const int, int * ) const; +template<> +void MPI_CLASS::call_minReduce( long long int *, const int, int * ) const; +template<> +void MPI_CLASS::call_minReduce( size_t *, const int, int * ) const; +template<> +void MPI_CLASS::call_minReduce( float *, const int, int * ) const; +template<> +void MPI_CLASS::call_minReduce( double *, const int, int * ) const; +#endif +// Default instantiations of call_minReduce(TYPE*, const int, int*) +template +void MPI_CLASS::call_minReduce( TYPE *, const int, int * ) const +{ + char message[200]; + sprintf( message, "Default instantion of minReduce in parallel is not supported (%s)", + typeid( TYPE ).name() ); + MPI_CLASS_ERROR( message ); +} +// Define specializations of call_minReduce(const TYPE*, TYPE*, const int, int*) +#if defined( USE_MPI ) || defined( USE_EXT_MPI ) +template<> +void MPI_CLASS::call_minReduce( + const unsigned char *, unsigned char *, const int, int * ) const; +template<> +void MPI_CLASS::call_minReduce( const char *, char *, const int, int * ) const; +template<> +void MPI_CLASS::call_minReduce( + const unsigned int *, unsigned int *, const int, int * ) const; +template<> +void MPI_CLASS::call_minReduce( const int *, int *, const int, int * ) const; +template<> +void MPI_CLASS::call_minReduce( + const unsigned long int *, unsigned long int *, const int, int * ) const; +template<> +void MPI_CLASS::call_minReduce( const long int *, long int *, const int, int * ) const; +template<> +void MPI_CLASS::call_minReduce( + const unsigned long long int *, unsigned long long int *, const int, int * ) const; +template<> +void MPI_CLASS::call_minReduce( + const long long int *, long long int *, const int, int * ) const; +template<> +void MPI_CLASS::call_minReduce( const size_t *, size_t *, const int, int * ) const; +template<> +void MPI_CLASS::call_minReduce( const float *, float *, const int, int * ) const; +template<> +void MPI_CLASS::call_minReduce( const double *, double *, const int, int * ) const; +#endif +// Default instantiations of call_minReduce(const TYPE*, TYPE*, const int, int*) +template +void MPI_CLASS::call_minReduce( const TYPE *, TYPE *, const int, int * ) const +{ + char message[200]; + sprintf( message, "Default instantion of minReduce in parallel is not supported (%s)", + typeid( TYPE ).name() ); + MPI_CLASS_ERROR( message ); +} + + +/************************************************************************ + * maxReduce * + ************************************************************************/ +template +inline TYPE MPI_CLASS::maxReduce( const TYPE value ) const +{ + if ( comm_size > 1 ) { + TYPE tmp = value; + call_maxReduce( &tmp, 1, nullptr ); + return tmp; + } else { + return value; + } +} +template +inline void MPI_CLASS::maxReduce( TYPE *x, const int n, int *rank_of_max ) const +{ + if ( comm_size > 1 ) { + call_maxReduce( x, n, rank_of_max ); + } else { + if ( rank_of_max != nullptr ) { + for ( int i = 0; i < n; i++ ) + rank_of_max[i] = 0; + } + } +} +template +inline void MPI_CLASS::maxReduce( const TYPE *x, TYPE *y, const int n, int *rank_of_max ) const +{ + if ( comm_size > 1 ) { + call_maxReduce( x, y, n, rank_of_max ); + } else { + for ( int i = 0; i < n; i++ ) { + y[i] = x[i]; + if ( rank_of_max != nullptr ) + rank_of_max[i] = 0; + } + } +} +// Define specializations of call_maxReduce(TYPE*, const int, int*) +#if defined( USE_MPI ) || defined( USE_EXT_MPI ) +template<> +void MPI_CLASS::call_maxReduce( unsigned char *, const int, int * ) const; +template<> +void MPI_CLASS::call_maxReduce( char *, const int, int * ) const; +template<> +void MPI_CLASS::call_maxReduce( unsigned int *, const int, int * ) const; +template<> +void MPI_CLASS::call_maxReduce( int *, const int, int * ) const; +template<> +void MPI_CLASS::call_maxReduce( unsigned long int *, const int, int * ) const; +template<> +void MPI_CLASS::call_maxReduce( long int *, const int, int * ) const; +template<> +void MPI_CLASS::call_maxReduce( + unsigned long long int *, const int, int * ) const; +template<> +void MPI_CLASS::call_maxReduce( long long int *, const int, int * ) const; +template<> +void MPI_CLASS::call_maxReduce( size_t *, const int, int * ) const; +template<> +void MPI_CLASS::call_maxReduce( float *, const int, int * ) const; +template<> +void MPI_CLASS::call_maxReduce( double *, const int, int * ) const; +#endif +// Default instantiations of call_maxReduce(TYPE*, const int, int*) +template +void MPI_CLASS::call_maxReduce( TYPE *, const int, int * ) const +{ + char message[200]; + sprintf( message, "Default instantion of maxReduce in parallel is not supported (%s)", + typeid( TYPE ).name() ); + MPI_CLASS_ERROR( message ); +} +// Define specializations of call_maxReduce(const TYPE*, TYPE*, const int, int*) +#if defined( USE_MPI ) || defined( USE_EXT_MPI ) +template<> +void MPI_CLASS::call_maxReduce( + const unsigned char *, unsigned char *, const int, int * ) const; +template<> +void MPI_CLASS::call_maxReduce( const char *, char *, const int, int * ) const; +template<> +void MPI_CLASS::call_maxReduce( + const unsigned int *, unsigned int *, const int, int * ) const; +template<> +void MPI_CLASS::call_maxReduce( const int *, int *, const int, int * ) const; +template<> +void MPI_CLASS::call_maxReduce( + const unsigned long int *, unsigned long int *, const int, int * ) const; +template<> +void MPI_CLASS::call_maxReduce( const long int *, long int *, const int, int * ) const; +template<> +void MPI_CLASS::call_maxReduce( + const unsigned long long int *, unsigned long long int *, const int, int * ) const; +template<> +void MPI_CLASS::call_maxReduce( + const long long int *, long long int *, const int, int * ) const; +template<> +void MPI_CLASS::call_maxReduce( const size_t *, size_t *, const int, int * ) const; +template<> +void MPI_CLASS::call_maxReduce( const float *, float *, const int, int * ) const; +template<> +void MPI_CLASS::call_maxReduce( const double *, double *, const int, int * ) const; +#endif +// Default instantiations of call_maxReduce(const TYPE*, TYPE*, const int, int*) +template +void MPI_CLASS::call_maxReduce( const TYPE *, TYPE *, const int, int * ) const +{ + char message[200]; + sprintf( message, "Default instantion of maxReduce in parallel is not supported (%s)", + typeid( TYPE ).name() ); + MPI_CLASS_ERROR( message ); +} + + +/************************************************************************ + * bcast * + ************************************************************************/ +// Define specializations of bcast(TYPE*, const int, const int) +#if defined( USE_MPI ) || defined( USE_EXT_MPI ) +template<> +void MPI_CLASS::call_bcast( unsigned char *, const int, const int ) const; +template<> +void MPI_CLASS::call_bcast( char *, const int, const int ) const; +template<> +void MPI_CLASS::call_bcast( unsigned int *, const int, const int ) const; +template<> +void MPI_CLASS::call_bcast( int *, const int, const int ) const; +template<> +void MPI_CLASS::call_bcast( float *, const int, const int ) const; +template<> +void MPI_CLASS::call_bcast( double *, const int, const int ) const; +#else +template<> +void MPI_CLASS::call_bcast( char *, const int, const int ) const; +#endif +// Default instantiations of bcast(TYPE*, const int, const int) +template +void MPI_CLASS::call_bcast( TYPE *x, const int n, const int root ) const +{ + static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); + call_bcast( (char *) x, (int) n * sizeof( TYPE ), root ); +} +// Specialization of bcast for std::string +template<> +inline std::string MPI_CLASS::bcast( const std::string &value, const int root ) const +{ + if ( comm_size == 1 ) + return value; + int length = static_cast( value.size() ); + call_bcast( &length, 1, root ); + if ( length == 0 ) + return std::string(); + char *str = new char[length + 1]; + if ( root == comm_rank ) { + for ( int i = 0; i < length; i++ ) + str[i] = value[i]; + } + call_bcast( str, length, root ); + str[length] = 0; + std::string result( str ); + delete[] str; + return result; +} +template<> +inline void MPI_CLASS::bcast( std::string *, const int, const int ) const +{ + MPI_CLASS_ERROR( "Cannot bcast an array of strings" ); +} +// Default implimentation of bcast +template +inline TYPE MPI_CLASS::bcast( const TYPE &value, const int root ) const +{ + if ( root >= comm_size ) + MPI_CLASS_ERROR( "root cannot be >= size in bcast" ); + if ( comm_size > 1 ) { + TYPE tmp = value; + call_bcast( &tmp, 1, root ); + return tmp; + } else { + return value; + } +} +template +inline void MPI_CLASS::bcast( TYPE *x, const int n, const int root ) const +{ + if ( root >= comm_size ) + MPI_CLASS_ERROR( "root cannot be >= size in bcast" ); + if ( comm_size > 1 ) + call_bcast( x, n, root ); +} + + +/************************************************************************ + * send * + ************************************************************************/ +// Define specializations of send(const TYPE*, const int, const int, int) +#if defined( USE_MPI ) || defined( USE_EXT_MPI ) +template<> +void MPI_CLASS::send( const char *, const int, const int, int ) const; +template<> +void MPI_CLASS::send( const int *, int, const int, int ) const; +template<> +void MPI_CLASS::send( const float *, const int, const int, int ) const; +template<> +void MPI_CLASS::send( const double *, const int, const int, int ) const; +#else +template<> +void MPI_CLASS::send( const char *, const int, const int, int ) const; +#endif +// Default instantiations of send(const TYPE*, const int, const int, int) +template +inline void MPI_CLASS::send( + const TYPE *buf, const int length, const int recv_proc_number, int tag ) const +{ + static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); + send( (const char *) buf, length * sizeof( TYPE ), recv_proc_number, tag ); +} + + +/************************************************************************ + * Isend * + ************************************************************************/ +// Define specializations of Isend(const TYPE*, const int, const int, const int) +#if defined( USE_MPI ) || defined( USE_EXT_MPI ) +template<> +MPI_Request MPI_CLASS::Isend( const char *, const int, const int, const int ) const; +template<> +MPI_Request MPI_CLASS::Isend( const int *, int, const int, const int ) const; +template<> +MPI_Request MPI_CLASS::Isend( const float *, const int, const int, const int ) const; +template<> +MPI_Request MPI_CLASS::Isend( const double *, const int, const int, const int ) const; +#else +template<> +MPI_Request MPI_CLASS::Isend( const char *, const int, const int, const int ) const; +#endif +// Default instantiations of Isend(const TYPE*, const int, const int, const int) +template +inline MPI_Request MPI_CLASS::Isend( + const TYPE *buf, const int length, const int recv_proc_number, const int tag ) const +{ + static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); + return Isend( (const char *) buf, length * sizeof( TYPE ), recv_proc_number, tag ); +} + + +/************************************************************************ + * recv * + ************************************************************************/ +// Define specializations of recv(TYPE*, int&, const int, const bool, int) +#if defined( USE_MPI ) || defined( USE_EXT_MPI ) +template<> +void MPI_CLASS::recv( char *, int &, const int, const bool, int ) const; +template<> +void MPI_CLASS::recv( int *, int &, const int, const bool, int ) const; +template<> +void MPI_CLASS::recv( float *, int &, const int, const bool, int ) const; +template<> +void MPI_CLASS::recv( double *, int &, const int, const bool, int ) const; +#else +template<> +void MPI_CLASS::recv( char *, int &, const int, const bool, int ) const; +#endif +// Default instantiations of recv(TYPE*, int&, const int, const bool, int) +template +inline void MPI_CLASS::recv( + TYPE *buf, int &length, const int send_proc_number, const bool get_length, int tag ) const +{ + static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); + int size = length * sizeof( TYPE ); + recv( (char *) buf, size, send_proc_number, get_length, tag ); + if ( get_length ) { + MPI_CLASS_ASSERT( size % sizeof( TYPE ) == 0 ); + length = size / sizeof( TYPE ); + } +} + + +/************************************************************************ + * Irecv * + ************************************************************************/ +// Define specializations of recv(TYPE*, int&, const int, const bool, int) +#if defined( USE_MPI ) || defined( USE_EXT_MPI ) +template<> +MPI_Request MPI_CLASS::Irecv( char *, const int, const int, const int ) const; +template<> +MPI_Request MPI_CLASS::Irecv( int *, const int, const int, const int ) const; +template<> +MPI_Request MPI_CLASS::Irecv( float *, const int, const int, const int ) const; +template<> +MPI_Request MPI_CLASS::Irecv( double *, const int, const int, const int ) const; +#else +template<> +MPI_Request MPI_CLASS::Irecv( char *, const int, const int, const int ) const; +#endif +// Default instantiations of recv(TYPE*, int&, const int, const bool, int) +template +inline MPI_Request MPI_CLASS::Irecv( + TYPE *buf, const int length, const int send_proc, const int tag ) const +{ + static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); + return Irecv( (char *) buf, length * sizeof( TYPE ), send_proc, tag ); +} + + +/************************************************************************ + * allGather * + ************************************************************************/ +template +std::vector MPI_CLASS::allGather( const TYPE &x ) const +{ + static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); + if ( getSize() <= 1 ) + return std::vector( 1, x ); + std::vector data( getSize() ); + allGather( x, data.data() ); + return data; +} +template +std::vector MPI_CLASS::allGather( const std::vector &x ) const +{ + static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); + if ( getSize() <= 1 ) + return x; + std::vector count = allGather( x.size() ); + std::vector disp( getSize(), 0 ); + size_t N = count[0]; + for ( size_t i = 1; i < count.size(); i++ ) { + disp[i] = disp[i - 1] + count[i - 1]; + N += count[i]; + } + std::vector data( N ); + allGather( x.data(), x.size(), data.data(), count.data(), disp.data(), true ); + return data; +} +// Specialization of MPI_CLASS::allGather for std::string +template<> +inline void MPI_CLASS::allGather( const std::string &x_in, std::string *x_out ) const +{ + // Get the bytes recvied per processor + std::vector recv_cnt( comm_size, 0 ); + allGather( (int) x_in.size() + 1, &recv_cnt[0] ); + std::vector recv_disp( comm_size, 0 ); + for ( int i = 1; i < comm_size; i++ ) + recv_disp[i] = recv_disp[i - 1] + recv_cnt[i - 1]; + // Call the vector form of allGather for the char arrays + char *recv_data = new char[recv_disp[comm_size - 1] + recv_cnt[comm_size - 1]]; + allGather( + x_in.c_str(), (int) x_in.size() + 1, recv_data, &recv_cnt[0], &recv_disp[0], true ); + for ( int i = 0; i < comm_size; i++ ) + x_out[i] = std::string( &recv_data[recv_disp[i]] ); + delete[] recv_data; +} +// Default instantiation of MPI_CLASS::allGather +template +inline void MPI_CLASS::allGather( const TYPE &x_in, TYPE *x_out ) const +{ + static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); + if ( comm_size > 1 ) { + // We can use the vector form of allGather with a char array to ge the data we want + call_allGather( x_in, x_out ); + } else { + // Single processor case + x_out[0] = x_in; + } +} +// Specialization of MPI_CLASS::allGather for std::string +template<> +inline int MPI_CLASS::allGather( + const std::string *, const int, std::string *, int *, int *, bool ) const +{ + MPI_CLASS_ERROR( "Cannot allGather an array of strings" ); + return 0; +} +// Define specializations of call_allGather(const TYPE, TYPE*) +#if defined( USE_MPI ) || defined( USE_EXT_MPI ) +template<> +void MPI_CLASS::call_allGather( const unsigned char &, unsigned char * ) const; +template<> +void MPI_CLASS::call_allGather( const char &, char * ) const; +template<> +void MPI_CLASS::call_allGather( const unsigned int &, unsigned int * ) const; +template<> +void MPI_CLASS::call_allGather( const int &, int * ) const; +template<> +void MPI_CLASS::call_allGather( + const unsigned long int &, unsigned long int * ) const; +template<> +void MPI_CLASS::call_allGather( const long int &, long int * ) const; +template<> +void MPI_CLASS::call_allGather( const float &, float * ) const; +template<> +void MPI_CLASS::call_allGather( const double &, double * ) const; +#endif +// Default instantiation of MPI_CLASS::allGather +template +int MPI_CLASS::allGather( const TYPE *send_data, const int send_cnt, TYPE *recv_data, int *recv_cnt, + int *recv_disp, bool known_recv ) const +{ + // Check the inputs + if ( known_recv && ( recv_cnt == nullptr || recv_disp == nullptr ) ) + MPI_CLASS_ERROR( "Error calling allGather" ); + // Check if we are dealing with a single processor + if ( comm_size == 1 ) { + if ( send_data == nullptr && send_cnt > 0 ) { + MPI_CLASS_ERROR( "send_data is null" ); + } else if ( !known_recv ) { + // We do not know the recieved sizes + for ( int i = 0; i < send_cnt; i++ ) + recv_data[i] = send_data[i]; + if ( recv_cnt != nullptr ) + recv_cnt[0] = send_cnt; + if ( recv_disp != nullptr ) + recv_disp[0] = 0; + } else { + // We know the recieved sizes + for ( int i = 0; i < send_cnt; i++ ) + recv_data[i + recv_disp[0]] = send_data[i]; + } + return send_cnt; + } + // Get the sizes of the recieved data (if necessary) + int *recv_cnt2 = recv_cnt; + int *recv_disp2 = recv_disp; + if ( !known_recv ) { + if ( recv_cnt == nullptr ) + recv_cnt2 = new int[comm_size]; + if ( recv_disp == nullptr ) + recv_disp2 = new int[comm_size]; + call_allGather( send_cnt, recv_cnt2 ); + recv_disp2[0] = 0; + for ( int i = 1; i < comm_size; i++ ) + recv_disp2[i] = recv_disp2[i - 1] + recv_cnt2[i - 1]; + } + int N_recv = 0; + for ( int i = 0; i < comm_size; i++ ) + N_recv += recv_cnt2[i]; + // Send/recv the data + call_allGather( send_data, send_cnt, recv_data, recv_cnt2, recv_disp2 ); + // Delete any temporary memory + if ( recv_cnt == nullptr ) + delete[] recv_cnt2; + if ( recv_disp == nullptr ) + delete[] recv_disp2; + return N_recv; +} +// Default instantiations of call_allGather(const TYPE, TYPE*) +template +void MPI_CLASS::call_allGather( const TYPE &x_in, TYPE *x_out ) const +{ + static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); + allGather( (const char *) &x_in, (int) sizeof( TYPE ), (char *) x_out ); +} +// Define specializations of call_allGather(const TYPE*, int, TYPE*, int*, int*) +#if defined( USE_MPI ) || defined( USE_EXT_MPI ) +template<> +void MPI_CLASS::call_allGather( + const unsigned char *, int, unsigned char *, int *, int * ) const; +template<> +void MPI_CLASS::call_allGather( const char *, int, char *, int *, int * ) const; +template<> +void MPI_CLASS::call_allGather( + const unsigned int *, int, unsigned int *, int *, int * ) const; +template<> +void MPI_CLASS::call_allGather( const int *, int, int *, int *, int * ) const; +template<> +void MPI_CLASS::call_allGather( + const unsigned long int *, int, unsigned long int *, int *, int * ) const; +template<> +void MPI_CLASS::call_allGather( const long int *, int, long int *, int *, int * ) const; +template<> +void MPI_CLASS::call_allGather( const float *, int, float *, int *, int * ) const; +template<> +void MPI_CLASS::call_allGather( const double *, int, double *, int *, int * ) const; +#else +template<> +void MPI_CLASS::call_allGather( const char *, int, char *, int *, int * ) const; +#endif +// Default instantiations of int call_allGather(const TYPE*, int, TYPE*, int*) +template +void MPI_CLASS::call_allGather( + const TYPE *x_in, int size_in, TYPE *x_out, int *size_out, int *disp_out ) const +{ + int *size2 = new int[comm_size]; + int *disp2 = new int[comm_size]; + for ( int i = 0; i < comm_size; i++ ) { + size2[i] = size_out[i] * sizeof( TYPE ); + disp2[i] = disp_out[i] * sizeof( TYPE ); + } + static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); + call_allGather( + (const char *) x_in, (int) size_in * sizeof( TYPE ), (char *) x_out, size2, disp2 ); + delete[] size2; + delete[] disp2; +} + + +/************************************************************************ + * setGather * + ************************************************************************/ +template +inline void MPI_CLASS::setGather( std::set &set ) const +{ + std::vector send_buf( set.begin(), set.end() ); + std::vector recv_cnt( this->comm_size, 0 ); + this->allGather( (int) send_buf.size(), &recv_cnt[0] ); + std::vector recv_disp( this->comm_size, 0 ); + for ( int i = 1; i < this->comm_size; i++ ) + recv_disp[i] = recv_disp[i - 1] + recv_cnt[i - 1]; + size_t N_recv_tot = 0; + for ( int i = 0; i < this->comm_size; i++ ) + N_recv_tot += recv_cnt[i]; + if ( N_recv_tot == 0 ) + return; + std::vector recv_buf( N_recv_tot ); + TYPE *send_data = nullptr; + if ( send_buf.size() > 0 ) { + send_data = &send_buf[0]; + } + TYPE *recv_data = &recv_buf[0]; + static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); + this->allGather( + send_data, (int) send_buf.size(), recv_data, &recv_cnt[0], &recv_disp[0], true ); + for ( size_t i = 0; i < recv_buf.size(); i++ ) + set.insert( recv_buf[i] ); +} + + +/************************************************************************ + * mapGather * + ************************************************************************/ +template +inline void MPI_CLASS::mapGather( std::map &map ) const +{ + std::vector send_id; + std::vector send_data; + send_id.reserve( map.size() ); + send_data.reserve( map.size() ); + for ( auto it = map.begin(); it != map.end(); ++it ) { + send_id.push_back( it->first ); + send_data.push_back( it->second ); + } + int send_size = (int) send_id.size(); + std::vector recv_cnt( this->comm_size, 0 ); + this->allGather( send_size, &recv_cnt[0] ); + std::vector recv_disp( this->comm_size, 0 ); + for ( int i = 1; i < this->comm_size; i++ ) + recv_disp[i] = recv_disp[i - 1] + recv_cnt[i - 1]; + size_t N_recv_tot = 0; + for ( int i = 0; i < this->comm_size; i++ ) + N_recv_tot += recv_cnt[i]; + if ( N_recv_tot == 0 ) + return; + std::vector recv_id( N_recv_tot ); + std::vector recv_data( N_recv_tot ); + KEY *send_data1 = nullptr; + DATA *send_data2 = nullptr; + if ( send_id.size() > 0 ) { + send_data1 = &send_id[0]; + send_data2 = &send_data[0]; + } + static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); + this->allGather( send_data1, send_size, &recv_id[0], &recv_cnt[0], &recv_disp[0], true ); + this->allGather( + send_data2, send_size, &recv_data[0], &recv_cnt[0], &recv_disp[0], true ); + map = std::map(); + for ( size_t i = 0; i < N_recv_tot; i++ ) + map.insert( std::pair( recv_id[i], recv_data[i] ) ); +} + + +/************************************************************************ + * sumScan * + ************************************************************************/ +template +inline void MPI_CLASS::sumScan( const TYPE *x, TYPE *y, const int n ) const +{ + if ( comm_size > 1 ) { + call_sumScan( x, y, n ); + } else { + for ( int i = 0; i < n; i++ ) + y[i] = x[i]; + } +} +// Define specializations of call_sumScan(const TYPE*, TYPE*, int) +#if defined( USE_MPI ) || defined( USE_EXT_MPI ) +template<> +void MPI_CLASS::call_sumScan( const unsigned char *, unsigned char *, int ) const; +template<> +void MPI_CLASS::call_sumScan( const char *, char *, int ) const; +template<> +void MPI_CLASS::call_sumScan( const unsigned int *, unsigned int *, int ) const; +template<> +void MPI_CLASS::call_sumScan( const int *, int *, int ) const; +template<> +void MPI_CLASS::call_sumScan( + const unsigned long int *, unsigned long int *, int ) const; +template<> +void MPI_CLASS::call_sumScan( const long int *, long int *, int ) const; +template<> +void MPI_CLASS::call_sumScan( const size_t *, size_t *, int ) const; +template<> +void MPI_CLASS::call_sumScan( const float *, float *, int ) const; +template<> +void MPI_CLASS::call_sumScan( const double *, double *, int ) const; +template<> +void MPI_CLASS::call_sumScan>( + const std::complex *, std::complex *, int ) const; +#endif +// Default instantiations of call_sumScan(const TYPE*, TYPE*, int) +template +void MPI_CLASS::call_sumScan( const TYPE *, TYPE *, int ) const +{ + char message[200]; + sprintf( message, "Default instantion of sumScan in parallel is not supported (%s)", + typeid( TYPE ).name() ); + MPI_CLASS_ERROR( message ); +} + + +/************************************************************************ + * minScan * + ************************************************************************/ +template +inline void MPI_CLASS::minScan( const TYPE *x, TYPE *y, const int n ) const +{ + if ( comm_size > 1 ) { + call_minScan( x, y, n ); + } else { + for ( int i = 0; i < n; i++ ) + y[i] = x[i]; + } +} +// Define specializations of call_minScan(const TYPE*, TYPE*, int) +#if defined( USE_MPI ) || defined( USE_EXT_MPI ) +template<> +void MPI_CLASS::call_minScan( const unsigned char *, unsigned char *, int ) const; +template<> +void MPI_CLASS::call_minScan( const char *, char *, int ) const; +template<> +void MPI_CLASS::call_minScan( const unsigned int *, unsigned int *, int ) const; +template<> +void MPI_CLASS::call_minScan( const int *, int *, int ) const; +template<> +void MPI_CLASS::call_minScan( + const unsigned long int *, unsigned long int *, int ) const; +template<> +void MPI_CLASS::call_minScan( const long int *, long int *, int ) const; +template<> +void MPI_CLASS::call_minScan( const size_t *, size_t *, int ) const; +template<> +void MPI_CLASS::call_minScan( const float *, float *, int ) const; +template<> +void MPI_CLASS::call_minScan( const double *, double *, int ) const; +#endif +// Default instantiations of call_minScan(const TYPE*, TYPE*, int) +template +void MPI_CLASS::call_minScan( const TYPE *, TYPE *, int ) const +{ + char message[200]; + sprintf( message, "Default instantion of minScan in parallel is not supported (%s)", + typeid( TYPE ).name() ); + MPI_CLASS_ERROR( message ); +} + + +/************************************************************************ + * maxScan * + ************************************************************************/ +template +inline void MPI_CLASS::maxScan( const TYPE *x, TYPE *y, const int n ) const +{ + if ( comm_size > 1 ) { + call_maxScan( x, y, n ); + } else { + for ( int i = 0; i < n; i++ ) + y[i] = x[i]; + } +} +// Define specializations of call_maxScan(const TYPE*, TYPE*, int) +#if defined( USE_MPI ) || defined( USE_EXT_MPI ) +template<> +void MPI_CLASS::call_maxScan( const unsigned char *, unsigned char *, int ) const; +template<> +void MPI_CLASS::call_maxScan( const char *, char *, int ) const; +template<> +void MPI_CLASS::call_maxScan( const unsigned int *, unsigned int *, int ) const; +template<> +void MPI_CLASS::call_maxScan( const int *, int *, int ) const; +template<> +void MPI_CLASS::call_maxScan( + const unsigned long int *, unsigned long int *, int ) const; +template<> +void MPI_CLASS::call_maxScan( const long int *, long int *, int ) const; +template<> +void MPI_CLASS::call_maxScan( const size_t *, size_t *, int ) const; +template<> +void MPI_CLASS::call_maxScan( const float *, float *, int ) const; +template<> +void MPI_CLASS::call_maxScan( const double *, double *, int ) const; +#endif +// Default instantiations of call_maxScan(const TYPE*, TYPE*, int) +template +void MPI_CLASS::call_maxScan( const TYPE *, TYPE *, int ) const +{ + char message[200]; + sprintf( message, "Default instantion of maxReduce in parallel is not supported (%s)", + typeid( TYPE ).name() ); + MPI_CLASS_ERROR( message ); +} + + +/************************************************************************ + * allToAll * + ************************************************************************/ +// Define specializations of allToAll(const int n, const char*, char* ) +#if defined( USE_MPI ) || defined( USE_EXT_MPI ) +template<> +void MPI_CLASS::allToAll( + const int n, const unsigned char *, unsigned char * ) const; +template<> +void MPI_CLASS::allToAll( const int n, const char *, char * ) const; +template<> +void MPI_CLASS::allToAll( const int n, const unsigned int *, unsigned int * ) const; +template<> +void MPI_CLASS::allToAll( const int n, const int *, int * ) const; +template<> +void MPI_CLASS::allToAll( + const int n, const unsigned long int *, unsigned long int * ) const; +template<> +void MPI_CLASS::allToAll( const int n, const long int *, long int * ) const; +template<> +void MPI_CLASS::allToAll( const int n, const float *, float * ) const; +template<> +void MPI_CLASS::allToAll( const int n, const double *, double * ) const; +#endif +// Default instantiations of allToAll(const int n, const char*, char* ) +#if defined( USE_MPI ) || defined( USE_EXT_MPI ) +template +void MPI_CLASS::allToAll( const int n, const TYPE *send_data, TYPE *recv_data ) const +{ + static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); + allToAll( n * sizeof( TYPE ), (char *) send_data, (char *) recv_data ); +} +#else +template +void MPI_CLASS::allToAll( const int n, const TYPE *send_data, TYPE *recv_data ) const +{ + if ( comm_size != 1 ) + MPI_CLASS_ERROR( "Invalid size for allToAll" ); + for ( int i = 0; i < n; i++ ) + recv_data[i] = send_data[i]; +} +#endif + + +/************************************************************************ + * allToAll * + ************************************************************************/ +template +int MPI_CLASS::allToAll( const TYPE *send_data, const int send_cnt[], const int send_disp[], + TYPE *recv_data, int *recv_cnt, int *recv_disp, bool known_recv ) const +{ + int N_recieved = 0; + if ( comm_size == 1 ) { + // Special case for single-processor communicators + if ( known_recv ) { + if ( recv_cnt[0] != send_cnt[0] && send_cnt[0] > 0 ) + MPI_CLASS_ERROR( "Single processor send/recv are different sizes" ); + } else { + if ( recv_cnt != nullptr ) + recv_cnt[0] = send_cnt[0]; + if ( recv_disp != nullptr ) + recv_disp[0] = send_disp[0]; + } + for ( int i = 0; i < send_cnt[0]; i++ ) + recv_data[i + recv_disp[0]] = send_data[i + send_disp[0]]; + N_recieved = send_cnt[0]; + } else if ( known_recv ) { + // The recieve sizes are known + MPI_CLASS_ASSERT( recv_cnt != nullptr && recv_disp != nullptr ); + call_allToAll( send_data, send_cnt, send_disp, recv_data, recv_cnt, recv_disp ); + for ( int i = 0; i < comm_size; i++ ) + N_recieved += recv_cnt[i]; + } else { + // The recieve sizes are not known, we need to communicate that information first + int *recv_cnt2 = recv_cnt; + int *recv_disp2 = recv_disp; + if ( recv_cnt == nullptr ) + recv_cnt2 = new int[comm_size]; + if ( recv_disp == nullptr ) + recv_disp2 = new int[comm_size]; + // Communicate the size we will be recieving from each processor + allToAll( 1, send_cnt, recv_cnt2 ); + recv_disp2[0] = 0; + for ( int i = 1; i < comm_size; i++ ) + recv_disp2[i] = recv_disp2[i - 1] + recv_cnt2[i - 1]; + // Send the data + call_allToAll( send_data, send_cnt, send_disp, recv_data, recv_cnt2, recv_disp2 ); + for ( int i = 0; i < comm_size; i++ ) + N_recieved += recv_cnt2[i]; + if ( recv_cnt == nullptr ) + delete[] recv_cnt2; + if ( recv_disp == nullptr ) + delete[] recv_disp2; + } + return N_recieved; +} +// Define specializations of call_allToAll +#if defined( USE_MPI ) || defined( USE_EXT_MPI ) +template<> +void MPI_CLASS::call_allToAll( const unsigned char *, const int *, const int *, + unsigned char *, const int *, const int * ) const; +template<> +void MPI_CLASS::call_allToAll( + const char *, const int *, const int *, char *, const int *, const int * ) const; +template<> +void MPI_CLASS::call_allToAll( const unsigned int *, const int *, const int *, + unsigned int *, const int *, const int * ) const; +template<> +void MPI_CLASS::call_allToAll( + const int *, const int *, const int *, int *, const int *, const int * ) const; +template<> +void MPI_CLASS::call_allToAll( const unsigned long int *, const int *, + const int *, unsigned long int *, const int *, const int * ) const; +template<> +void MPI_CLASS::call_allToAll( + const long int *, const int *, const int *, long int *, const int *, const int * ) const; +template<> +void MPI_CLASS::call_allToAll( + const float *, const int *, const int *, float *, const int *, const int * ) const; +template<> +void MPI_CLASS::call_allToAll( + const double *, const int *, const int *, double *, const int *, const int * ) const; +#else +template<> +void MPI_CLASS::call_allToAll( + const char *, const int *, const int *, char *, const int *, const int * ) const; +#endif +// Default instantiations of call_allToAll +template +void MPI_CLASS::call_allToAll( const TYPE *send_data, const int send_cnt[], const int send_disp[], + TYPE *recv_data, const int *recv_cnt, const int *recv_disp ) const +{ + int *send_cnt2 = new int[comm_size]; + int *recv_cnt2 = new int[comm_size]; + int *send_disp2 = new int[comm_size]; + int *recv_disp2 = new int[comm_size]; + for ( int i = 0; i < comm_size; i++ ) { + send_cnt2[i] = send_cnt[i] * sizeof( TYPE ); + send_disp2[i] = send_disp[i] * sizeof( TYPE ); + recv_cnt2[i] = recv_cnt[i] * sizeof( TYPE ); + recv_disp2[i] = recv_disp[i] * sizeof( TYPE ); + } + static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); + call_allToAll( + (char *) send_data, send_cnt2, send_disp2, (char *) recv_data, recv_cnt2, recv_disp2 ); + delete[] send_cnt2; + delete[] recv_cnt2; + delete[] send_disp2; + delete[] recv_disp2; +} + + +} // namespace Utilities + +#endif diff --git a/common/MPI.cpp b/common/MPI.cpp new file mode 100644 index 00000000..d20c1af2 --- /dev/null +++ b/common/MPI.cpp @@ -0,0 +1,3758 @@ +// This file impliments a wrapper class for MPI functions + +#include "common/MPI.h" +#include "common/Utilities.h" + +#include "ProfilerApp.h" +#include "StackTrace/ErrorHandlers.h" +#include "StackTrace/StackTrace.h" + +// Include all other headers +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +// Include OS specific headers +#undef USE_WINDOWS +#undef USE_LINUX +#undef USE_MAC +#if defined( WIN32 ) || defined( _WIN32 ) || defined( WIN64 ) || defined( _WIN64 ) +// We are using windows +#define USE_WINDOWS +#include +#include +#define sched_yield() Sleep( 0 ) +#elif defined( __APPLE__ ) +// Using MAC +#define USE_MAC +#include +#elif defined( __linux ) || defined( __unix ) || defined( __posix ) +// We are using linux +#define USE_LINUX +#include +#include +#else +#error Unknown OS +#endif + + +// Convience defines +#define MPI_ERROR ERROR +#define MPI_ASSERT ASSERT +#define MPI_INSIST INSIST +#define MPI_WARNING WARNING +#define MPI_CLASS_COMM_NULL MPI_COMM_NULL +#define MPI_CLASS_COMM_SELF MPI_COMM_SELF +#define MPI_CLASS_COMM_WORLD MPI_COMM_WORLD + + +// Global variable to track create new unique comms (dup and split) +#ifndef USE_MPI +MPI_Comm uniqueGlobalComm = 11; +#endif + + +#if defined( USE_SAMRAI ) && defined( USE_PETSC ) && !defined( USE_MPI ) +int MPI_REQUEST_NULL = 3; +int MPI_ERR_IN_STATUS = 4; +#endif + + +namespace Utilities { + + +// Some special structs to work with MPI +#ifdef USE_MPI +struct IntIntStruct { + int j; + int i; +}; +struct LongIntStruct { + long int j; + int i; +}; +struct FloatIntStruct { + float f; + int i; +}; +struct DoubleIntStruct { + double d; + int i; +}; +#endif + + +// Initialized the static member variables +volatile unsigned int MPI_CLASS::N_MPI_Comm_created = 0; +volatile unsigned int MPI_CLASS::N_MPI_Comm_destroyed = 0; +short MPI_CLASS::profile_level = 127; + + +// Define a type for use with size_t +#ifdef USE_MPI +static MPI_Datatype MPI_SIZE_T = 0x0; +static MPI_Datatype getSizeTDataType() +{ + int size_int, size_long, size_longlong, size_longlong2; + MPI_Type_size( MPI_UNSIGNED, &size_int ); + MPI_Type_size( MPI_UNSIGNED_LONG, &size_long ); + MPI_Type_size( MPI_UNSIGNED_LONG_LONG, &size_longlong ); + MPI_Type_size( MPI_LONG_LONG_INT, &size_longlong2 ); + if ( sizeof( size_t ) == size_int ) { + return MPI_UNSIGNED; + } else if ( sizeof( size_t ) == size_long ) { + return MPI_UNSIGNED_LONG; + } else if ( sizeof( size_t ) == size_longlong ) { + return MPI_UNSIGNED_LONG_LONG; + } else if ( sizeof( size_t ) == size_longlong2 ) { + MPI_WARNING( "Using signed long long datatype for size_t in MPI" ); + return MPI_LONG_LONG_INT; // Note: this is not unsigned + } else { + MPI_ERROR( "No suitable datatype found" ); + } + return 0; +} +#endif + + +// Static data for asyncronous communication without MPI +// Note: these routines may not be thread-safe yet +#ifndef USE_MPI +static const int mpi_max_tag = 0x003FFFFF; +struct Isendrecv_struct { + const char *data; // Pointer to data + int status; // Status: 1-sending, 2-recieving +}; +std::map global_isendrecv_list; +static MPI_Request getRequest( MPI_Comm comm, int tag ) +{ + MPI_ASSERT( tag >= 0 && tag <= mpi_max_tag ); + // Use hashing function: 2^64*0.5*(sqrt(5)-1) + uint64_t a = static_cast( comm ) * 0x9E3779B97F4A7C15; + uint64_t b = static_cast( tag ) * 0x9E3779B97F4A7C15; + uint64_t hash = a ^ b; + MPI_Request request; + memcpy( &request, &hash, sizeof( MPI_Request ) ); + return request; +} +#endif + + +// Check the mpi error code +#ifdef USE_MPI +inline void check_MPI( int error ) +{ + if ( error != MPI_SUCCESS ) + MPI_ERROR( "Error calling MPI routine" ); +} +#endif + + +/****************************************************************** + * Some helper functions to convert between signed/unsigned types * + ******************************************************************/ +DISABLE_WARNINGS +static inline constexpr unsigned int offset_int() +{ + return ~static_cast( std::numeric_limits::min() ) + 1; +} +static inline constexpr unsigned long int offset_long() +{ + return ~static_cast( std::numeric_limits::min() ) + 1; +} +static inline constexpr unsigned long long int offset_long_long() +{ + return ~static_cast( std::numeric_limits::min() ) + 1; +} +ENABLE_WARNINGS +static inline unsigned int signed_to_unsigned( int x ) +{ + const auto offset = offset_int(); + return ( x >= 0 ) ? static_cast( x ) + offset : + offset - static_cast( -x ); +} +static inline unsigned long int signed_to_unsigned( long int x ) +{ + const auto offset = offset_long(); + return ( x >= 0 ) ? static_cast( x ) + offset : + offset - static_cast( -x ); +} +static inline unsigned long long int signed_to_unsigned( long long int x ) +{ + const auto offset = offset_long_long(); + return ( x >= 0 ) ? static_cast( x ) + offset : + offset - static_cast( -x ); +} +static inline int unsigned_to_signed( unsigned int x ) +{ + const auto offset = offset_int(); + return ( x >= offset ) ? static_cast( x - offset ) : -static_cast( offset - x ); +} +static inline long int unsigned_to_signed( unsigned long int x ) +{ + const auto offset = offset_long(); + return ( x >= offset ) ? static_cast( x - offset ) : + -static_cast( offset - x ); +} +static inline long long int unsigned_to_signed( unsigned long long int x ) +{ + const auto offset = offset_long_long(); + return ( x >= offset ) ? static_cast( x - offset ) : + -static_cast( offset - x ); +} + + +/************************************************************************ + * Get the MPI version * + ************************************************************************/ +std::array MPI_CLASS::version() +{ +#ifdef USE_MPI + int MPI_version; + int MPI_subversion; + MPI_Get_version( &MPI_version, &MPI_subversion ); + return { MPI_version, MPI_subversion }; +#else + return { 0, 0 }; +#endif +} +std::string MPI_CLASS::info() +{ +#ifdef USE_MPI +#if MPI_VERSION >= 3 + int MPI_version_length = 0; + char MPI_version_string[MPI_MAX_LIBRARY_VERSION_STRING]; + MPI_Get_library_version( MPI_version_string, &MPI_version_length ); + if ( MPI_version_length > 0 ) { + std::string MPI_info( MPI_version_string, MPI_version_length ); + size_t pos = MPI_info.find( '\n' ); + while ( pos != std::string::npos ) { + MPI_info.insert( pos + 1, " " ); + pos = MPI_info.find( '\n', pos + 1 ); + } + return MPI_info; + } +#endif + auto tmp = version(); + return std::to_string( tmp[0] ) + "." + std::to_string( tmp[0] ); +#else + return std::string(); +#endif +} + + +/************************************************************************ + * Functions to get/set the process affinities * + ************************************************************************/ +int MPI_CLASS::getNumberOfProcessors() { return std::thread::hardware_concurrency(); } +std::vector MPI_CLASS::getProcessAffinity() +{ + std::vector procs; +#ifdef USE_LINUX + cpu_set_t mask; + int error = sched_getaffinity( getpid(), sizeof( cpu_set_t ), &mask ); + if ( error != 0 ) + MPI_ERROR( "Error getting process affinity" ); + for ( int i = 0; i < (int) sizeof( cpu_set_t ) * CHAR_BIT; i++ ) { + if ( CPU_ISSET( i, &mask ) ) + procs.push_back( i ); + } +#elif defined( USE_MAC ) + // MAC does not support getting or setting the affinity + printf( "Warning: MAC does not support getting the process affinity\n" ); + procs.clear(); +#elif defined( USE_WINDOWS ) + HANDLE hProc = GetCurrentProcess(); + size_t procMask; + size_t sysMask; + PDWORD_PTR procMaskPtr = reinterpret_cast( &procMask ); + PDWORD_PTR sysMaskPtr = reinterpret_cast( &sysMask ); + GetProcessAffinityMask( hProc, procMaskPtr, sysMaskPtr ); + for ( int i = 0; i < (int) sizeof( size_t ) * CHAR_BIT; i++ ) { + if ( ( procMask & 0x1 ) != 0 ) + procs.push_back( i ); + procMask >>= 1; + } +#else +#error Unknown OS +#endif + return procs; +} +void MPI_CLASS::setProcessAffinity( const std::vector &procs ) +{ +#ifdef USE_LINUX + cpu_set_t mask; + CPU_ZERO( &mask ); + for ( auto cpu : procs ) + CPU_SET( cpu, &mask ); + int error = sched_setaffinity( getpid(), sizeof( cpu_set_t ), &mask ); + if ( error != 0 ) + MPI_ERROR( "Error setting process affinity" ); +#elif defined( USE_MAC ) + // MAC does not support getting or setting the affinity + NULL_USE( procs ); +#elif defined( USE_WINDOWS ) + DWORD mask = 0; + for ( size_t i = 0; i < procs.size(); i++ ) + mask |= ( (DWORD) 1 ) << procs[i]; + HANDLE hProc = GetCurrentProcess(); + SetProcessAffinityMask( hProc, mask ); +#else +#error Unknown OS +#endif +} + + +/************************************************************************ + * Function to check if MPI is active * + ************************************************************************/ +bool MPI_CLASS::MPI_active() +{ +#ifdef USE_MPI + int initialized = 0, finalized = 0; + MPI_Initialized( &initialized ); + MPI_Finalized( &finalized ); + return initialized != 0 && finalized == 0; +#else + return true; +#endif +} +MPI_CLASS::ThreadSupport MPI_CLASS::queryThreadSupport() +{ +#ifdef USE_MPI + int provided = 0; + MPI_Query_thread( &provided ); + if ( provided == MPI_THREAD_SINGLE ) + return ThreadSupport::SINGLE; + if ( provided == MPI_THREAD_FUNNELED ) + return ThreadSupport::FUNNELED; + if ( provided == MPI_THREAD_SERIALIZED ) + return ThreadSupport::SERIALIZED; + if ( provided == MPI_THREAD_MULTIPLE ) + return ThreadSupport::MULTIPLE; + return ThreadSupport::SINGLE; +#else + return ThreadSupport::MULTIPLE; +#endif +} + + +/************************************************************************ + * Function to perform a load balance of the given processes * + ************************************************************************/ +void MPI_CLASS::balanceProcesses( const MPI_CLASS &globalComm, const int method, + const std::vector &procs, const int N_min_in, const int N_max_in ) +{ + // Build the list of processors to use + std::vector cpus = procs; + if ( cpus.empty() ) { + for ( int i = 0; i < getNumberOfProcessors(); i++ ) + cpus.push_back( i ); + } + // Handle the "easy cases" + if ( method == 1 ) { + // Trivial case where we do not need any communication + setProcessAffinity( cpus ); + return; + } + // Get the sub-communicator for the current node + MPI_CLASS nodeComm = globalComm.splitByNode(); + int N_min = std::min( std::max( N_min_in, 1 ), cpus.size() ); + int N_max = N_max_in; + if ( N_max == -1 ) + N_max = cpus.size(); + N_max = std::min( N_max, cpus.size() ); + MPI_ASSERT( N_max >= N_min ); + // Perform the load balance within the node + if ( method == 2 ) { + int N_proc = cpus.size() / nodeComm.getSize(); + N_proc = std::max( N_proc, N_min ); + N_proc = std::min( N_proc, N_max ); + std::vector cpus2( N_proc, -1 ); + for ( int i = 0; i < N_proc; i++ ) + cpus2[i] = cpus[( nodeComm.getRank() * N_proc + i ) % cpus.size()]; + setProcessAffinity( cpus2 ); + } else { + MPI_ERROR( "Unknown method for load balance" ); + } +} + + +/************************************************************************ + * Empty constructor * + ************************************************************************/ +MPI_CLASS::MPI_CLASS() +{ +// Initialize the data members to a defaul communicator of self +#ifdef USE_MPI + communicator = MPI_COMM_NULL; + d_maxTag = 0x7FFFFFFF; +#else + communicator = MPI_CLASS_COMM_NULL; + d_maxTag = mpi_max_tag; +#endif + d_ranks = nullptr; + d_count = nullptr; + d_manage = false; + comm_rank = 0; + comm_size = 1; + d_isNull = true; + d_currentTag = nullptr; + d_call_abort = true; + tmp_alignment = -1; +} + + +/************************************************************************ + * Empty deconstructor * + ************************************************************************/ +MPI_CLASS::~MPI_CLASS() { reset(); } +void MPI_CLASS::reset() +{ + // Decrement the count if used + int count = -1; + if ( d_count != nullptr ) + count = --( *d_count ); + if ( count == 0 ) { + // We are holding that last reference to the MPI_Comm object, we need to free it + if ( d_manage ) { +#ifdef USE_MPI + MPI_Comm_set_errhandler( communicator, MPI_ERRORS_ARE_FATAL ); + int err = MPI_Comm_free( &communicator ); + if ( err != MPI_SUCCESS ) + MPI_ERROR( "Problem free'ing MPI_Comm object" ); + communicator = MPI_CLASS_COMM_NULL; + ++N_MPI_Comm_destroyed; +#endif + } + if ( d_ranks != nullptr ) + delete[] d_ranks; + delete d_count; + } + if ( d_currentTag == nullptr ) { + // No tag index + } else if ( d_currentTag[1] > 1 ) { + --( d_currentTag[1] ); + } else { + delete[] d_currentTag; + } + d_manage = false; + d_count = nullptr; + d_ranks = nullptr; + comm_rank = 0; + comm_size = 1; + d_maxTag = 0; + d_isNull = true; + d_currentTag = nullptr; + d_call_abort = true; +} + + +/************************************************************************ + * Copy constructors * + ************************************************************************/ +MPI_CLASS::MPI_CLASS( const MPI_CLASS &comm ) + : communicator( comm.communicator ), + d_isNull( comm.d_isNull ), + d_manage( comm.d_manage ), + comm_rank( comm.comm_rank ), + comm_size( comm.comm_size ), + d_ranks( comm.d_ranks ), + d_maxTag( comm.d_maxTag ), + d_currentTag( comm.d_currentTag ) +{ + // Initialize the data members to the existing comm object + if ( d_currentTag != nullptr ) + ++d_currentTag[1]; + d_call_abort = comm.d_call_abort; + // Set and increment the count + d_count = comm.d_count; + if ( d_count != nullptr ) + ++( *d_count ); + tmp_alignment = -1; +} +MPI_CLASS::MPI_CLASS( MPI_CLASS &&rhs ) : MPI_CLASS() +{ + std::swap( communicator, rhs.communicator ); + std::swap( d_isNull, rhs.d_isNull ); + std::swap( d_manage, rhs.d_manage ); + std::swap( d_call_abort, rhs.d_call_abort ); + std::swap( profile_level, rhs.profile_level ); + std::swap( comm_rank, rhs.comm_rank ); + std::swap( comm_size, rhs.comm_size ); + std::swap( d_ranks, rhs.d_ranks ); + std::swap( d_maxTag, rhs.d_maxTag ); + std::swap( d_currentTag, rhs.d_currentTag ); + std::swap( d_count, rhs.d_count ); + std::swap( tmp_alignment, rhs.tmp_alignment ); +} + + +/************************************************************************ + * Assignment operators * + ************************************************************************/ +MPI_CLASS &MPI_CLASS::operator=( const MPI_CLASS &comm ) +{ + if ( this == &comm ) // protect against invalid self-assignment + return *this; + // Destroy the previous object + this->reset(); + // Initialize the data members to the existing object + this->communicator = comm.communicator; + this->comm_rank = comm.comm_rank; + this->comm_size = comm.comm_size; + this->d_ranks = comm.d_ranks; + this->d_isNull = comm.d_isNull; + this->d_manage = comm.d_manage; + this->d_maxTag = comm.d_maxTag; + this->d_call_abort = comm.d_call_abort; + this->d_currentTag = comm.d_currentTag; + if ( this->d_currentTag != nullptr ) + ++( this->d_currentTag[1] ); + // Set and increment the count + this->d_count = comm.d_count; + if ( this->d_count != nullptr ) + ++( *d_count ); + this->tmp_alignment = -1; + return *this; +} +MPI_CLASS &MPI_CLASS::operator=( MPI_CLASS &&rhs ) +{ + if ( this == &rhs ) // protect against invalid self-assignment + return *this; + std::swap( communicator, rhs.communicator ); + std::swap( d_isNull, rhs.d_isNull ); + std::swap( d_manage, rhs.d_manage ); + std::swap( d_call_abort, rhs.d_call_abort ); + std::swap( profile_level, rhs.profile_level ); + std::swap( comm_rank, rhs.comm_rank ); + std::swap( comm_size, rhs.comm_size ); + std::swap( d_ranks, rhs.d_ranks ); + std::swap( d_maxTag, rhs.d_maxTag ); + std::swap( d_currentTag, rhs.d_currentTag ); + std::swap( d_count, rhs.d_count ); + std::swap( tmp_alignment, rhs.tmp_alignment ); + return *this; +} + + +/************************************************************************ + * Constructor from existing MPI communicator * + ************************************************************************/ +int d_global_currentTag_world1[2] = { 1, 1 }; +int d_global_currentTag_world2[2] = { 1, 1 }; +int d_global_currentTag_self[2] = { 1, 1 }; +#ifdef USE_MPI +std::atomic_int d_global_count_world1 = { 1 }; +std::atomic_int d_global_count_world2 = { 1 }; +std::atomic_int d_global_count_self = { 1 }; +#endif +MPI_CLASS::MPI_CLASS( MPI_Comm comm, bool manage ) +{ + d_count = nullptr; + d_ranks = nullptr; + d_manage = false; + tmp_alignment = -1; + // Check if we are using our version of comm_world + if ( comm == MPI_CLASS_COMM_WORLD ) { + communicator = MPI_COMM_WORLD; + } else if ( comm == MPI_CLASS_COMM_SELF ) { + communicator = MPI_COMM_SELF; + } else if ( comm == MPI_CLASS_COMM_NULL ) { + communicator = MPI_COMM_NULL; + } else { + communicator = comm; + } +#ifdef USE_MPI + // We are using MPI, use the MPI communicator to initialize the data + if ( communicator != MPI_COMM_NULL ) { + // Set the MPI_SIZE_T datatype if it has not been set + if ( MPI_SIZE_T == 0x0 ) + MPI_SIZE_T = getSizeTDataType(); + // Attach the error handler + StackTrace::setMPIErrorHandler( communicator ); + // Get the communicator properties + MPI_Comm_rank( communicator, &comm_rank ); + MPI_Comm_size( communicator, &comm_size ); + int flag, *val; + int ierr = MPI_Comm_get_attr( communicator, MPI_TAG_UB, &val, &flag ); + MPI_ASSERT( ierr == MPI_SUCCESS ); + if ( flag == 0 ) { + d_maxTag = 0x7FFFFFFF; // The tag is not a valid attribute (set to 2^31-1) + } else { + d_maxTag = *val; + if ( d_maxTag < 0 ) { + d_maxTag = 0x7FFFFFFF; + } // The maximum tag is > a signed int (set to 2^31-1) + MPI_INSIST( d_maxTag >= 0x7FFF, "maximum tag size is < MPI standard" ); + } + } else { + comm_rank = 1; + comm_size = 0; + d_maxTag = 0x7FFFFFFF; + } + d_isNull = communicator == MPI_COMM_NULL; + if ( manage && communicator != MPI_COMM_NULL && communicator != MPI_COMM_SELF && + communicator != MPI_COMM_WORLD ) + d_manage = true; + // Create the count (Note: we do not need to worry about thread safety) + if ( communicator == MPI_CLASS_COMM_WORLD ) { + d_count = &d_global_count_world1; + ++( *d_count ); + } else if ( communicator == MPI_COMM_WORLD ) { + d_count = &d_global_count_world2; + ++( *d_count ); + } else if ( communicator == MPI_COMM_SELF ) { + d_count = &d_global_count_self; + ++( *d_count ); + } else if ( communicator == MPI_COMM_NULL ) { + d_count = nullptr; + } else { + d_count = new std::atomic_int; + *d_count = 1; + } + if ( d_manage ) + ++N_MPI_Comm_created; + // Create d_ranks + if ( comm_size > 1 ) { + d_ranks = new int[comm_size]; + d_ranks[0] = -1; + } +#else + // We are not using MPI, intialize based on the communicator + NULL_USE( manage ); + comm_rank = 0; + comm_size = 1; + d_maxTag = mpi_max_tag; + d_isNull = communicator == MPI_COMM_NULL; + if ( d_isNull ) + comm_size = 0; +#endif + if ( communicator == MPI_CLASS_COMM_WORLD ) { + d_currentTag = d_global_currentTag_world1; + ++( this->d_currentTag[1] ); + } else if ( communicator == MPI_COMM_WORLD ) { + d_currentTag = d_global_currentTag_world2; + ++( this->d_currentTag[1] ); + } else if ( communicator == MPI_COMM_SELF ) { + d_currentTag = d_global_currentTag_self; + ++( this->d_currentTag[1] ); + } else if ( communicator == MPI_COMM_NULL ) { + d_currentTag = nullptr; + } else { + d_currentTag = new int[2]; + d_currentTag[0] = ( d_maxTag <= 0x10000 ) ? 1 : 0x1FFF; + d_currentTag[1] = 1; + } + d_call_abort = true; +} + + +/************************************************************************ + * Return the ranks of the communicator in the global comm * + ************************************************************************/ +std::vector MPI_CLASS::globalRanks() const +{ + // Get my global rank if it has not been set + static int myGlobalRank = -1; + if ( myGlobalRank == -1 ) { +#ifdef USE_MPI + if ( MPI_active() ) + MPI_Comm_rank( MPI_CLASS_COMM_WORLD, &myGlobalRank ); +#else + myGlobalRank = 0; +#endif + } + // Check if we are dealing with a serial or null communicator + if ( comm_size == 1 ) + return std::vector( 1, myGlobalRank ); + if ( d_ranks == nullptr || communicator == MPI_COMM_NULL ) + return std::vector(); + // Fill d_ranks if necessary + if ( d_ranks[0] == -1 ) { + if ( communicator == MPI_CLASS_COMM_WORLD ) { + for ( int i = 0; i < comm_size; i++ ) + d_ranks[i] = i; + } else { + + MPI_ASSERT( myGlobalRank != -1 ); + this->allGather( myGlobalRank, d_ranks ); + } + } + // Return d_ranks + return std::vector( d_ranks, d_ranks + comm_size ); +} + + +/************************************************************************ + * Generate a random number * + ************************************************************************/ +size_t MPI_CLASS::rand() const +{ + size_t val = 0; + if ( getRank() == 0 ) { + static std::random_device rd; + static std::mt19937 gen( rd() ); + static std::uniform_int_distribution dist; + val = dist( gen ); + } + val = bcast( val, 0 ); + return val; +} + + +/************************************************************************ + * Intersect two communicators * + ************************************************************************/ +#ifdef USE_MPI +static inline void MPI_Group_free2( MPI_Group *group ) +{ + if ( *group != MPI_GROUP_EMPTY ) { + // MPICH is fine with free'ing an empty group, OpenMPI crashes + MPI_Group_free( group ); + } +} +MPI_CLASS MPI_CLASS::intersect( const MPI_CLASS &comm1, const MPI_CLASS &comm2 ) +{ + MPI_Group group1 = MPI_GROUP_EMPTY, group2 = MPI_GROUP_EMPTY; + if ( !comm1.isNull() ) { + MPI_Group_free2( &group1 ); + MPI_Comm_group( comm1.communicator, &group1 ); + } + if ( !comm2.isNull() ) { + MPI_Group_free2( &group2 ); + MPI_Comm_group( comm2.communicator, &group2 ); + } + MPI_Group group12; + MPI_Group_intersection( group1, group2, &group12 ); + int compare1, compare2; + MPI_Group_compare( group1, group12, &compare1 ); + MPI_Group_compare( group2, group12, &compare2 ); + MPI_CLASS new_comm( MPI_CLASS_COMM_NULL ); + int size; + MPI_Group_size( group12, &size ); + if ( compare1 != MPI_UNEQUAL && size != 0 ) { + // The intersection matches comm1 + new_comm = comm1; + } else if ( compare2 != MPI_UNEQUAL && size != 0 ) { + // The intersection matches comm2 + new_comm = comm2; + } else if ( comm1.isNull() ) { + // comm1 is null, we can return safely (comm1 is needed for communication) + } else { + // The intersection is smaller than comm1 or comm2 + // Check if the new comm is nullptr for all processors + int max_size = 0; + MPI_Allreduce( &size, &max_size, 1, MPI_INT, MPI_MAX, comm1.communicator ); + if ( max_size == 0 ) { + // We are dealing with completely disjoint sets + new_comm = MPI_CLASS( MPI_CLASS_COMM_NULL, false ); + } else { + // Create the new comm + // Note: OpenMPI crashes if the intersection group is EMPTY for any processors + // We will set it to SELF for the EMPTY processors, then create a nullptr comm later + if ( group12 == MPI_GROUP_EMPTY ) { + MPI_Group_free2( &group12 ); + MPI_Comm_group( MPI_COMM_SELF, &group12 ); + } + MPI_Comm new_MPI_comm; + MPI_Comm_create( comm1.communicator, group12, &new_MPI_comm ); + if ( size > 0 ) { + // This is the valid case where we create a new intersection comm + new_comm = MPI_CLASS( new_MPI_comm, true ); + } else { + // We actually want a null comm for this communicator + new_comm = MPI_CLASS( MPI_CLASS_COMM_NULL, false ); + MPI_Comm_free( &new_MPI_comm ); + } + } + } + MPI_Group_free2( &group1 ); + MPI_Group_free2( &group2 ); + MPI_Group_free2( &group12 ); + return new_comm; +} +#else +MPI_CLASS MPI_CLASS::intersect( const MPI_CLASS &comm1, const MPI_CLASS &comm2 ) +{ + if ( comm1.isNull() || comm2.isNull() ) + return MPI_CLASS( MPI_CLASS_COMM_NULL, false ); + MPI_ASSERT( comm1.comm_size == 1 && comm2.comm_size == 1 ); + return comm1; +} +#endif + + +/************************************************************************ + * Split a comm * + ************************************************************************/ +MPI_CLASS MPI_CLASS::split( int color, int key ) const +{ + if ( d_isNull ) { + return MPI_CLASS( MPI_CLASS_COMM_NULL ); + } else if ( comm_size == 1 ) { + if ( color == -1 ) + return MPI_CLASS( MPI_CLASS_COMM_NULL ); + return dup(); + } + MPI_Comm new_MPI_comm = MPI_CLASS_COMM_NULL; +#ifdef USE_MPI + // USE MPI to split the communicator + if ( color == -1 ) { + check_MPI( MPI_Comm_split( communicator, MPI_UNDEFINED, key, &new_MPI_comm ) ); + } else { + check_MPI( MPI_Comm_split( communicator, color, key, &new_MPI_comm ) ); + } +#endif + // Create the new object + NULL_USE( key ); + MPI_CLASS new_comm( new_MPI_comm, true ); + new_comm.d_call_abort = d_call_abort; + return new_comm; +} +MPI_CLASS MPI_CLASS::splitByNode( int key ) const +{ + // Check if we are dealing with a single processor (trivial case) + if ( comm_size == 1 ) + return this->split( 0, 0 ); + // Get the node name + std::string name = MPI_CLASS::getNodeName(); + // Gather the names from all ranks + std::vector list( comm_size ); + allGather( name, &list[0] ); + // Create the colors + std::vector color( comm_size, -1 ); + color[0] = 0; + for ( int i = 1; i < comm_size; i++ ) { + const std::string tmp1 = list[i]; + for ( int j = 0; j < i; j++ ) { + const std::string tmp2 = list[j]; + if ( tmp1 == tmp2 ) { + color[i] = color[j]; + break; + } + color[i] = color[i - 1] + 1; + } + } + MPI_CLASS new_comm = this->split( color[comm_rank], key ); + return new_comm; +} + + +/************************************************************************ + * Duplicate an exisiting comm object * + ************************************************************************/ +MPI_CLASS MPI_CLASS::dup() const +{ + if ( d_isNull ) + return MPI_CLASS( MPI_CLASS_COMM_NULL ); + MPI_Comm new_MPI_comm = communicator; +#if defined( USE_MPI ) || defined( USE_PETSC ) + // USE MPI to duplicate the communicator + MPI_Comm_dup( communicator, &new_MPI_comm ); +#else + new_MPI_comm = uniqueGlobalComm; + uniqueGlobalComm++; +#endif + // Create the new comm object + MPI_CLASS new_comm( new_MPI_comm, true ); + new_comm.d_isNull = d_isNull; + new_comm.d_call_abort = d_call_abort; + return new_comm; +} + + +/************************************************************************ + * Get the node name * + ************************************************************************/ +std::string MPI_CLASS::getNodeName() +{ +#ifdef USE_MPI + int length; + char name[MPI_MAX_PROCESSOR_NAME + 1]; + memset( name, 0, MPI_MAX_PROCESSOR_NAME + 1 ); + MPI_Get_processor_name( name, &length ); + return std::string( name ); +#else + return "Node0"; +#endif +} + + +/************************************************************************ + * Overload operator == * + ************************************************************************/ +bool MPI_CLASS::operator==( const MPI_CLASS &comm ) const +{ + return communicator == comm.communicator; +} + + +/************************************************************************ + * Overload operator != * + ************************************************************************/ +bool MPI_CLASS::operator!=( const MPI_CLASS &comm ) const +{ + return communicator != comm.communicator; +} + + +/************************************************************************ + * Overload operator < * + ************************************************************************/ +bool MPI_CLASS::operator<( const MPI_CLASS &comm ) const +{ + MPI_ASSERT( !this->d_isNull && !comm.d_isNull ); + bool flag = true; + // First check if either communicator is NULL + if ( this->d_isNull ) + return false; + if ( comm.d_isNull ) + flag = false; + // Use compare to check if the comms are equal + if ( compare( comm ) != 0 ) + return false; + // Check that the size of the other communicator is > the current communicator size + if ( comm_size >= comm.comm_size ) + flag = false; +// Check the union of the communicator groups +// this is < comm iff this group is a subgroup of comm's group +#ifdef USE_MPI + MPI_Group group1 = MPI_GROUP_EMPTY, group2 = MPI_GROUP_EMPTY, group12 = MPI_GROUP_EMPTY; + if ( !d_isNull ) + MPI_Comm_group( communicator, &group1 ); + if ( !comm.d_isNull ) + MPI_Comm_group( comm.communicator, &group2 ); + MPI_Group_union( group1, group2, &group12 ); + int compare; + MPI_Group_compare( group2, group12, &compare ); + if ( compare == MPI_UNEQUAL ) + flag = false; + MPI_Group_free( &group1 ); + MPI_Group_free( &group2 ); + MPI_Group_free( &group12 ); +#endif + // Perform a global reduce of the flag (equivalent to all operation) + return allReduce( flag ); +} + + +/************************************************************************ + * Overload operator <= * + ************************************************************************/ +bool MPI_CLASS::operator<=( const MPI_CLASS &comm ) const +{ + MPI_ASSERT( !this->d_isNull && !comm.d_isNull ); + bool flag = true; + // First check if either communicator is NULL + if ( this->d_isNull ) + return false; + if ( comm.d_isNull ) + flag = false; +#ifdef USE_MPI + int world_size = 0; + MPI_Comm_size( MPI_COMM_WORLD, &world_size ); + if ( comm.getSize() == world_size ) + return true; + if ( getSize() == 1 && !comm.d_isNull ) + return true; +#endif + // Use compare to check if the comms are equal + if ( compare( comm ) != 0 ) + return true; + // Check that the size of the other communicator is > the current communicator size + // this is <= comm iff this group is a subgroup of comm's group + if ( comm_size > comm.comm_size ) + flag = false; +// Check the unnion of the communicator groups +#ifdef USE_MPI + MPI_Group group1, group2, group12; + MPI_Comm_group( communicator, &group1 ); + MPI_Comm_group( comm.communicator, &group2 ); + MPI_Group_union( group1, group2, &group12 ); + int compare; + MPI_Group_compare( group2, group12, &compare ); + if ( compare == MPI_UNEQUAL ) + flag = false; + MPI_Group_free( &group1 ); + MPI_Group_free( &group2 ); + MPI_Group_free( &group12 ); +#endif + // Perform a global reduce of the flag (equivalent to all operation) + return allReduce( flag ); +} + + +/************************************************************************ + * Overload operator > * + ************************************************************************/ +bool MPI_CLASS::operator>( const MPI_CLASS &comm ) const +{ + bool flag = true; + // First check if either communicator is NULL + if ( this->d_isNull ) + return false; + if ( comm.d_isNull ) + flag = false; + // Use compare to check if the comms are equal + if ( compare( comm ) != 0 ) + return false; + // Check that the size of the other communicator is > the current communicator size + if ( comm_size <= comm.comm_size ) + flag = false; +// Check the unnion of the communicator groups +// this is > comm iff comm's group is a subgroup of this group +#ifdef USE_MPI + MPI_Group group1 = MPI_GROUP_EMPTY, group2 = MPI_GROUP_EMPTY, group12 = MPI_GROUP_EMPTY; + if ( !d_isNull ) + MPI_Comm_group( communicator, &group1 ); + if ( !comm.d_isNull ) + MPI_Comm_group( comm.communicator, &group2 ); + MPI_Group_union( group1, group2, &group12 ); + int compare; + MPI_Group_compare( group1, group12, &compare ); + if ( compare == MPI_UNEQUAL ) + flag = false; + MPI_Group_free( &group1 ); + MPI_Group_free( &group2 ); + MPI_Group_free( &group12 ); +#endif + // Perform a global reduce of the flag (equivalent to all operation) + return allReduce( flag ); +} + + +/************************************************************************ + * Overload operator >= * + ************************************************************************/ +bool MPI_CLASS::operator>=( const MPI_CLASS &comm ) const +{ + bool flag = true; + // First check if either communicator is NULL + if ( this->d_isNull ) + return false; + if ( comm.d_isNull ) + flag = false; +#ifdef USE_MPI + int world_size = 0; + MPI_Comm_size( MPI_COMM_WORLD, &world_size ); + if ( getSize() == world_size ) + return true; + if ( comm.getSize() == 1 && !comm.d_isNull ) + return true; +#endif + // Use compare to check if the comms are equal + if ( compare( comm ) != 0 ) + return true; + // Check that the size of the other communicator is > the current communicator size + if ( comm_size < comm.comm_size ) + flag = false; +// Check the unnion of the communicator groups +// this is >= comm iff comm's group is a subgroup of this group +#ifdef USE_MPI + MPI_Group group1 = MPI_GROUP_EMPTY, group2 = MPI_GROUP_EMPTY, group12 = MPI_GROUP_EMPTY; + if ( !d_isNull ) + MPI_Comm_group( communicator, &group1 ); + if ( !comm.d_isNull ) + MPI_Comm_group( comm.communicator, &group2 ); + MPI_Group_union( group1, group2, &group12 ); + int compare; + MPI_Group_compare( group1, group12, &compare ); + if ( compare == MPI_UNEQUAL ) + flag = false; + MPI_Group_free( &group1 ); + MPI_Group_free( &group2 ); + MPI_Group_free( &group12 ); +#endif + // Perform a global reduce of the flag (equivalent to all operation) + return allReduce( flag ); +} + + +/************************************************************************ + * Compare two comm objects * + ************************************************************************/ +int MPI_CLASS::compare( const MPI_CLASS &comm ) const +{ + if ( communicator == comm.communicator ) + return 1; +#ifdef USE_MPI + if ( d_isNull || comm.d_isNull ) + return 0; + int result; + check_MPI( MPI_Comm_compare( communicator, comm.communicator, &result ) ); + if ( result == MPI_IDENT ) + return 2; + else if ( result == MPI_CONGRUENT ) + return 3; + else if ( result == MPI_SIMILAR ) + return 4; + else if ( result == MPI_UNEQUAL ) + return 0; + MPI_ERROR( "Unknown results from comm compare" ); +#else + if ( comm.communicator == MPI_COMM_NULL || communicator == MPI_COMM_NULL ) + return 0; + else + return 3; +#endif + return 0; +} + + +/************************************************************************ + * Abort the program. * + ************************************************************************/ +void MPI_CLASS::setCallAbortInSerialInsteadOfExit( bool flag ) { d_call_abort = flag; } +void MPI_CLASS::abort() const +{ +#ifdef USE_MPI + MPI_Comm comm = communicator; + if ( comm == MPI_COMM_NULL ) + comm = MPI_COMM_WORLD; + if ( !MPI_active() ) { + // MPI is not availible + exit( -1 ); + } else if ( comm_size > 1 ) { + MPI_Abort( comm, -1 ); + } else if ( d_call_abort ) { + MPI_Abort( comm, -1 ); + } else { + exit( -1 ); + } +#else + exit( -1 ); +#endif +} + + +/************************************************************************ + * newTag * + ************************************************************************/ +int MPI_CLASS::newTag() +{ +#ifdef USE_MPI + // Syncronize the processes to ensure all ranks enter this call + // Needed so the count will match + barrier(); + // Return and increment the tag + int tag = ( *d_currentTag )++; + MPI_INSIST( tag <= d_maxTag, "Maximum number of tags exceeded\n" ); + return tag; +#else + static int globalCurrentTag = 1; + return globalCurrentTag++; +#endif +} + + +/************************************************************************ + * allReduce * + ************************************************************************/ +bool MPI_CLASS::allReduce( const bool value ) const +{ + bool ret = value; + if ( comm_size > 1 ) { +#ifdef USE_MPI + MPI_Allreduce( + (void *) &value, (void *) &ret, 1, MPI_UNSIGNED_CHAR, MPI_MIN, communicator ); +#else + MPI_ERROR( "This shouldn't be possible" ); +#endif + } + return ret; +} + + +/************************************************************************ + * anyReduce * + ************************************************************************/ +bool MPI_CLASS::anyReduce( const bool value ) const +{ + bool ret = value; + if ( comm_size > 1 ) { +#ifdef USE_MPI + MPI_Allreduce( + (void *) &value, (void *) &ret, 1, MPI_UNSIGNED_CHAR, MPI_MAX, communicator ); +#else + MPI_ERROR( "This shouldn't be possible" ); +#endif + } + return ret; +} + + +/************************************************************************ + * call_sumReduce * + * Note: these specializations are only called when using MPI. * + ************************************************************************/ +#ifdef USE_MPI +// unsigned char +template<> +void MPI_CLASS::call_sumReduce( + const unsigned char *send, unsigned char *recv, const int n ) const +{ + PROFILE_START( "sumReduce1", profile_level ); + MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED_CHAR, MPI_SUM, communicator ); + PROFILE_STOP( "sumReduce1", profile_level ); +} +template<> +void MPI_CLASS::call_sumReduce( unsigned char *x, const int n ) const +{ + PROFILE_START( "sumReduce2", profile_level ); + auto send = x; + auto recv = new unsigned char[n]; + MPI_Allreduce( send, recv, n, MPI_UNSIGNED_CHAR, MPI_SUM, communicator ); + for ( int i = 0; i < n; i++ ) + x[i] = recv[i]; + delete[] recv; + PROFILE_STOP( "sumReduce2", profile_level ); +} +// char +template<> +void MPI_CLASS::call_sumReduce( const char *send, char *recv, const int n ) const +{ + PROFILE_START( "sumReduce1", profile_level ); + MPI_Allreduce( (void *) send, (void *) recv, n, MPI_SIGNED_CHAR, MPI_SUM, communicator ); + PROFILE_STOP( "sumReduce1", profile_level ); +} +template<> +void MPI_CLASS::call_sumReduce( char *x, const int n ) const +{ + PROFILE_START( "sumReduce2", profile_level ); + auto send = x; + auto recv = new char[n]; + MPI_Allreduce( send, recv, n, MPI_SIGNED_CHAR, MPI_SUM, communicator ); + for ( int i = 0; i < n; i++ ) + x[i] = recv[i]; + delete[] recv; + PROFILE_STOP( "sumReduce2", profile_level ); +} +// unsigned int +template<> +void MPI_CLASS::call_sumReduce( + const unsigned int *send, unsigned int *recv, const int n ) const +{ + PROFILE_START( "sumReduce1", profile_level ); + MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED, MPI_SUM, communicator ); + PROFILE_STOP( "sumReduce1", profile_level ); +} +template<> +void MPI_CLASS::call_sumReduce( unsigned int *x, const int n ) const +{ + PROFILE_START( "sumReduce2", profile_level ); + auto send = x; + auto recv = new unsigned int[n]; + MPI_Allreduce( send, recv, n, MPI_UNSIGNED, MPI_SUM, communicator ); + for ( int i = 0; i < n; i++ ) + x[i] = recv[i]; + delete[] recv; + PROFILE_STOP( "sumReduce2", profile_level ); +} +// int +template<> +void MPI_CLASS::call_sumReduce( const int *send, int *recv, const int n ) const +{ + PROFILE_START( "sumReduce1", profile_level ); + MPI_Allreduce( (void *) send, (void *) recv, n, MPI_INT, MPI_SUM, communicator ); + PROFILE_STOP( "sumReduce1", profile_level ); +} +template<> +void MPI_CLASS::call_sumReduce( int *x, const int n ) const +{ + PROFILE_START( "sumReduce2", profile_level ); + auto send = x; + auto recv = new int[n]; + MPI_Allreduce( send, recv, n, MPI_INT, MPI_SUM, communicator ); + for ( int i = 0; i < n; i++ ) + x[i] = recv[i]; + delete[] recv; + PROFILE_STOP( "sumReduce2", profile_level ); +} +// long int +template<> +void MPI_CLASS::call_sumReduce( const long int *send, long int *recv, const int n ) const +{ + PROFILE_START( "sumReduce1", profile_level ); + MPI_Allreduce( (void *) send, (void *) recv, n, MPI_LONG, MPI_SUM, communicator ); + PROFILE_STOP( "sumReduce1", profile_level ); +} +template<> +void MPI_CLASS::call_sumReduce( long int *x, const int n ) const +{ + PROFILE_START( "sumReduce2", profile_level ); + auto send = x; + auto recv = new long int[n]; + MPI_Allreduce( send, recv, n, MPI_LONG, MPI_SUM, communicator ); + for ( int i = 0; i < n; i++ ) + x[i] = recv[i]; + delete[] recv; + PROFILE_STOP( "sumReduce2", profile_level ); +} +// unsigned long int +template<> +void MPI_CLASS::call_sumReduce( + const unsigned long *send, unsigned long *recv, const int n ) const +{ + PROFILE_START( "sumReduce1", profile_level ); + MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED_LONG, MPI_SUM, communicator ); + PROFILE_STOP( "sumReduce1", profile_level ); +} +template<> +void MPI_CLASS::call_sumReduce( unsigned long *x, const int n ) const +{ + PROFILE_START( "sumReduce2", profile_level ); + auto send = x; + auto recv = new unsigned long int[n]; + MPI_Allreduce( send, recv, n, MPI_UNSIGNED_LONG, MPI_SUM, communicator ); + for ( int i = 0; i < n; i++ ) + x[i] = recv[i]; + delete[] recv; + PROFILE_STOP( "sumReduce2", profile_level ); +} +// size_t +#ifdef USE_WINDOWS +template<> +void MPI_CLASS::call_sumReduce( const size_t *send, size_t *recv, const int n ) const +{ + MPI_ASSERT( MPI_SIZE_T != 0 ); + PROFILE_START( "sumReduce1", profile_level ); + MPI_Allreduce( (void *) send, (void *) recv, n, MPI_SIZE_T, MPI_SUM, communicator ); + PROFILE_STOP( "sumReduce1", profile_level ); +} +template<> +void MPI_CLASS::call_sumReduce( size_t *x, const int n ) const +{ + MPI_ASSERT( MPI_SIZE_T != 0 ); + PROFILE_START( "sumReduce2", profile_level ); + auto send = x; + auto recv = new size_t[n]; + MPI_Allreduce( (void *) send, (void *) recv, n, MPI_SIZE_T, MPI_SUM, communicator ); + for ( int i = 0; i < n; i++ ) + x[i] = recv[i]; + delete[] recv; + PROFILE_STOP( "sumReduce2", profile_level ); +} +#endif +// float +template<> +void MPI_CLASS::call_sumReduce( const float *send, float *recv, const int n ) const +{ + PROFILE_START( "sumReduce1", profile_level ); + MPI_Allreduce( (void *) send, (void *) recv, n, MPI_FLOAT, MPI_SUM, communicator ); + PROFILE_STOP( "sumReduce1", profile_level ); +} +template<> +void MPI_CLASS::call_sumReduce( float *x, const int n ) const +{ + PROFILE_START( "sumReduce2", profile_level ); + auto send = x; + auto recv = new float[n]; + MPI_Allreduce( send, recv, n, MPI_FLOAT, MPI_SUM, communicator ); + for ( int i = 0; i < n; i++ ) + x[i] = recv[i]; + delete[] recv; + PROFILE_STOP( "sumReduce2", profile_level ); +} +// double +template<> +void MPI_CLASS::call_sumReduce( const double *send, double *recv, const int n ) const +{ + PROFILE_START( "sumReduce1", profile_level ); + MPI_Allreduce( (void *) send, (void *) recv, n, MPI_DOUBLE, MPI_SUM, communicator ); + PROFILE_STOP( "sumReduce1", profile_level ); +} +template<> +void MPI_CLASS::call_sumReduce( double *x, const int n ) const +{ + PROFILE_START( "sumReduce2", profile_level ); + auto send = x; + auto recv = new double[n]; + MPI_Allreduce( send, recv, n, MPI_DOUBLE, MPI_SUM, communicator ); + for ( int i = 0; i < n; i++ ) + x[i] = recv[i]; + delete[] recv; + PROFILE_STOP( "sumReduce2", profile_level ); +} +// std::complex +template<> +void MPI_CLASS::call_sumReduce>( + const std::complex *x, std::complex *y, const int n ) const +{ + PROFILE_START( "sumReduce1", profile_level ); + auto send = new double[2 * n]; + auto recv = new double[2 * n]; + for ( int i = 0; i < n; i++ ) { + send[2 * i + 0] = real( x[i] ); + send[2 * i + 1] = imag( x[i] ); + } + MPI_Allreduce( (void *) send, (void *) recv, 2 * n, MPI_DOUBLE, MPI_SUM, communicator ); + for ( int i = 0; i < n; i++ ) + y[i] = std::complex( recv[2 * i + 0], recv[2 * i + 1] ); + delete[] send; + delete[] recv; + PROFILE_STOP( "sumReduce1", profile_level ); +} +template<> +void MPI_CLASS::call_sumReduce>( std::complex *x, const int n ) const +{ + PROFILE_START( "sumReduce2", profile_level ); + auto send = new double[2 * n]; + auto recv = new double[2 * n]; + for ( int i = 0; i < n; i++ ) { + send[2 * i + 0] = real( x[i] ); + send[2 * i + 1] = imag( x[i] ); + } + MPI_Allreduce( send, recv, 2 * n, MPI_DOUBLE, MPI_SUM, communicator ); + for ( int i = 0; i < n; i++ ) + x[i] = std::complex( recv[2 * i + 0], recv[2 * i + 1] ); + delete[] send; + delete[] recv; + PROFILE_STOP( "sumReduce2", profile_level ); +} +#endif + + +/************************************************************************ + * call_minReduce * + * Note: these specializations are only called when using MPI. * + ************************************************************************/ +#ifdef USE_MPI +// unsigned char +template<> +void MPI_CLASS::call_minReduce( + const unsigned char *send, unsigned char *recv, const int n, int *comm_rank_of_min ) const +{ + if ( comm_rank_of_min == nullptr ) { + PROFILE_START( "minReduce1", profile_level ); + MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED_CHAR, MPI_MIN, communicator ); + PROFILE_STOP( "minReduce1", profile_level ); + } else { + auto tmp = new int[n]; + for ( int i = 0; i < n; i++ ) + tmp[i] = send[i]; + call_minReduce( tmp, n, comm_rank_of_min ); + for ( int i = 0; i < n; i++ ) + recv[i] = static_cast( tmp[i] ); + delete[] tmp; + } +} +template<> +void MPI_CLASS::call_minReduce( + unsigned char *x, const int n, int *comm_rank_of_min ) const +{ + if ( comm_rank_of_min == nullptr ) { + PROFILE_START( "minReduce2", profile_level ); + auto send = x; + auto recv = new unsigned char[n]; + MPI_Allreduce( send, recv, n, MPI_UNSIGNED_CHAR, MPI_MIN, communicator ); + for ( int i = 0; i < n; i++ ) + x[i] = recv[i]; + delete[] recv; + PROFILE_STOP( "minReduce2", profile_level ); + } else { + auto tmp = new int[n]; + for ( int i = 0; i < n; i++ ) + tmp[i] = x[i]; + call_minReduce( tmp, n, comm_rank_of_min ); + for ( int i = 0; i < n; i++ ) + x[i] = static_cast( tmp[i] ); + delete[] tmp; + } +} +// char +template<> +void MPI_CLASS::call_minReduce( + const char *send, char *recv, const int n, int *comm_rank_of_min ) const +{ + if ( comm_rank_of_min == nullptr ) { + PROFILE_START( "minReduce1", profile_level ); + MPI_Allreduce( (void *) send, (void *) recv, n, MPI_SIGNED_CHAR, MPI_MIN, communicator ); + PROFILE_STOP( "minReduce1", profile_level ); + } else { + auto tmp = new int[n]; + for ( int i = 0; i < n; i++ ) + tmp[i] = send[i]; + call_minReduce( tmp, n, comm_rank_of_min ); + for ( int i = 0; i < n; i++ ) + recv[i] = static_cast( tmp[i] ); + delete[] tmp; + } +} +template<> +void MPI_CLASS::call_minReduce( char *x, const int n, int *comm_rank_of_min ) const +{ + if ( comm_rank_of_min == nullptr ) { + PROFILE_START( "minReduce2", profile_level ); + auto send = x; + auto recv = new char[n]; + MPI_Allreduce( send, recv, n, MPI_SIGNED_CHAR, MPI_MIN, communicator ); + for ( int i = 0; i < n; i++ ) + x[i] = recv[i]; + delete[] recv; + PROFILE_STOP( "minReduce2", profile_level ); + } else { + auto tmp = new int[n]; + for ( int i = 0; i < n; i++ ) + tmp[i] = x[i]; + call_minReduce( tmp, n, comm_rank_of_min ); + for ( int i = 0; i < n; i++ ) + x[i] = static_cast( tmp[i] ); + delete[] tmp; + } +} +// unsigned int +template<> +void MPI_CLASS::call_minReduce( + const unsigned int *send, unsigned int *recv, const int n, int *comm_rank_of_min ) const +{ + if ( comm_rank_of_min == nullptr ) { + PROFILE_START( "minReduce1", profile_level ); + MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED, MPI_MIN, communicator ); + PROFILE_STOP( "minReduce1", profile_level ); + } else { + auto tmp = new int[n]; + for ( int i = 0; i < n; i++ ) + tmp[i] = unsigned_to_signed( send[i] ); + call_minReduce( tmp, n, comm_rank_of_min ); + for ( int i = 0; i < n; i++ ) + recv[i] = signed_to_unsigned( tmp[i] ); + delete[] tmp; + } +} +template<> +void MPI_CLASS::call_minReduce( + unsigned int *x, const int n, int *comm_rank_of_min ) const +{ + if ( comm_rank_of_min == nullptr ) { + PROFILE_START( "minReduce2", profile_level ); + auto send = x; + auto recv = new unsigned int[n]; + MPI_Allreduce( send, recv, n, MPI_UNSIGNED, MPI_MIN, communicator ); + for ( int i = 0; i < n; i++ ) + x[i] = recv[i]; + delete[] recv; + PROFILE_STOP( "minReduce2", profile_level ); + } else { + auto tmp = new int[n]; + for ( int i = 0; i < n; i++ ) + tmp[i] = unsigned_to_signed( x[i] ); + call_minReduce( tmp, n, comm_rank_of_min ); + for ( int i = 0; i < n; i++ ) + x[i] = signed_to_unsigned( tmp[i] ); + delete[] tmp; + } +} +// int +template<> +void MPI_CLASS::call_minReduce( + const int *x, int *y, const int n, int *comm_rank_of_min ) const +{ + PROFILE_START( "minReduce1", profile_level ); + if ( comm_rank_of_min == nullptr ) { + MPI_Allreduce( (void *) x, (void *) y, n, MPI_INT, MPI_MIN, communicator ); + } else { + auto recv = new IntIntStruct[n]; + auto send = new IntIntStruct[n]; + for ( int i = 0; i < n; ++i ) { + send[i].j = x[i]; + send[i].i = comm_rank; + } + MPI_Allreduce( send, recv, n, MPI_2INT, MPI_MINLOC, communicator ); + for ( int i = 0; i < n; ++i ) { + y[i] = recv[i].j; + comm_rank_of_min[i] = recv[i].i; + } + delete[] recv; + delete[] send; + } + PROFILE_STOP( "minReduce1", profile_level ); +} +template<> +void MPI_CLASS::call_minReduce( int *x, const int n, int *comm_rank_of_min ) const +{ + PROFILE_START( "minReduce2", profile_level ); + if ( comm_rank_of_min == nullptr ) { + auto send = x; + auto recv = new int[n]; + MPI_Allreduce( send, recv, n, MPI_INT, MPI_MIN, communicator ); + for ( int i = 0; i < n; i++ ) + x[i] = recv[i]; + delete[] recv; + } else { + auto recv = new IntIntStruct[n]; + auto send = new IntIntStruct[n]; + for ( int i = 0; i < n; ++i ) { + send[i].j = x[i]; + send[i].i = comm_rank; + } + MPI_Allreduce( send, recv, n, MPI_2INT, MPI_MINLOC, communicator ); + for ( int i = 0; i < n; ++i ) { + x[i] = recv[i].j; + comm_rank_of_min[i] = recv[i].i; + } + delete[] recv; + delete[] send; + } + PROFILE_STOP( "minReduce2", profile_level ); +} +// unsigned long int +template<> +void MPI_CLASS::call_minReduce( const unsigned long int *send, + unsigned long int *recv, const int n, int *comm_rank_of_min ) const +{ + if ( comm_rank_of_min == nullptr ) { + PROFILE_START( "minReduce1", profile_level ); + MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED_LONG, MPI_MIN, communicator ); + PROFILE_STOP( "minReduce1", profile_level ); + } else { + auto tmp = new long int[n]; + for ( int i = 0; i < n; i++ ) + tmp[i] = unsigned_to_signed( send[i] ); + call_minReduce( tmp, n, comm_rank_of_min ); + for ( int i = 0; i < n; i++ ) + recv[i] = signed_to_unsigned( tmp[i] ); + delete[] tmp; + } +} +template<> +void MPI_CLASS::call_minReduce( + unsigned long int *x, const int n, int *comm_rank_of_min ) const +{ + if ( comm_rank_of_min == nullptr ) { + PROFILE_START( "minReduce2", profile_level ); + auto send = x; + auto recv = new unsigned long int[n]; + MPI_Allreduce( send, recv, n, MPI_UNSIGNED_LONG, MPI_MIN, communicator ); + for ( int i = 0; i < n; i++ ) + x[i] = recv[i]; + delete[] recv; + PROFILE_STOP( "minReduce2", profile_level ); + } else { + auto tmp = new long int[n]; + for ( int i = 0; i < n; i++ ) + tmp[i] = unsigned_to_signed( x[i] ); + call_minReduce( tmp, n, comm_rank_of_min ); + for ( int i = 0; i < n; i++ ) + x[i] = signed_to_unsigned( tmp[i] ); + delete[] tmp; + } +} +// long int +template<> +void MPI_CLASS::call_minReduce( + const long int *x, long int *y, const int n, int *comm_rank_of_min ) const +{ + PROFILE_START( "minReduce1", profile_level ); + if ( comm_rank_of_min == nullptr ) { + MPI_Allreduce( (void *) x, (void *) y, n, MPI_LONG, MPI_MIN, communicator ); + } else { + auto recv = new LongIntStruct[n]; + auto send = new LongIntStruct[n]; + for ( int i = 0; i < n; ++i ) { + send[i].j = x[i]; + send[i].i = comm_rank; + } + MPI_Allreduce( send, recv, n, MPI_LONG_INT, MPI_MINLOC, communicator ); + for ( int i = 0; i < n; ++i ) { + y[i] = recv[i].j; + comm_rank_of_min[i] = recv[i].i; + } + delete[] recv; + delete[] send; + } + PROFILE_STOP( "minReduce1", profile_level ); +} +template<> +void MPI_CLASS::call_minReduce( long int *x, const int n, int *comm_rank_of_min ) const +{ + PROFILE_START( "minReduce2", profile_level ); + if ( comm_rank_of_min == nullptr ) { + auto send = x; + auto recv = new long int[n]; + MPI_Allreduce( send, recv, n, MPI_LONG, MPI_MIN, communicator ); + for ( long int i = 0; i < n; i++ ) + x[i] = recv[i]; + delete[] recv; + } else { + auto recv = new LongIntStruct[n]; + auto send = new LongIntStruct[n]; + for ( int i = 0; i < n; ++i ) { + send[i].j = x[i]; + send[i].i = comm_rank; + } + MPI_Allreduce( send, recv, n, MPI_LONG_INT, MPI_MINLOC, communicator ); + for ( int i = 0; i < n; ++i ) { + x[i] = recv[i].j; + comm_rank_of_min[i] = recv[i].i; + } + delete[] recv; + delete[] send; + } + PROFILE_STOP( "minReduce2", profile_level ); +} +// unsigned long long int +template<> +void MPI_CLASS::call_minReduce( const unsigned long long int *send, + unsigned long long int *recv, const int n, int *comm_rank_of_min ) const +{ + PROFILE_START( "minReduce1", profile_level ); + if ( comm_rank_of_min == nullptr ) { + auto x = new long long int[n]; + auto y = new long long int[n]; + for ( int i = 0; i < n; i++ ) + x[i] = unsigned_to_signed( send[i] ); + MPI_Allreduce( (void *) x, (void *) y, n, MPI_LONG_LONG_INT, MPI_MIN, communicator ); + for ( int i = 0; i < n; i++ ) + recv[i] = signed_to_unsigned( y[i] ); + delete[] x; + delete[] y; + } else { + printf( "minReduce will use double\n" ); + auto tmp = new double[n]; + for ( int i = 0; i < n; i++ ) + tmp[i] = static_cast( send[i] ); + call_minReduce( tmp, n, comm_rank_of_min ); + for ( int i = 0; i < n; i++ ) + recv[i] = static_cast( tmp[i] ); + delete[] tmp; + } + PROFILE_STOP( "minReduce1", profile_level ); +} +template<> +void MPI_CLASS::call_minReduce( + unsigned long long int *x, const int n, int *comm_rank_of_min ) const +{ + auto recv = new unsigned long long int[n]; + call_minReduce( x, recv, n, comm_rank_of_min ); + for ( int i = 0; i < n; i++ ) + x[i] = recv[i]; + delete[] recv; +} +// long long int +template<> +void MPI_CLASS::call_minReduce( + const long long int *x, long long int *y, const int n, int *comm_rank_of_min ) const +{ + PROFILE_START( "minReduce1", profile_level ); + if ( comm_rank_of_min == nullptr ) { + MPI_Allreduce( (void *) x, (void *) y, n, MPI_LONG_LONG_INT, MPI_MIN, communicator ); + } else { + printf( "minReduce will use double\n" ); + auto tmp = new double[n]; + for ( int i = 0; i < n; i++ ) + tmp[i] = static_cast( x[i] ); + call_minReduce( tmp, n, comm_rank_of_min ); + for ( int i = 0; i < n; i++ ) + y[i] = static_cast( tmp[i] ); + delete[] tmp; + } + PROFILE_STOP( "minReduce1", profile_level ); +} +template<> +void MPI_CLASS::call_minReduce( + long long int *x, const int n, int *comm_rank_of_min ) const +{ + auto recv = new long long int[n]; + call_minReduce( x, recv, n, comm_rank_of_min ); + for ( int i = 0; i < n; i++ ) + x[i] = signed_to_unsigned( recv[i] ); + delete[] recv; +} +// float +template<> +void MPI_CLASS::call_minReduce( + const float *x, float *y, const int n, int *comm_rank_of_min ) const +{ + PROFILE_START( "minReduce1", profile_level ); + if ( comm_rank_of_min == nullptr ) { + MPI_Allreduce( (void *) x, (void *) y, n, MPI_INT, MPI_MIN, communicator ); + } else { + auto recv = new FloatIntStruct[n]; + auto send = new FloatIntStruct[n]; + for ( int i = 0; i < n; ++i ) { + send[i].f = x[i]; + send[i].i = comm_rank; + } + MPI_Allreduce( send, recv, n, MPI_FLOAT_INT, MPI_MINLOC, communicator ); + for ( int i = 0; i < n; ++i ) { + y[i] = recv[i].f; + comm_rank_of_min[i] = recv[i].i; + } + delete[] recv; + delete[] send; + } + PROFILE_STOP( "minReduce1", profile_level ); +} +template<> +void MPI_CLASS::call_minReduce( float *x, const int n, int *comm_rank_of_min ) const +{ + PROFILE_START( "minReduce2", profile_level ); + if ( comm_rank_of_min == nullptr ) { + auto send = x; + auto recv = new float[n]; + MPI_Allreduce( send, recv, n, MPI_FLOAT, MPI_MIN, communicator ); + for ( int i = 0; i < n; i++ ) + x[i] = recv[i]; + delete[] recv; + } else { + auto recv = new FloatIntStruct[n]; + auto send = new FloatIntStruct[n]; + for ( int i = 0; i < n; ++i ) { + send[i].f = x[i]; + send[i].i = comm_rank; + } + MPI_Allreduce( send, recv, n, MPI_FLOAT_INT, MPI_MINLOC, communicator ); + for ( int i = 0; i < n; ++i ) { + x[i] = recv[i].f; + comm_rank_of_min[i] = recv[i].i; + } + delete[] recv; + delete[] send; + } + PROFILE_STOP( "minReduce2", profile_level ); +} +// double +template<> +void MPI_CLASS::call_minReduce( + const double *x, double *y, const int n, int *comm_rank_of_min ) const +{ + PROFILE_START( "minReduce1", profile_level ); + if ( comm_rank_of_min == nullptr ) { + MPI_Allreduce( (void *) x, (void *) y, n, MPI_DOUBLE, MPI_MIN, communicator ); + } else { + auto recv = new DoubleIntStruct[n]; + auto send = new DoubleIntStruct[n]; + for ( int i = 0; i < n; ++i ) { + send[i].d = x[i]; + send[i].i = comm_rank; + } + MPI_Allreduce( send, recv, n, MPI_DOUBLE_INT, MPI_MINLOC, communicator ); + for ( int i = 0; i < n; ++i ) { + y[i] = recv[i].d; + comm_rank_of_min[i] = recv[i].i; + } + delete[] recv; + delete[] send; + } + PROFILE_STOP( "minReduce1", profile_level ); +} +template<> +void MPI_CLASS::call_minReduce( double *x, const int n, int *comm_rank_of_min ) const +{ + PROFILE_START( "minReduce2", profile_level ); + if ( comm_rank_of_min == nullptr ) { + auto send = x; + auto recv = new double[n]; + MPI_Allreduce( send, recv, n, MPI_DOUBLE, MPI_MIN, communicator ); + for ( int i = 0; i < n; i++ ) + x[i] = recv[i]; + delete[] recv; + } else { + auto recv = new DoubleIntStruct[n]; + auto send = new DoubleIntStruct[n]; + for ( int i = 0; i < n; ++i ) { + send[i].d = x[i]; + send[i].i = comm_rank; + } + MPI_Allreduce( send, recv, n, MPI_DOUBLE_INT, MPI_MINLOC, communicator ); + for ( int i = 0; i < n; ++i ) { + x[i] = recv[i].d; + comm_rank_of_min[i] = recv[i].i; + } + delete[] recv; + delete[] send; + } + PROFILE_STOP( "minReduce2", profile_level ); +} +#endif + + +/************************************************************************ + * call_maxReduce * + * Note: these specializations are only called when using MPI. * + ************************************************************************/ +#ifdef USE_MPI +// unsigned char +template<> +void MPI_CLASS::call_maxReduce( + const unsigned char *send, unsigned char *recv, const int n, int *comm_rank_of_max ) const +{ + if ( comm_rank_of_max == nullptr ) { + PROFILE_START( "maxReduce1", profile_level ); + MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED_CHAR, MPI_MAX, communicator ); + PROFILE_STOP( "maxReduce1", profile_level ); + } else { + auto tmp = new int[n]; + for ( int i = 0; i < n; i++ ) + tmp[i] = send[i]; + call_maxReduce( tmp, n, comm_rank_of_max ); + for ( int i = 0; i < n; i++ ) + recv[i] = static_cast( tmp[i] ); + delete[] tmp; + } +} +template<> +void MPI_CLASS::call_maxReduce( + unsigned char *x, const int n, int *comm_rank_of_max ) const +{ + if ( comm_rank_of_max == nullptr ) { + PROFILE_START( "maxReduce2", profile_level ); + auto send = x; + auto recv = new unsigned char[n]; + MPI_Allreduce( send, recv, n, MPI_UNSIGNED_CHAR, MPI_MAX, communicator ); + for ( int i = 0; i < n; i++ ) + x[i] = recv[i]; + delete[] recv; + PROFILE_STOP( "maxReduce2", profile_level ); + } else { + auto tmp = new int[n]; + for ( int i = 0; i < n; i++ ) + tmp[i] = x[i]; + call_maxReduce( tmp, n, comm_rank_of_max ); + for ( int i = 0; i < n; i++ ) + x[i] = static_cast( tmp[i] ); + delete[] tmp; + } +} +// char +template<> +void MPI_CLASS::call_maxReduce( + const char *send, char *recv, const int n, int *comm_rank_of_max ) const +{ + if ( comm_rank_of_max == nullptr ) { + PROFILE_START( "maxReduce1", profile_level ); + MPI_Allreduce( (void *) send, (void *) recv, n, MPI_SIGNED_CHAR, MPI_MAX, communicator ); + PROFILE_STOP( "maxReduce1", profile_level ); + } else { + auto tmp = new int[n]; + for ( int i = 0; i < n; i++ ) + tmp[i] = send[i]; + call_maxReduce( tmp, n, comm_rank_of_max ); + for ( int i = 0; i < n; i++ ) + recv[i] = static_cast( tmp[i] ); + delete[] tmp; + } +} +template<> +void MPI_CLASS::call_maxReduce( char *x, const int n, int *comm_rank_of_max ) const +{ + if ( comm_rank_of_max == nullptr ) { + PROFILE_START( "maxReduce2", profile_level ); + auto send = x; + auto recv = new char[n]; + MPI_Allreduce( send, recv, n, MPI_SIGNED_CHAR, MPI_MAX, communicator ); + for ( int i = 0; i < n; i++ ) + x[i] = recv[i]; + delete[] recv; + PROFILE_STOP( "maxReduce2", profile_level ); + } else { + auto tmp = new int[n]; + for ( int i = 0; i < n; i++ ) + tmp[i] = x[i]; + call_maxReduce( tmp, n, comm_rank_of_max ); + for ( int i = 0; i < n; i++ ) + x[i] = static_cast( tmp[i] ); + delete[] tmp; + } +} +// unsigned int +template<> +void MPI_CLASS::call_maxReduce( + const unsigned int *send, unsigned int *recv, const int n, int *comm_rank_of_max ) const +{ + if ( comm_rank_of_max == nullptr ) { + PROFILE_START( "maxReduce1", profile_level ); + MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED, MPI_MAX, communicator ); + PROFILE_STOP( "maxReduce1", profile_level ); + } else { + auto tmp = new int[n]; + for ( int i = 0; i < n; i++ ) + tmp[i] = unsigned_to_signed( send[i] ); + call_maxReduce( tmp, n, comm_rank_of_max ); + for ( int i = 0; i < n; i++ ) + recv[i] = signed_to_unsigned( tmp[i] ); + delete[] tmp; + } +} +template<> +void MPI_CLASS::call_maxReduce( + unsigned int *x, const int n, int *comm_rank_of_max ) const +{ + if ( comm_rank_of_max == nullptr ) { + PROFILE_START( "maxReduce2", profile_level ); + auto send = x; + auto recv = new unsigned int[n]; + MPI_Allreduce( send, recv, n, MPI_UNSIGNED, MPI_MAX, communicator ); + for ( int i = 0; i < n; i++ ) + x[i] = recv[i]; + delete[] recv; + PROFILE_STOP( "maxReduce2", profile_level ); + } else { + auto tmp = new int[n]; + for ( int i = 0; i < n; i++ ) + tmp[i] = unsigned_to_signed( x[i] ); + call_maxReduce( tmp, n, comm_rank_of_max ); + for ( int i = 0; i < n; i++ ) + x[i] = signed_to_unsigned( tmp[i] ); + delete[] tmp; + } +} +// int +template<> +void MPI_CLASS::call_maxReduce( + const int *x, int *y, const int n, int *comm_rank_of_max ) const +{ + PROFILE_START( "maxReduce1", profile_level ); + if ( comm_rank_of_max == nullptr ) { + MPI_Allreduce( (void *) x, (void *) y, n, MPI_INT, MPI_MAX, communicator ); + } else { + auto recv = new IntIntStruct[n]; + auto send = new IntIntStruct[n]; + for ( int i = 0; i < n; ++i ) { + send[i].j = x[i]; + send[i].i = comm_rank; + } + MPI_Allreduce( send, recv, n, MPI_2INT, MPI_MAXLOC, communicator ); + for ( int i = 0; i < n; ++i ) { + y[i] = recv[i].j; + comm_rank_of_max[i] = recv[i].i; + } + delete[] recv; + delete[] send; + } + PROFILE_STOP( "maxReduce1", profile_level ); +} +template<> +void MPI_CLASS::call_maxReduce( int *x, const int n, int *comm_rank_of_max ) const +{ + PROFILE_START( "maxReduce2", profile_level ); + if ( comm_rank_of_max == nullptr ) { + int *send = x; + auto recv = new int[n]; + MPI_Allreduce( send, recv, n, MPI_INT, MPI_MAX, communicator ); + for ( int i = 0; i < n; i++ ) + x[i] = recv[i]; + delete[] recv; + } else { + auto recv = new IntIntStruct[n]; + auto send = new IntIntStruct[n]; + for ( int i = 0; i < n; ++i ) { + send[i].j = x[i]; + send[i].i = comm_rank; + } + MPI_Allreduce( send, recv, n, MPI_2INT, MPI_MAXLOC, communicator ); + for ( int i = 0; i < n; ++i ) { + x[i] = recv[i].j; + comm_rank_of_max[i] = recv[i].i; + } + delete[] recv; + delete[] send; + } + PROFILE_STOP( "maxReduce2", profile_level ); +} +// long int +template<> +void MPI_CLASS::call_maxReduce( + const long int *x, long int *y, const int n, int *comm_rank_of_max ) const +{ + PROFILE_START( "maxReduce1", profile_level ); + if ( comm_rank_of_max == nullptr ) { + MPI_Allreduce( (void *) x, (void *) y, n, MPI_LONG, MPI_MAX, communicator ); + } else { + auto recv = new LongIntStruct[n]; + auto send = new LongIntStruct[n]; + for ( int i = 0; i < n; ++i ) { + send[i].j = x[i]; + send[i].i = comm_rank; + } + MPI_Allreduce( send, recv, n, MPI_LONG_INT, MPI_MAXLOC, communicator ); + for ( int i = 0; i < n; ++i ) { + y[i] = recv[i].j; + comm_rank_of_max[i] = recv[i].i; + } + delete[] recv; + delete[] send; + } + PROFILE_STOP( "maxReduce1", profile_level ); +} +template<> +void MPI_CLASS::call_maxReduce( long int *x, const int n, int *comm_rank_of_max ) const +{ + PROFILE_START( "maxReduce2", profile_level ); + if ( comm_rank_of_max == nullptr ) { + auto send = x; + auto recv = new long int[n]; + MPI_Allreduce( send, recv, n, MPI_LONG, MPI_MAX, communicator ); + for ( int i = 0; i < n; i++ ) + x[i] = recv[i]; + delete[] recv; + } else { + auto recv = new LongIntStruct[n]; + auto send = new LongIntStruct[n]; + for ( int i = 0; i < n; ++i ) { + send[i].j = x[i]; + send[i].i = comm_rank; + } + MPI_Allreduce( send, recv, n, MPI_LONG_INT, MPI_MAXLOC, communicator ); + for ( int i = 0; i < n; ++i ) { + x[i] = recv[i].j; + comm_rank_of_max[i] = recv[i].i; + } + delete[] recv; + delete[] send; + } + PROFILE_STOP( "maxReduce2", profile_level ); +} +// unsigned long int +template<> +void MPI_CLASS::call_maxReduce( const unsigned long int *send, + unsigned long int *recv, const int n, int *comm_rank_of_max ) const +{ + if ( comm_rank_of_max == nullptr ) { + PROFILE_START( "maxReduce1", profile_level ); + MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED_LONG, MPI_MAX, communicator ); + PROFILE_STOP( "maxReduce1", profile_level ); + } else { + auto tmp = new long int[n]; + for ( int i = 0; i < n; i++ ) + tmp[i] = unsigned_to_signed( send[i] ); + call_maxReduce( tmp, n, comm_rank_of_max ); + for ( int i = 0; i < n; i++ ) + recv[i] = signed_to_unsigned( tmp[i] ); + delete[] tmp; + } +} +template<> +void MPI_CLASS::call_maxReduce( + unsigned long int *x, const int n, int *comm_rank_of_max ) const +{ + if ( comm_rank_of_max == nullptr ) { + PROFILE_START( "maxReduce2", profile_level ); + auto send = x; + auto recv = new unsigned long int[n]; + MPI_Allreduce( send, recv, n, MPI_UNSIGNED_LONG, MPI_MAX, communicator ); + for ( int i = 0; i < n; i++ ) + x[i] = recv[i]; + delete[] recv; + PROFILE_STOP( "maxReduce2", profile_level ); + } else { + auto tmp = new long int[n]; + for ( int i = 0; i < n; i++ ) + tmp[i] = unsigned_to_signed( x[i] ); + call_maxReduce( tmp, n, comm_rank_of_max ); + for ( int i = 0; i < n; i++ ) + x[i] = signed_to_unsigned( tmp[i] ); + delete[] tmp; + } +} +// unsigned long long int +template<> +void MPI_CLASS::call_maxReduce( const unsigned long long int *send, + unsigned long long int *recv, const int n, int *comm_rank_of_max ) const +{ + PROFILE_START( "maxReduce1", profile_level ); + if ( comm_rank_of_max == nullptr ) { + auto x = new long long int[n]; + auto y = new long long int[n]; + for ( int i = 0; i < n; i++ ) + x[i] = unsigned_to_signed( send[i] ); + MPI_Allreduce( (void *) x, (void *) y, n, MPI_LONG_LONG_INT, MPI_MAX, communicator ); + for ( int i = 0; i < n; i++ ) + recv[i] = signed_to_unsigned( y[i] ); + delete[] x; + delete[] y; + } else { + printf( "maxReduce will use double\n" ); + auto tmp = new double[n]; + for ( int i = 0; i < n; i++ ) + tmp[i] = static_cast( send[i] ); + call_maxReduce( tmp, n, comm_rank_of_max ); + for ( int i = 0; i < n; i++ ) + recv[i] = static_cast( tmp[i] ); + delete[] tmp; + } + PROFILE_STOP( "maxReduce1", profile_level ); +} +template<> +void MPI_CLASS::call_maxReduce( + unsigned long long int *x, const int n, int *comm_rank_of_max ) const +{ + auto recv = new unsigned long long int[n]; + call_maxReduce( x, recv, n, comm_rank_of_max ); + for ( int i = 0; i < n; i++ ) + x[i] = recv[i]; + delete[] recv; +} +// long long int +template<> +void MPI_CLASS::call_maxReduce( + const long long int *x, long long int *y, const int n, int *comm_rank_of_max ) const +{ + PROFILE_START( "maxReduce1", profile_level ); + if ( comm_rank_of_max == nullptr ) { + MPI_Allreduce( (void *) x, (void *) y, n, MPI_LONG_LONG_INT, MPI_MAX, communicator ); + } else { + printf( "maxReduce will use double\n" ); + auto tmp = new double[n]; + for ( int i = 0; i < n; i++ ) + tmp[i] = static_cast( x[i] ); + call_maxReduce( tmp, n, comm_rank_of_max ); + for ( int i = 0; i < n; i++ ) + y[i] = static_cast( tmp[i] ); + delete[] tmp; + } + PROFILE_STOP( "maxReduce1", profile_level ); +} +template<> +void MPI_CLASS::call_maxReduce( + long long int *x, const int n, int *comm_rank_of_max ) const +{ + auto recv = new long long int[n]; + call_maxReduce( x, recv, n, comm_rank_of_max ); + for ( int i = 0; i < n; i++ ) + x[i] = signed_to_unsigned( recv[i] ); + delete[] recv; +} +// float +template<> +void MPI_CLASS::call_maxReduce( + const float *x, float *y, const int n, int *comm_rank_of_max ) const +{ + PROFILE_START( "maxReduce1", profile_level ); + if ( comm_rank_of_max == nullptr ) { + MPI_Allreduce( (void *) x, (void *) y, n, MPI_FLOAT, MPI_MAX, communicator ); + } else { + auto recv = new FloatIntStruct[n]; + auto send = new FloatIntStruct[n]; + for ( int i = 0; i < n; ++i ) { + send[i].f = x[i]; + send[i].i = comm_rank; + } + MPI_Allreduce( send, recv, n, MPI_FLOAT_INT, MPI_MAXLOC, communicator ); + for ( int i = 0; i < n; ++i ) { + y[i] = recv[i].f; + comm_rank_of_max[i] = recv[i].i; + } + delete[] recv; + delete[] send; + } + PROFILE_STOP( "maxReduce1", profile_level ); +} +template<> +void MPI_CLASS::call_maxReduce( float *x, const int n, int *comm_rank_of_max ) const +{ + PROFILE_START( "maxReduce2", profile_level ); + if ( comm_rank_of_max == nullptr ) { + auto send = x; + auto recv = new float[n]; + MPI_Allreduce( send, recv, n, MPI_FLOAT, MPI_MAX, communicator ); + for ( int i = 0; i < n; i++ ) + x[i] = recv[i]; + delete[] recv; + } else { + auto recv = new FloatIntStruct[n]; + auto send = new FloatIntStruct[n]; + for ( int i = 0; i < n; ++i ) { + send[i].f = x[i]; + send[i].i = comm_rank; + } + MPI_Allreduce( send, recv, n, MPI_FLOAT_INT, MPI_MAXLOC, communicator ); + for ( int i = 0; i < n; ++i ) { + x[i] = recv[i].f; + comm_rank_of_max[i] = recv[i].i; + } + delete[] recv; + delete[] send; + } + PROFILE_STOP( "maxReduce2", profile_level ); +} +// double +template<> +void MPI_CLASS::call_maxReduce( + const double *x, double *y, const int n, int *comm_rank_of_max ) const +{ + PROFILE_START( "maxReduce1", profile_level ); + if ( comm_rank_of_max == nullptr ) { + MPI_Allreduce( (void *) x, (void *) y, n, MPI_DOUBLE, MPI_MAX, communicator ); + } else { + auto recv = new DoubleIntStruct[n]; + auto send = new DoubleIntStruct[n]; + for ( int i = 0; i < n; ++i ) { + send[i].d = x[i]; + send[i].i = comm_rank; + } + MPI_Allreduce( send, recv, n, MPI_DOUBLE_INT, MPI_MAXLOC, communicator ); + for ( int i = 0; i < n; ++i ) { + y[i] = recv[i].d; + comm_rank_of_max[i] = recv[i].i; + } + delete[] recv; + delete[] send; + } + PROFILE_STOP( "maxReduce1", profile_level ); +} +template<> +void MPI_CLASS::call_maxReduce( double *x, const int n, int *comm_rank_of_max ) const +{ + PROFILE_START( "maxReduce2", profile_level ); + if ( comm_rank_of_max == nullptr ) { + auto send = x; + auto recv = new double[n]; + MPI_Allreduce( send, recv, n, MPI_DOUBLE, MPI_MAX, communicator ); + for ( int i = 0; i < n; i++ ) + x[i] = recv[i]; + delete[] recv; + } else { + auto recv = new DoubleIntStruct[n]; + auto send = new DoubleIntStruct[n]; + for ( int i = 0; i < n; ++i ) { + send[i].d = x[i]; + send[i].i = comm_rank; + } + MPI_Allreduce( send, recv, n, MPI_DOUBLE_INT, MPI_MAXLOC, communicator ); + for ( int i = 0; i < n; ++i ) { + x[i] = recv[i].d; + comm_rank_of_max[i] = recv[i].i; + } + delete[] recv; + delete[] send; + } + PROFILE_STOP( "maxReduce2", profile_level ); +} +#endif + + +/************************************************************************ + * bcast * + * Note: these specializations are only called when using MPI. * + ************************************************************************/ +#ifdef USE_MPI +// char +template<> +void MPI_CLASS::call_bcast( unsigned char *x, const int n, const int root ) const +{ + PROFILE_START( "bcast", profile_level ); + MPI_Bcast( x, n, MPI_UNSIGNED_CHAR, root, communicator ); + PROFILE_STOP( "bcast", profile_level ); +} +template<> +void MPI_CLASS::call_bcast( char *x, const int n, const int root ) const +{ + PROFILE_START( "bcast", profile_level ); + MPI_Bcast( x, n, MPI_CHAR, root, communicator ); + PROFILE_STOP( "bcast", profile_level ); +} +// int +template<> +void MPI_CLASS::call_bcast( unsigned int *x, const int n, const int root ) const +{ + PROFILE_START( "bcast", profile_level ); + MPI_Bcast( x, n, MPI_UNSIGNED, root, communicator ); + PROFILE_STOP( "bcast", profile_level ); +} +template<> +void MPI_CLASS::call_bcast( int *x, const int n, const int root ) const +{ + PROFILE_START( "bcast", profile_level ); + MPI_Bcast( x, n, MPI_INT, root, communicator ); + PROFILE_STOP( "bcast", profile_level ); +} +// float +template<> +void MPI_CLASS::call_bcast( float *x, const int n, const int root ) const +{ + PROFILE_START( "bcast", profile_level ); + MPI_Bcast( x, n, MPI_FLOAT, root, communicator ); + PROFILE_STOP( "bcast", profile_level ); +} +// double +template<> +void MPI_CLASS::call_bcast( double *x, const int n, const int root ) const +{ + PROFILE_START( "bcast", profile_level ); + MPI_Bcast( x, n, MPI_DOUBLE, root, communicator ); + PROFILE_STOP( "bcast", profile_level ); +} +#else +// We need a concrete instantiation of bcast(x,n,root); +template<> +void MPI_CLASS::call_bcast( char *, const int, const int ) const +{ +} +#endif + + +/************************************************************************ + * Perform a global barrier across all processors. * + ************************************************************************/ +void MPI_CLASS::barrier() const +{ +#ifdef USE_MPI + MPI_Barrier( communicator ); +#endif +} + + +/************************************************************************ + * Send data array to another processor. * + * Note: these specializations are only called when using MPI. * + ************************************************************************/ +#ifdef USE_MPI +// char +template<> +void MPI_CLASS::send( + const char *buf, const int length, const int recv_proc_number, int tag ) const +{ + // Set the tag to 0 if it is < 0 + tag = ( tag >= 0 ) ? tag : 0; + MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); + // Send the data + PROFILE_START( "send", profile_level ); + MPI_Send( (void *) buf, length, MPI_CHAR, recv_proc_number, tag, communicator ); + PROFILE_STOP( "send", profile_level ); +} +// int +template<> +void MPI_CLASS::send( + const int *buf, const int length, const int recv_proc_number, int tag ) const +{ + // Set the tag to 0 if it is < 0 + tag = ( tag >= 0 ) ? tag : 0; + MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); + // Send the data + PROFILE_START( "send", profile_level ); + MPI_Send( (void *) buf, length, MPI_INT, recv_proc_number, tag, communicator ); + PROFILE_STOP( "send", profile_level ); +} +// float +template<> +void MPI_CLASS::send( + const float *buf, const int length, const int recv_proc_number, int tag ) const +{ + // Set the tag to 0 if it is < 0 + tag = ( tag >= 0 ) ? tag : 0; + MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); + // Send the data + PROFILE_START( "send", profile_level ); + MPI_Send( (void *) buf, length, MPI_FLOAT, recv_proc_number, tag, communicator ); + PROFILE_STOP( "send", profile_level ); +} +// double +template<> +void MPI_CLASS::send( + const double *buf, const int length, const int recv_proc_number, int tag ) const +{ + // Set the tag to 0 if it is < 0 + tag = ( tag >= 0 ) ? tag : 0; + MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); + // Send the data + PROFILE_START( "send", profile_level ); + MPI_Send( (void *) buf, length, MPI_DOUBLE, recv_proc_number, tag, communicator ); + PROFILE_STOP( "send", profile_level ); +} +#else +// We need a concrete instantiation of send for use without MPI +template<> +void MPI_CLASS::send( const char *buf, const int length, const int, int tag ) const +{ + MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); + MPI_INSIST( tag >= 0, "tag must be >= 0" ); + PROFILE_START( "send", profile_level ); + auto id = getRequest( communicator, tag ); + auto it = global_isendrecv_list.find( id ); + MPI_INSIST( it == global_isendrecv_list.end(), + "send must be paired with a previous call to irecv in serial" ); + MPI_ASSERT( it->second.status == 2 ); + memcpy( (char *) it->second.data, buf, length ); + global_isendrecv_list.erase( it ); + PROFILE_START( "send", profile_level ); +} +#endif + + +/************************************************************************ + * Non-blocking send data array to another processor. * + * Note: these specializations are only called when using MPI. * + ************************************************************************/ +#ifdef USE_MPI +// char +template<> +MPI_Request MPI_CLASS::Isend( + const char *buf, const int length, const int recv_proc, const int tag ) const +{ + MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); + MPI_INSIST( tag >= 0, "tag must be >= 0" ); + MPI_Request request; + PROFILE_START( "Isend", profile_level ); + MPI_Isend( (void *) buf, length, MPI_CHAR, recv_proc, tag, communicator, &request ); + PROFILE_STOP( "Isend", profile_level ); + return request; +} +// int +template<> +MPI_Request MPI_CLASS::Isend( + const int *buf, const int length, const int recv_proc, const int tag ) const +{ + MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); + MPI_INSIST( tag >= 0, "tag must be >= 0" ); + MPI_Request request; + PROFILE_START( "Isend", profile_level ); + MPI_Isend( (void *) buf, length, MPI_INT, recv_proc, tag, communicator, &request ); + PROFILE_STOP( "Isend", profile_level ); + return request; +} +// float +template<> +MPI_Request MPI_CLASS::Isend( + const float *buf, const int length, const int recv_proc, const int tag ) const +{ + MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); + MPI_INSIST( tag >= 0, "tag must be >= 0" ); + MPI_Request request; + PROFILE_START( "Isend", profile_level ); + MPI_Isend( (void *) buf, length, MPI_FLOAT, recv_proc, tag, communicator, &request ); + PROFILE_STOP( "Isend", profile_level ); + return request; +} +// double +template<> +MPI_Request MPI_CLASS::Isend( + const double *buf, const int length, const int recv_proc, const int tag ) const +{ + MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); + MPI_INSIST( tag >= 0, "tag must be >= 0" ); + MPI_Request request; + PROFILE_START( "Isend", profile_level ); + MPI_Isend( (void *) buf, length, MPI_DOUBLE, recv_proc, tag, communicator, &request ); + PROFILE_STOP( "Isend", profile_level ); + return request; +} +#else +// We need a concrete instantiation of send for use without mpi +template<> +MPI_Request MPI_CLASS::Isend( + const char *buf, const int length, const int, const int tag ) const +{ + MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); + MPI_INSIST( tag >= 0, "tag must be >= 0" ); + PROFILE_START( "Isend", profile_level ); + auto id = getRequest( communicator, tag ); + auto it = global_isendrecv_list.find( id ); + if ( it == global_isendrecv_list.end() ) { + // We are calling isend first + Isendrecv_struct data; + data.data = buf; + data.status = 1; + global_isendrecv_list.insert( std::pair( id, data ) ); + } else { + // We called irecv first + MPI_ASSERT( it->second.status == 2 ); + memcpy( (char *) it->second.data, buf, length ); + global_isendrecv_list.erase( it ); + } + PROFILE_STOP( "Isend", profile_level ); + return id; +} +#endif + + +/************************************************************************ + * Send byte array to another processor. * + ************************************************************************/ +void MPI_CLASS::sendBytes( + const void *buf, const int number_bytes, const int recv_proc_number, int tag ) const +{ + MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); + MPI_INSIST( tag >= 0, "tag must be >= 0" ); + send( (const char *) buf, number_bytes, recv_proc_number, tag ); +} + + +/************************************************************************ + * Non-blocking send byte array to another processor. * + ************************************************************************/ +MPI_Request MPI_CLASS::IsendBytes( + const void *buf, const int number_bytes, const int recv_proc, const int tag ) const +{ + MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); + MPI_INSIST( tag >= 0, "tag must be >= 0" ); + return Isend( (const char *) buf, number_bytes, recv_proc, tag ); +} + + +/************************************************************************ + * Recieve data array to another processor. * + * Note: these specializations are only called when using MPI. * + ************************************************************************/ +#ifdef USE_MPI +// char +template<> +void MPI_CLASS::recv( + char *buf, int &length, const int send_proc_number, const bool get_length, int tag ) const +{ + // Set the tag to 0 if it is < 0 + tag = ( tag >= 0 ) ? tag : 0; + MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); + PROFILE_START( "recv", profile_level ); + // Get the recieve length if necessary + if ( get_length ) { + int bytes = this->probe( send_proc_number, tag ); + int recv_length = bytes / sizeof( char ); + MPI_INSIST( length >= recv_length, "Recived length is larger than allocated array" ); + length = recv_length; + } + // Send the data + MPI_Status status; + MPI_Recv( (void *) buf, length, MPI_CHAR, send_proc_number, tag, communicator, &status ); + PROFILE_STOP( "recv", profile_level ); +} +// int +template<> +void MPI_CLASS::recv( + int *buf, int &length, const int send_proc_number, const bool get_length, int tag ) const +{ + // Set the tag to 0 if it is < 0 + tag = ( tag >= 0 ) ? tag : 0; + MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); + PROFILE_START( "recv", profile_level ); + // Get the recieve length if necessary + if ( get_length ) { + int bytes = this->probe( send_proc_number, tag ); + int recv_length = bytes / sizeof( int ); + MPI_INSIST( length >= recv_length, "Recived length is larger than allocated array" ); + length = recv_length; + } + // Send the data + MPI_Status status; + MPI_Recv( (void *) buf, length, MPI_INT, send_proc_number, tag, communicator, &status ); + PROFILE_STOP( "recv", profile_level ); +} +// float +template<> +void MPI_CLASS::recv( + float *buf, int &length, const int send_proc_number, const bool get_length, int tag ) const +{ + // Set the tag to 0 if it is < 0 + tag = ( tag >= 0 ) ? tag : 0; + MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); + PROFILE_START( "recv", profile_level ); + // Get the recieve length if necessary + if ( get_length ) { + int bytes = this->probe( send_proc_number, tag ); + int recv_length = bytes / sizeof( float ); + MPI_INSIST( length >= recv_length, "Recived length is larger than allocated array" ); + length = recv_length; + } + // Send the data + MPI_Status status; + MPI_Recv( (void *) buf, length, MPI_FLOAT, send_proc_number, tag, communicator, &status ); + PROFILE_STOP( "recv", profile_level ); +} +// double +template<> +void MPI_CLASS::recv( + double *buf, int &length, const int send_proc_number, const bool get_length, int tag ) const +{ + // Set the tag to 0 if it is < 0 + tag = ( tag >= 0 ) ? tag : 0; + MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); + PROFILE_START( "recv", profile_level ); + // Get the recieve length if necessary + if ( get_length ) { + int bytes = this->probe( send_proc_number, tag ); + int recv_length = bytes / sizeof( double ); + MPI_INSIST( length >= recv_length, "Recived length is larger than allocated array" ); + length = recv_length; + } + // Send the data + MPI_Status status; + MPI_Recv( (void *) buf, length, MPI_DOUBLE, send_proc_number, tag, communicator, &status ); + PROFILE_STOP( "recv", profile_level ); +} +#else +// We need a concrete instantiation of recv for use without mpi +template<> +void MPI_CLASS::recv( char *buf, int &length, const int, const bool, int tag ) const +{ + MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); + MPI_INSIST( tag >= 0, "tag must be >= 0" ); + PROFILE_START( "recv", profile_level ); + auto id = getRequest( communicator, tag ); + auto it = global_isendrecv_list.find( id ); + MPI_INSIST( it != global_isendrecv_list.end(), + "recv must be paired with a previous call to isend in serial" ); + MPI_ASSERT( it->second.status == 1 ); + memcpy( buf, it->second.data, length ); + global_isendrecv_list.erase( it ); + PROFILE_STOP( "recv", profile_level ); +} +#endif + + +/************************************************************************ + * Non-blocking recieve data array to another processor. * + * Note: these specializations are only called when using MPI. * + ************************************************************************/ +#ifdef USE_MPI +// char +template<> +MPI_Request MPI_CLASS::Irecv( + char *buf, const int length, const int send_proc, const int tag ) const +{ + MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); + MPI_INSIST( tag >= 0, "tag must be >= 0" ); + MPI_Request request; + PROFILE_START( "Irecv", profile_level ); + MPI_Irecv( (void *) buf, length, MPI_CHAR, send_proc, tag, communicator, &request ); + PROFILE_STOP( "Irecv", profile_level ); + return request; +} +// int +template<> +MPI_Request MPI_CLASS::Irecv( + int *buf, const int length, const int send_proc, const int tag ) const +{ + MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); + MPI_INSIST( tag >= 0, "tag must be >= 0" ); + MPI_Request request; + PROFILE_START( "Irecv", profile_level ); + MPI_Irecv( (void *) buf, length, MPI_INT, send_proc, tag, communicator, &request ); + PROFILE_STOP( "Irecv", profile_level ); + return request; +} +// float +template<> +MPI_Request MPI_CLASS::Irecv( + float *buf, const int length, const int send_proc, const int tag ) const +{ + MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); + MPI_INSIST( tag >= 0, "tag must be >= 0" ); + MPI_Request request; + PROFILE_START( "Irecv", profile_level ); + MPI_Irecv( (void *) buf, length, MPI_FLOAT, send_proc, tag, communicator, &request ); + PROFILE_STOP( "Irecv", profile_level ); + return request; +} +// double +template<> +MPI_Request MPI_CLASS::Irecv( + double *buf, const int length, const int send_proc, const int tag ) const +{ + MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); + MPI_INSIST( tag >= 0, "tag must be >= 0" ); + MPI_Request request; + PROFILE_START( "Irecv", profile_level ); + MPI_Irecv( (void *) buf, length, MPI_DOUBLE, send_proc, tag, communicator, &request ); + PROFILE_STOP( "Irecv", profile_level ); + return request; +} +#else +// We need a concrete instantiation of irecv for use without mpi +template<> +MPI_Request MPI_CLASS::Irecv( char *buf, const int length, const int, const int tag ) const +{ + MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); + MPI_INSIST( tag >= 0, "tag must be >= 0" ); + PROFILE_START( "Irecv", profile_level ); + auto id = getRequest( communicator, tag ); + auto it = global_isendrecv_list.find( id ); + if ( it == global_isendrecv_list.end() ) { + // We are calling Irecv first + Isendrecv_struct data; + data.data = buf; + data.status = 2; + global_isendrecv_list.insert( std::pair( id, data ) ); + } else { + // We called Isend first + MPI_ASSERT( it->second.status == 1 ); + memcpy( buf, it->second.data, length ); + global_isendrecv_list.erase( it ); + } + PROFILE_STOP( "Irecv", profile_level ); + return id; +} +#endif + + +/************************************************************************ + * Recieve byte array to another processor. * + ************************************************************************/ +void MPI_CLASS::recvBytes( void *buf, int &number_bytes, const int send_proc, int tag ) const +{ + recv( (char *) buf, number_bytes, send_proc, false, tag ); +} + + +/************************************************************************ + * Recieve byte array to another processor. * + ************************************************************************/ +MPI_Request MPI_CLASS::IrecvBytes( + void *buf, const int number_bytes, const int send_proc, const int tag ) const +{ + MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); + MPI_INSIST( tag >= 0, "tag must be >= 0" ); + return Irecv( (char *) buf, number_bytes, send_proc, tag ); +} + + +/************************************************************************ + * allGather * + * Note: these specializations are only called when using MPI. * + ************************************************************************/ +#ifdef USE_MPI +// unsigned char +template<> +void MPI_CLASS::call_allGather( + const unsigned char &x_in, unsigned char *x_out ) const +{ + PROFILE_START( "allGather", profile_level ); + MPI_Allgather( + (void *) &x_in, 1, MPI_UNSIGNED_CHAR, (void *) x_out, 1, MPI_UNSIGNED_CHAR, communicator ); + PROFILE_STOP( "allGather", profile_level ); +} +template<> +void MPI_CLASS::call_allGather( const unsigned char *x_in, int size_in, + unsigned char *x_out, int *size_out, int *disp_out ) const +{ + PROFILE_START( "allGatherv", profile_level ); + MPI_Allgatherv( (void *) x_in, size_in, MPI_CHAR, (void *) x_out, size_out, disp_out, MPI_CHAR, + communicator ); + PROFILE_STOP( "allGatherv", profile_level ); +} +// char +template<> +void MPI_CLASS::call_allGather( const char &x_in, char *x_out ) const +{ + PROFILE_START( "allGather", profile_level ); + MPI_Allgather( (void *) &x_in, 1, MPI_CHAR, (void *) x_out, 1, MPI_CHAR, communicator ); + PROFILE_STOP( "allGather", profile_level ); +} +template<> +void MPI_CLASS::call_allGather( + const char *x_in, int size_in, char *x_out, int *size_out, int *disp_out ) const +{ + PROFILE_START( "allGatherv", profile_level ); + MPI_Allgatherv( (void *) x_in, size_in, MPI_CHAR, (void *) x_out, size_out, disp_out, MPI_CHAR, + communicator ); + PROFILE_STOP( "allGatherv", profile_level ); +} +// unsigned int +template<> +void MPI_CLASS::call_allGather( const unsigned int &x_in, unsigned int *x_out ) const +{ + PROFILE_START( "allGather", profile_level ); + MPI_Allgather( (void *) &x_in, 1, MPI_UNSIGNED, (void *) x_out, 1, MPI_UNSIGNED, communicator ); + PROFILE_STOP( "allGather", profile_level ); +} +template<> +void MPI_CLASS::call_allGather( + const unsigned int *x_in, int size_in, unsigned int *x_out, int *size_out, int *disp_out ) const +{ + PROFILE_START( "allGatherv", profile_level ); + MPI_Allgatherv( (void *) x_in, size_in, MPI_UNSIGNED, (void *) x_out, size_out, disp_out, + MPI_UNSIGNED, communicator ); + PROFILE_STOP( "allGatherv", profile_level ); +} +// int +template<> +void MPI_CLASS::call_allGather( const int &x_in, int *x_out ) const +{ + PROFILE_START( "allGather", profile_level ); + MPI_Allgather( (void *) &x_in, 1, MPI_INT, (void *) x_out, 1, MPI_INT, communicator ); + PROFILE_STOP( "allGather", profile_level ); +} +template<> +void MPI_CLASS::call_allGather( + const int *x_in, int size_in, int *x_out, int *size_out, int *disp_out ) const +{ + PROFILE_START( "allGatherv", profile_level ); + MPI_Allgatherv( (void *) x_in, size_in, MPI_INT, (void *) x_out, size_out, disp_out, MPI_INT, + communicator ); + PROFILE_STOP( "allGatherv", profile_level ); +} +// unsigned long int +template<> +void MPI_CLASS::call_allGather( + const unsigned long int &x_in, unsigned long int *x_out ) const +{ + PROFILE_START( "allGather", profile_level ); + MPI_Allgather( + (void *) &x_in, 1, MPI_UNSIGNED_LONG, (void *) x_out, 1, MPI_UNSIGNED_LONG, communicator ); + PROFILE_STOP( "allGather", profile_level ); +} +template<> +void MPI_CLASS::call_allGather( const unsigned long int *x_in, int size_in, + unsigned long int *x_out, int *size_out, int *disp_out ) const +{ + PROFILE_START( "allGatherv", profile_level ); + MPI_Allgatherv( (void *) x_in, size_in, MPI_UNSIGNED_LONG, (void *) x_out, size_out, disp_out, + MPI_UNSIGNED_LONG, communicator ); + PROFILE_STOP( "allGatherv", profile_level ); +} +// long int +template<> +void MPI_CLASS::call_allGather( const long int &x_in, long int *x_out ) const +{ + PROFILE_START( "allGather", profile_level ); + MPI_Allgather( (void *) &x_in, 1, MPI_LONG, (void *) x_out, 1, MPI_LONG, communicator ); + PROFILE_STOP( "allGather", profile_level ); +} +template<> +void MPI_CLASS::call_allGather( + const long int *x_in, int size_in, long int *x_out, int *size_out, int *disp_out ) const +{ + PROFILE_START( "allGatherv", profile_level ); + MPI_Allgatherv( (void *) x_in, size_in, MPI_LONG, (void *) x_out, size_out, disp_out, MPI_LONG, + communicator ); + PROFILE_STOP( "allGatherv", profile_level ); +} +// float +template<> +void MPI_CLASS::call_allGather( const float &x_in, float *x_out ) const +{ + PROFILE_START( "allGather", profile_level ); + MPI_Allgather( (void *) &x_in, 1, MPI_FLOAT, (void *) x_out, 1, MPI_FLOAT, communicator ); + PROFILE_STOP( "allGather", profile_level ); +} +template<> +void MPI_CLASS::call_allGather( + const float *x_in, int size_in, float *x_out, int *size_out, int *disp_out ) const +{ + PROFILE_START( "allGatherv", profile_level ); + MPI_Allgatherv( (void *) x_in, size_in, MPI_FLOAT, (void *) x_out, size_out, disp_out, + MPI_FLOAT, communicator ); + PROFILE_STOP( "allGatherv", profile_level ); +} +// double +template<> +void MPI_CLASS::call_allGather( const double &x_in, double *x_out ) const +{ + PROFILE_START( "allGather", profile_level ); + MPI_Allgather( (void *) &x_in, 1, MPI_DOUBLE, (void *) x_out, 1, MPI_DOUBLE, communicator ); + PROFILE_STOP( "allGather", profile_level ); +} +template<> +void MPI_CLASS::call_allGather( + const double *x_in, int size_in, double *x_out, int *size_out, int *disp_out ) const +{ + PROFILE_START( "allGatherv", profile_level ); + MPI_Allgatherv( (void *) x_in, size_in, MPI_DOUBLE, (void *) x_out, size_out, disp_out, + MPI_DOUBLE, communicator ); + PROFILE_STOP( "allGatherv", profile_level ); +} +#else +// We need a concrete instantiation of call_allGather(x_in,size_in,x_out,size_out) +template<> +void MPI_CLASS::call_allGather( const char *, int, char *, int *, int * ) const +{ + MPI_ERROR( "Internal error in communicator (allGather) " ); +} +#endif + + +/************************************************************************ + * allToAll * + * Note: these specializations are only called when using MPI. * + ************************************************************************/ +#ifdef USE_MPI +template<> +void MPI_CLASS::allToAll( + const int n, const unsigned char *send, unsigned char *recv ) const +{ + PROFILE_START( "allToAll", profile_level ); + MPI_Alltoall( + (void *) send, n, MPI_UNSIGNED_CHAR, (void *) recv, n, MPI_UNSIGNED_CHAR, communicator ); + PROFILE_STOP( "allToAll", profile_level ); +} +template<> +void MPI_CLASS::allToAll( const int n, const char *send, char *recv ) const +{ + PROFILE_START( "allToAll", profile_level ); + MPI_Alltoall( (void *) send, n, MPI_CHAR, (void *) recv, n, MPI_CHAR, communicator ); + PROFILE_STOP( "allToAll", profile_level ); +} +template<> +void MPI_CLASS::allToAll( + const int n, const unsigned int *send, unsigned int *recv ) const +{ + PROFILE_START( "allToAll", profile_level ); + MPI_Alltoall( (void *) send, n, MPI_UNSIGNED, (void *) recv, n, MPI_UNSIGNED, communicator ); + PROFILE_STOP( "allToAll", profile_level ); +} +template<> +void MPI_CLASS::allToAll( const int n, const int *send, int *recv ) const +{ + PROFILE_START( "allToAll", profile_level ); + MPI_Alltoall( (void *) send, n, MPI_INT, (void *) recv, n, MPI_INT, communicator ); + PROFILE_STOP( "allToAll", profile_level ); +} +template<> +void MPI_CLASS::allToAll( + const int n, const unsigned long int *send, unsigned long int *recv ) const +{ + PROFILE_START( "allToAll", profile_level ); + MPI_Alltoall( + (void *) send, n, MPI_UNSIGNED_LONG, (void *) recv, n, MPI_UNSIGNED_LONG, communicator ); + PROFILE_STOP( "allToAll", profile_level ); +} +template<> +void MPI_CLASS::allToAll( const int n, const long int *send, long int *recv ) const +{ + PROFILE_START( "allToAll", profile_level ); + MPI_Alltoall( (void *) send, n, MPI_LONG, (void *) recv, n, MPI_LONG, communicator ); + PROFILE_STOP( "allToAll", profile_level ); +} +template<> +void MPI_CLASS::allToAll( const int n, const float *send, float *recv ) const +{ + PROFILE_START( "allToAll", profile_level ); + MPI_Alltoall( (void *) send, n, MPI_FLOAT, (void *) recv, n, MPI_FLOAT, communicator ); + PROFILE_STOP( "allToAll", profile_level ); +} +template<> +void MPI_CLASS::allToAll( const int n, const double *send, double *recv ) const +{ + PROFILE_START( "allToAll", profile_level ); + MPI_Alltoall( (void *) send, n, MPI_DOUBLE, (void *) recv, n, MPI_DOUBLE, communicator ); + PROFILE_STOP( "allToAll", profile_level ); +} +#endif + + +/************************************************************************ + * call_allToAll * + * Note: these specializations are only called when using MPI. * + ************************************************************************/ +#ifdef USE_MPI +// unsigned char +template<> +void MPI_CLASS::call_allToAll( const unsigned char *send_data, const int send_cnt[], + const int send_disp[], unsigned char *recv_data, const int *recv_cnt, + const int *recv_disp ) const +{ + PROFILE_START( "allToAllv", profile_level ); + MPI_Alltoallv( (void *) send_data, (int *) send_cnt, (int *) send_disp, MPI_UNSIGNED_CHAR, + (void *) recv_data, (int *) recv_cnt, (int *) recv_disp, MPI_UNSIGNED_CHAR, communicator ); + PROFILE_STOP( "allToAllv", profile_level ); +} +// char +template<> +void MPI_CLASS::call_allToAll( const char *send_data, const int send_cnt[], + const int send_disp[], char *recv_data, const int *recv_cnt, const int *recv_disp ) const +{ + PROFILE_START( "allToAllv", profile_level ); + MPI_Alltoallv( (void *) send_data, (int *) send_cnt, (int *) send_disp, MPI_CHAR, + (void *) recv_data, (int *) recv_cnt, (int *) recv_disp, MPI_CHAR, communicator ); + PROFILE_STOP( "allToAllv", profile_level ); +} +// unsigned int +template<> +void MPI_CLASS::call_allToAll( const unsigned int *send_data, const int send_cnt[], + const int send_disp[], unsigned int *recv_data, const int *recv_cnt, + const int *recv_disp ) const +{ + PROFILE_START( "allToAllv", profile_level ); + MPI_Alltoallv( (void *) send_data, (int *) send_cnt, (int *) send_disp, MPI_UNSIGNED, + (void *) recv_data, (int *) recv_cnt, (int *) recv_disp, MPI_UNSIGNED, communicator ); + PROFILE_STOP( "allToAllv", profile_level ); +} +// int +template<> +void MPI_CLASS::call_allToAll( const int *send_data, const int send_cnt[], + const int send_disp[], int *recv_data, const int *recv_cnt, const int *recv_disp ) const +{ + PROFILE_START( "allToAllv", profile_level ); + MPI_Alltoallv( (void *) send_data, (int *) send_cnt, (int *) send_disp, MPI_INT, + (void *) recv_data, (int *) recv_cnt, (int *) recv_disp, MPI_INT, communicator ); + PROFILE_STOP( "allToAllv", profile_level ); +} +// unsigned long int +template<> +void MPI_CLASS::call_allToAll( const unsigned long int *send_data, + const int send_cnt[], const int send_disp[], unsigned long int *recv_data, const int *recv_cnt, + const int *recv_disp ) const +{ + PROFILE_START( "allToAllv", profile_level ); + MPI_Alltoallv( (void *) send_data, (int *) send_cnt, (int *) send_disp, MPI_UNSIGNED_LONG, + (void *) recv_data, (int *) recv_cnt, (int *) recv_disp, MPI_UNSIGNED_LONG, communicator ); + PROFILE_STOP( "allToAllv", profile_level ); +} +// long int +template<> +void MPI_CLASS::call_allToAll( const long int *send_data, const int send_cnt[], + const int send_disp[], long int *recv_data, const int *recv_cnt, const int *recv_disp ) const +{ + PROFILE_START( "allToAllv", profile_level ); + MPI_Alltoallv( (void *) send_data, (int *) send_cnt, (int *) send_disp, MPI_LONG, + (void *) recv_data, (int *) recv_cnt, (int *) recv_disp, MPI_LONG, communicator ); + PROFILE_STOP( "allToAllv", profile_level ); +} +// float +template<> +void MPI_CLASS::call_allToAll( const float *send_data, const int send_cnt[], + const int send_disp[], float *recv_data, const int *recv_cnt, const int *recv_disp ) const +{ + PROFILE_START( "allToAllv", profile_level ); + MPI_Alltoallv( (void *) send_data, (int *) send_cnt, (int *) send_disp, MPI_FLOAT, + (void *) recv_data, (int *) recv_cnt, (int *) recv_disp, MPI_FLOAT, communicator ); + PROFILE_STOP( "allToAllv", profile_level ); +} +// double +template<> +void MPI_CLASS::call_allToAll( const double *send_data, const int send_cnt[], + const int send_disp[], double *recv_data, const int *recv_cnt, const int *recv_disp ) const +{ + PROFILE_START( "allToAllv", profile_level ); + MPI_Alltoallv( (void *) send_data, (int *) send_cnt, (int *) send_disp, MPI_DOUBLE, + (void *) recv_data, (int *) recv_cnt, (int *) recv_disp, MPI_DOUBLE, communicator ); + PROFILE_STOP( "allToAllv", profile_level ); +} +#else +// Default instatiation of unsigned char +template<> +void MPI_CLASS::call_allToAll( + const char *, const int[], const int[], char *, const int *, const int * ) const +{ + MPI_ERROR( "Should not reach this point" ); +} +#endif + + +/************************************************************************ + * call_sumScan * + * Note: these specializations are only called when using MPI. * + ************************************************************************/ +#ifdef USE_MPI +// unsigned char +template<> +void MPI_CLASS::call_sumScan( + const unsigned char *send, unsigned char *recv, int n ) const +{ + PROFILE_START( "sumScan", profile_level ); + MPI_Scan( (void *) send, (void *) recv, n, MPI_UNSIGNED_CHAR, MPI_SUM, communicator ); + PROFILE_STOP( "sumScan", profile_level ); +} +// char +template<> +void MPI_CLASS::call_sumScan( const char *send, char *recv, int n ) const +{ + PROFILE_START( "sumScan", profile_level ); + MPI_Scan( (void *) send, (void *) recv, n, MPI_SIGNED_CHAR, MPI_SUM, communicator ); + PROFILE_STOP( "sumScan", profile_level ); +} +// unsigned int +template<> +void MPI_CLASS::call_sumScan( + const unsigned int *send, unsigned int *recv, int n ) const +{ + PROFILE_START( "sumScan", profile_level ); + MPI_Scan( (void *) send, (void *) recv, n, MPI_UNSIGNED, MPI_SUM, communicator ); + PROFILE_STOP( "sumScan", profile_level ); +} +// int +template<> +void MPI_CLASS::call_sumScan( const int *send, int *recv, int n ) const +{ + PROFILE_START( "sumScan", profile_level ); + MPI_Scan( (void *) send, (void *) recv, n, MPI_INT, MPI_SUM, communicator ); + PROFILE_STOP( "sumScan", profile_level ); +} +// long int +template<> +void MPI_CLASS::call_sumScan( const long int *send, long int *recv, int n ) const +{ + PROFILE_START( "sumScan", profile_level ); + MPI_Scan( (void *) send, (void *) recv, n, MPI_LONG, MPI_SUM, communicator ); + PROFILE_STOP( "sumScan", profile_level ); +} +// unsigned long int +template<> +void MPI_CLASS::call_sumScan( + const unsigned long *send, unsigned long *recv, int n ) const +{ + PROFILE_START( "sumScan", profile_level ); + MPI_Scan( (void *) send, (void *) recv, n, MPI_UNSIGNED_LONG, MPI_SUM, communicator ); + PROFILE_STOP( "sumScan", profile_level ); +} +// size_t +#ifdef USE_WINDOWS +template<> +void MPI_CLASS::call_sumScan( const size_t *send, size_t *recv, int n ) const +{ + MPI_ASSERT( MPI_SIZE_T != 0 ); + PROFILE_START( "sumScan", profile_level ); + MPI_Scan( (void *) send, (void *) recv, n, MPI_SIZE_T, MPI_SUM, communicator ); + PROFILE_STOP( "sumScan", profile_level ); +} +#endif +// float +template<> +void MPI_CLASS::call_sumScan( const float *send, float *recv, int n ) const +{ + PROFILE_START( "sumScan", profile_level ); + MPI_Scan( (void *) send, (void *) recv, n, MPI_FLOAT, MPI_SUM, communicator ); + PROFILE_STOP( "sumScan", profile_level ); +} +// double +template<> +void MPI_CLASS::call_sumScan( const double *send, double *recv, int n ) const +{ + PROFILE_START( "sumScan", profile_level ); + MPI_Scan( (void *) send, (void *) recv, n, MPI_DOUBLE, MPI_SUM, communicator ); + PROFILE_STOP( "sumScan", profile_level ); +} +// std::complex +template<> +void MPI_CLASS::call_sumScan>( + const std::complex *x, std::complex *y, int n ) const +{ + auto send = new double[2 * n]; + auto recv = new double[2 * n]; + for ( int i = 0; i < n; i++ ) { + send[2 * i + 0] = real( x[i] ); + send[2 * i + 1] = imag( x[i] ); + } + MPI_Scan( (void *) send, (void *) recv, 2 * n, MPI_DOUBLE, MPI_SUM, communicator ); + for ( int i = 0; i < n; i++ ) + y[i] = std::complex( recv[2 * i + 0], recv[2 * i + 1] ); + delete[] send; + delete[] recv; +} +#endif + + +/************************************************************************ + * call_minScan * + * Note: these specializations are only called when using MPI. * + ************************************************************************/ +#ifdef USE_MPI +// unsigned char +template<> +void MPI_CLASS::call_minScan( + const unsigned char *send, unsigned char *recv, int n ) const +{ + PROFILE_START( "minScan", profile_level ); + MPI_Scan( (void *) send, (void *) recv, n, MPI_UNSIGNED_CHAR, MPI_MIN, communicator ); + PROFILE_STOP( "minScan", profile_level ); +} +// char +template<> +void MPI_CLASS::call_minScan( const char *send, char *recv, int n ) const +{ + PROFILE_START( "minScan", profile_level ); + MPI_Scan( (void *) send, (void *) recv, n, MPI_SIGNED_CHAR, MPI_MIN, communicator ); + PROFILE_STOP( "minScan", profile_level ); +} +// unsigned int +template<> +void MPI_CLASS::call_minScan( + const unsigned int *send, unsigned int *recv, int n ) const +{ + PROFILE_START( "minScan", profile_level ); + MPI_Scan( (void *) send, (void *) recv, n, MPI_UNSIGNED, MPI_MIN, communicator ); + PROFILE_STOP( "minScan", profile_level ); +} +// int +template<> +void MPI_CLASS::call_minScan( const int *send, int *recv, int n ) const +{ + PROFILE_START( "minScan", profile_level ); + MPI_Scan( (void *) send, (void *) recv, n, MPI_INT, MPI_MIN, communicator ); + PROFILE_STOP( "minScan", profile_level ); +} +// unsigned long int +template<> +void MPI_CLASS::call_minScan( + const unsigned long int *send, unsigned long int *recv, int n ) const +{ + PROFILE_START( "minScan", profile_level ); + MPI_Scan( (void *) send, (void *) recv, n, MPI_UNSIGNED_LONG, MPI_MIN, communicator ); + PROFILE_STOP( "minScan", profile_level ); +} +// long int +template<> +void MPI_CLASS::call_minScan( const long int *send, long int *recv, int n ) const +{ + PROFILE_START( "minScan", profile_level ); + MPI_Scan( (void *) send, (void *) recv, n, MPI_LONG, MPI_MIN, communicator ); + PROFILE_STOP( "minScan", profile_level ); +} +// size_t +#ifdef USE_WINDOWS +template<> +void MPI_CLASS::call_minScan( const size_t *send, size_t *recv, int n ) const +{ + MPI_ASSERT( MPI_SIZE_T != 0 ); + PROFILE_START( "minScan", profile_level ); + MPI_Scan( (void *) send, (void *) recv, n, MPI_SIZE_T, MPI_MIN, communicator ); + PROFILE_STOP( "minScan", profile_level ); +} +#endif +// float +template<> +void MPI_CLASS::call_minScan( const float *send, float *recv, int n ) const +{ + PROFILE_START( "minScan", profile_level ); + MPI_Scan( (void *) send, (void *) recv, n, MPI_FLOAT, MPI_MIN, communicator ); + PROFILE_STOP( "minScan", profile_level ); +} +// double +template<> +void MPI_CLASS::call_minScan( const double *send, double *recv, int n ) const +{ + PROFILE_START( "minScan", profile_level ); + MPI_Scan( (void *) send, (void *) recv, n, MPI_DOUBLE, MPI_MIN, communicator ); + PROFILE_STOP( "minScan", profile_level ); +} +#endif + + +/************************************************************************ + * call_maxScan * + * Note: these specializations are only called when using MPI. * + ************************************************************************/ +#ifdef USE_MPI +// unsigned char +template<> +void MPI_CLASS::call_maxScan( + const unsigned char *send, unsigned char *recv, int n ) const +{ + PROFILE_START( "maxScan", profile_level ); + MPI_Scan( (void *) send, (void *) recv, n, MPI_UNSIGNED_CHAR, MPI_MAX, communicator ); + PROFILE_STOP( "maxScan", profile_level ); +} +// char +template<> +void MPI_CLASS::call_maxScan( const char *send, char *recv, int n ) const +{ + PROFILE_START( "maxScan", profile_level ); + MPI_Scan( (void *) send, (void *) recv, n, MPI_SIGNED_CHAR, MPI_MAX, communicator ); + PROFILE_STOP( "maxScan", profile_level ); +} +// unsigned int +template<> +void MPI_CLASS::call_maxScan( + const unsigned int *send, unsigned int *recv, int n ) const +{ + PROFILE_START( "maxScan", profile_level ); + MPI_Scan( (void *) send, (void *) recv, n, MPI_UNSIGNED, MPI_MAX, communicator ); + PROFILE_STOP( "maxScan", profile_level ); +} +// int +template<> +void MPI_CLASS::call_maxScan( const int *send, int *recv, int n ) const +{ + PROFILE_START( "maxScan", profile_level ); + MPI_Scan( (void *) send, (void *) recv, n, MPI_INT, MPI_MAX, communicator ); + PROFILE_STOP( "maxScan", profile_level ); +} +// long int +template<> +void MPI_CLASS::call_maxScan( const long int *send, long int *recv, int n ) const +{ + PROFILE_START( "maxScan", profile_level ); + MPI_Scan( (void *) send, (void *) recv, n, MPI_LONG, MPI_MAX, communicator ); + PROFILE_STOP( "maxScan", profile_level ); +} +// unsigned long int +template<> +void MPI_CLASS::call_maxScan( + const unsigned long int *send, unsigned long int *recv, int n ) const +{ + PROFILE_START( "maxScan", profile_level ); + MPI_Scan( (void *) send, (void *) recv, n, MPI_UNSIGNED_LONG, MPI_MAX, communicator ); + PROFILE_STOP( "maxScan", profile_level ); +} +// size_t +#ifdef USE_WINDOWS +template<> +void MPI_CLASS::call_maxScan( const size_t *send, size_t *recv, int n ) const +{ + MPI_ASSERT( MPI_SIZE_T != 0 ); + PROFILE_START( "maxScan", profile_level ); + MPI_Scan( (void *) send, (void *) recv, n, MPI_SIZE_T, MPI_MAX, communicator ); + PROFILE_STOP( "maxScan", profile_level ); +} +#endif +// float +template<> +void MPI_CLASS::call_maxScan( const float *send, float *recv, int n ) const +{ + PROFILE_START( "maxScan", profile_level ); + MPI_Scan( (void *) send, (void *) recv, n, MPI_INT, MPI_MAX, communicator ); + PROFILE_STOP( "maxScan", profile_level ); +} +// double +template<> +void MPI_CLASS::call_maxScan( const double *send, double *recv, int n ) const +{ + PROFILE_START( "maxScan", profile_level ); + MPI_Scan( (void *) send, (void *) recv, n, MPI_DOUBLE, MPI_MAX, communicator ); + PROFILE_STOP( "maxScan", profile_level ); +} +#endif + + +/************************************************************************ + * Communicate ranks for communication * + ************************************************************************/ +std::vector MPI_CLASS::commRanks( const std::vector &ranks ) const +{ +#ifdef USE_MPI + // Get a byte array with the ranks to communicate + auto data1 = new char[comm_size]; + auto data2 = new char[comm_size]; + memset( data1, 0, comm_size ); + memset( data2, 0, comm_size ); + for ( auto &rank : ranks ) + data1[rank] = 1; + MPI_Alltoall( data1, 1, MPI_CHAR, data2, 1, MPI_CHAR, communicator ); + int N = 0; + for ( int i = 0; i < comm_size; i++ ) + N += data2[i]; + std::vector ranks_out; + ranks_out.reserve( N ); + for ( int i = 0; i < comm_size; i++ ) { + if ( data2[i] ) + ranks_out.push_back( i ); + } + delete[] data1; + delete[] data2; + return ranks_out; +#else + return ranks; +#endif +} + + +/************************************************************************ + * Wait functions * + ************************************************************************/ +#ifdef USE_MPI +void MPI_CLASS::wait( MPI_Request request ) +{ + PROFILE_START( "wait", profile_level ); + MPI_Status status; + int flag = 0; + int err = MPI_Test( &request, &flag, &status ); + MPI_ASSERT( err == MPI_SUCCESS ); // Check that the first call is valid + while ( !flag ) { + // Put the current thread to sleep to allow other threads to run + sched_yield(); + // Check if the request has finished + MPI_Test( &request, &flag, &status ); + } + PROFILE_STOP( "wait", profile_level ); +} +int MPI_CLASS::waitAny( int count, MPI_Request *request ) +{ + if ( count == 0 ) + return -1; + PROFILE_START( "waitAny", profile_level ); + int index = -1; + int flag = 0; + auto status = new MPI_Status[count]; + int err = MPI_Testany( count, request, &index, &flag, status ); + MPI_ASSERT( err == MPI_SUCCESS ); // Check that the first call is valid + while ( !flag ) { + // Put the current thread to sleep to allow other threads to run + sched_yield(); + // Check if the request has finished + MPI_Testany( count, request, &index, &flag, status ); + } + MPI_ASSERT( index >= 0 ); // Check that the index is valid + delete[] status; + PROFILE_STOP( "waitAny", profile_level ); + return index; +} +void MPI_CLASS::waitAll( int count, MPI_Request *request ) +{ + if ( count == 0 ) + return; + PROFILE_START( "waitAll", profile_level ); + int flag = 0; + auto status = new MPI_Status[count]; + int err = MPI_Testall( count, request, &flag, status ); + MPI_ASSERT( err == MPI_SUCCESS ); // Check that the first call is valid + while ( !flag ) { + // Put the current thread to sleep to allow other threads to run + sched_yield(); + // Check if the request has finished + MPI_Testall( count, request, &flag, status ); + } + PROFILE_STOP( "waitAll", profile_level ); + delete[] status; +} +std::vector MPI_CLASS::waitSome( int count, MPI_Request *request ) +{ + if ( count == 0 ) + return std::vector(); + PROFILE_START( "waitSome", profile_level ); + std::vector indicies( count, -1 ); + auto *status = new MPI_Status[count]; + int outcount = 0; + int err = MPI_Testsome( count, request, &outcount, &indicies[0], status ); + MPI_ASSERT( err == MPI_SUCCESS ); // Check that the first call is valid + MPI_ASSERT( outcount != MPI_UNDEFINED ); // Check that the first call is valid + while ( outcount == 0 ) { + // Put the current thread to sleep to allow other threads to run + sched_yield(); + // Check if the request has finished + MPI_Testsome( count, request, &outcount, &indicies[0], status ); + } + indicies.resize( outcount ); + delete[] status; + PROFILE_STOP( "waitSome", profile_level ); + return indicies; +} +#else +void MPI_CLASS::wait( MPI_Request request ) +{ + PROFILE_START( "wait", profile_level ); + while ( 1 ) { + // Check if the request is in our list + if ( global_isendrecv_list.find( request ) == global_isendrecv_list.end() ) + break; + // Put the current thread to sleep to allow other threads to run + sched_yield(); + } + PROFILE_STOP( "wait", profile_level ); +} +int MPI_CLASS::waitAny( int count, MPI_Request *request ) +{ + if ( count == 0 ) + return -1; + PROFILE_START( "waitAny", profile_level ); + int index = 0; + while ( 1 ) { + // Check if the request is in our list + bool found_any = false; + for ( int i = 0; i < count; i++ ) { + if ( global_isendrecv_list.find( request[i] ) == global_isendrecv_list.end() ) { + found_any = true; + index = i; + } + } + if ( found_any ) + break; + // Put the current thread to sleep to allow other threads to run + sched_yield(); + } + PROFILE_STOP( "waitAny", profile_level ); + return index; +} +void MPI_CLASS::waitAll( int count, MPI_Request *request ) +{ + if ( count == 0 ) + return; + PROFILE_START( "waitAll", profile_level ); + while ( 1 ) { + // Check if the request is in our list + bool found_all = true; + for ( int i = 0; i < count; i++ ) { + if ( global_isendrecv_list.find( request[i] ) != global_isendrecv_list.end() ) + found_all = false; + } + if ( found_all ) + break; + // Put the current thread to sleep to allow other threads to run + sched_yield(); + } + PROFILE_STOP( "waitAll", profile_level ); +} +std::vector MPI_CLASS::waitSome( int count, MPI_Request *request ) +{ + if ( count == 0 ) + return std::vector(); + PROFILE_START( "waitSome", profile_level ); + std::vector indicies; + while ( 1 ) { + // Check if the request is in our list + for ( int i = 0; i < count; i++ ) { + if ( global_isendrecv_list.find( request[i] ) == global_isendrecv_list.end() ) + indicies.push_back( i ); + } + if ( !indicies.empty() ) + break; + // Put the current thread to sleep to allow other threads to run + sched_yield(); + } + PROFILE_STOP( "waitSome", profile_level ); + return indicies; +} +#endif + + +/************************************************************************ + * Probe functions * + ************************************************************************/ +#ifdef USE_MPI +int MPI_CLASS::Iprobe( int source, int tag ) const +{ + MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); + MPI_INSIST( tag >= 0, "tag must be >= 0" ); + MPI_Status status; + int flag = 0; + MPI_Iprobe( source, tag, communicator, &flag, &status ); + if ( flag == 0 ) + return -1; + int count; + MPI_Get_count( &status, MPI_BYTE, &count ); + MPI_ASSERT( count >= 0 ); + return count; +} +int MPI_CLASS::probe( int source, int tag ) const +{ + MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); + MPI_INSIST( tag >= 0, "tag must be >= 0" ); + MPI_Status status; + MPI_Probe( source, tag, communicator, &status ); + int count; + MPI_Get_count( &status, MPI_BYTE, &count ); + MPI_ASSERT( count >= 0 ); + return count; +} +#else +int MPI_CLASS::Iprobe( int, int ) const +{ + MPI_ERROR( "Not implimented for serial codes (Iprobe)" ); + return 0; +} +int MPI_CLASS::probe( int, int ) const +{ + MPI_ERROR( "Not implimented for serial codes (probe)" ); + return 0; +} +#endif + + +/************************************************************************ + * Timer functions * + ************************************************************************/ +#ifdef USE_MPI +double MPI_CLASS::time() { return MPI_Wtime(); } +double MPI_CLASS::tick() { return MPI_Wtick(); } +#else +double MPI_CLASS::time() +{ + auto t = std::chrono::system_clock::now(); + auto ns = std::chrono::duration_cast( t.time_since_epoch() ); + return 1e-9 * ns.count(); +} +double MPI_CLASS::tick() +{ + auto period = std::chrono::system_clock::period(); + return static_cast( period.num ) / static_cast( period.den ); +} +#endif + + +/************************************************************************ + * Serialize a block of code across MPI processes * + ************************************************************************/ +void MPI_CLASS::serializeStart() +{ +#ifdef USE_MPI + using namespace std::chrono_literals; + if ( comm_rank == 0 ) { + // Start rank 0 immediately + } else { + // Wait for a message from the previous rank + MPI_Request request; + MPI_Status status; + int flag = false, buf = 0; + MPI_Irecv( &buf, 1, MPI_INT, comm_rank - 1, 5627, MPI_COMM_WORLD, &request ); + while ( !flag ) { + MPI_Test( &request, &flag, &status ); + std::this_thread::sleep_for( 50ms ); + } + } +#endif +} +void MPI_CLASS::serializeStop() +{ +#ifdef USE_MPI + using namespace std::chrono_literals; + if ( comm_rank < comm_size - 1 ) { + // Send flag to next rank + MPI_Send( &comm_rank, 1, MPI_INT, comm_rank + 1, 5627, MPI_COMM_WORLD ); + // Wait for final finished flag + int flag = false, buf = 0; + MPI_Request request; + MPI_Status status; + MPI_Irecv( &buf, 1, MPI_INT, comm_size - 1, 5627, MPI_COMM_WORLD, &request ); + while ( !flag ) { + MPI_Test( &request, &flag, &status ); + std::this_thread::sleep_for( 50ms ); + } + } else { + // Send final flag to all ranks + for ( int i = 0; i < comm_size - 1; i++ ) + MPI_Send( &comm_rank, 1, MPI_INT, i, 5627, MPI_COMM_WORLD ); + } +#endif +} + + +/**************************************************************************** + * Function to start/stop MPI * + ****************************************************************************/ +#ifdef USE_EXT_MPI +static bool called_MPI_Init = false; +#endif +bool MPI_CLASS::MPI_Active() +{ +#ifdef USE_EXT_MPI + int MPI_initialized, MPI_finialized; + MPI_Initialized( &MPI_initialized ); + MPI_Finalized( &MPI_finialized ); + return MPI_initialized != 0 && MPI_finialized == 0; +#else + return false; +#endif +} +void MPI_CLASS::start_MPI( int argc, char *argv[], int profile_level ) +{ + changeProfileLevel( profile_level ); + NULL_USE( argc ); + NULL_USE( argv ); +#ifdef USE_EXT_MPI + if ( MPI_Active() ) { + called_MPI_Init = false; + } else { + int provided; + int result = MPI_Init_thread( &argc, &argv, MPI_THREAD_MULTIPLE, &provided ); + if ( result != MPI_SUCCESS ) + MPI_ERROR( "Unable to initialize MPI" ); + if ( provided < MPI_THREAD_MULTIPLE ) + std::cerr << "Warning: Failed to start MPI with MPI_THREAD_MULTIPLE\n"; + called_MPI_Init = true; + } +#endif +} +void MPI_CLASS::stop_MPI() +{ +#ifdef USE_EXT_MPI + int finalized; + MPI_Finalized( &finalized ); + if ( called_MPI_Init && !finalized ) { + MPI_Barrier( MPI_COMM_WORLD ); + MPI_Finalize(); + called_MPI_Init = true; + } +#endif +} + + +} // namespace Utilities + diff --git a/common/MPI.h b/common/MPI.h new file mode 100644 index 00000000..e3fd3e13 --- /dev/null +++ b/common/MPI.h @@ -0,0 +1,1152 @@ +// This file includes a wrapper class for MPI functions +// Note this is a modified version of the MPI class for the Advanced Multi-Physics Package +// Used with permission + +/* + +Copyright (c) 2012 UT-Battelle, LLC + +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: +Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +Collection of administrative costs for redistribution of the source code or binary form is allowed. However, collection of a royalty or other fee in excess of good faith amount for cost recovery for such redistribution is prohibited. + +*/ + +#ifndef included_LBPM_MPI +#define included_LBPM_MPI + + +#include +#include +#include +#include +#include +#include +#include + + +// Include mpi.h (or define MPI objects) +// clang-format off +#ifdef USE_MPI + #include "mpi.h" +#else + typedef int MPI_Comm; + typedef int MPI_Request; + typedef int MPI_Status; + typedef void *MPI_Errhandler; + enum MPI_TYPES { MPI_INT, MPI_FLOAT, MPI_DOUBLE }; + #define MPI_COMM_WORLD ( (MPI_Comm) 0xF4000010 ) + #define MPI_COMM_SELF ( (MPI_Comm) 0xF4000001 ) + #define MPI_COMM_NULL ( (MPI_Comm) 0xF4000000 ) +#endif +// clang-format on + + +namespace Utilities { + + +/** + * \class MPI + * + * @brief Provides C++ wrapper around MPI routines. + * + * Class MPI groups common MPI routines into one globally-accessible + * location. It provides small, simple routines that are common in MPI code. + * In some cases, the calling syntax has been simplified for convenience. + * Moreover, there is no reason to include the preprocessor ifdef/endif + * guards around these calls, since the MPI libraries are not called in + * these routines if the MPI libraries are not being used (e.g., when + * writing serial code). + * Note: Many of the communication routines are templated on type. When using + * unknown types the reduce calls will fail, the send and gather calls should + * succeed provided that the size of the data type object is a fixed size on + * all processors. sizeof(type) must be the same for all elements and processors. + */ +class MPI final +{ +public: + enum class ThreadSupport : int { SINGLE, FUNNELED, SERIALIZED, MULTIPLE }; + +public: // Constructors + /** + *\brief Is MPI active + *\details This returns true if MPI is initailized and not finalized + */ + static bool MPI_active(); + + /** + *\brief Empty constructor + *\details This creates an empty constructor that does not contain an MPI communicator. + */ + MPI(); + + + //! Empty destructor + ~MPI(); + + + /** + * \brief Constructor from existing MPI communicator + * \details This constructor creates a new communicator from an existing MPI communicator. + * This does not create a new internal MPI_Comm, but uses the existing comm. + * Note that by default, this will not free the MPI_Comm object and the user is + * responsible + * for free'ing the MPI_Comm when it is no longer used. This behavior is controlled by the + * optional manage argument. + * \param comm Existing MPI communicator + * \param manage Do we want to manage the comm (free the MPI_Comm when this object leaves + * scope) + */ + MPI( MPI_Comm comm, bool manage = false ); + + + /** + * \brief Constructor from existing communicator + * \details This constructor creates a new communicator from an existing communicator. + * This does not create a new internal MPI_Comm, but uses the existing comm. + * \param comm Existing communicator + */ + MPI( const MPI &comm ); + + + /*! + * Move constructor + * @param rhs Communicator to copy + */ + MPI( MPI &&rhs ); + + + /** + * \brief Assignment operator + * \details This operator overloads the assignment to correctly copy an communicator + * \param comm Existing MPI object + */ + MPI &operator=( const MPI &comm ); + + + /*! + * Move assignment operator + * @param rhs Communicator to copy + */ + MPI &operator=( MPI &&rhs ); + + + /** + * \brief Reset the object + * \details This resets the object to the empty state without an MPI_Comm + */ + void reset(); + + +public: // Member functions + /** + * \brief Get the node name + * \details This function returns a unique name for each node. + * It is a wrapper for MPI_Get_processor_name. + */ + static std::string getNodeName(); + + + //! Function to return the number of processors available + static int getNumberOfProcessors(); + + + //! Function to return the affinity of the current process + static std::vector getProcessAffinity(); + + + //! Function to set the affinity of the current process + static void setProcessAffinity( const std::vector &procs ); + + + /** + * \brief Load balance the processes within a node + * \details This function will redistribute the processes within a node using the + * process affinities to achieve the desired load balance. + * Note: this is a global operation on the given comm, and it is STRONGLY + * recommended to use COMM_WORLD. + * \param comm The communicator to use (Default is COMM_WORLD) + * \param method The desired load balance method to use: + * 1: Adjust the affinities so all processes share the given processors. + * This effectively allows the OS to handle the load balancing + * by migrating the processes as necessary. This is recommended + * for most users and use cases. (default) + * 2: Adjust the affinities so that the fewest number of processes overlap. + * This will try to give each process a unique set of processors while + * ensuring that each process has at least N_min processes. + * \param procs An optional list of processors to use. By default, setting this to an + * empty vector will use all available processors on the given node. + * \param N_min The minimum number of processors for any process (-1 indicates all available + * processors). + * \param N_max The maximum number of processors for any process (-1 indicates all available + * processors). + * + */ + static void balanceProcesses( const MPI &comm = MPI( MPI_COMM_WORLD ), const int method = 1, + const std::vector &procs = std::vector(), const int N_min = 1, + const int N_max = -1 ); + + + //! Query the level of thread support + static ThreadSupport queryThreadSupport(); + + + /** + * \brief Generate a random number + * \details This generates a random number that is consistent across the comm + */ + size_t rand() const; + + + /** + * \brief Split an existing communicator + * \details This creates a new communicator by splitting an existing communicator. + * See MPI_Comm_split for information on how the underlying split will occur. + * Note: the underlying MPI_Comm object will be free'd automatically when it is no longer + * used by any MPI objects. + * \param color Control of subset assignment (nonnegative integer). + * Processes with the same color are in the same new communicator . + * -1: processor will not be a member of any object (NULL object will be returned) + * \param key Control of rank assignment (integer). + * Note that, for a fixed color, the keys need not be unique. The processes will + * be sorted + * in ascending order according to this key, then all the processes in a given + * color will + * have the relative rank order as they did in their parent group. (See + * MPI_Comm_split) + */ + MPI split( int color, int key = -1 ) const; + + + /** + * \brief Split an existing communicator by node + * \details This creates a new communicator by splitting an existing communicator + * by the node. This will result in a separate MPI_Comm for each physical node. + * Internally this will use MPI_Get_processor_name to identify the nodes. + * Note: the underlying MPI_Comm object will be free'd automatically when it is no longer + * used by any MPI objects) + * \param key Control of rank assignment (integer). + * Note that, for a fixed color, the keys need not be unique. The processes will + * be sorted + * in ascending order according to this key, then all the processes in a given + * color will + * have the relative rank order as they did in their parent group. (See + * MPI_Comm_split) + */ + MPI splitByNode( int key = -1 ) const; + + + /** + * \brief Duplicate an existing communicator + * \details This creates a new communicator by duplicating an existing communicator. + * The resulting communicator will exist over the same processes, but have a different + * context. + * Note: the underlying MPI_Comm object will be free'd automatically when it is no longer + * used by any MPI objects. + */ + MPI dup() const; + + + /** + * \brief Create a communicator from the intersection of two communicators + * \details This creates a new communicator by intersecting two existing communicators. + * Any processors that do not contain the both communicators will receive a NULL communicator. + * There are 3 possible cases: + * The communicators are disjoint (a null communicator will be returned on all processors). + * One communicator is a sub communicator of another. This will require communication on + * the smaller communicator only. + * The communicators partially overlap. This will require communication on the first + * communicator. + */ + static MPI intersect( const MPI &comm1, const MPI &comm2 ); + + + /** + * Check if the current communicator is NULL + */ + bool isNull() const { return d_isNull; } + + + /** + * \brief Return the global ranks for the comm + * \details This returns a vector which contains the global ranks for each + * member of the communicator. The global ranks are defined according to WORLD comm. + */ + std::vector globalRanks() const; + + + /** + * Get the current MPI communicator. + * Note: The underlying MPI_Comm object may be free'd by the object when it is no + * longer used by any communicators. If the user has made a copy using the + * getCommunicator routine, then it may be free'd without user knowledge. The + * user is responsible for checking if the communicator is valid, or keeping a + * copy of the communicator that provided the MPI_Communicator. + */ + const MPI_Comm &getCommunicator() const { return communicator; } + + + /** + * \brief Overload operator == + * \details Overload operator comm1 == comm2. Two MPI objects are == if they share the same + * communicator. + * Note: this is a local operation. + */ + bool operator==( const MPI & ) const; + + + /** + * \brief Overload operator != + * \details Overload operator comm1 != comm2. Two MPI objects are != if they + * do not share the same communicator. + * Note: this is a local operation. + */ + bool operator!=( const MPI & ) const; + + + /** + * \brief Overload operator < + * \details Overload operator comm1 < comm2. One MPI object is < another iff all the + * processors in the first object are also in the second. Additionally, the second + * object must contain at least one processor that is not in the first object. + * This is a collective operation, based on the first communicator. + * As a result all processors on the first communicator will return the same value, + * while any processors that are not on the first communicator will return an unknown value. + * Additionally, all processors on the first object MUST call this routine and will be + * synchronized through this call (there is an internalallReduce). + */ + bool operator<( const MPI & ) const; + + + /** + * \brief Overload operator <= + * \details Overload operator comm1 <= comm2. One MPI object is <= another iff all the + * processors in the first object are also in the second. This is a collective operation, + * based on the first communicator. As a result all processors on the first communicator + * will return the same value, while any processors that are not on the first communicator + * will return an unknown value. Additionally, all processors on the first object MUST + * call this routine and will be synchronized through this call (there is an internal + * allReduce). + */ + bool operator<=( const MPI & ) const; + + + /** + * \brief Overload operator > + * \details Overload operator comm1 > comm2. One MPI object is > another iff all the + * processors in the second object are also in the first. Additionally, the first object + * must contain at least one processor that is not in the second object. + * This is a collective operation, based on the first communicator. + * As a result all processors on the first communicator will return the same value, + * while any processors that are not on the first communicator will return an unknown value. + * Additionally, all processors on the first object MUST call this routine and will be + * synchronized through this call (there is an internal allReduce). + */ + bool operator>( const MPI & ) const; + + + /** + * \brief Overload operator >= + * \details Overload operator comm1 >= comm2. One MPI object is > another iff all the + * processors in the second object are also in the first. Additionally, the first object + * must contain at least one processor that is not in the second object. + * This is a collective operation, based on the first communicator. + * As a result all processors on the first communicator will return the same value, while any + * processors that are not on the first communicator will return an unknown value. + * Additionally, all processors on the first object MUST call this routine and will be + * synchronized through this call (there is an internal allReduce). + */ + bool operator>=( const MPI & ) const; + + + /** + * \brief Compare to another communicator + * \details This compares the current communicator to another communicator. + * This returns 1 if the two communicators are equal (they share the same MPI communicator), + * 2 if the contexts and groups are the same, 3 if different contexts but identical groups, + * 4 if different contexts but similar groups, and 0 otherwise. + * Note: this is a local operation. + */ + int compare( const MPI & ) const; + + + /** + * Return the processor rank (identifier) from 0 through the number of + * processors minus one. + */ + int getRank() const { return comm_rank; } + + + /** + * Return the number of processors. + */ + int getSize() const { return comm_size; } + + + /** + * Return the maximum tag + */ + int maxTag() const { return d_maxTag; } + + + /** + * \brief Return a new tag + * \details This routine will return an unused tag for communication. + * Note that this tag may match a user tag, but this function will + * not return two duplicate tags. This is a global operation. + */ + int newTag(); + + + /** + * Call MPI_Abort or exit depending on whether running with one or more + * processes and value set by function above, if called. The default is + * to call exit(-1) if running with one processor and to call MPI_Abort() + * otherwise. This function avoids having to guard abort calls in + * application code. + */ + void abort() const; + + + /** + * Set boolean flag indicating whether exit or abort is called when running + * with one processor. Calling this function influences the behavior of + * calls to abort(). By default, the flag is true meaning that + * abort() will be called. Passing false means exit(-1) will be called. + */ + void setCallAbortInSerialInsteadOfExit( bool flag = true ); + + + /** + * \brief Boolean all reduce + * \details This function performs a boolean all reduce across all processors. + * It returns true iff all processor are true; + * \param value The input value for the all reduce + */ + bool allReduce( const bool value ) const; + + + /** + * \brief Boolean any reduce + * \details This function performs a boolean any reduce across all processors. + * It returns true if any processor is true; + * \param value The input value for the all reduce + */ + bool anyReduce( const bool value ) const; + + + /** + * \brief Sum Reduce + * \details This function performs a sum all reduce across all processor. + * It returns the sum across all processors; + * \param value The input value for the all reduce + */ + template + type sumReduce( const type value ) const; + + + /** + * \brief Sum Reduce + * \details Perform an array sum Reduce across all nodes. Each + * processor contributes an array of values, and the + * element-wise sum is returned in the same array. + * \param x The input/output array for the reduce + * \param n The number of values in the array (must match on all nodes) + */ + template + void sumReduce( type *x, const int n = 1 ) const; + + + /** + * \brief Sum Reduce + * \details Perform an array sum Reduce across all nodes. Each + * processor contributes an array of values, and the + * element-wise sum is returned in the same array. + * \param x The input array for the reduce + * \param y The output array for the reduce + * \param n The number of values in the array (must match on all nodes) + */ + template + void sumReduce( const type *x, type *y, const int n = 1 ) const; + + + /** + * \brief Min Reduce + * \details This function performs a min all reduce across all processor. + * It returns the minimum value across all processors; + * \param value The input value for the all reduce + */ + template + type minReduce( const type value ) const; + + + /** + * \brief Sum Reduce + * \details Perform an array min Reduce across all nodes. Each + * processor contributes an array of values, and the + * element-wise minimum is returned in the same array. + * + * If a 'rank_of_min' argument is provided, it will set the array to the + * rank of process holding the minimum value. Like the double argument, + * the size of the supplied 'rank_of_min' array should be n. + * \param x The input/output array for the reduce + * \param n The number of values in the array (must match on all nodes) + * \param rank_of_min Optional array indicating the rank of the processor containing the + * minimum value + */ + template + void minReduce( type *x, const int n = 1, int *rank_of_min = nullptr ) const; + + + /** + * \brief Sum Reduce + * \details Perform an array min Reduce across all nodes. Each + * processor contributes an array of values, and the + * element-wise minimum is returned in the same array. + * + * If a 'rank_of_min' argument is provided, it will set the array to the + * rank of process holding the minimum value. Like the double argument, + * the size of the supplied 'rank_of_min' array should be n. + * \param x The input array for the reduce + * \param y The output array for the reduce + * \param n The number of values in the array (must match on all nodes) + * \param rank_of_min Optional array indicating the rank of the processor containing the + * minimum value + */ + template + void minReduce( const type *x, type *y, const int n = 1, int *rank_of_min = nullptr ) const; + + + /** + * \brief Max Reduce + * \details This function performs a max all reduce across all processor. + * It returns the maximum value across all processors; + * \param value The input value for the all reduce + */ + template + type maxReduce( const type value ) const; + + + /** + * \brief Sum Reduce + * \details Perform an array max Reduce across all nodes. Each + * processor contributes an array of values, and the + * element-wise maximum is returned in the same array. + * + * If a 'rank_of_min' argument is provided, it will set the array to the + * rank of process holding the minimum value. Like the double argument, + * the size of the supplied 'rank_of_min' array should be n. + * \param x The input/output array for the reduce + * \param n The number of values in the array (must match on all nodes) + * \param rank_of_max Optional array indicating the rank of the processor containing the + * minimum value + */ + template + void maxReduce( type *x, const int n = 1, int *rank_of_max = nullptr ) const; + + + /** + * \brief Sum Reduce + * \details Perform an array max Reduce across all nodes. Each + * processor contributes an array of values, and the + * element-wise maximum is returned in the same array. + * + * If a 'rank_of_min' argument is provided, it will set the array to the + * rank of process holding the minimum value. Like the double argument, + * the size of the supplied 'rank_of_min' array should be n. + * \param x The input array for the reduce + * \param y The output array for the reduce + * \param n The number of values in the array (must match on all nodes) + * \param rank_of_max Optional array indicating the rank of the processor containing the + * minimum value + */ + template + void maxReduce( const type *x, type *y, const int n = 1, int *rank_of_max = nullptr ) const; + + + /** + * \brief Scan Sum Reduce + * \details Computes the sum scan (partial reductions) of data on a collection of processes. + * See MPI_Scan for more information. + * \param x The input array for the scan + * \param y The output array for the scan + * \param n The number of values in the array (must match on all nodes) + */ + template + void sumScan( const type *x, type *y, const int n = 1 ) const; + + + /** + * \brief Scan Min Reduce + * \details Computes the min scan (partial reductions) of data on a collection of processes. + * See MPI_Scan for more information. + * \param x The input array for the scan + * \param y The output array for the scan + * \param n The number of values in the array (must match on all nodes) + */ + template + void minScan( const type *x, type *y, const int n = 1 ) const; + + + /** + * \brief Scan Max Reduce + * \details Computes the max scan (partial reductions) of data on a collection of processes. + * See MPI_Scan for more information. + * \param x The input array for the scan + * \param y The output array for the scan + * \param n The number of values in the array (must match on all nodes) + */ + template + void maxScan( const type *x, type *y, const int n = 1 ) const; + + + /** + * \brief Broadcast + * \details This function broadcasts a value from root to all processors + * \param value The input value for the broadcast. + * \param root The processor performing the broadcast + */ + template + type bcast( const type &value, const int root ) const; + + + /** + * \brief Broadcast + * \details This function broadcasts an array from root to all processors + * \param value The input/output array for the broadcast + * \param n The number of values in the array (must match on all nodes) + * \param root The processor performing the broadcast + */ + template + void bcast( type *value, const int n, const int root ) const; + + + /** + * Perform a global barrier across all processors. + */ + void barrier() const; + + + /*! + * @brief This function sends an MPI message with an array to another processor. + * + * If the receiving processor knows in advance the length + * of the array, use "send_length = false;" otherwise, + * this processor will first send the length of the array, + * then send the data. This call must be paired with a + * matching call to recv. + * + * @param buf Pointer to array buffer with length integers. + * @param length Number of integers in buf that we want to send. + * @param recv Receiving processor number. + * @param tag Optional integer argument specifying an integer tag + * to be sent with this message. Default tag is 0. + * The matching recv must share this tag. + */ + template + void send( const type *buf, const int length, const int recv, int tag = 0 ) const; + + + /*! + * @brief This function sends an MPI message with an array of bytes + * (MPI_BYTES) to receiving_proc_number. + * + * This call must be paired with a matching call to recvBytes. + * + * @param buf Void pointer to an array of number_bytes bytes to send. + * @param N_bytes Integer number of bytes to send. + * @param recv Receiving processor number. + * @param tag Optional integer argument specifying an integer tag + * to be sent with this message. Default tag is 0. + * The matching recv must share this tag. + */ + void sendBytes( const void *buf, const int N_bytes, const int recv, int tag = 0 ) const; + + + /*! + * @brief This function sends an MPI message with an array + * to another processor using a non-blocking call. + * The receiving processor must know the length of the array. + * This call must be paired with a matching call to Irecv. + * + * @param buf Pointer to array buffer with length integers. + * @param length Number of integers in buf that we want to send. + * @param recv_proc Receiving processor number. + * @param tag Integer argument specifying an integer tag + * to be sent with this message. + */ + template + MPI_Request Isend( + const type *buf, const int length, const int recv_proc, const int tag ) const; + + + /*! + * @brief This function sends an MPI message with an array of bytes + * (MPI_BYTES) to receiving_proc_number using a non-blocking call. + * The receiving processor must know the number of bytes to receive. + * This call must be paired with a matching call to IrecvBytes. + * + * @param buf Void pointer to an array of number_bytes bytes to send. + * @param N_bytes Integer number of bytes to send. + * @param recv_proc Receiving processor number. + * @param tag Integer argument specifying an integer tag + * to be sent with this message. + */ + MPI_Request IsendBytes( + const void *buf, const int N_bytes, const int recv_proc, const int tag ) const; + + + /*! + * @brief This function receives an MPI message with a data + * array from another processor. + * + * If this processor knows in advance the length of the array, + * use "get_length = false;" otherwise we will get the return size. + * This call must be paired with a matching call to send. + * + * @param buf Pointer to integer array buffer with capacity of length integers. + * @param length If get_length==true: The number of elements to be received, otherwise + * the maximum number of values that can be stored in buf. + * On output the number of received elements. + * @param send Processor number of sender. + * @param tag Optional integer argument specifying a tag which must be matched + * by the tag of the incoming message. Default tag is 0. + */ + template + inline void recv( type *buf, int length, const int send, int tag ) const + { + int length2 = length; + recv( buf, length2, send, false, tag ); + } + + + /*! + * @brief This function receives an MPI message with a data + * array from another processor. + * + * If this processor knows in advance the length of the array, + * use "get_length = false;" otherwise we will get the return size. + * This call must be paired with a matching call to send. + * + * @param buf Pointer to integer array buffer with capacity of length integers. + * @param length If get_length==true: The number of elements to be received, otherwise + * the maximum number of values that can be stored in buf. + * On output the number of received elements. + * @param send Processor number of sender. + * @param get_length Optional boolean argument specifying if we first + * need to check the message size to get the size of the array. + * Default value is true. + * @param tag Optional integer argument specifying a tag which must be matched + * by the tag of the incoming message. Default tag is 0. + */ + template + void recv( type *buf, int &length, const int send, const bool get_length, int tag ) const; + + + /*! + * @brief This function receives an MPI message with an array of + * max size number_bytes (MPI_BYTES) from any processor. + * + * This call must be paired with a matching call to sendBytes. + * + * @param buf Void pointer to a buffer of size number_bytes bytes. + * @param N_bytes Integer number specifying size of buf in bytes. + * @param send Integer number specifying size of buf in bytes. + * @param tag Optional integer argument specifying a tag which + * must be matched by the tag of the incoming message. Default + * tag is 0. + */ + void recvBytes( void *buf, int &N_bytes, const int send, int tag = 0 ) const; + + + /*! + * @brief This function receives an MPI message with a data + * array from another processor using a non-blocking call. + * + * @param buf Pointer to integer array buffer with capacity of length integers. + * @param length Maximum number of values that can be stored in buf. + * @param send_proc Processor number of sender. + * @param tag Optional integer argument specifying a tag which must + * be matched by the tag of the incoming message. + */ + template + MPI_Request Irecv( type *buf, const int length, const int send_proc, const int tag ) const; + + + /*! + * @brief This function receives an MPI message with an array of + * max size number_bytes (MPI_BYTES) from any processor. + * + * This call must be paired with a matching call to sendBytes. + * + * @param buf Void pointer to a buffer of size number_bytes bytes. + * @param N_bytes Integer number specifying size of buf in bytes. + * @param send_proc Processor number of sender. + * @param tag Integer argument specifying a tag which must + * be matched by the tag of the incoming message. + */ + MPI_Request IrecvBytes( + void *buf, const int N_bytes, const int send_proc, const int tag ) const; + + + /*! + * Each processor sends every other processor a single value. + * @param[in] x Input value for allGather + * @return Output array for allGather + */ + template + std::vector allGather( const type &x ) const; + + + /*! + * Each processor sends every other processor an array + * @param[in] x Input array for allGather + * @return Output array for allGather + */ + template + std::vector allGather( const std::vector &x_in ) const; + + + /*! + * Each processor sends every other processor a single value. + * The x_out array should be preallocated to a length equal + * to the number of processors. + * @param x_in Input value for allGather + * @param x_out Output array for allGather (must be preallocated to the size of the + * communicator) + */ + template + void allGather( const type &x_in, type *x_out ) const; + + + /*! + * Each processor sends an array of data to all other processors. + * Each processor receives the values from all processors and gathers them + * to a single array. If successful, the total number of received + * elements will be returned. + * @param send_data Input array + * @param send_cnt The number of values to send + * @param recv_data Output array of received values + * @param recv_cnt The number of values to receive from each processor (N). + * If known, this should be provided as an input. Otherwise + * it is an optional output that will return the number of + * received values from each processor. + * @param recv_disp The displacement (relative to the start of the array) + * from which to store the data received from processor i. + * If known, this should be provided as an input. Otherwise + * it is an optional output that will return the starting location + * (relative to the start of the array) for the received data from + * processor i. + * @param known_recv Are the received counts and displacements known. + * If the received sizes are known, then they must be provided, + * and an extra communication step is not necessary. If the received + * sizes are not known, then an extra communication step will occur + * internally + * and the sizes and displacements will be returned (if desired). + */ + template + int allGather( const type *send_data, const int send_cnt, type *recv_data, + int *recv_cnt = nullptr, int *recv_disp = nullptr, bool known_recv = false ) const; + + + /*! + * This function combines sets from different processors to create a single master set + * @param set Input/Output std::set for the gather. + */ + template + void setGather( std::set &set ) const; + + + /*! + * This function combines std::maps from different processors to create a single master std::map + * If two or more ranks share the same key, the lowest rank will be used + * @param map Input/Output std::map for the gather. + */ + template + void mapGather( std::map &map ) const; + + + /*! + * Each processor sends an array of n values to each processor. + * Each processor sends an array of n values to each processor. + * The jth block of data is sent from processor i to processor j and placed + * in the ith block on the receiving processor. In the variable + * description, N is the size of the communicator. Note that this is a + * blocking global communication. + * @param n The number of elements in each data block to send. + * @param send_data Input array (nxN) + * @param recv_data Output array of received values (nxN) + */ + template + void allToAll( const int n, const type *send_data, type *recv_data ) const; + + + /*! + * Each processor sends an array of data to the different processors. + * Each processor may send any size array to any processor. In the variable + * description, N is the size of the communicator. Note that this is a + * blocking global communication. If successful, the total number of received + * elements will be returned. + * @param send_data Input array + * @param send_cnt The number of values to send to each processor (N) + * @param send_disp The displacement (relative to the start of the array) + * from which to send to processor i + * @param recv_data Output array of received values + * @param recv_cnt The number of values to receive from each processor (N). + * If known, this should be provided as an input. Otherwise + * it is an optional output that will return the number of + * received values from each processor. + * @param recv_disp The displacement (relative to the start of the array) + * from which to send to processor i. + * If known, this should be provided as an input. Otherwise + * it is an optional output that will return the starting location + * (relative to the start of the array) for the received data from + * processor i. + * @param known_recv Are the received counts and displacements known. + * If the received sizes are known, then they must be provided, + * and an extra communication step is not necessary. If the received + * sizes are not know, then an extra communication step will occur + * internally + * and the sizes and displacements will be returned (if desired). + */ + template + int allToAll( const type *send_data, const int send_cnt[], const int send_disp[], + type *recv_data, int *recv_cnt = nullptr, int *recv_disp = nullptr, + bool known_recv = false ) const; + + + /*! + * \brief Send a list of proccesor ids to communicate + * \details This function communicates a list of proccesors to communicate. + * Given a list of ranks that we want to send/receieve data to/from, this routine + * will communicate that set to the other ranks returning the list of processors + * that want to communication with the current rank. + * Note: this routine will involved global communication + * \param ranks List of ranks that the current rank wants to communicate with + * \return List of ranks that want to communicate with the current processor + */ + std::vector commRanks( const std::vector &ranks ) const; + + + /*! + * \brief Wait for a communication to finish + * \details Wait for a communication to finish. + * Note: this does not require a communicator. + * \param request Communication request to wait for (returned for Isend or Irecv) + */ + static void wait( MPI_Request request ); + + + /*! + * \brief Wait for any communication to finish. + * \details This function waits for any of the given communication requests to finish. + * It returns the index of the communication request that finished. + * Note: this does not require a communicator. + * \param count Number of communications to check + * \param request Array of communication requests to wait for (returned for Isend or Irecv) + */ + static int waitAny( int count, MPI_Request *request ); + + + /*! + * \brief Wait for all communications to finish. + * \details This function waits for all of the given communication requests to finish. + * Note: this does not require a communicator. + * \param count Number of communications to check + * \param request Array of communication requests to wait for (returned for Isend or Irecv) + */ + static void waitAll( int count, MPI_Request *request ); + + + /*! + * \brief Wait for some communications to finish. + * \details This function waits for one (or more) communications to finish. + * It returns an array of the indicies that have finished. + * Note: this does not require a communicator. + * \param count Number of communications to check + * \param request Array of communication requests to wait for (returned for Isend or Irecv) + */ + static std::vector waitSome( int count, MPI_Request *request ); + + + /*! + * \brief Nonblocking test for a message + * \details This function performs a non-blocking test for a message. + * It will return the number of bytes in the message if a message with + * the specified source and tag (on the current communicator) is available. + * Otherwise it will return -1. + * \param source source rank (-1: any source) + * \param tag tag (-1: any tag) + */ + int Iprobe( int source = -1, int tag = -1 ) const; + + + /*! + * \brief Blocking test for a message + * \details This function performs a blocking test for a message. + * It will return the number of bytes in the message when a message with + * the specified source and tag (on the current communicator) is available + * \param source source rank (-1: any source) + * \param tag tag (-1: any tag) + */ + int probe( int source = -1, int tag = -1 ) const; + + + /*! + * \brief Start a serial region + * \details This function will serialize MPI processes so that they run + * one at a time. A call to serializeStart must be followed by a call + * to serializeStop after the commands to be executed. + * Note: the ranks will be run in order. + */ + void serializeStart(); + + + /*! + * \brief Stop a serial region + * \details Stop a serial region. See serializeStart for more information. + */ + void serializeStop(); + + + /*! + * \brief Elapsed time + * \details This function returns the elapsed time on the calling processor + * since an arbitrary point in the past (seconds). It is a wrapper to MPI_Wtime. + * See "tick" for the timer resolution in seconds. + * The time may or may not be synchronized across processors depending on the MPI + * implementation. Refer to MPI documentation for the desired platform for more information. + */ + static double time(); + + + /*! + * \brief Timer resolution + * \details This function returns the timer resolution used by "time" + */ + static double tick(); + + + /*! + * \brief Change the level of the internal timers + * \details This function changes the level of the timers used to profile MPI + * \param level New level of the timers + */ + static void changeProfileLevel( int level ) { profile_level = level; } + + + //! Return the total number of MPI_Comm objects that have been created + static size_t MPI_Comm_created() { return N_MPI_Comm_created; } + + //! Return the total number of MPI_Comm objects that have been destroyed + static size_t MPI_Comm_destroyed() { return N_MPI_Comm_destroyed; } + + //! Return details about MPI + static std::string info(); + + //! Return the MPI version number { major, minor } + static std::array version(); + + //! Check if MPI is active + static bool MPI_Active(); + + //! Start MPI + static void start_MPI( int argc_in, char *argv_in[], int profile_level = 0 ); + + //! Stop MPI + static void stop_MPI(); + + +private: // Private helper functions for templated MPI operations; + template + void call_sumReduce( type *x, const int n = 1 ) const; + template + void call_sumReduce( const type *x, type *y, const int n = 1 ) const; + template + void call_minReduce( type *x, const int n = 1, int *rank_of_min = nullptr ) const; + template + void call_minReduce( + const type *x, type *y, const int n = 1, int *rank_of_min = nullptr ) const; + template + void call_maxReduce( type *x, const int n = 1, int *rank_of_max = nullptr ) const; + template + void call_maxReduce( + const type *x, type *y, const int n = 1, int *rank_of_max = nullptr ) const; + template + void call_bcast( type *x, const int n, const int root ) const; + template + void call_allGather( const type &x_in, type *x_out ) const; + template + void call_allGather( + const type *x_in, int size_in, type *x_out, int *size_out, int *disp_out ) const; + template + void call_sumScan( const type *x, type *y, int n = 1 ) const; + template + void call_minScan( const type *x, type *y, int n = 1 ) const; + template + void call_maxScan( const type *x, type *y, int n = 1 ) const; + template + void call_allToAll( const type *send_data, const int send_cnt[], const int send_disp[], + type *recv_data, const int *recv_cnt, const int *recv_disp ) const; + + +private: // data members + // The internal MPI communicator + MPI_Comm communicator; + + // Is the communicator NULL + bool d_isNull; + + // Do we want to manage this communicator + bool d_manage; + + // Do we want to call MPI_abort instead of exit + bool d_call_abort; + + // The level for the profiles of MPI + static short profile_level; + + // The rank and size of the communicator + int comm_rank, comm_size; + + // The ranks of the comm in the global comm + mutable int *volatile d_ranks; + + // Some attributes + int d_maxTag; + int *volatile d_currentTag; + + /* How many objects share the same underlying MPI communicator. + * When the count goes to 0, the MPI comm will be free'd (assuming it was created + * by an communicator). This may not be perfect, but is likely to be good enough. + * Note that for thread safety, any access to this variable should be blocked for thread safety. + * The value of count MUST be volatile to ensure the correct value is always used. + */ + std::atomic_int *volatile d_count; + + // Add a variable for data alignment (necessary for some Intel builds) + double tmp_alignment; + + /* We want to keep track of how many MPI_Comm objects we have created over time. + * Like the count, for thread safety this should be blocked, however the most likely error + * caused by not blocking is a slight error in the MPI count. Since this is just for reference + * we do not need to block (recognizing that the value may not be 100% accurate). + */ + static volatile unsigned int N_MPI_Comm_created; + static volatile unsigned int N_MPI_Comm_destroyed; +}; + + +} // namespace Utilities + + +// Include the default instantiations +// \cond HIDDEN_SYMBOLS +#include "common/MPI.I" +// \endcond + + +#endif diff --git a/common/MPI_Helpers.cpp b/common/MPI_Helpers.cpp deleted file mode 100644 index 736a2f02..00000000 --- a/common/MPI_Helpers.cpp +++ /dev/null @@ -1,266 +0,0 @@ -#include "common/MPI_Helpers.h" -#include "common/Utilities.h" - - -/******************************************************** -* Return the MPI data type * -********************************************************/ -template<> MPI_Datatype getMPItype() { - return MPI_CHAR; -} -template<> MPI_Datatype getMPItype() { - return MPI_UNSIGNED_CHAR; -} -template<> MPI_Datatype getMPItype() { - return MPI_INT; -} -template<> MPI_Datatype getMPItype() { - return MPI_LONG; -} -template<> MPI_Datatype getMPItype() { - return MPI_UNSIGNED_LONG; -} -template<> MPI_Datatype getMPItype() { - return MPI_LONG_LONG; -} -template<> MPI_Datatype getMPItype() { - return MPI_FLOAT; -} -template<> MPI_Datatype getMPItype() { - return MPI_DOUBLE; -} - - -/******************************************************** -* Concrete implimentations for packing/unpacking * -********************************************************/ -// unsigned char -template<> -size_t packsize( const unsigned char& ) -{ - return sizeof(unsigned char); -} -template<> -void pack( const unsigned char& rhs, char *buffer ) -{ - memcpy(buffer,&rhs,sizeof(unsigned char)); -} -template<> -void unpack( unsigned char& data, const char *buffer ) -{ - memcpy(&data,buffer,sizeof(unsigned char)); -} -// char -template<> -size_t packsize( const char& ) -{ - return sizeof(char); -} -template<> -void pack( const char& rhs, char *buffer ) -{ - memcpy(buffer,&rhs,sizeof(char)); -} -template<> -void unpack( char& data, const char *buffer ) -{ - memcpy(&data,buffer,sizeof(char)); -} -// int -template<> -size_t packsize( const int& ) -{ - return sizeof(int); -} -template<> -void pack( const int& rhs, char *buffer ) -{ - memcpy(buffer,&rhs,sizeof(int)); -} -template<> -void unpack( int& data, const char *buffer ) -{ - memcpy(&data,buffer,sizeof(int)); -} -// unsigned int -template<> -size_t packsize( const unsigned int& ) -{ - return sizeof(unsigned int); -} -template<> -void pack( const unsigned int& rhs, char *buffer ) -{ - memcpy(buffer,&rhs,sizeof(int)); -} -template<> -void unpack( unsigned int& data, const char *buffer ) -{ - memcpy(&data,buffer,sizeof(int)); -} -// size_t -template<> -size_t packsize( const size_t& ) -{ - return sizeof(size_t); -} -template<> -void pack( const size_t& rhs, char *buffer ) -{ - memcpy(buffer,&rhs,sizeof(size_t)); -} -template<> -void unpack( size_t& data, const char *buffer ) -{ - memcpy(&data,buffer,sizeof(size_t)); -} -// std::string -template<> -size_t packsize( const std::string& rhs ) -{ - return rhs.size()+1; -} -template<> -void pack( const std::string& rhs, char *buffer ) -{ - memcpy(buffer,rhs.c_str(),rhs.size()+1); -} -template<> -void unpack( std::string& data, const char *buffer ) -{ - data = std::string(buffer); -} - - -/******************************************************** -* Fake MPI routines * -********************************************************/ -#ifndef USE_MPI -int MPI_Init(int*,char***) -{ - return 0; -} -int MPI_Init_thread(int*,char***, int required, int *provided ) -{ - *provided = required; - return 0; -} -int MPI_Finalize() -{ - return 0; -} -int MPI_Comm_size( MPI_Comm, int *size ) -{ - *size = 1; - return 0; -} -int MPI_Comm_rank( MPI_Comm, int *rank ) -{ - *rank = 0; - return 0; -} -int MPI_Barrier( MPI_Comm ) -{ - return 0; -} -int MPI_Waitall( int, MPI_Request[], MPI_Status[] ) -{ - return 0; -} -int MPI_Wait( MPI_Request*, MPI_Status* ) -{ - return 0; -} -int MPI_Bcast( void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm ) -{ - return 0; -} -int MPI_Send(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, - MPI_Comm comm) -{ - ERROR("Not implimented yet"); - return 0; -} -int MPI_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag, - MPI_Comm comm, MPI_Status *status) -{ - ERROR("Not implimented yet"); - return 0; -} -int MPI_Isend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, - MPI_Comm comm, MPI_Request *request) -{ - ERROR("Not implimented yet"); - return 0; -} -int MPI_Irecv(void *buf, int count, MPI_Datatype datatype, int source, - int tag, MPI_Comm comm, MPI_Request *request) -{ - ERROR("Not implimented yet"); - return 0; -} -int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) -{ - ERROR("Not implimented yet"); - return 0; -} -int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, MPI_Datatype recvtype, - MPI_Comm comm) -{ - ERROR("Not implimented yet"); - return 0; -} -int MPI_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, const int *recvcounts, const int *displs, - MPI_Datatype recvtype, MPI_Comm comm) -{ - ERROR("Not implimented yet"); - return 0; -} -int MPI_Sendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - int dest, int sendtag, - void *recvbuf, int recvcount, MPI_Datatype recvtype, - int source, int recvtag, - MPI_Comm comm, MPI_Status *status) -{ - ERROR("Not implimented yet"); - return 0; -} -int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, - MPI_Op op, int root, MPI_Comm comm) -{ - ERROR("Not implimented yet"); - return 0; -} -int MPI_Comm_group(MPI_Comm comm, MPI_Group *group) -{ - ERROR("Not implimented yet"); - return 0; -} -int MPI_Comm_create(MPI_Comm comm, MPI_Group group, MPI_Comm *newcomm) -{ - ERROR("Not implimented yet"); - return 0; -} -int MPI_Comm_dup(MPI_Comm comm, MPI_Comm *newcomm) -{ - *newcomm = comm; - return 0; -} -double MPI_Wtime( void ) -{ - return 0.0; -} -int MPI_Comm_free(MPI_Comm *group) -{ - return 0; -} -int MPI_Group_free(MPI_Group *group) -{ - return 0; -} -#endif - - diff --git a/common/MPI_Helpers.h b/common/MPI_Helpers.h deleted file mode 100644 index 1d20318e..00000000 --- a/common/MPI_Helpers.h +++ /dev/null @@ -1,239 +0,0 @@ -// This file contains wrappers for MPI routines and functions to pack/unpack data structures -#ifndef MPI_WRAPPERS_INC -#define MPI_WRAPPERS_INC - -#include -#include -#include -#include - -#ifdef USE_MPI - // Inlcude MPI - #include "mpi.h" -#else - // Create fake MPI types - typedef int MPI_Comm; - typedef int MPI_Request; - typedef int MPI_Status; - #define MPI_COMM_WORLD 0 - #define MPI_COMM_SELF 0 - #define MPI_COMM_NULL -1 - #define MPI_GROUP_NULL -2 - #define MPI_STATUS_IGNORE NULL - enum MPI_Datatype { MPI_LOGICAL, MPI_CHAR, MPI_UNSIGNED_CHAR, MPI_INT, - MPI_UNSIGNED, MPI_LONG, MPI_UNSIGNED_LONG, MPI_LONG_LONG, MPI_FLOAT, MPI_DOUBLE }; - enum MPI_Op { MPI_MIN, MPI_MAX, MPI_SUM }; - typedef int MPI_Group; - #define MPI_THREAD_SINGLE 0 - #define MPI_THREAD_FUNNELED 1 - #define MPI_THREAD_SERIALIZED 2 - #define MPI_THREAD_MULTIPLE 3 - // Fake MPI functions - int MPI_Init(int*,char***); - int MPI_Init_thread( int *argc, char ***argv, int required, int *provided ); - int MPI_Finalize(); - int MPI_Comm_size( MPI_Comm, int *size ); - int MPI_Comm_rank( MPI_Comm, int *rank ); - int MPI_Barrier(MPI_Comm); - int MPI_Wait(MPI_Request*,MPI_Status*); - int MPI_Waitall(int,MPI_Request[],MPI_Status[]); - int MPI_Bcast(void*,int,MPI_Datatype,int,MPI_Comm); - int MPI_Send(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, - MPI_Comm comm); - int MPI_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag, - MPI_Comm comm, MPI_Status *status); - int MPI_Isend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, - MPI_Comm comm, MPI_Request *request); - int MPI_Irecv(void *buf, int count, MPI_Datatype datatype, int source, - int tag, MPI_Comm comm, MPI_Request *request); - int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); - int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, MPI_Datatype recvtype, - MPI_Comm comm); - int MPI_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, const int *recvcounts, const int *displs, - MPI_Datatype recvtype, MPI_Comm comm); - int MPI_Sendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - int dest, int sendtag, - void *recvbuf, int recvcount, MPI_Datatype recvtype, - int source, int recvtag, - MPI_Comm comm, MPI_Status *status); - int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, - MPI_Op op, int root, MPI_Comm comm); - double MPI_Wtime( void ); - int MPI_Comm_group(MPI_Comm comm, MPI_Group *group); - int MPI_Comm_create(MPI_Comm comm, MPI_Group group, MPI_Comm *newcomm); - int MPI_Comm_free(MPI_Comm *group); - int MPI_Group_free(MPI_Group *group); - int MPI_Comm_dup(MPI_Comm comm, MPI_Comm *newcomm); -#endif - - -//! Get the size of the MPI_Comm -// Note: this is a thread and interrupt safe function -inline int comm_size( MPI_Comm comm ) { - int size = 1; - MPI_Comm_size( comm, &size ); - return size; -} - - -//! Get the rank of the MPI_Comm -// Note: this is a thread and interrupt safe function -inline int comm_rank( MPI_Comm comm ) { - int rank = 1; - MPI_Comm_rank( comm, &rank ); - return rank; -} - - -//! Get the size of MPI_COMM_WORLD -inline int MPI_WORLD_SIZE( ) { - return comm_size( MPI_COMM_WORLD ); -} - -//! Get the size of MPI_COMM_WORLD -inline int MPI_WORLD_RANK( ) { - return comm_rank( MPI_COMM_WORLD ); -} - -//! Return the appropriate MPI datatype for a class -template -MPI_Datatype getMPItype(); - - -//! Template function to return the buffer size required to pack a class -template -size_t packsize( const TYPE& rhs ); - -//! Template function to pack a class to a buffer -template -void pack( const TYPE& rhs, char *buffer ); - -//! Template function to unpack a class from a buffer -template -void unpack( TYPE& data, const char *buffer ); - - -//! Template function to return the buffer size required to pack a std::vector -template -size_t packsize( const std::vector& rhs ); - -//! Template function to pack a class to a buffer -template -void pack( const std::vector& rhs, char *buffer ); - -//! Template function to pack a class to a buffer -template -void unpack( std::vector& data, const char *buffer ); - - -//! Template function to return the buffer size required to pack a std::pair -template -size_t packsize( const std::pair& rhs ); - -//! Template function to pack a class to a buffer -template -void pack( const std::pair& rhs, char *buffer ); - -//! Template function to pack a class to a buffer -template -void unpack( std::pair& data, const char *buffer ); - - -//! Template function to return the buffer size required to pack a std::map -template -size_t packsize( const std::map& rhs ); - -//! Template function to pack a class to a buffer -template -void pack( const std::map& rhs, char *buffer ); - -//! Template function to pack a class to a buffer -template -void unpack( std::map& data, const char *buffer ); - - -//! Template function to return the buffer size required to pack a std::set -template -size_t packsize( const std::set& rhs ); - -//! Template function to pack a class to a buffer -template -void pack( const std::set& rhs, char *buffer ); - -//! Template function to pack a class to a buffer -template -void unpack( std::set& data, const char *buffer ); - - - -// Helper functions -inline double sumReduce( MPI_Comm comm, double x ) -{ - double y = 0; - MPI_Allreduce(&x,&y,1,MPI_DOUBLE,MPI_SUM,comm); - return y; -} -inline float sumReduce( MPI_Comm comm, float x ) -{ - float y = 0; - MPI_Allreduce(&x,&y,1,MPI_FLOAT,MPI_SUM,comm); - return y; -} -inline int sumReduce( MPI_Comm comm, int x ) -{ - int y = 0; - MPI_Allreduce(&x,&y,1,MPI_INT,MPI_SUM,comm); - return y; -} -inline long long sumReduce( MPI_Comm comm, long long x ) -{ - long long y = 0; - MPI_Allreduce(&x,&y,1,MPI_LONG_LONG,MPI_SUM,comm); - return y; -} -inline bool sumReduce( MPI_Comm comm, bool x ) -{ - int y = sumReduce( comm, x?1:0 ); - return y>0; -} -inline std::vector sumReduce( MPI_Comm comm, const std::vector& x ) -{ - auto y = x; - MPI_Allreduce(x.data(),y.data(),x.size(),MPI_FLOAT,MPI_SUM,comm); - return y; -} -inline std::vector sumReduce( MPI_Comm comm, const std::vector& x ) -{ - auto y = x; - MPI_Allreduce(x.data(),y.data(),x.size(),MPI_INT,MPI_SUM,comm); - return y; -} -inline double maxReduce( MPI_Comm comm, double x ) -{ - double y = 0; - MPI_Allreduce(&x,&y,1,MPI_DOUBLE,MPI_MAX,comm); - return y; -} -inline float maxReduce( MPI_Comm comm, float x ) -{ - float y = 0; - MPI_Allreduce(&x,&y,1,MPI_FLOAT,MPI_MAX,comm); - return y; -} -inline int maxReduce( MPI_Comm comm, int x ) -{ - int y = 0; - MPI_Allreduce(&x,&y,1,MPI_INT,MPI_MAX,comm); - return y; -} - - -#endif - - -#include "common/MPI_Helpers.hpp" - - diff --git a/common/ReadMicroCT.cpp b/common/ReadMicroCT.cpp index 79ef241e..2209e712 100644 --- a/common/ReadMicroCT.cpp +++ b/common/ReadMicroCT.cpp @@ -64,11 +64,11 @@ Array readMicroCT( const std::string& filename ) // Read the compressed micro CT data and distribute -Array readMicroCT( const Database& domain, MPI_Comm comm ) +Array readMicroCT( const Database& domain, const Utilities::MPI& comm ) { // Get the local problem info auto n = domain.getVector( "n" ); - int rank = comm_rank(MPI_COMM_WORLD); + int rank = comm.getRank(); auto nproc = domain.getVector( "nproc" ); RankInfoStruct rankInfo( rank, nproc[0], nproc[1], nproc[2] ); diff --git a/common/ReadMicroCT.h b/common/ReadMicroCT.h index f232740e..c8acc379 100644 --- a/common/ReadMicroCT.h +++ b/common/ReadMicroCT.h @@ -5,11 +5,12 @@ #include "common/Array.h" #include "common/Communication.h" #include "common/Database.h" +#include "common/MPI.h" Array readMicroCT( const std::string& filename ); -Array readMicroCT( const Database& domain, MPI_Comm comm ); +Array readMicroCT( const Database& domain, const Utilities::MPI& comm ); #endif diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index e8a75994..6f2966e7 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -5,9 +5,7 @@ ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr Dm){ Lock=false; // unlock the communicator //...................................................................................... // Create a separate copy of the communicator for the device - //MPI_Comm_group(Dm->Comm,&Group); - //MPI_Comm_create(Dm->Comm,Group,&MPI_COMM_SCALBL); - MPI_Comm_dup(Dm->Comm,&MPI_COMM_SCALBL); + MPI_COMM_SCALBL = Dm->Comm.dup(); //...................................................................................... // Copy the domain size and communication information directly from Dm Nx = Dm->Nx; @@ -215,7 +213,7 @@ ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr Dm){ ScaLBL_CopyToZeroCopy(dvcRecvList_Yz,Dm->recvList_Yz,recvCount_Yz*sizeof(int)); //...................................................................................... - MPI_Barrier(MPI_COMM_SCALBL); + MPI_COMM_SCALBL.barrier(); //................................................................................... // Set up the recieve distribution lists @@ -288,7 +286,7 @@ ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr Dm){ //................................................................................... //...................................................................................... - MPI_Barrier(MPI_COMM_SCALBL); + MPI_COMM_SCALBL.barrier(); ScaLBL_DeviceBarrier(); //...................................................................................... SendCount = sendCount_x+sendCount_X+sendCount_y+sendCount_Y+sendCount_z+sendCount_Z+ @@ -869,8 +867,8 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){ ScaLBL_D3Q19_Pack(12,dvcSendList_x,3*sendCount_x,sendCount_x,sendbuf_x,dist,N); ScaLBL_D3Q19_Pack(14,dvcSendList_x,4*sendCount_x,sendCount_x,sendbuf_x,dist,N); - MPI_Isend(sendbuf_x, 5*sendCount_x,MPI_DOUBLE,rank_x,sendtag,MPI_COMM_SCALBL,&req1[0]); - MPI_Irecv(recvbuf_X, 5*recvCount_X,MPI_DOUBLE,rank_X,recvtag,MPI_COMM_SCALBL,&req2[0]); + req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 5*sendCount_x,rank_x,sendtag); + req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 5*recvCount_X,rank_X,recvtag); //...Packing for X face(1,7,9,11,13)................................ ScaLBL_D3Q19_Pack(1,dvcSendList_X,0,sendCount_X,sendbuf_X,dist,N); ScaLBL_D3Q19_Pack(7,dvcSendList_X,sendCount_X,sendCount_X,sendbuf_X,dist,N); @@ -878,8 +876,8 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){ ScaLBL_D3Q19_Pack(11,dvcSendList_X,3*sendCount_X,sendCount_X,sendbuf_X,dist,N); ScaLBL_D3Q19_Pack(13,dvcSendList_X,4*sendCount_X,sendCount_X,sendbuf_X,dist,N); - MPI_Isend(sendbuf_X, 5*sendCount_X,MPI_DOUBLE,rank_X,sendtag,MPI_COMM_SCALBL,&req1[1]); - MPI_Irecv(recvbuf_x, 5*recvCount_x,MPI_DOUBLE,rank_x,recvtag,MPI_COMM_SCALBL,&req2[1]); + req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 5*sendCount_X,rank_X,sendtag); + req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 5*recvCount_x,rank_x,recvtag); //...Packing for y face(4,8,9,16,18)................................. ScaLBL_D3Q19_Pack(4,dvcSendList_y,0,sendCount_y,sendbuf_y,dist,N); ScaLBL_D3Q19_Pack(8,dvcSendList_y,sendCount_y,sendCount_y,sendbuf_y,dist,N); @@ -887,8 +885,8 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){ ScaLBL_D3Q19_Pack(16,dvcSendList_y,3*sendCount_y,sendCount_y,sendbuf_y,dist,N); ScaLBL_D3Q19_Pack(18,dvcSendList_y,4*sendCount_y,sendCount_y,sendbuf_y,dist,N); - MPI_Isend(sendbuf_y, 5*sendCount_y,MPI_DOUBLE,rank_y,sendtag,MPI_COMM_SCALBL,&req1[2]); - MPI_Irecv(recvbuf_Y, 5*recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,MPI_COMM_SCALBL,&req2[2]); + req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 5*sendCount_y,rank_y,sendtag); + req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 5*recvCount_Y,rank_Y,recvtag); //...Packing for Y face(3,7,10,15,17)................................. ScaLBL_D3Q19_Pack(3,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,dist,N); ScaLBL_D3Q19_Pack(7,dvcSendList_Y,sendCount_Y,sendCount_Y,sendbuf_Y,dist,N); @@ -896,8 +894,8 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){ ScaLBL_D3Q19_Pack(15,dvcSendList_Y,3*sendCount_Y,sendCount_Y,sendbuf_Y,dist,N); ScaLBL_D3Q19_Pack(17,dvcSendList_Y,4*sendCount_Y,sendCount_Y,sendbuf_Y,dist,N); - MPI_Isend(sendbuf_Y, 5*sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,MPI_COMM_SCALBL,&req1[3]); - MPI_Irecv(recvbuf_y, 5*recvCount_y,MPI_DOUBLE,rank_y,recvtag,MPI_COMM_SCALBL,&req2[3]); + req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 5*sendCount_Y,rank_Y,sendtag); + req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 5*recvCount_y,rank_y,recvtag); //...Packing for z face(6,12,13,16,17)................................ ScaLBL_D3Q19_Pack(6,dvcSendList_z,0,sendCount_z,sendbuf_z,dist,N); ScaLBL_D3Q19_Pack(12,dvcSendList_z,sendCount_z,sendCount_z,sendbuf_z,dist,N); @@ -905,8 +903,8 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){ ScaLBL_D3Q19_Pack(16,dvcSendList_z,3*sendCount_z,sendCount_z,sendbuf_z,dist,N); ScaLBL_D3Q19_Pack(17,dvcSendList_z,4*sendCount_z,sendCount_z,sendbuf_z,dist,N); - MPI_Isend(sendbuf_z, 5*sendCount_z,MPI_DOUBLE,rank_z,sendtag,MPI_COMM_SCALBL,&req1[4]); - MPI_Irecv(recvbuf_Z, 5*recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,MPI_COMM_SCALBL,&req2[4]); + req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 5*sendCount_z,rank_z,sendtag); + req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 5*recvCount_Z,rank_Z,recvtag); //...Packing for Z face(5,11,14,15,18)................................ ScaLBL_D3Q19_Pack(5,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,dist,N); @@ -915,57 +913,57 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){ ScaLBL_D3Q19_Pack(15,dvcSendList_Z,3*sendCount_Z,sendCount_Z,sendbuf_Z,dist,N); ScaLBL_D3Q19_Pack(18,dvcSendList_Z,4*sendCount_Z,sendCount_Z,sendbuf_Z,dist,N); - MPI_Isend(sendbuf_Z, 5*sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,MPI_COMM_SCALBL,&req1[5]); - MPI_Irecv(recvbuf_z, 5*recvCount_z,MPI_DOUBLE,rank_z,recvtag,MPI_COMM_SCALBL,&req2[5]); + req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 5*sendCount_Z,rank_Z,sendtag); + req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 5*recvCount_z,rank_z,recvtag); //...Pack the xy edge (8)................................ ScaLBL_D3Q19_Pack(8,dvcSendList_xy,0,sendCount_xy,sendbuf_xy,dist,N); - MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_xy,sendtag,MPI_COMM_SCALBL,&req1[6]); - MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_XY,recvtag,MPI_COMM_SCALBL,&req2[6]); + req1[6] = MPI_COMM_SCALBL.Isend(sendbuf_xy, sendCount_xy,rank_xy,sendtag); + req2[6] = MPI_COMM_SCALBL.Irecv(recvbuf_XY, recvCount_XY,rank_XY,recvtag); //...Pack the Xy edge (9)................................ ScaLBL_D3Q19_Pack(9,dvcSendList_Xy,0,sendCount_Xy,sendbuf_Xy,dist,N); - MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_Xy,sendtag,MPI_COMM_SCALBL,&req1[8]); - MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_xY,recvtag,MPI_COMM_SCALBL,&req2[8]); + req1[8] = MPI_COMM_SCALBL.Isend(sendbuf_Xy, sendCount_Xy,rank_Xy,sendtag); + req2[8] = MPI_COMM_SCALBL.Irecv(recvbuf_xY, recvCount_xY,rank_xY,recvtag); //...Pack the xY edge (10)................................ ScaLBL_D3Q19_Pack(10,dvcSendList_xY,0,sendCount_xY,sendbuf_xY,dist,N); - MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_xY,sendtag,MPI_COMM_SCALBL,&req1[9]); - MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_Xy,recvtag,MPI_COMM_SCALBL,&req2[9]); + req1[9] = MPI_COMM_SCALBL.Isend(sendbuf_xY, sendCount_xY,rank_xY,sendtag); + req2[9] = MPI_COMM_SCALBL.Irecv(recvbuf_Xy, recvCount_Xy,rank_Xy,recvtag); //...Pack the XY edge (7)................................ ScaLBL_D3Q19_Pack(7,dvcSendList_XY,0,sendCount_XY,sendbuf_XY,dist,N); - MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_XY,sendtag,MPI_COMM_SCALBL,&req1[7]); - MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_xy,recvtag,MPI_COMM_SCALBL,&req2[7]); + req1[7] = MPI_COMM_SCALBL.Isend(sendbuf_XY, sendCount_XY,rank_XY,sendtag); + req2[7] = MPI_COMM_SCALBL.Irecv(recvbuf_xy, recvCount_xy,rank_xy,recvtag); //...Pack the xz edge (12)................................ ScaLBL_D3Q19_Pack(12,dvcSendList_xz,0,sendCount_xz,sendbuf_xz,dist,N); - MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_xz,sendtag,MPI_COMM_SCALBL,&req1[10]); - MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_XZ,recvtag,MPI_COMM_SCALBL,&req2[10]); + req1[10] = MPI_COMM_SCALBL.Isend(sendbuf_xz, sendCount_xz,rank_xz,sendtag); + req2[10] = MPI_COMM_SCALBL.Irecv(recvbuf_XZ, recvCount_XZ,rank_XZ,recvtag); //...Pack the xZ edge (14)................................ ScaLBL_D3Q19_Pack(14,dvcSendList_xZ,0,sendCount_xZ,sendbuf_xZ,dist,N); - MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_xZ,sendtag,MPI_COMM_SCALBL,&req1[13]); - MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_Xz,recvtag,MPI_COMM_SCALBL,&req2[13]); + req1[13] = MPI_COMM_SCALBL.Isend(sendbuf_xZ, sendCount_xZ,rank_xZ,sendtag); + req2[13] = MPI_COMM_SCALBL.Irecv(recvbuf_Xz, recvCount_Xz,rank_Xz,recvtag); //...Pack the Xz edge (13)................................ ScaLBL_D3Q19_Pack(13,dvcSendList_Xz,0,sendCount_Xz,sendbuf_Xz,dist,N); - MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_Xz,sendtag,MPI_COMM_SCALBL,&req1[12]); - MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_xZ,recvtag,MPI_COMM_SCALBL,&req2[12]); + req1[12] = MPI_COMM_SCALBL.Isend(sendbuf_Xz, sendCount_Xz,rank_Xz,sendtag); + req2[12] = MPI_COMM_SCALBL.Irecv(recvbuf_xZ, recvCount_xZ,rank_xZ,recvtag); //...Pack the XZ edge (11)................................ ScaLBL_D3Q19_Pack(11,dvcSendList_XZ,0,sendCount_XZ,sendbuf_XZ,dist,N); - MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_XZ,sendtag,MPI_COMM_SCALBL,&req1[11]); - MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_xz,recvtag,MPI_COMM_SCALBL,&req2[11]); + req1[11] = MPI_COMM_SCALBL.Isend(sendbuf_XZ, sendCount_XZ,rank_XZ,sendtag); + req2[11] = MPI_COMM_SCALBL.Irecv(recvbuf_xz, recvCount_xz,rank_xz,recvtag); //...Pack the yz edge (16)................................ ScaLBL_D3Q19_Pack(16,dvcSendList_yz,0,sendCount_yz,sendbuf_yz,dist,N); - MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_yz,sendtag,MPI_COMM_SCALBL,&req1[14]); - MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_YZ,recvtag,MPI_COMM_SCALBL,&req2[14]); + req1[14] = MPI_COMM_SCALBL.Isend(sendbuf_yz, sendCount_yz,rank_yz,sendtag); + req2[14] = MPI_COMM_SCALBL.Irecv(recvbuf_YZ, recvCount_YZ,rank_YZ,recvtag); //...Pack the yZ edge (18)................................ ScaLBL_D3Q19_Pack(18,dvcSendList_yZ,0,sendCount_yZ,sendbuf_yZ,dist,N); - MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_yZ,sendtag,MPI_COMM_SCALBL,&req1[17]); - MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_Yz,recvtag,MPI_COMM_SCALBL,&req2[17]); + req1[17] = MPI_COMM_SCALBL.Isend(sendbuf_yZ, sendCount_yZ,rank_yZ,sendtag); + req2[17] = MPI_COMM_SCALBL.Irecv(recvbuf_Yz, recvCount_Yz,rank_Yz,recvtag); //...Pack the Yz edge (17)................................ ScaLBL_D3Q19_Pack(17,dvcSendList_Yz,0,sendCount_Yz,sendbuf_Yz,dist,N); - MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_Yz,sendtag,MPI_COMM_SCALBL,&req1[16]); - MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_yZ,recvtag,MPI_COMM_SCALBL,&req2[16]); + req1[16] = MPI_COMM_SCALBL.Isend(sendbuf_Yz, sendCount_Yz,rank_Yz,sendtag); + req2[16] = MPI_COMM_SCALBL.Irecv(recvbuf_yZ, recvCount_yZ,rank_yZ,recvtag); //...Pack the YZ edge (15)................................ ScaLBL_D3Q19_Pack(15,dvcSendList_YZ,0,sendCount_YZ,sendbuf_YZ,dist,N); - MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_YZ,sendtag,MPI_COMM_SCALBL,&req1[15]); - MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_yz,recvtag,MPI_COMM_SCALBL,&req2[15]); + req1[15] = MPI_COMM_SCALBL.Isend(sendbuf_YZ, sendCount_YZ,rank_YZ,sendtag); + req2[15] = MPI_COMM_SCALBL.Irecv(recvbuf_yz, recvCount_yz,rank_yz,recvtag); //................................................................................... } @@ -975,8 +973,8 @@ void ScaLBL_Communicator::RecvD3Q19AA(double *dist){ // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 //................................................................................... // Wait for completion of D3Q19 communication - MPI_Waitall(18,req1,stat1); - MPI_Waitall(18,req2,stat2); + MPI_COMM_SCALBL.waitAll(18,req1); + MPI_COMM_SCALBL.waitAll(18,req2); ScaLBL_DeviceBarrier(); //................................................................................... @@ -1059,8 +1057,8 @@ void ScaLBL_Communicator::RecvGrad(double *phi, double *grad){ // Recieves halo and incorporates into D3Q19 based stencil gradient computation //................................................................................... // Wait for completion of D3Q19 communication - MPI_Waitall(18,req1,stat1); - MPI_Waitall(18,req2,stat2); + MPI_COMM_SCALBL.waitAll(18,req1); + MPI_COMM_SCALBL.waitAll(18,req2); ScaLBL_DeviceBarrier(); //................................................................................... @@ -1153,36 +1151,36 @@ void ScaLBL_Communicator::BiSendD3Q7AA(double *Aq, double *Bq){ ScaLBL_D3Q19_Pack(2,dvcSendList_x,0,sendCount_x,sendbuf_x,Aq,N); ScaLBL_D3Q19_Pack(2,dvcSendList_x,sendCount_x,sendCount_x,sendbuf_x,Bq,N); - MPI_Isend(sendbuf_x, 2*sendCount_x,MPI_DOUBLE,rank_x,sendtag,MPI_COMM_SCALBL,&req1[0]); - MPI_Irecv(recvbuf_X, 2*recvCount_X,MPI_DOUBLE,rank_X,recvtag,MPI_COMM_SCALBL,&req2[0]); + req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 2*sendCount_x,rank_x,sendtag); + req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 2*recvCount_X,rank_X,recvtag); //...Packing for X face(1,7,9,11,13)................................ ScaLBL_D3Q19_Pack(1,dvcSendList_X,0,sendCount_X,sendbuf_X,Aq,N); ScaLBL_D3Q19_Pack(1,dvcSendList_X,sendCount_X,sendCount_X,sendbuf_X,Bq,N); - MPI_Isend(sendbuf_X, 2*sendCount_X,MPI_DOUBLE,rank_X,sendtag,MPI_COMM_SCALBL,&req1[1]); - MPI_Irecv(recvbuf_x, 2*recvCount_x,MPI_DOUBLE,rank_x,recvtag,MPI_COMM_SCALBL,&req2[1]); + req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 2*sendCount_X,rank_X,sendtag); + req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 2*recvCount_x,rank_x,recvtag); //...Packing for y face(4,8,9,16,18)................................. ScaLBL_D3Q19_Pack(4,dvcSendList_y,0,sendCount_y,sendbuf_y,Aq,N); ScaLBL_D3Q19_Pack(4,dvcSendList_y,sendCount_y,sendCount_y,sendbuf_y,Bq,N); - MPI_Isend(sendbuf_y, 2*sendCount_y,MPI_DOUBLE,rank_y,sendtag,MPI_COMM_SCALBL,&req1[2]); - MPI_Irecv(recvbuf_Y, 2*recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,MPI_COMM_SCALBL,&req2[2]); + req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 2*sendCount_y,rank_y,sendtag); + req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 2*recvCount_Y,rank_Y,recvtag); //...Packing for Y face(3,7,10,15,17)................................. ScaLBL_D3Q19_Pack(3,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,Aq,N); ScaLBL_D3Q19_Pack(3,dvcSendList_Y,sendCount_Y,sendCount_Y,sendbuf_Y,Bq,N); - MPI_Isend(sendbuf_Y, 2*sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,MPI_COMM_SCALBL,&req1[3]); - MPI_Irecv(recvbuf_y, 2*recvCount_y,MPI_DOUBLE,rank_y,recvtag,MPI_COMM_SCALBL,&req2[3]); + req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 2*sendCount_Y,rank_Y,sendtag); + req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 2*recvCount_y,rank_y,recvtag); //...Packing for z face(6,12,13,16,17)................................ ScaLBL_D3Q19_Pack(6,dvcSendList_z,0,sendCount_z,sendbuf_z,Aq,N); ScaLBL_D3Q19_Pack(6,dvcSendList_z,sendCount_z,sendCount_z,sendbuf_z,Bq,N); - MPI_Isend(sendbuf_z, 2*sendCount_z,MPI_DOUBLE,rank_z,sendtag,MPI_COMM_SCALBL,&req1[4]); - MPI_Irecv(recvbuf_Z, 2*recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,MPI_COMM_SCALBL,&req2[4]); + req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 2*sendCount_z,rank_z,sendtag); + req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 2*recvCount_Z,rank_Z,recvtag); //...Packing for Z face(5,11,14,15,18)................................ ScaLBL_D3Q19_Pack(5,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,Aq,N); @@ -1190,8 +1188,8 @@ void ScaLBL_Communicator::BiSendD3Q7AA(double *Aq, double *Bq){ //................................................................................... // Send all the distributions - MPI_Isend(sendbuf_Z, 2*sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,MPI_COMM_SCALBL,&req1[5]); - MPI_Irecv(recvbuf_z, 2*recvCount_z,MPI_DOUBLE,rank_z,recvtag,MPI_COMM_SCALBL,&req2[5]); + req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 2*sendCount_Z,rank_Z,sendtag); + req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 2*recvCount_z,rank_z,recvtag); } @@ -1201,8 +1199,8 @@ void ScaLBL_Communicator::BiRecvD3Q7AA(double *Aq, double *Bq){ // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 //................................................................................... // Wait for completion of D3Q19 communication - MPI_Waitall(6,req1,stat1); - MPI_Waitall(6,req2,stat2); + MPI_COMM_SCALBL.waitAll(6,req1); + MPI_COMM_SCALBL.waitAll(6,req2); ScaLBL_DeviceBarrier(); //................................................................................... @@ -1293,18 +1291,18 @@ void ScaLBL_Communicator::TriSendD3Q7AA(double *Aq, double *Bq, double *Cq){ //................................................................................... // Send all the distributions - MPI_Isend(sendbuf_x, 3*sendCount_x,MPI_DOUBLE,rank_x,sendtag,MPI_COMM_SCALBL,&req1[0]); - MPI_Irecv(recvbuf_X, 3*recvCount_X,MPI_DOUBLE,rank_X,recvtag,MPI_COMM_SCALBL,&req2[0]); - MPI_Isend(sendbuf_X, 3*sendCount_X,MPI_DOUBLE,rank_X,sendtag,MPI_COMM_SCALBL,&req1[1]); - MPI_Irecv(recvbuf_x, 3*recvCount_x,MPI_DOUBLE,rank_x,recvtag,MPI_COMM_SCALBL,&req2[1]); - MPI_Isend(sendbuf_y, 3*sendCount_y,MPI_DOUBLE,rank_y,sendtag,MPI_COMM_SCALBL,&req1[2]); - MPI_Irecv(recvbuf_Y, 3*recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,MPI_COMM_SCALBL,&req2[2]); - MPI_Isend(sendbuf_Y, 3*sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,MPI_COMM_SCALBL,&req1[3]); - MPI_Irecv(recvbuf_y, 3*recvCount_y,MPI_DOUBLE,rank_y,recvtag,MPI_COMM_SCALBL,&req2[3]); - MPI_Isend(sendbuf_z, 3*sendCount_z,MPI_DOUBLE,rank_z,sendtag,MPI_COMM_SCALBL,&req1[4]); - MPI_Irecv(recvbuf_Z, 3*recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,MPI_COMM_SCALBL,&req2[4]); - MPI_Isend(sendbuf_Z, 3*sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,MPI_COMM_SCALBL,&req1[5]); - MPI_Irecv(recvbuf_z, 3*recvCount_z,MPI_DOUBLE,rank_z,recvtag,MPI_COMM_SCALBL,&req2[5]); + req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 3*sendCount_x,rank_x,sendtag); + req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 3*recvCount_X,rank_X,recvtag); + req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 3*sendCount_X,rank_X,sendtag); + req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 3*recvCount_x,rank_x,recvtag); + req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 3*sendCount_y,rank_y,sendtag); + req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 3*recvCount_Y,rank_Y,recvtag); + req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 3*sendCount_Y,rank_Y,sendtag); + req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 3*recvCount_y,rank_y,recvtag); + req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 3*sendCount_z,rank_z,sendtag); + req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 3*recvCount_Z,rank_Z,recvtag); + req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 3*sendCount_Z,rank_Z,sendtag); + req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 3*recvCount_z,rank_z,recvtag); } @@ -1314,8 +1312,8 @@ void ScaLBL_Communicator::TriRecvD3Q7AA(double *Aq, double *Bq, double *Cq){ // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 //................................................................................... // Wait for completion of D3Q19 communication - MPI_Waitall(6,req1,stat1); - MPI_Waitall(6,req2,stat2); + MPI_COMM_SCALBL.waitAll(6,req1); + MPI_COMM_SCALBL.waitAll(6,req2); ScaLBL_DeviceBarrier(); //................................................................................... @@ -1409,49 +1407,49 @@ void ScaLBL_Communicator::SendHalo(double *data){ // Send / Recv all the phase indcator field values //................................................................................... - MPI_Isend(sendbuf_x, sendCount_x,MPI_DOUBLE,rank_x,sendtag,MPI_COMM_SCALBL,&req1[0]); - MPI_Irecv(recvbuf_X, recvCount_X,MPI_DOUBLE,rank_X,recvtag,MPI_COMM_SCALBL,&req2[0]); - MPI_Isend(sendbuf_X, sendCount_X,MPI_DOUBLE,rank_X,sendtag,MPI_COMM_SCALBL,&req1[1]); - MPI_Irecv(recvbuf_x, recvCount_x,MPI_DOUBLE,rank_x,recvtag,MPI_COMM_SCALBL,&req2[1]); - MPI_Isend(sendbuf_y, sendCount_y,MPI_DOUBLE,rank_y,sendtag,MPI_COMM_SCALBL,&req1[2]); - MPI_Irecv(recvbuf_Y, recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,MPI_COMM_SCALBL,&req2[2]); - MPI_Isend(sendbuf_Y, sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,MPI_COMM_SCALBL,&req1[3]); - MPI_Irecv(recvbuf_y, recvCount_y,MPI_DOUBLE,rank_y,recvtag,MPI_COMM_SCALBL,&req2[3]); - MPI_Isend(sendbuf_z, sendCount_z,MPI_DOUBLE,rank_z,sendtag,MPI_COMM_SCALBL,&req1[4]); - MPI_Irecv(recvbuf_Z, recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,MPI_COMM_SCALBL,&req2[4]); - MPI_Isend(sendbuf_Z, sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,MPI_COMM_SCALBL,&req1[5]); - MPI_Irecv(recvbuf_z, recvCount_z,MPI_DOUBLE,rank_z,recvtag,MPI_COMM_SCALBL,&req2[5]); - MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_xy,sendtag,MPI_COMM_SCALBL,&req1[6]); - MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_XY,recvtag,MPI_COMM_SCALBL,&req2[6]); - MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_XY,sendtag,MPI_COMM_SCALBL,&req1[7]); - MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_xy,recvtag,MPI_COMM_SCALBL,&req2[7]); - MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_Xy,sendtag,MPI_COMM_SCALBL,&req1[8]); - MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_xY,recvtag,MPI_COMM_SCALBL,&req2[8]); - MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_xY,sendtag,MPI_COMM_SCALBL,&req1[9]); - MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_Xy,recvtag,MPI_COMM_SCALBL,&req2[9]); - MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_xz,sendtag,MPI_COMM_SCALBL,&req1[10]); - MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_XZ,recvtag,MPI_COMM_SCALBL,&req2[10]); - MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_XZ,sendtag,MPI_COMM_SCALBL,&req1[11]); - MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_xz,recvtag,MPI_COMM_SCALBL,&req2[11]); - MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_Xz,sendtag,MPI_COMM_SCALBL,&req1[12]); - MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_xZ,recvtag,MPI_COMM_SCALBL,&req2[12]); - MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_xZ,sendtag,MPI_COMM_SCALBL,&req1[13]); - MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_Xz,recvtag,MPI_COMM_SCALBL,&req2[13]); - MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_yz,sendtag,MPI_COMM_SCALBL,&req1[14]); - MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_YZ,recvtag,MPI_COMM_SCALBL,&req2[14]); - MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_YZ,sendtag,MPI_COMM_SCALBL,&req1[15]); - MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_yz,recvtag,MPI_COMM_SCALBL,&req2[15]); - MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_Yz,sendtag,MPI_COMM_SCALBL,&req1[16]); - MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_yZ,recvtag,MPI_COMM_SCALBL,&req2[16]); - MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_yZ,sendtag,MPI_COMM_SCALBL,&req1[17]); - MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_Yz,recvtag,MPI_COMM_SCALBL,&req2[17]); + req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, sendCount_x,rank_x,sendtag); + req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, recvCount_X,rank_X,recvtag); + req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, sendCount_X,rank_X,sendtag); + req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, recvCount_x,rank_x,recvtag); + req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, sendCount_y,rank_y,sendtag); + req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, recvCount_Y,rank_Y,recvtag); + req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, sendCount_Y,rank_Y,sendtag); + req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, recvCount_y,rank_y,recvtag); + req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, sendCount_z,rank_z,sendtag); + req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, recvCount_Z,rank_Z,recvtag); + req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, sendCount_Z,rank_Z,sendtag); + req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, recvCount_z,rank_z,recvtag); + req1[6] = MPI_COMM_SCALBL.Isend(sendbuf_xy, sendCount_xy,rank_xy,sendtag); + req2[6] = MPI_COMM_SCALBL.Irecv(recvbuf_XY, recvCount_XY,rank_XY,recvtag); + req1[7] = MPI_COMM_SCALBL.Isend(sendbuf_XY, sendCount_XY,rank_XY,sendtag); + req2[7] = MPI_COMM_SCALBL.Irecv(recvbuf_xy, recvCount_xy,rank_xy,recvtag); + req1[8] = MPI_COMM_SCALBL.Isend(sendbuf_Xy, sendCount_Xy,rank_Xy,sendtag); + req2[8] = MPI_COMM_SCALBL.Irecv(recvbuf_xY, recvCount_xY,rank_xY,recvtag); + req1[9] = MPI_COMM_SCALBL.Isend(sendbuf_xY, sendCount_xY,rank_xY,sendtag); + req2[9] = MPI_COMM_SCALBL.Irecv(recvbuf_Xy, recvCount_Xy,rank_Xy,recvtag); + req1[10] = MPI_COMM_SCALBL.Isend(sendbuf_xz, sendCount_xz,rank_xz,sendtag); + req2[10] = MPI_COMM_SCALBL.Irecv(recvbuf_XZ, recvCount_XZ,rank_XZ,recvtag); + req1[11] = MPI_COMM_SCALBL.Isend(sendbuf_XZ, sendCount_XZ,rank_XZ,sendtag); + req2[11] = MPI_COMM_SCALBL.Irecv(recvbuf_xz, recvCount_xz,rank_xz,recvtag); + req1[12] = MPI_COMM_SCALBL.Isend(sendbuf_Xz, sendCount_Xz,rank_Xz,sendtag); + req2[12] = MPI_COMM_SCALBL.Irecv(recvbuf_xZ, recvCount_xZ,rank_xZ,recvtag); + req1[13] = MPI_COMM_SCALBL.Isend(sendbuf_xZ, sendCount_xZ,rank_xZ,sendtag); + req2[13] = MPI_COMM_SCALBL.Irecv(recvbuf_Xz, recvCount_Xz,rank_Xz,recvtag); + req1[14] = MPI_COMM_SCALBL.Isend(sendbuf_yz, sendCount_yz,rank_yz,sendtag); + req2[14] = MPI_COMM_SCALBL.Irecv(recvbuf_YZ, recvCount_YZ,rank_YZ,recvtag); + req1[15] = MPI_COMM_SCALBL.Isend(sendbuf_YZ, sendCount_YZ,rank_YZ,sendtag); + req2[15] = MPI_COMM_SCALBL.Irecv(recvbuf_yz, recvCount_yz,rank_yz,recvtag); + req1[16] = MPI_COMM_SCALBL.Isend(sendbuf_Yz, sendCount_Yz,rank_Yz,sendtag); + req2[16] = MPI_COMM_SCALBL.Irecv(recvbuf_yZ, recvCount_yZ,rank_yZ,recvtag); + req1[17] = MPI_COMM_SCALBL.Isend(sendbuf_yZ, sendCount_yZ,rank_yZ,sendtag); + req2[17] = MPI_COMM_SCALBL.Irecv(recvbuf_Yz, recvCount_Yz,rank_Yz,recvtag); //................................................................................... } void ScaLBL_Communicator::RecvHalo(double *data){ //................................................................................... - MPI_Waitall(18,req1,stat1); - MPI_Waitall(18,req2,stat2); + MPI_COMM_SCALBL.waitAll(18,req1); + MPI_COMM_SCALBL.waitAll(18,req2); ScaLBL_DeviceBarrier(); //................................................................................... //................................................................................... @@ -1564,7 +1562,7 @@ double ScaLBL_Communicator::D3Q19_Flux_BC_z(int *neighborList, double *fq, doubl LocInletArea = double(sendCount_z); else LocInletArea = 0.f; - MPI_Allreduce(&LocInletArea,&InletArea,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_SCALBL); + InletArea = MPI_COMM_SCALBL.sumReduce( LocInletArea ); //printf("Inlet area = %f \n", InletArea); // Set the flux BC @@ -1573,7 +1571,7 @@ double ScaLBL_Communicator::D3Q19_Flux_BC_z(int *neighborList, double *fq, doubl if (kproc == 0) locsum = ScaLBL_D3Q19_AAeven_Flux_BC_z(dvcSendList_z, fq, flux, InletArea, sendCount_z, N); - MPI_Allreduce(&locsum,&sum,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_SCALBL); + sum = MPI_COMM_SCALBL.sumReduce( locsum ); din = flux/InletArea + sum; //if (rank==0) printf("computed din (even) =%f \n",din); if (kproc == 0) @@ -1583,7 +1581,7 @@ double ScaLBL_Communicator::D3Q19_Flux_BC_z(int *neighborList, double *fq, doubl if (kproc == 0) locsum = ScaLBL_D3Q19_AAodd_Flux_BC_z(neighborList, dvcSendList_z, fq, flux, InletArea, sendCount_z, N); - MPI_Allreduce(&locsum,&sum,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_SCALBL); + sum = MPI_COMM_SCALBL.sumReduce( locsum ); din = flux/InletArea + sum; //if (rank==0) printf("computed din (odd)=%f \n",din); diff --git a/common/ScaLBL.h b/common/ScaLBL.h index a50ab7ed..78896d3f 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -207,9 +207,8 @@ private: // Give the object it's own MPI communicator RankInfoStruct rank_info; MPI_Group Group; // Group of processors associated with this domain - MPI_Comm MPI_COMM_SCALBL; // MPI Communicator for this domain + Utilities::MPI MPI_COMM_SCALBL; // MPI Communicator for this domain MPI_Request req1[18],req2[18]; - MPI_Status stat1[18],stat2[18]; //...................................................................................... // MPI ranks for all 18 neighbors //...................................................................................... diff --git a/common/SpherePack.cpp b/common/SpherePack.cpp index a7246b72..18057653 100644 --- a/common/SpherePack.cpp +++ b/common/SpherePack.cpp @@ -9,7 +9,6 @@ #include "common/Array.h" #include "common/Utilities.h" -#include "common/MPI_Helpers.h" #include "common/Communication.h" #include "common/Database.h" #include "common/SpherePack.h" diff --git a/common/SpherePack.h b/common/SpherePack.h index 5075b289..56284a40 100644 --- a/common/SpherePack.h +++ b/common/SpherePack.h @@ -12,7 +12,6 @@ #include "common/Array.h" #include "common/Utilities.h" -#include "common/MPI_Helpers.h" #include "common/Communication.h" #include "common/Database.h" diff --git a/common/UnitTest.cpp b/common/UnitTest.cpp index b995fa68..aeb9026e 100755 --- a/common/UnitTest.cpp +++ b/common/UnitTest.cpp @@ -14,44 +14,49 @@ /******************************************************************** * Constructor/Destructor * ********************************************************************/ -UnitTest::UnitTest() +UnitTest::UnitTest() : d_verbose( false ), d_comm( MPI_COMM_SELF ) { -#ifdef USE_MPI - comm = MPI_COMM_WORLD; -#endif + if ( Utilities::MPI::MPI_active() ) + d_comm = MPI_COMM_WORLD; } UnitTest::~UnitTest() { reset(); } void UnitTest::reset() { - mutex.lock(); + d_mutex.lock(); // Clear the data forcing a reallocation - std::vector().swap( pass_messages ); - std::vector().swap( fail_messages ); - std::vector().swap( expected_fail_messages ); - mutex.unlock(); + std::vector().swap( d_pass ); + std::vector().swap( d_fail ); + std::vector().swap( d_expected ); + d_mutex.unlock(); } /******************************************************************** * Add a pass, fail, expected failure message in a thread-safe way * ********************************************************************/ -void UnitTest::passes( const std::string &in ) +void UnitTest::passes( std::string in ) { - mutex.lock(); - pass_messages.push_back( in ); - mutex.unlock(); + d_mutex.lock(); + if ( d_verbose ) + printf( "UnitTest: %i passes: %s\n", d_comm.getRank(), in.data() ); + d_pass.emplace_back( std::move( in ) ); + d_mutex.unlock(); } -void UnitTest::failure( const std::string &in ) +void UnitTest::failure( std::string in ) { - mutex.lock(); - fail_messages.push_back( in ); - mutex.unlock(); + d_mutex.lock(); + if ( d_verbose ) + printf( "UnitTest: %i failed: %s\n", d_comm.getRank(), in.data() ); + d_fail.emplace_back( std::move( in ) ); + d_mutex.unlock(); } -void UnitTest::expected_failure( const std::string &in ) +void UnitTest::expected_failure( std::string in ) { - mutex.lock(); - expected_fail_messages.push_back( in ); - mutex.unlock(); + d_mutex.lock(); + if ( d_verbose ) + printf( "UnitTest: %i expected_failure: %s\n", d_comm.getRank(), in.data() ); + d_expected.emplace_back( std::move( in ) ); + d_mutex.unlock(); } @@ -59,23 +64,6 @@ void UnitTest::expected_failure( const std::string &in ) * Print a global report * * Note: only rank 0 will print, all messages will be aggregated * ********************************************************************/ -inline std::vector UnitTest::allGather( int value ) const -{ - int size = getSize(); - std::vector data( size, value ); -#ifdef USE_MPI - if ( size > 1 ) - MPI_Allgather( &value, 1, MPI_INT, data.data(), 1, MPI_INT, comm ); -#endif - return data; -} -inline void UnitTest::barrier() const -{ -#ifdef USE_MPI - if ( getSize() > 1 ) - MPI_Barrier( comm ); -#endif -} static inline void print_messages( const std::vector> &messages ) { if ( messages.size() > 1 ) { @@ -93,28 +81,27 @@ static inline void print_messages( const std::vector> & } void UnitTest::report( const int level0 ) const { - mutex.lock(); - int size = getSize(); - int rank = getRank(); + d_mutex.lock(); + int size = d_comm.getSize(); + int rank = d_comm.getRank(); + // Give all processors a chance to print any remaining messages + d_comm.barrier(); + Utilities::sleep_ms( 10 ); // Broadcast the print level from rank 0 - int level = level0; -#ifdef USE_MPI - if ( getSize() > 1 ) - MPI_Bcast( &level, 1, MPI_INT, 0, comm ); -#endif + int level = d_comm.bcast( level0, 0 ); if ( level < 0 || level > 2 ) ERROR( "Invalid print level" ); // Perform a global all gather to get the number of failures per processor - auto N_pass = allGather( pass_messages.size() ); - auto N_fail = allGather( fail_messages.size() ); - auto N_expected_fail = allGather( expected_fail_messages.size() ); - int N_pass_tot = 0; - int N_fail_tot = 0; - int N_expected_fail_tot = 0; + auto N_pass = d_comm.allGather( d_pass.size() ); + auto N_fail = d_comm.allGather( d_fail.size() ); + auto N_expected = d_comm.allGather( d_expected.size() ); + int N_pass_tot = 0; + int N_fail_tot = 0; + int N_expected_tot = 0; for ( int i = 0; i < size; i++ ) { N_pass_tot += N_pass[i]; N_fail_tot += N_fail[i]; - N_expected_fail_tot += N_expected_fail[i]; + N_expected_tot += N_expected[i]; } // Send all messages to rank 0 (if needed) std::vector> pass_messages_rank( size ); @@ -122,13 +109,13 @@ void UnitTest::report( const int level0 ) const std::vector> expected_fail_rank( size ); // Get the pass messages if ( ( level == 1 && N_pass_tot <= 20 ) || level == 2 ) - pass_messages_rank = UnitTest::gatherMessages( pass_messages, 1 ); + pass_messages_rank = UnitTest::gatherMessages( d_pass, 1 ); // Get the fail messages if ( level == 1 || level == 2 ) - fail_messages_rank = UnitTest::gatherMessages( fail_messages, 2 ); + fail_messages_rank = UnitTest::gatherMessages( d_fail, 2 ); // Get the expected_fail messages - if ( ( level == 1 && N_expected_fail_tot <= 50 ) || level == 2 ) - expected_fail_rank = UnitTest::gatherMessages( expected_fail_messages, 2 ); + if ( ( level == 1 && N_expected_tot <= 50 ) || level == 2 ) + expected_fail_rank = UnitTest::gatherMessages( d_expected, 2 ); // Print the results of all messages (only rank 0 will print) if ( rank == 0 ) { pout << std::endl; @@ -174,31 +161,31 @@ void UnitTest::report( const int level0 ) const pout << std::endl; // Print the tests that expected failed pout << "Tests expected failed" << std::endl; - if ( level == 0 || ( level == 1 && N_expected_fail_tot > 50 ) ) { + if ( level == 0 || ( level == 1 && N_expected_tot > 50 ) ) { // We want to print a summary if ( size > 8 ) { // Print 1 summary for all processors printp( " %i tests expected failed (use report level 2 for more detail)\n", - N_expected_fail_tot ); + N_expected_tot ); } else { // Print a summary for each processor for ( int i = 0; i < size; i++ ) printp( " %i tests expected failed (proc %i) (use report level 2 for more " "detail)\n", - N_expected_fail[i], i ); + N_expected[i], i ); } } else { // We want to print all messages for ( int i = 0; i < size; i++ ) - ASSERT( (int) expected_fail_rank[i].size() == N_expected_fail[i] ); + ASSERT( (int) expected_fail_rank[i].size() == N_expected[i] ); print_messages( expected_fail_rank ); } pout << std::endl; } // Add a barrier to synchronize all processors (rank 0 is much slower) - barrier(); + d_comm.barrier(); Utilities::sleep_ms( 10 ); // Need a brief pause to allow any printing to finish - mutex.unlock(); + d_mutex.unlock(); } @@ -208,8 +195,8 @@ void UnitTest::report( const int level0 ) const std::vector> UnitTest::gatherMessages( const std::vector &local_messages, int tag ) const { - const int rank = getRank(); - const int size = getSize(); + const int rank = d_comm.getRank(); + const int size = d_comm.getSize(); std::vector> messages( size ); if ( rank == 0 ) { // Rank 0 should receive all messages @@ -233,7 +220,6 @@ std::vector> UnitTest::gatherMessages( void UnitTest::pack_message_stream( const std::vector &messages, const int rank, const int tag ) const { -#ifdef USE_MPI // Get the size of the messages auto N_messages = (int) messages.size(); auto *msg_size = new int[N_messages]; @@ -254,18 +240,11 @@ void UnitTest::pack_message_stream( k += msg_size[i]; } // Send the message stream (using a non-blocking send) - MPI_Request request; - MPI_Isend( data, size_data, MPI_CHAR, rank, tag, comm, &request ); + auto request = d_comm.Isend( data, size_data, rank, tag ); // Wait for the communication to send and free the temporary memory - MPI_Status status; - MPI_Wait( &request, &status ); + d_comm.wait( request ); delete[] data; delete[] msg_size; -#else - NULL_USE( messages ); - NULL_USE( rank ); - NULL_USE( tag ); -#endif } @@ -274,20 +253,15 @@ void UnitTest::pack_message_stream( ********************************************************************/ std::vector UnitTest::unpack_message_stream( const int rank, const int tag ) const { -#ifdef USE_MPI // Probe the message to get the message size - MPI_Status status; - MPI_Probe( rank, tag, comm, &status ); - int size_data = -1; - MPI_Get_count( &status, MPI_BYTE, &size_data ); + int size_data = d_comm.probe( rank, tag ); ASSERT( size_data >= 0 ); // Allocate memory to receive the data auto *data = new char[size_data]; // receive the data (using a non-blocking receive) - MPI_Request request; - MPI_Irecv( data, size_data, MPI_CHAR, rank, tag, comm, &request ); + auto request = d_comm.Irecv( data, size_data, rank, tag ); // Wait for the communication to be received - MPI_Wait( &request, &status ); + d_comm.wait( request ); // Unpack the message stream int N_messages = 0; memcpy( &N_messages, data, sizeof( int ) ); @@ -303,77 +277,16 @@ std::vector UnitTest::unpack_message_stream( const int rank, const messages[i] = std::string( &data[k], msg_size[i] ); k += msg_size[i]; } + // Delete the temporary memory delete[] data; return messages; -#else - NULL_USE( rank ); - NULL_USE( tag ); - return std::vector(); -#endif } /******************************************************************** * Other functions * ********************************************************************/ -int UnitTest::getRank() const -{ - int rank = 0; -#ifdef USE_MPI - int flag = 0; - MPI_Initialized( &flag ); - if ( flag ) - MPI_Comm_rank( comm, &rank ); -#endif - return rank; -} -int UnitTest::getSize() const -{ - int size = 1; -#ifdef USE_MPI - int flag = 0; - MPI_Initialized( &flag ); - if ( flag ) - MPI_Comm_size( comm, &size ); -#endif - return size; -} -size_t UnitTest::NumPassGlobal() const -{ - size_t num = pass_messages.size(); -#ifdef USE_MPI - if ( getSize() > 1 ) { - auto send = static_cast( num ); - int sum = 0; - MPI_Allreduce( &send, &sum, 1, MPI_INT, MPI_SUM, comm ); - num = static_cast( sum ); - } -#endif - return num; -} -size_t UnitTest::NumFailGlobal() const -{ - size_t num = fail_messages.size(); -#ifdef USE_MPI - if ( getSize() > 1 ) { - auto send = static_cast( num ); - int sum = 0; - MPI_Allreduce( &send, &sum, 1, MPI_INT, MPI_SUM, comm ); - num = static_cast( sum ); - } -#endif - return num; -} -size_t UnitTest::NumExpectedFailGlobal() const -{ - size_t num = expected_fail_messages.size(); -#ifdef USE_MPI - if ( getSize() > 1 ) { - auto send = static_cast( num ); - int sum = 0; - MPI_Allreduce( &send, &sum, 1, MPI_INT, MPI_SUM, comm ); - num = static_cast( sum ); - } -#endif - return num; -} +size_t UnitTest::NumPassGlobal() const { return d_comm.sumReduce( d_pass.size() ); } +size_t UnitTest::NumFailGlobal() const { return d_comm.sumReduce( d_fail.size() ); } +size_t UnitTest::NumExpectedFailGlobal() const { return d_comm.sumReduce( d_expected.size() ); } + diff --git a/common/UnitTest.h b/common/UnitTest.h index 80503d19..9d452747 100755 --- a/common/UnitTest.h +++ b/common/UnitTest.h @@ -1,13 +1,11 @@ #ifndef included_UnitTest #define included_UnitTest +#include "common/MPI.h" + #include -#include #include #include -#ifdef USE_MPI -#include "mpi.h" -#endif /*! @@ -28,47 +26,47 @@ * \endcode */ -class UnitTest +class UnitTest final { public: //! Constructor UnitTest(); //! Destructor - virtual ~UnitTest(); + ~UnitTest(); + + // Copy constructor + UnitTest( const UnitTest & ) = delete; + + // Assignment operator + UnitTest &operator=( const UnitTest & ) = delete; //! Indicate a passed test (thread-safe) - virtual void passes( const std::string &in ); + void passes( std::string in ); //! Indicate a failed test (thread-safe) - virtual void failure( const std::string &in ); + void failure( std::string in ); //! Indicate an expected failed test (thread-safe) - virtual void expected_failure( const std::string &in ); + void expected_failure( std::string in ); //! Return the number of passed tests locally - virtual size_t NumPassLocal() const { return pass_messages.size(); } + inline size_t NumPassLocal() const { return d_pass.size(); } //! Return the number of failed tests locally - virtual size_t NumFailLocal() const { return fail_messages.size(); } + inline size_t NumFailLocal() const { return d_fail.size(); } //! Return the number of expected failed tests locally - virtual size_t NumExpectedFailLocal() const { return expected_fail_messages.size(); } + inline size_t NumExpectedFailLocal() const { return d_expected.size(); } //! Return the number of passed tests locally - virtual size_t NumPassGlobal() const; + size_t NumPassGlobal() const; //! Return the number of failed tests locally - virtual size_t NumFailGlobal() const; + size_t NumFailGlobal() const; //! Return the number of expected failed tests locally - virtual size_t NumExpectedFailGlobal() const; - - //! Return the rank of the current processor - int getRank() const; - - //! Return the number of processors - int getSize() const; + size_t NumExpectedFailGlobal() const; /*! * Print a report of the passed and failed tests. @@ -77,29 +75,28 @@ public: * to print correctly). * @param level Optional integer specifying the level of reporting (default: 1) * 0: Report the number of tests passed, failed, and expected failures. - * 1: Report the number of passed tests (if <=20) or the number passed - * otherwise, report all failures, report the number of expected - * failed tests (if <=50) or the number passed otherwise. + * 1: Report the passed tests (if <=20) or number passed, + * Report all failures, + * Report the expected failed tests (if <=50) or the number passed. * 2: Report all passed, failed, and expected failed tests. */ - virtual void report( const int level = 1 ) const; + void report( const int level = 1 ) const; //! Clear the messages void reset(); -protected: - std::vector pass_messages; - std::vector fail_messages; - std::vector expected_fail_messages; - mutable std::mutex mutex; -#ifdef USE_MPI - MPI_Comm comm; -#endif + //! Make the unit test operator verbose? + void verbose( bool verbose = true ) { d_verbose = verbose; } private: - // Make the copy constructor private - UnitTest( const UnitTest & ) {} + std::vector d_pass; + std::vector d_fail; + std::vector d_expected; + bool d_verbose; + mutable std::mutex d_mutex; + Utilities::MPI d_comm; +private: // Function to pack the messages into a single data stream and send to the given processor // Note: This function does not return until the message stream has been sent void pack_message_stream( @@ -109,9 +106,7 @@ private: // Note: This function does not return until the message stream has been received std::vector unpack_message_stream( const int rank, const int tag ) const; - // Helper functions - inline void barrier() const; - inline std::vector allGather( int value ) const; + // Gather the messages inline std::vector> gatherMessages( const std::vector &local_messages, int tag ) const; }; diff --git a/common/UtilityMacros.h b/common/UtilityMacros.h index bfac172f..2c374ef1 100644 --- a/common/UtilityMacros.h +++ b/common/UtilityMacros.h @@ -143,35 +143,43 @@ * Be sure to follow with ENABLE_WARNINGS */ // clang-format off -#ifdef DISABLE_WARNINGS - // Macros previously defined -#elif defined( USING_MSVC ) +#ifndef DISABLE_WARNINGS +#if defined( USING_MSVC ) #define DISABLE_WARNINGS __pragma( warning( push, 0 ) ) #define ENABLE_WARNINGS __pragma( warning( pop ) ) #elif defined( USING_CLANG ) #define DISABLE_WARNINGS \ - _Pragma( "clang diagnostic push" ) _Pragma( "clang diagnostic ignored \"-Wall\"" ) \ + _Pragma( "clang diagnostic push" ) \ + _Pragma( "clang diagnostic ignored \"-Wall\"" ) \ _Pragma( "clang diagnostic ignored \"-Wextra\"" ) \ _Pragma( "clang diagnostic ignored \"-Wunused-private-field\"" ) \ - _Pragma( "clang diagnostic ignored \"-Wmismatched-new-delete\"" ) + _Pragma( "clang diagnostic ignored \"-Wdeprecated-declarations\"" ) \ + _Pragma( "clang diagnostic ignored \"-Winteger-overflow\"" ) #define ENABLE_WARNINGS _Pragma( "clang diagnostic pop" ) #elif defined( USING_GCC ) - // Note: We cannot disable the -Wliteral-suffix message with this macro because the - // pragma command cannot suppress warnings from the C++ preprocessor. See gcc bug #53431. #define DISABLE_WARNINGS \ - _Pragma( "GCC diagnostic push" ) _Pragma( "GCC diagnostic ignored \"-Wall\"" ) \ + _Pragma( "GCC diagnostic push" ) \ + _Pragma( "GCC diagnostic ignored \"-Wpragmas\"" ) \ + _Pragma( "GCC diagnostic ignored \"-Wall\"" ) \ _Pragma( "GCC diagnostic ignored \"-Wextra\"" ) \ - _Pragma( "GCC diagnostic ignored \"-Wpragmas\"" ) \ + _Pragma( "GCC diagnostic ignored \"-Wpedantic\"" ) \ _Pragma( "GCC diagnostic ignored \"-Wunused-local-typedefs\"" ) \ _Pragma( "GCC diagnostic ignored \"-Woverloaded-virtual\"" ) \ _Pragma( "GCC diagnostic ignored \"-Wunused-parameter\"" ) \ - _Pragma( "GCC diagnostic ignored \"-Warray-bounds\"" ) \ + _Pragma( "GCC diagnostic ignored \"-Wdeprecated-declarations\"" ) \ + _Pragma( "GCC diagnostic ignored \"-Wvirtual-move-assign\"" ) \ + _Pragma( "GCC diagnostic ignored \"-Wunused-function\"" ) \ + _Pragma( "GCC diagnostic ignored \"-Woverflow\"" ) \ + _Pragma( "GCC diagnostic ignored \"-Wunused-variable\"" ) \ + _Pragma( "GCC diagnostic ignored \"-Wignored-qualifiers\"" ) \ + _Pragma( "GCC diagnostic ignored \"-Wenum-compare\"" ) \ _Pragma( "GCC diagnostic ignored \"-Wterminate\"" ) #define ENABLE_WARNINGS _Pragma( "GCC diagnostic pop" ) #else #define DISABLE_WARNINGS #define ENABLE_WARNINGS #endif +#endif // clang-format on diff --git a/cpu/BGK.cpp b/cpu/BGK.cpp index 436ab381..bccc5b77 100644 --- a/cpu/BGK.cpp +++ b/cpu/BGK.cpp @@ -1,5 +1,4 @@ extern "C" void ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){ - int n; // conserved momemnts double rho,ux,uy,uz,uu; // non-conserved moments @@ -111,14 +110,12 @@ extern "C" void ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int } extern "C" void ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){ - int n; // conserved momemnts double rho,ux,uy,uz,uu; // non-conserved moments double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; int nr1,nr2,nr3,nr4,nr5,nr6,nr7,nr8,nr9,nr10,nr11,nr12,nr13,nr14,nr15,nr16,nr17,nr18; - int nread; for (int n=start; n 0 ){ // Get the density value (Streaming already performed) - Na = Den[n]; - Nb = Den[N+n]; + double Na = Den[n]; + double Nb = Den[N+n]; Phi[n] = (Na-Nb)/(Na+Nb); } } - //................................................................... } extern "C" void ScaLBL_SetSlice_z(double *Phi, double value, int Nx, int Ny, int Nz, int Slice){ - int n; - for (n=Slice*Nx*Ny; n<(Slice+1)*Nx*Ny; n++){ + for (int n=Slice*Nx*Ny; n<(Slice+1)*Nx*Ny; n++){ Phi[n] = value; } } @@ -1255,7 +1246,7 @@ extern "C" void ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *Aq, do double *Vel, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ - int ijk,nn,n; + int ijk,nn; double fq; // conserved momemnts double rho,jx,jy,jz; @@ -1838,7 +1829,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double *di double *Phi, double *Vel, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ - int n,nn,ijk,nread; + int nn,ijk,nread; int nr1,nr2,nr3,nr4,nr5,nr6; int nr7,nr8,nr9,nr10; int nr11,nr12,nr13,nr14; @@ -2492,7 +2483,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double *di extern "C" void ScaLBL_D3Q7_AAodd_PhaseField(int *neighborList, int *Map, double *Aq, double *Bq, double *Den, double *Phi, int start, int finish, int Np){ - int idx,n,nread; + int idx, nread; double fq,nA,nB; for (int n=start; n #include -ScaLBL_ColorModel::ScaLBL_ColorModel(int RANK, int NP, MPI_Comm COMM): +ScaLBL_ColorModel::ScaLBL_ColorModel(int RANK, int NP, const Utilities::MPI& COMM): rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tauA(0),tauB(0),rhoA(0),rhoB(0),alpha(0),beta(0), Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),inletA(0),inletB(0),outletA(0),outletB(0), Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) @@ -167,9 +167,9 @@ void ScaLBL_ColorModel::SetDomain(){ for (int i=0; iid[i] = 1; // initialize this way //Averages = std::shared_ptr ( new TwoPhase(Dm) ); // TwoPhase analysis object Averages = std::shared_ptr ( new SubPhase(Dm) ); // TwoPhase analysis object - MPI_Barrier(comm); + comm.barrier(); Dm->CommInit(); - MPI_Barrier(comm); + comm.barrier(); // Read domain parameters rank = Dm->rank(); nprocx = Dm->nprocx(); @@ -292,7 +292,7 @@ void ScaLBL_ColorModel::AssignComponentLabels(double *phase) for (int i=0; iid[i] = Mask->id[i]; for (size_t idx=0; idxComm, label_count[idx]); + label_count_global[idx] = Dm->Comm.sumReduce( label_count[idx] ); if (rank==0){ printf("Component labels: %lu \n",NLABELS); @@ -333,7 +333,7 @@ void ScaLBL_ColorModel::Create(){ Map.resize(Nx,Ny,Nz); Map.fill(-2); auto neighborList= new int[18*Npad]; Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); - MPI_Barrier(comm); + comm.barrier(); //........................................................................... // MAIN VARIABLES ALLOCATED HERE @@ -465,7 +465,7 @@ void ScaLBL_ColorModel::Initialize(){ ScaLBL_CopyToDevice(Phi,cPhi,N*sizeof(double)); ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + comm.barrier(); } if (rank==0) printf ("Initializing phase field \n"); @@ -651,7 +651,7 @@ void ScaLBL_ColorModel::Run(){ //.......create and start timer............ double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + comm.barrier(); starttime = MPI_Wtime(); //......................................... @@ -700,7 +700,8 @@ void ScaLBL_ColorModel::Run(){ } ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); + comm.barrier(); // *************EVEN TIMESTEP************* timestep++; @@ -735,10 +736,10 @@ void ScaLBL_ColorModel::Run(){ } ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); + comm.barrier(); //************************************************************************ - MPI_Barrier(comm); PROFILE_STOP("Update"); if (rank==0 && timestep%analysis_interval == 0 && BoundaryCondition > 0){ @@ -979,7 +980,7 @@ void ScaLBL_ColorModel::Run(){ //morph_delta *= (-1.0); REVERSE_FLOW_DIRECTION = false; } - MPI_Barrier(comm); + comm.barrier(); } morph_timesteps += analysis_interval; } @@ -989,7 +990,7 @@ void ScaLBL_ColorModel::Run(){ PROFILE_SAVE("lbpm_color_simulator",1); //************************************************************************ ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + comm.barrier(); stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep @@ -1034,17 +1035,17 @@ double ScaLBL_ColorModel::ImageInit(std::string Filename){ } } - Count=sumReduce( Dm->Comm, Count); - PoreCount=sumReduce( Dm->Comm, PoreCount); + Count = Dm->Comm.sumReduce( Count ); + PoreCount = Dm->Comm.sumReduce( PoreCount ); if (rank==0) printf(" new saturation: %f (%f / %f) \n", Count / PoreCount, Count, PoreCount); ScaLBL_CopyToDevice(Phi, PhaseLabel, Nx*Ny*Nz*sizeof(double)); - MPI_Barrier(comm); + comm.barrier(); ScaLBL_D3Q19_Init(fq, Np); ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - MPI_Barrier(comm); + comm.barrier(); ScaLBL_CopyToHost(Averages->Phi.data(),Phi,Nx*Ny*Nz*sizeof(double)); @@ -1076,7 +1077,7 @@ double ScaLBL_ColorModel::MorphOpenConnected(double target_volume_change){ BlobIDstruct new_index; double vF=0.0; double vS=0.0; ComputeGlobalBlobIDs(nx-2,ny-2,nz-2,Dm->rank_info,phase,Averages->SDs,vF,vS,phase_label,Dm->Comm); - MPI_Barrier(Dm->Comm); + Dm->Comm.barrier(); long long count_connected=0; long long count_porespace=0; @@ -1098,9 +1099,9 @@ double ScaLBL_ColorModel::MorphOpenConnected(double target_volume_change){ } } } - count_connected=sumReduce( Dm->Comm, count_connected); - count_porespace=sumReduce( Dm->Comm, count_porespace); - count_water=sumReduce( Dm->Comm, count_water); + count_connected = Dm->Comm.sumReduce( count_connected); + count_porespace = Dm->Comm.sumReduce( count_porespace); + count_water = Dm->Comm.sumReduce( count_water); for (int k=0; kComm, count_morphopen); + count_morphopen = Dm->Comm.sumReduce( count_morphopen); volume_change = double(count_morphopen - count_connected); if (rank==0) printf(" opening of connected oil %f \n",volume_change/count_connected); @@ -1278,8 +1279,8 @@ double ScaLBL_ColorModel::SeedPhaseField(const double seed_water_in_oil){ mass_loss += random_value*seed_water_in_oil; } - count= sumReduce( Dm->Comm, count); - mass_loss= sumReduce( Dm->Comm, mass_loss); + count = Dm->Comm.sumReduce( count ); + mass_loss = Dm->Comm.sumReduce( mass_loss ); if (rank == 0) printf("Remove mass %f from %f voxels \n",mass_loss,count); // Need to initialize Aq, Bq, Den, Phi directly @@ -1316,7 +1317,7 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta } } } - double volume_initial = sumReduce( Dm->Comm, count); + double volume_initial = Dm->Comm.sumReduce( count); /* sprintf(LocalRankFilename,"phi_initial.%05i.raw",rank); FILE *INPUT = fopen(LocalRankFilename,"wb"); @@ -1326,7 +1327,7 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta // 2. Identify connected components of phase field -> phase_label BlobIDstruct new_index; ComputeGlobalBlobIDs(Nx-2,Ny-2,Nz-2,rank_info,phase,Averages->SDs,vF,vS,phase_label,comm); - MPI_Barrier(comm); + comm.barrier(); // only operate on component "0" count = 0.0; @@ -1348,8 +1349,8 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta } } } - double volume_connected = sumReduce( Dm->Comm, count); - second_biggest = sumReduce( Dm->Comm, second_biggest); + double volume_connected = Dm->Comm.sumReduce( count ); + second_biggest = Dm->Comm.sumReduce( second_biggest ); /*int reach_x, reach_y, reach_z; for (int k=0; kComm, count); + double volume_final = Dm->Comm.sumReduce( count ); delta_volume = (volume_final-volume_initial); if (rank == 0) printf("MorphInit: change fluid volume fraction by %f \n", delta_volume/volume_initial); diff --git a/models/ColorModel.h b/models/ColorModel.h index a3b3a124..c52f04c3 100644 --- a/models/ColorModel.h +++ b/models/ColorModel.h @@ -12,13 +12,13 @@ Implementation of color lattice boltzmann model #include "common/Communication.h" #include "analysis/TwoPhase.h" #include "analysis/runAnalysis.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "ProfilerApp.h" #include "threadpool/thread_pool.h" class ScaLBL_ColorModel{ public: - ScaLBL_ColorModel(int RANK, int NP, MPI_Comm COMM); + ScaLBL_ColorModel(int RANK, int NP, const Utilities::MPI& COMM); ~ScaLBL_ColorModel(); // functions in they should be run @@ -68,7 +68,7 @@ public: double *Pressure; private: - MPI_Comm comm; + Utilities::MPI comm; int dist_mem_size; int neighborSize; diff --git a/models/DFHModel.cpp b/models/DFHModel.cpp index 4eb03bea..ced5853f 100644 --- a/models/DFHModel.cpp +++ b/models/DFHModel.cpp @@ -3,7 +3,7 @@ color lattice boltzmann model */ #include "models/DFHModel.h" -ScaLBL_DFHModel::ScaLBL_DFHModel(int RANK, int NP, MPI_Comm COMM): +ScaLBL_DFHModel::ScaLBL_DFHModel(int RANK, int NP, const Utilities::MPI& COMM): rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tauA(0),tauB(0),rhoA(0),rhoB(0),alpha(0),beta(0), Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),inletA(0),inletB(0),outletA(0),outletB(0), Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) @@ -100,16 +100,16 @@ void ScaLBL_DFHModel::ReadParams(string filename){ } void ScaLBL_DFHModel::SetDomain(){ - Dm = std::shared_ptr(new Domain(domain_db,comm)); // full domain for analysis - Mask = std::shared_ptr(new Domain(domain_db,comm)); // mask domain removes immobile phases + Dm = std::make_shared(domain_db,comm); // full domain for analysis + Mask = std::make_shared(domain_db,comm); // mask domain removes immobile phases Nx+=2; Ny+=2; Nz += 2; N = Nx*Ny*Nz; id = new char [N]; - for (int i=0; iid[i] = 1; // initialize this way - Averages = std::shared_ptr ( new TwoPhase(Dm) ); // TwoPhase analysis object - MPI_Barrier(comm); + for (int i=0; iid[i] = 1; // initialize this way + Averages = std::make_shared( Dm ); // TwoPhase analysis object + comm.barrier(); Dm->CommInit(); - MPI_Barrier(comm); + comm.barrier(); rank = Dm->rank(); } @@ -131,7 +131,7 @@ void ScaLBL_DFHModel::ReadInput(){ sprintf(LocalRankString,"%05d",rank); sprintf(LocalRankFilename,"%s%s","SignDist.",LocalRankString); ReadBinaryFile(LocalRankFilename, Averages->SDs.data(), N); - MPI_Barrier(comm); + comm.barrier(); if (rank == 0) cout << "Domain set." << endl; } @@ -206,7 +206,7 @@ void ScaLBL_DFHModel::Create(){ Map.resize(Nx,Ny,Nz); Map.fill(-2); auto neighborList= new int[18*Npad]; Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); - MPI_Barrier(comm); + comm.barrier(); //........................................................................... // MAIN VARIABLES ALLOCATED HERE @@ -424,7 +424,7 @@ void ScaLBL_DFHModel::Initialize(){ } } } - MPI_Allreduce(&count_wet,&count_wet_global,1,MPI_DOUBLE,MPI_SUM,comm); + count_wet_global = comm.sumReduce( count_wet ); if (rank==0) printf("Wetting phase volume fraction =%f \n",count_wet_global/double(Nx*Ny*Nz*nprocs)); // initialize phi based on PhaseLabel (include solid component labels) ScaLBL_CopyToDevice(Phi, PhaseLabel, Np*sizeof(double)); @@ -446,7 +446,7 @@ void ScaLBL_DFHModel::Initialize(){ timestep=0; } } - MPI_Bcast(×tep,1,MPI_INT,0,comm); + comm.bcast(×tep,1,0); // Read in the restart file to CPU buffers double *cPhi = new double[Np]; double *cDist = new double[19*Np]; @@ -468,7 +468,7 @@ void ScaLBL_DFHModel::Initialize(){ ScaLBL_DeviceBarrier(); delete [] cPhi; delete [] cDist; - MPI_Barrier(comm); + comm.barrier(); } if (rank==0) printf ("Initializing phase field \n"); @@ -486,7 +486,7 @@ void ScaLBL_DFHModel::Run(){ //.......create and start timer............ double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + comm.barrier(); starttime = MPI_Wtime(); //......................................... //************ MAIN ITERATION LOOP ***************************************/ @@ -532,7 +532,8 @@ void ScaLBL_DFHModel::Run(){ } ScaLBL_D3Q19_AAodd_DFH(NeighborList, fq, Aq, Bq, Den, Phi, Gradient, SolidPotential, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); + comm.barrier(); // *************EVEN TIMESTEP************* timestep++; @@ -568,9 +569,9 @@ void ScaLBL_DFHModel::Run(){ } ScaLBL_D3Q19_AAeven_DFH(NeighborList, fq, Aq, Bq, Den, Phi, Gradient, SolidPotential, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); + comm.barrier(); //************************************************************************ - MPI_Barrier(comm); PROFILE_STOP("Update"); // Run the analysis @@ -581,7 +582,7 @@ void ScaLBL_DFHModel::Run(){ PROFILE_SAVE("lbpm_color_simulator",1); //************************************************************************ ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + comm.barrier(); stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep diff --git a/models/DFHModel.h b/models/DFHModel.h index 883ec6f8..00e6e6b3 100644 --- a/models/DFHModel.h +++ b/models/DFHModel.h @@ -12,13 +12,13 @@ Implementation of color lattice boltzmann model #include "common/Communication.h" #include "analysis/TwoPhase.h" #include "analysis/runAnalysis.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "ProfilerApp.h" #include "threadpool/thread_pool.h" class ScaLBL_DFHModel{ public: - ScaLBL_DFHModel(int RANK, int NP, MPI_Comm COMM); + ScaLBL_DFHModel(int RANK, int NP, const Utilities::MPI& COMM); ~ScaLBL_DFHModel(); // functions in they should be run @@ -66,7 +66,7 @@ public: double *Pressure; private: - MPI_Comm comm; + Utilities::MPI comm; int dist_mem_size; int neighborSize; diff --git a/models/MRTModel.cpp b/models/MRTModel.cpp index 9ba733ae..d9b8069d 100644 --- a/models/MRTModel.cpp +++ b/models/MRTModel.cpp @@ -4,7 +4,7 @@ #include "models/MRTModel.h" #include "analysis/distance.h" -ScaLBL_MRTModel::ScaLBL_MRTModel(int RANK, int NP, MPI_Comm COMM): +ScaLBL_MRTModel::ScaLBL_MRTModel(int RANK, int NP, const Utilities::MPI& COMM): rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tau(0), Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),mu(0), Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) @@ -82,9 +82,9 @@ void ScaLBL_MRTModel::SetDomain(){ for (int i=0; iid[i] = 1; // initialize this way //Averages = std::shared_ptr ( new TwoPhase(Dm) ); // TwoPhase analysis object - MPI_Barrier(comm); + comm.barrier(); Dm->CommInit(); - MPI_Barrier(comm); + comm.barrier(); rank = Dm->rank(); nprocx = Dm->nprocx(); @@ -152,7 +152,7 @@ void ScaLBL_MRTModel::Create(){ Map.resize(Nx,Ny,Nz); Map.fill(-2); auto neighborList= new int[18*Npad]; Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); - MPI_Barrier(comm); + comm.barrier(); //........................................................................... // MAIN VARIABLES ALLOCATED HERE //........................................................................... @@ -171,7 +171,7 @@ void ScaLBL_MRTModel::Create(){ if (rank==0) printf ("Setting up device map and neighbor list \n"); // copy the neighbor list ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize); - MPI_Barrier(comm); + comm.barrier(); } @@ -206,7 +206,8 @@ void ScaLBL_MRTModel::Run(){ //.......create and start timer............ double starttime,stoptime,cputime; - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); + comm.barrier(); starttime = MPI_Wtime(); if (rank==0) printf("Beginning AA timesteps, timestepMax = %i \n", timestepMax); if (rank==0) printf("********************************************************\n"); @@ -220,18 +221,21 @@ void ScaLBL_MRTModel::Run(){ ScaLBL_D3Q19_AAodd_MRT(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE ScaLBL_D3Q19_AAodd_MRT(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); + comm.barrier(); timestep++; ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL ScaLBL_D3Q19_AAeven_MRT(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE ScaLBL_D3Q19_AAeven_MRT(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); + comm.barrier(); //************************************************************************/ if (timestep%1000==0){ ScaLBL_D3Q19_Momentum(fq,Velocity, Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); + comm.barrier(); ScaLBL_Comm->RegularLayout(Map,&Velocity[0],Velocity_x); ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],Velocity_y); ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],Velocity_z); @@ -253,10 +257,10 @@ void ScaLBL_MRTModel::Run(){ } } } - MPI_Allreduce(&vax_loc,&vax,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&vay_loc,&vay,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&vaz_loc,&vaz,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + vax = Mask->Comm.sumReduce( vax_loc ); + vay = Mask->Comm.sumReduce( vay_loc ); + vaz = Mask->Comm.sumReduce( vaz_loc ); + count = Mask->Comm.sumReduce( count_loc ); vax /= count; vay /= count; @@ -286,10 +290,10 @@ void ScaLBL_MRTModel::Run(){ double As = Morphology.A(); double Hs = Morphology.H(); double Xs = Morphology.X(); - Vs=sumReduce( Dm->Comm, Vs); - As=sumReduce( Dm->Comm, As); - Hs=sumReduce( Dm->Comm, Hs); - Xs=sumReduce( Dm->Comm, Xs); + Vs = Dm->Comm.sumReduce( Vs); + As = Dm->Comm.sumReduce( As); + Hs = Dm->Comm.sumReduce( Hs); + Xs = Dm->Comm.sumReduce( Xs); double h = Dm->voxel_length; double absperm = h*h*mu*Mask->Porosity()*flow_rate / force_mag; if (rank==0) { @@ -323,7 +327,8 @@ void ScaLBL_MRTModel::VelocityField(){ /* Minkowski Morphology(Mask); int SIZE=Np*sizeof(double); ScaLBL_D3Q19_Momentum(fq,Velocity, Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier();. + comm.barrier(); ScaLBL_CopyToHost(&VELOCITY[0],&Velocity[0],3*SIZE); memcpy(Morphology.SDn.data(), Distance.data(), Nx*Ny*Nz*sizeof(double)); @@ -350,10 +355,10 @@ void ScaLBL_MRTModel::VelocityField(){ vaz_loc += VELOCITY[2*Np+n]; count_loc+=1.0; } - MPI_Allreduce(&vax_loc,&vax,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&vay_loc,&vay,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&vaz_loc,&vaz,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + vax = Mask->Comm.sumReduce( vax_loc ); + vay = Mask->Comm.sumReduce( vay_loc ); + vaz = Mask->Comm.sumReduce( vaz_loc ); + count = Mask->Comm.sumReduce( count_loc ); vax /= count; vay /= count; diff --git a/models/MRTModel.h b/models/MRTModel.h index aa4ee1f0..7e23cc44 100644 --- a/models/MRTModel.h +++ b/models/MRTModel.h @@ -11,13 +11,13 @@ #include "common/ScaLBL.h" #include "common/Communication.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "analysis/Minkowski.h" #include "ProfilerApp.h" class ScaLBL_MRTModel{ public: - ScaLBL_MRTModel(int RANK, int NP, MPI_Comm COMM); + ScaLBL_MRTModel(int RANK, int NP, const Utilities::MPI& COMM); ~ScaLBL_MRTModel(); // functions in they should be run @@ -63,7 +63,7 @@ public: DoubleArray Velocity_y; DoubleArray Velocity_z; private: - MPI_Comm comm; + Utilities::MPI comm; // filenames char LocalRankString[8]; diff --git a/tests/BlobAnalyzeParallel.cpp b/tests/BlobAnalyzeParallel.cpp index c9e3f8fc..48e9e230 100644 --- a/tests/BlobAnalyzeParallel.cpp +++ b/tests/BlobAnalyzeParallel.cpp @@ -100,11 +100,10 @@ inline void WriteBlobStates(TwoPhase TCAT, double D, double porosity){ int main(int argc, char **argv) { // Initialize MPI - int rank, nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); Utilities::setAbortBehavior( true, true, true ); Utilities::setErrorHandlers(); PROFILE_ENABLE(0); @@ -137,7 +136,7 @@ int main(int argc, char **argv) domain >> Ly; domain >> Lz; } - MPI_Barrier(comm); + comm.barrier(); // Computational domain MPI_Bcast(&nx,1,MPI_INT,0,comm); MPI_Bcast(&ny,1,MPI_INT,0,comm); @@ -150,7 +149,7 @@ int main(int argc, char **argv) MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. - MPI_Barrier(comm); + comm.barrier(); // Check that the number of processors >= the number of ranks if ( rank==0 ) { @@ -209,7 +208,7 @@ int main(int argc, char **argv) // WriteLocalSolidID(LocalRankFilename, id, N); sprintf(LocalRankFilename,"%s%s","SignDist.",LocalRankString); ReadBinaryFile(LocalRankFilename, Averages.SDs.get(), N); - MPI_Barrier(comm); + comm.barrier(); if (rank == 0) cout << "Domain set." << endl; //....................................................................... //copies of data needed to perform checkpointing from cpu @@ -221,7 +220,7 @@ int main(int argc, char **argv) if (rank==0) printf("Reading restart file! \n"); // Read in the restart file to CPU buffers ReadCheckpoint(LocalRestartFile, Den, DistEven, DistOdd, N); - MPI_Barrier(comm); + comm.barrier(); //......................................................................... // Populate the arrays needed to perform averaging if (rank==0) printf("Populate arrays \n"); @@ -329,14 +328,14 @@ int main(int argc, char **argv) // BlobContainer Blobs; DoubleArray RecvBuffer(dimx); // MPI_Allreduce(&Averages.BlobAverages.get(),&Blobs.get(),1,MPI_DOUBLE,MPI_SUM,Dm.Comm); - MPI_Barrier(comm); + comm.barrier(); if (rank==0) printf("Number of components is %i \n",dimy); for (int b=0; b 0.0){ double Vn,pn,awn,ans,Jwn,Kwn,lwns,cwns,trawn,trJwn; @@ -482,7 +481,7 @@ int main(int argc, char **argv) fclose(BLOBS);*/ PROFILE_STOP("main"); PROFILE_SAVE("BlobIdentifyParallel",false); - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); return 0; } diff --git a/tests/BlobIdentifyParallel.cpp b/tests/BlobIdentifyParallel.cpp index f93371cb..b8929a11 100644 --- a/tests/BlobIdentifyParallel.cpp +++ b/tests/BlobIdentifyParallel.cpp @@ -47,11 +47,10 @@ void readRankData( int proc, int nx, int ny, int nz, DoubleArray& Phase, DoubleA int main(int argc, char **argv) { // Initialize MPI - int rank, nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); #ifdef PROFILE PROFILE_ENABLE(0); PROFILE_DISABLE_TRACE(); @@ -129,7 +128,7 @@ int main(int argc, char **argv) PROFILE_STOP("main"); PROFILE_SAVE("BlobIdentifyParallel",false); #endif - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); return 0; } diff --git a/tests/ColorToBinary.cpp b/tests/ColorToBinary.cpp index 7ac740bc..fae156d1 100644 --- a/tests/ColorToBinary.cpp +++ b/tests/ColorToBinary.cpp @@ -114,11 +114,10 @@ inline void ReadFromRank(char *FILENAME, DoubleArray &Phase, int nx, int ny, int int main(int argc, char **argv) { // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); printf("----------------------------------------------------------\n"); printf("Creating single Binary file from restart (8-bit integer)\n"); @@ -276,7 +275,7 @@ int main(int argc, char **argv) */ // **************************************************** - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); // **************************************************** } diff --git a/tests/ComponentLabel.cpp b/tests/ComponentLabel.cpp index 07ef6555..624ce8f4 100644 --- a/tests/ComponentLabel.cpp +++ b/tests/ComponentLabel.cpp @@ -119,11 +119,10 @@ inline void ReadFromRank(char *FILENAME, DoubleArray &Phase, DoubleArray &Pressu int main(int argc, char **argv) { // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); printf("----------------------------------------------------------\n"); printf("COMPUTING TCAT ANALYSIS FOR NON-WETTING PHASE FEATURES \n"); @@ -433,7 +432,7 @@ int main(int argc, char **argv) fclose(DISTANCE); */ // **************************************************** - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); // **************************************************** } diff --git a/tests/GenerateSphereTest.cpp b/tests/GenerateSphereTest.cpp index 53fc8746..5886be21 100644 --- a/tests/GenerateSphereTest.cpp +++ b/tests/GenerateSphereTest.cpp @@ -9,7 +9,7 @@ //#include "common/pmmc.h" #include "common/Domain.h" #include "common/SpherePack.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Communication.h" /* @@ -70,8 +70,8 @@ inline void MorphOpen(DoubleArray SignDist, char *id, Domain &Dm, int nx, int ny } } // total Global is the number of nodes in the pore-space - MPI_Allreduce(&count,&totalGlobal,1,MPI_DOUBLE,MPI_SUM,Dm.Comm); - MPI_Allreduce(&maxdist,&maxdistGlobal,1,MPI_DOUBLE,MPI_MAX,Dm.Comm); + totalGlobal = Dm.Comm.sumReduce( count ); + maxdistGlobal = Dm.Comm.sumReduce( maxdist ); double volume=double(nprocx*nprocy*nprocz)*double(nx-2)*double(ny-2)*double(nz-2); double porosity=totalGlobal/volume; if (rank==0) printf("Media Porosity: %f \n",porosity); @@ -148,7 +148,6 @@ inline void MorphOpen(DoubleArray SignDist, char *id, Domain &Dm, int nx, int ny double Rcrit_old; double Rcrit_new; - double GlobalNumber = 1.f; int imin,jmin,kmin,imax,jmax,kmax; Rcrit_new = maxdistGlobal; @@ -215,41 +214,41 @@ inline void MorphOpen(DoubleArray SignDist, char *id, Domain &Dm, int nx, int ny PackID(Dm.sendList_YZ, Dm.sendCount_YZ ,sendID_YZ, id); //...................................................................................... MPI_Sendrecv(sendID_x,Dm.sendCount_x,MPI_CHAR,Dm.rank_x(),sendtag, - recvID_X,Dm.recvCount_X,MPI_CHAR,Dm.rank_X(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); + recvID_X,Dm.recvCount_X,MPI_CHAR,Dm.rank_X(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_X,Dm.sendCount_X,MPI_CHAR,Dm.rank_X(),sendtag, - recvID_x,Dm.recvCount_x,MPI_CHAR,Dm.rank_x(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); + recvID_x,Dm.recvCount_x,MPI_CHAR,Dm.rank_x(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_y,Dm.sendCount_y,MPI_CHAR,Dm.rank_y(),sendtag, - recvID_Y,Dm.recvCount_Y,MPI_CHAR,Dm.rank_Y(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); + recvID_Y,Dm.recvCount_Y,MPI_CHAR,Dm.rank_Y(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_Y,Dm.sendCount_Y,MPI_CHAR,Dm.rank_Y(),sendtag, - recvID_y,Dm.recvCount_y,MPI_CHAR,Dm.rank_y(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); + recvID_y,Dm.recvCount_y,MPI_CHAR,Dm.rank_y(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_z,Dm.sendCount_z,MPI_CHAR,Dm.rank_z(),sendtag, - recvID_Z,Dm.recvCount_Z,MPI_CHAR,Dm.rank_Z(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); + recvID_Z,Dm.recvCount_Z,MPI_CHAR,Dm.rank_Z(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_Z,Dm.sendCount_Z,MPI_CHAR,Dm.rank_Z(),sendtag, - recvID_z,Dm.recvCount_z,MPI_CHAR,Dm.rank_z(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); + recvID_z,Dm.recvCount_z,MPI_CHAR,Dm.rank_z(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_xy,Dm.sendCount_xy,MPI_CHAR,Dm.rank_xy(),sendtag, - recvID_XY,Dm.recvCount_XY,MPI_CHAR,Dm.rank_XY(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); + recvID_XY,Dm.recvCount_XY,MPI_CHAR,Dm.rank_XY(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_XY,Dm.sendCount_XY,MPI_CHAR,Dm.rank_XY(),sendtag, - recvID_xy,Dm.recvCount_xy,MPI_CHAR,Dm.rank_xy(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); + recvID_xy,Dm.recvCount_xy,MPI_CHAR,Dm.rank_xy(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_Xy,Dm.sendCount_Xy,MPI_CHAR,Dm.rank_Xy(),sendtag, - recvID_xY,Dm.recvCount_xY,MPI_CHAR,Dm.rank_xY(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); + recvID_xY,Dm.recvCount_xY,MPI_CHAR,Dm.rank_xY(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_xY,Dm.sendCount_xY,MPI_CHAR,Dm.rank_xY(),sendtag, - recvID_Xy,Dm.recvCount_Xy,MPI_CHAR,Dm.rank_Xy(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); + recvID_Xy,Dm.recvCount_Xy,MPI_CHAR,Dm.rank_Xy(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_xz,Dm.sendCount_xz,MPI_CHAR,Dm.rank_xz(),sendtag, - recvID_XZ,Dm.recvCount_XZ,MPI_CHAR,Dm.rank_XZ(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); + recvID_XZ,Dm.recvCount_XZ,MPI_CHAR,Dm.rank_XZ(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_XZ,Dm.sendCount_XZ,MPI_CHAR,Dm.rank_XZ(),sendtag, - recvID_xz,Dm.recvCount_xz,MPI_CHAR,Dm.rank_xz(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); + recvID_xz,Dm.recvCount_xz,MPI_CHAR,Dm.rank_xz(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_Xz,Dm.sendCount_Xz,MPI_CHAR,Dm.rank_Xz(),sendtag, - recvID_xZ,Dm.recvCount_xZ,MPI_CHAR,Dm.rank_xZ(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); + recvID_xZ,Dm.recvCount_xZ,MPI_CHAR,Dm.rank_xZ(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_xZ,Dm.sendCount_xZ,MPI_CHAR,Dm.rank_xZ(),sendtag, - recvID_Xz,Dm.recvCount_Xz,MPI_CHAR,Dm.rank_Xz(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); + recvID_Xz,Dm.recvCount_Xz,MPI_CHAR,Dm.rank_Xz(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_yz,Dm.sendCount_yz,MPI_CHAR,Dm.rank_yz(),sendtag, - recvID_YZ,Dm.recvCount_YZ,MPI_CHAR,Dm.rank_YZ(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); + recvID_YZ,Dm.recvCount_YZ,MPI_CHAR,Dm.rank_YZ(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_YZ,Dm.sendCount_YZ,MPI_CHAR,Dm.rank_YZ(),sendtag, - recvID_yz,Dm.recvCount_yz,MPI_CHAR,Dm.rank_yz(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); + recvID_yz,Dm.recvCount_yz,MPI_CHAR,Dm.rank_yz(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_Yz,Dm.sendCount_Yz,MPI_CHAR,Dm.rank_Yz(),sendtag, - recvID_yZ,Dm.recvCount_yZ,MPI_CHAR,Dm.rank_yZ(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); + recvID_yZ,Dm.recvCount_yZ,MPI_CHAR,Dm.rank_yZ(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_yZ,Dm.sendCount_yZ,MPI_CHAR,Dm.rank_yZ(),sendtag, - recvID_Yz,Dm.recvCount_Yz,MPI_CHAR,Dm.rank_Yz(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); + recvID_Yz,Dm.recvCount_Yz,MPI_CHAR,Dm.rank_Yz(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); //...................................................................................... UnpackID(Dm.recvList_x, Dm.recvCount_x ,recvID_x, id); UnpackID(Dm.recvList_X, Dm.recvCount_X ,recvID_X, id); @@ -271,7 +270,7 @@ inline void MorphOpen(DoubleArray SignDist, char *id, Domain &Dm, int nx, int ny UnpackID(Dm.recvList_YZ, Dm.recvCount_YZ ,recvID_YZ, id); //...................................................................................... - MPI_Allreduce(&LocalNumber,&GlobalNumber,1,MPI_DOUBLE,MPI_SUM,Dm.Comm); + //double GlobalNumber = Dm.Comm.sumReduce( LocalNumber ); count = 0.f; for (int k=1; k= the number of ranks if ( rank==0 ) { @@ -254,14 +253,14 @@ int main(int argc, char **argv) cz[0]=0.25*Lz; cx[1]=0.75*Lz; cx[2]=0.25*Lz; cx[3]=0.25*Lz; rad[0]=rad[1]=rad[2]=rad[3]=0.1*Lx; - MPI_Barrier(comm); + comm.barrier(); // Broadcast the sphere packing to all processes MPI_Bcast(cx,nspheres,MPI_DOUBLE,0,comm); MPI_Bcast(cy,nspheres,MPI_DOUBLE,0,comm); MPI_Bcast(cz,nspheres,MPI_DOUBLE,0,comm); MPI_Bcast(rad,nspheres,MPI_DOUBLE,0,comm); //........................................................................... - MPI_Barrier(comm); + comm.barrier(); //....................................................................... SignedDistance(Averages.Phase.data(),nspheres,cx,cy,cz,rad,Lx,Ly,Lz,Nx,Ny,Nz, Dm->iproc(),Dm->jproc(),Dm->kproc(),Dm->nprocx(),Dm->nprocy(),Dm->nprocz()); @@ -317,7 +316,7 @@ int main(int argc, char **argv) delete [] rad; } // Limit scope so variables that contain communicators will free before MPI_Finialize - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); return 0; } diff --git a/tests/TestBlobIdentify.cpp b/tests/TestBlobIdentify.cpp index ccfc6afc..7eb5c270 100644 --- a/tests/TestBlobIdentify.cpp +++ b/tests/TestBlobIdentify.cpp @@ -23,21 +23,19 @@ inline double rand2() // Test if all ranks agree on a value -bool allAgree( int x, MPI_Comm comm ) { +bool allAgree( int x, const Utilities::MPI& comm ) { int x2 = x; - MPI_Bcast(&x2,1,MPI_INT,0,comm); + comm.bcast(&x2,1,0); int diff = x==x2 ? 0:1; - int diff2 = 0; - MPI_Allreduce(&diff,&diff2,1,MPI_INT,MPI_SUM,comm); + int diff2 = comm.sumReduce( diff ); return diff2==0; } template -bool allAgree( const std::vector& x, MPI_Comm comm ) { +bool allAgree( const std::vector& x, const Utilities::MPI& comm ) { std::vector x2 = x; - MPI_Bcast(&x2[0],x.size()*sizeof(T)/sizeof(int),MPI_INT,0,comm); + comm.bcast(&x2[0],x.size()*sizeof(T)/sizeof(int),0); int diff = x==x2 ? 0:1; - int diff2 = 0; - MPI_Allreduce(&diff,&diff2,1,MPI_INT,MPI_SUM,comm); + int diff2 = comm.sumReduce( diff ); return diff2==0; } @@ -74,9 +72,9 @@ struct bubble_struct { // Create a random set of bubles -std::vector create_bubbles( int N_bubbles, double Lx, double Ly, double Lz, MPI_Comm comm ) +std::vector create_bubbles( int N_bubbles, double Lx, double Ly, double Lz, const Utilities::MPI& comm ) { - int rank = comm_rank(comm); + int rank = comm.getRank(); std::vector bubbles(N_bubbles); if ( rank == 0 ) { double R0 = 0.2*Lx*Ly*Lz/pow((double)N_bubbles,0.333); @@ -91,7 +89,7 @@ std::vector create_bubbles( int N_bubbles, double Lx, double Ly, } } size_t N_bytes = N_bubbles*sizeof(bubble_struct); - MPI_Bcast((char*)&bubbles[0],N_bytes,MPI_CHAR,0,comm); + comm.bcast((char*)&bubbles[0],N_bytes,0); return bubbles; } @@ -124,7 +122,7 @@ void fillBubbleData( const std::vector& bubbles, DoubleArray& Pha // Shift all of the data by the given number of cells -void shift_data( DoubleArray& data, int sx, int sy, int sz, const RankInfoStruct& rank_info, MPI_Comm comm ) +void shift_data( DoubleArray& data, int sx, int sy, int sz, const RankInfoStruct& rank_info, const Utilities::MPI& comm ) { int nx = data.size(0)-2; int ny = data.size(1)-2; @@ -154,11 +152,10 @@ void shift_data( DoubleArray& data, int sx, int sy, int sz, const RankInfoStruct int main(int argc, char **argv) { // Initialize MPI - int rank, nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); PROFILE_ENABLE(1); PROFILE_DISABLE_TRACE(); PROFILE_SYNCHRONIZE(); @@ -297,7 +294,7 @@ int main(int argc, char **argv) velocity[i].z = bubbles[i].radius*(2*rand2()-1); } } - MPI_Bcast((char*)&velocity[0],bubbles.size()*sizeof(Point),MPI_CHAR,0,comm); + comm.bcast((char*)&velocity[0],bubbles.size()*sizeof(Point),0); fillBubbleData( bubbles, Phase, SignDist, Lx, Ly, Lz, rank_info ); fillData.fill(Phase); fillData.fill(SignDist); @@ -391,8 +388,8 @@ int main(int argc, char **argv) printf("\n"); } } - MPI_Bcast(&N1,1,MPI_INT,0,comm); - MPI_Bcast(&N2,1,MPI_INT,0,comm); + comm.bcast(&N1,1,0); + comm.bcast(&N2,1,0); if ( N1!=nblobs || N2!=nblobs2 ) { if ( rank==0 ) printf("Error, blob ids do not map in moving bubble test (%i,%i,%i,%i)\n", @@ -412,7 +409,7 @@ int main(int argc, char **argv) // Finished PROFILE_STOP("main"); PROFILE_SAVE("TestBlobIdentify",false); - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); return N_errors; } diff --git a/tests/TestBlobIdentifyCorners.cpp b/tests/TestBlobIdentifyCorners.cpp index 4795f610..904e52e0 100644 --- a/tests/TestBlobIdentifyCorners.cpp +++ b/tests/TestBlobIdentifyCorners.cpp @@ -18,10 +18,9 @@ int main(int argc, char **argv) { // Initialize MPI - int rank, nprocs; MPI_Init(&argc,&argv); - MPI_Comm_rank(MPI_COMM_WORLD,&rank); - MPI_Comm_size(MPI_COMM_WORLD,&nprocs); + int rank = comm.getRank(); + int nprocs = comm.getSize(); /*if ( nprocs != 8 ) { printf("This tests requires 8 processors\n"); return -1; diff --git a/tests/TestBubble.cpp b/tests/TestBubble.cpp index c03e5dea..e7e0ced8 100644 --- a/tests/TestBubble.cpp +++ b/tests/TestBubble.cpp @@ -7,7 +7,7 @@ #include "analysis/pmmc.h" #include "common/ScaLBL.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Communication.h" #include "IO/Mesh.h" #include "IO/Writer.h" @@ -32,14 +32,15 @@ int main(int argc, char **argv) // Initialize MPI int provided_thread_support = -1; MPI_Init_thread(&argc,&argv,MPI_THREAD_MULTIPLE,&provided_thread_support); - MPI_Comm comm; - MPI_Comm_dup(MPI_COMM_WORLD,&comm); - int rank = comm_rank(comm); - int nprocs = comm_size(comm); - if ( rank==0 && provided_thread_support(domain_db,comm); - MPI_Barrier(comm); + comm.barrier(); Nx+=2; Ny+=2; Nz += 2; int N = Nx*Ny*Nz; @@ -250,7 +249,7 @@ int main(int argc, char **argv) IntArray Map(Nx,Ny,Nz); auto neighborList= new int[18*Npad]; Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); - MPI_Barrier(comm); + comm.barrier(); //........................................................................... // MAIN VARIABLES ALLOCATED HERE @@ -387,7 +386,7 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + comm.barrier(); starttime = MPI_Wtime(); //......................................... @@ -437,7 +436,7 @@ int main(int argc, char **argv) } ScaLBL_D3Q19_AAodd_DFH(NeighborList, fq, Aq, Bq, Den, Phi, Gradient, SolidPotential, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, 0, ScaLBL_Comm->next, Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); // *************EVEN TIMESTEP************* timestep++; @@ -473,9 +472,9 @@ int main(int argc, char **argv) } ScaLBL_D3Q19_AAeven_DFH(NeighborList, fq, Aq, Bq, Den, Phi, Gradient, SolidPotential, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, 0, ScaLBL_Comm->next, Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); //************************************************************************ - MPI_Barrier(comm); + comm.barrier(); PROFILE_STOP("Update"); // Run the analysis @@ -487,7 +486,7 @@ int main(int argc, char **argv) PROFILE_SAVE("lbpm_color_simulator",1); //************************************************************************ ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + comm.barrier(); stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep @@ -547,9 +546,8 @@ int main(int argc, char **argv) PROFILE_STOP("Main"); PROFILE_SAVE("lbpm_color_simulator",1); // **************************************************** - MPI_Barrier(comm); + comm.barrier(); } // Limit scope so variables that contain communicators will free before MPI_Finialize - MPI_Comm_free(&comm); MPI_Finalize(); return check; } diff --git a/tests/TestColorBubble.cpp b/tests/TestColorBubble.cpp index 0e6ea25a..1f42a71e 100644 --- a/tests/TestColorBubble.cpp +++ b/tests/TestColorBubble.cpp @@ -7,7 +7,7 @@ #include #include #include "common/ScaLBL.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "models/ColorModel.h" using namespace std; @@ -64,15 +64,11 @@ inline void InitializeBubble(ScaLBL_ColorModel &ColorModel, double BubbleRadius) //*************************************************************************************** int main(int argc, char **argv) { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); int check=0; { if (rank == 0){ @@ -97,7 +93,7 @@ int main(int argc, char **argv) ColorModel.WriteDebug(); } // **************************************************** - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); // **************************************************** diff --git a/tests/TestColorGrad.cpp b/tests/TestColorGrad.cpp index 5cd6d924..df1c1daf 100644 --- a/tests/TestColorGrad.cpp +++ b/tests/TestColorGrad.cpp @@ -7,7 +7,7 @@ #include #include #include "common/ScaLBL.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" using namespace std; @@ -15,15 +15,11 @@ using namespace std; //*************************************************************************************** int main(int argc, char **argv) { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); int check; { // parallel domain size (# of sub-domains) @@ -116,7 +112,7 @@ int main(int argc, char **argv) } // ************************************************************** // Broadcast simulation parameters from rank 0 to all other procs - MPI_Barrier(comm); + comm.barrier(); //................................................. MPI_Bcast(&Nx,1,MPI_INT,0,comm); MPI_Bcast(&Ny,1,MPI_INT,0,comm); @@ -129,7 +125,7 @@ int main(int argc, char **argv) MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. - MPI_Barrier(comm); + comm.barrier(); // ************************************************************** // ************************************************************** @@ -146,7 +142,7 @@ int main(int argc, char **argv) printf("********************************************************\n"); } - MPI_Barrier(comm); + comm.barrier(); double iVol_global = 1.0/Nx/Ny/Nz/nprocx/nprocy/nprocz; int BoundaryCondition=0; @@ -175,7 +171,7 @@ int main(int argc, char **argv) } } Dm.CommInit(); - MPI_Barrier(comm); + comm.barrier(); if (rank == 0) cout << "Domain set." << endl; if (rank==0) printf ("Create ScaLBL_Communicator \n"); @@ -192,7 +188,7 @@ int main(int argc, char **argv) neighborList= new int[18*Np]; ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Dm.id,Np); - MPI_Barrier(comm); + comm.barrier(); //......................device distributions................................. int dist_mem_size = Np*sizeof(double); @@ -260,7 +256,7 @@ int main(int argc, char **argv) } // **************************************************** - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); // **************************************************** diff --git a/tests/TestColorGradDFH.cpp b/tests/TestColorGradDFH.cpp index d6376d82..b04aebce 100644 --- a/tests/TestColorGradDFH.cpp +++ b/tests/TestColorGradDFH.cpp @@ -7,7 +7,7 @@ #include #include #include "common/ScaLBL.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" using namespace std; @@ -25,15 +25,11 @@ std::shared_ptr loadInputs( int nprocs ) //*************************************************************************************** int main(int argc, char **argv) { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); int check=0; { // parallel domain size (# of sub-domains) @@ -82,7 +78,7 @@ int main(int argc, char **argv) } } Dm->CommInit(); - MPI_Barrier(comm); + comm.barrier(); if (rank == 0) cout << "Domain set." << endl; if (rank==0) printf ("Create ScaLBL_Communicator \n"); @@ -105,7 +101,7 @@ int main(int argc, char **argv) IntArray Map(Nx,Ny,Nz); neighborList= new int[18*Npad]; Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np); - MPI_Barrier(comm); + comm.barrier(); //......................device distributions................................. int neighborSize=18*Np*sizeof(int); @@ -211,7 +207,7 @@ int main(int argc, char **argv) } // **************************************************** - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); // **************************************************** diff --git a/tests/TestColorMassBounceback.cpp b/tests/TestColorMassBounceback.cpp index c05c245e..78508f9b 100644 --- a/tests/TestColorMassBounceback.cpp +++ b/tests/TestColorMassBounceback.cpp @@ -7,7 +7,7 @@ #include #include #include "common/ScaLBL.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" using namespace std; @@ -15,15 +15,11 @@ using namespace std; //*************************************************************************************** int main(int argc, char **argv) { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); int check=0; { // parallel domain size (# of sub-domains) @@ -42,7 +38,7 @@ int main(int argc, char **argv) // Initialize compute device // int device=ScaLBL_SetDevice(rank); ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + comm.barrier(); Utilities::setErrorHandlers(); // Variables that specify the computational domain @@ -77,7 +73,7 @@ int main(int argc, char **argv) // Get the rank info const RankInfoStruct rank_info(rank,nprocx,nprocy,nprocz); - MPI_Barrier(comm); + comm.barrier(); if (nprocs != nprocx*nprocy*nprocz){ printf("nprocx = %i \n",nprocx); @@ -121,7 +117,7 @@ int main(int argc, char **argv) std::shared_ptr Dm(new Domain(domain_db,comm)); for (int i=0; iNx*Dm->Ny*Dm->Nz; i++) Dm->id[i] = 1; Dm->CommInit(); - MPI_Barrier(comm); + comm.barrier(); Nx+=2; Ny+=2; Nz += 2; int N = Nx*Ny*Nz; @@ -153,7 +149,7 @@ int main(int argc, char **argv) } } Dm->CommInit(); - MPI_Barrier(comm); + comm.barrier(); if (rank == 0) cout << "Domain set." << endl; if (rank==0) printf ("Create ScaLBL_Communicator \n"); @@ -170,7 +166,7 @@ int main(int argc, char **argv) Npad=Np+32; neighborList= new int[18*Npad]; Np=ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np); - MPI_Barrier(comm); + comm.barrier(); //......................device distributions................................. int dist_mem_size = Np*sizeof(double); @@ -272,7 +268,7 @@ int main(int argc, char **argv) ScaLBL_D3Q19_AAodd_DFH(NeighborList, fq, Aq, Bq, Den, Phi, Gradient, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); timestep++; @@ -332,7 +328,7 @@ int main(int argc, char **argv) ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE ScaLBL_D3Q19_AAeven_DFH(NeighborList, fq, Aq, Bq, Den, Phi, Gradient, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); timestep++; //************************************************************************ printf("Check after even time \n"); @@ -415,7 +411,7 @@ int main(int argc, char **argv) ScaLBL_D3Q19_AAodd_DFH(NeighborList, fq, Aq, Bq, Den, Phi, Gradient, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); timestep++; @@ -476,7 +472,7 @@ int main(int argc, char **argv) ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE ScaLBL_D3Q19_AAeven_DFH(NeighborList, fq, Aq, Bq, Den, Phi, Gradient, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); timestep++; //************************************************************************ printf("Check after even time \n"); @@ -523,7 +519,7 @@ int main(int argc, char **argv) } // **************************************************** - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); // **************************************************** return check; diff --git a/tests/TestColorSquareTube.cpp b/tests/TestColorSquareTube.cpp index 9807f0e8..cf8a9566 100644 --- a/tests/TestColorSquareTube.cpp +++ b/tests/TestColorSquareTube.cpp @@ -7,7 +7,7 @@ #include #include #include "common/ScaLBL.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "models/ColorModel.h" std::shared_ptr loadInputs( int nprocs ) @@ -84,15 +84,11 @@ void InitializeSquareTube(ScaLBL_ColorModel &ColorModel){ //*************************************************************************************** int main(int argc, char **argv) { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); int check=0; { if (rank == 0){ @@ -113,7 +109,7 @@ int main(int argc, char **argv) } // **************************************************** - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); // **************************************************** diff --git a/tests/TestCommD3Q19.cpp b/tests/TestCommD3Q19.cpp index e1fa821f..d2799355 100644 --- a/tests/TestCommD3Q19.cpp +++ b/tests/TestCommD3Q19.cpp @@ -6,7 +6,7 @@ #include #include #include "common/ScaLBL.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" using namespace std; @@ -164,11 +164,10 @@ inline void UnpackID(int *list, int count, char *recvbuf, char *ID){ int main(int argc, char **argv) { // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); int check; { @@ -263,14 +262,14 @@ int main(int argc, char **argv) } } } - MPI_Allreduce(&sum_local,&sum,1,MPI_DOUBLE,MPI_SUM,comm); + sum = comm.sumReduce( sum_local ); double iVol_global=1.f/double((Nx-2)*(Ny-2)*(Nz-2)*nprocx*nprocy*nprocz); porosity = 1.0-sum*iVol_global; if (rank==0) printf("Media porosity = %f \n",porosity); //....................................................................... //........................................................................... - MPI_Barrier(comm); + comm.barrier(); if (rank == 0) cout << "Domain set." << endl; //........................................................................... @@ -285,7 +284,7 @@ int main(int argc, char **argv) IntArray Map(Nx,Ny,Nz); Map.fill(-2); Np = ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np); - MPI_Barrier(comm); + comm.barrier(); int neighborSize=18*Np*sizeof(int); //......................device distributions................................. dist_mem_size = Np*sizeof(double); @@ -355,7 +354,7 @@ int main(int argc, char **argv) GlobalFlipScaLBL_D3Q19_Init(fq_host, Map, Np, Nx-2, Ny-2, Nz-2, iproc,jproc,kproc,nprocx,nprocy,nprocz); ScaLBL_CopyToDevice(fq, fq_host, 19*dist_mem_size); ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + comm.barrier(); //************************************************************************* // First timestep ScaLBL_Comm.SendD3Q19AA(fq); //READ FROM NORMAL @@ -378,7 +377,7 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; - MPI_Barrier(comm); + comm.barrier(); starttime = MPI_Wtime(); //......................................... @@ -398,7 +397,7 @@ int main(int argc, char **argv) //********************************************* ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + comm.barrier(); // Iteration completed! timestep++; //................................................................... @@ -427,7 +426,7 @@ int main(int argc, char **argv) if (rank==0) printf("Aggregated communication bandwidth = %f Gbit/sec \n",nprocs*ScaLBL_Comm.CommunicationCount*64*timestep/1e9); } // **************************************************** - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); // **************************************************** diff --git a/tests/TestDatabase.cpp b/tests/TestDatabase.cpp index 00bf87e2..ced704e2 100644 --- a/tests/TestDatabase.cpp +++ b/tests/TestDatabase.cpp @@ -9,7 +9,7 @@ #include "common/UnitTest.h" #include "common/Utilities.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Database.h" #include "ProfilerApp.h" @@ -17,11 +17,8 @@ // Main int main(int argc, char **argv) { - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); Utilities::setAbortBehavior(true,2); Utilities::setErrorHandlers(); UnitTest ut; @@ -69,7 +66,7 @@ int main(int argc, char **argv) // Finished PROFILE_SAVE("TestDatabase",true); - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); return err; } diff --git a/tests/TestFluxBC.cpp b/tests/TestFluxBC.cpp index 020bbd89..3e999715 100644 --- a/tests/TestFluxBC.cpp +++ b/tests/TestFluxBC.cpp @@ -1,5 +1,5 @@ #include -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Utilities.h" #include "common/ScaLBL.h" @@ -18,9 +18,9 @@ std::shared_ptr loadInputs( int nprocs ) int main (int argc, char **argv) { MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - int rank = MPI_WORLD_RANK(); - int nprocs = MPI_WORLD_SIZE(); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); // set the error code // Note: the error code should be consistent across all processors @@ -89,7 +89,7 @@ int main (int argc, char **argv) neighborList= new int[18*Npad]; Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np); - MPI_Barrier(comm); + comm.barrier(); //......................device distributions................................. int dist_mem_size = Np*sizeof(double); @@ -149,7 +149,7 @@ int main (int argc, char **argv) double *VEL; VEL= new double [3*Np]; int SIZE=3*Np*sizeof(double); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); ScaLBL_CopyToHost(&VEL[0],&dvc_vel[0],SIZE); double Q = 0.f; @@ -192,7 +192,7 @@ int main (int argc, char **argv) din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep); ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); ScaLBL_D3Q19_AAodd_MRT(NeighborList, fq, 0, ScaLBL_Comm->next, Np, rlx_setA, rlx_setB, Fx, Fy, Fz); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); timestep++; ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL @@ -201,7 +201,7 @@ int main (int argc, char **argv) din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep); ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); ScaLBL_D3Q19_AAeven_MRT(fq, 0, ScaLBL_Comm->next, Np, rlx_setA, rlx_setB, Fx, Fy, Fz); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); timestep++; //************************************************************************/ @@ -265,7 +265,7 @@ int main (int argc, char **argv) } // Finished - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); return error; } diff --git a/tests/TestForceD3Q19.cpp b/tests/TestForceD3Q19.cpp index b8f88aae..f8569624 100644 --- a/tests/TestForceD3Q19.cpp +++ b/tests/TestForceD3Q19.cpp @@ -1,5 +1,5 @@ #include -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Utilities.h" #include @@ -443,8 +443,9 @@ inline void MRT_Transform(double *dist, int Np, double Fx, double Fy, double Fz) int main (int argc, char **argv) { MPI_Init(&argc,&argv); - int rank = MPI_WORLD_RANK(); - int nprocs = MPI_WORLD_SIZE(); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); for (int i=0; i #include #include "common/ScaLBL.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" using namespace std; @@ -46,15 +46,11 @@ std::shared_ptr loadInputs( int nprocs ) //*************************************************************************************** int main(int argc, char **argv) { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); int check=0; { // parallel domain size (# of sub-domains) @@ -98,7 +94,7 @@ int main(int argc, char **argv) printf("********************************************************\n"); } - MPI_Barrier(comm); + comm.barrier(); kproc = rank/(nprocx*nprocy); jproc = (rank-nprocx*nprocy*kproc)/nprocx; iproc = rank-nprocx*nprocy*kproc-nprocz*jproc; @@ -106,7 +102,7 @@ int main(int argc, char **argv) if (rank == 0) { printf("i,j,k proc=%d %d %d \n",iproc,jproc,kproc); } - MPI_Barrier(comm); + comm.barrier(); if (rank == 1){ printf("i,j,k proc=%d %d %d \n",iproc,jproc,kproc); printf("\n\n"); @@ -143,7 +139,7 @@ int main(int argc, char **argv) } } Dm->CommInit(); - MPI_Barrier(comm); + comm.barrier(); if (rank == 0) cout << "Domain set." << endl; int Np=0; // number of local pore nodes @@ -188,7 +184,7 @@ int main(int argc, char **argv) if (rank == 0) PrintNeighborList(neighborList,Np, rank); - MPI_Barrier(comm); + comm.barrier(); //......................device distributions................................. int dist_mem_size = Np*sizeof(double); @@ -213,13 +209,13 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); starttime = MPI_Wtime(); /************ MAIN ITERATION LOOP (timing communications)***************************************/ //ScaLBL_Comm->SendD3Q19(dist, &dist[10*Np]); //ScaLBL_Comm->RecvD3Q19(dist, &dist[10*Np]); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); if (rank==0) printf("Beginning AA timesteps...\n"); if (rank==0) printf("********************************************************\n"); @@ -231,14 +227,14 @@ int main(int argc, char **argv) ScaLBL_D3Q19_AAodd_MRT(NeighborList, dist, ScaLBL_Comm->first_interior, ScaLBL_Comm->last_interior, Np, rlx_setA, rlx_setB, Fx, Fy, Fz); ScaLBL_Comm->RecvD3Q19AA(dist); //WRITE INTO OPPOSITE ScaLBL_D3Q19_AAodd_MRT(NeighborList, dist, 0, ScaLBL_Comm->next, Np, rlx_setA, rlx_setB, Fx, Fy, Fz); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); timestep++; ScaLBL_Comm->SendD3Q19AA(dist); //READ FORM NORMAL ScaLBL_D3Q19_AAeven_MRT(dist, ScaLBL_Comm->first_interior, ScaLBL_Comm->last_interior, Np, rlx_setA, rlx_setB, Fx, Fy, Fz); ScaLBL_Comm->RecvD3Q19AA(dist); //WRITE INTO OPPOSITE ScaLBL_D3Q19_AAeven_MRT(dist, 0, ScaLBL_Comm->next, Np, rlx_setA, rlx_setB, Fx, Fy, Fz); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); timestep++; //************************************************************************/ @@ -331,7 +327,7 @@ int main(int argc, char **argv) } // **************************************************** - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); // **************************************************** diff --git a/tests/TestInterfaceSpeed.cpp b/tests/TestInterfaceSpeed.cpp index 40d53b47..d2c901df 100644 --- a/tests/TestInterfaceSpeed.cpp +++ b/tests/TestInterfaceSpeed.cpp @@ -2,7 +2,7 @@ #include #include "analysis/TwoPhase.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Communication.h" #include "IO/Mesh.h" #include "IO/Writer.h" @@ -18,13 +18,9 @@ int main (int argc, char *argv[]) { // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); - - int i,j,k; + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); // Load inputs string FILENAME = argv[1]; @@ -40,7 +36,7 @@ int main (int argc, char *argv[]) Nx+=2; Ny+=2; Nz+=2; - for (i=0; iid[i] = 1; + for (int i=0; iid[i] = 1; Dm->CommInit(); @@ -51,9 +47,9 @@ int main (int argc, char *argv[]) double dist1,dist2; Cx = Cy = Cz = N*0.5; - for (k=0; k #include #include "common/ScaLBL.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" using namespace std; @@ -488,15 +488,11 @@ inline void UnpackID(int *list, int count, char *recvbuf, char *ID){ //*************************************************************************************** int main(int argc, char **argv) { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); int check; { // parallel domain size (# of sub-domains) @@ -582,7 +578,7 @@ int main(int argc, char **argv) } // ************************************************************** // Broadcast simulation parameters from rank 0 to all other procs - MPI_Barrier(comm); + comm.barrier(); //................................................. MPI_Bcast(&Nx,1,MPI_INT,0,comm); MPI_Bcast(&Ny,1,MPI_INT,0,comm); @@ -595,7 +591,7 @@ int main(int argc, char **argv) MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. - MPI_Barrier(comm); + comm.barrier(); // ************************************************************** // ************************************************************** @@ -613,7 +609,7 @@ int main(int argc, char **argv) printf("********************************************************\n"); } - MPI_Barrier(comm); + comm.barrier(); kproc = rank/(nprocx*nprocy); jproc = (rank-nprocx*nprocy*kproc)/nprocx; iproc = rank-nprocx*nprocy*kproc-nprocz*jproc; @@ -621,7 +617,7 @@ int main(int argc, char **argv) if (rank == 0) { printf("i,j,k proc=%d %d %d \n",iproc,jproc,kproc); } - MPI_Barrier(comm); + comm.barrier(); if (rank == 1){ printf("i,j,k proc=%d %d %d \n",iproc,jproc,kproc); printf("\n\n"); @@ -650,7 +646,7 @@ int main(int argc, char **argv) fread(Dm.id,1,N,IDFILE); fclose(IDFILE); - MPI_Barrier(comm); + comm.barrier(); Dm.CommInit(); //....................................................................... @@ -671,12 +667,12 @@ int main(int argc, char **argv) } } } - MPI_Barrier(comm); + comm.barrier(); MPI_Allreduce(&sum_local,&sum,1,MPI_DOUBLE,MPI_SUM,comm); porosity = sum*iVol_global; if (rank==0) printf("Media porosity = %f \n",porosity); - MPI_Barrier(comm); + comm.barrier(); if (rank == 0) cout << "Domain set." << endl; if (rank==0) printf ("Create ScaLBL_Communicator \n"); @@ -706,7 +702,7 @@ int main(int argc, char **argv) neighborList= new int[18*Np]; ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Dm.id,Np); - MPI_Barrier(comm); + comm.barrier(); //......................device distributions................................. int dist_mem_size = Np*sizeof(double); @@ -734,7 +730,7 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); starttime = MPI_Wtime(); while (timestep < timesteps) { @@ -743,14 +739,14 @@ int main(int argc, char **argv) ScaLBL_D3Q19_AAodd_MRT(NeighborList, dist, ScaLBL_Comm.next, Np, Np, rlx_setA, rlx_setB, Fx, Fy, Fz); ScaLBL_Comm.RecvD3Q19AA(dist); //WRITE INTO OPPOSITE ScaLBL_D3Q19_AAodd_MRT(NeighborList, dist, 0, ScaLBL_Comm.next, Np, rlx_setA, rlx_setB, Fx, Fy, Fz); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); timestep++; ScaLBL_Comm.SendD3Q19AA(dist); //READ FORM NORMAL ScaLBL_D3Q19_AAeven_MRT(dist, ScaLBL_Comm.next, Np, Np, rlx_setA, rlx_setB, Fx, Fy, Fz); ScaLBL_Comm.RecvD3Q19AA(dist); //WRITE INTO OPPOSITE ScaLBL_D3Q19_AAeven_MRT(dist, 0, ScaLBL_Comm.next, Np, rlx_setA, rlx_setB, Fx, Fy, Fz); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); timestep++; //************************************************************************/ @@ -783,7 +779,7 @@ int main(int argc, char **argv) VEL= new double [3*Np]; int SIZE=3*Np*sizeof(double); ScaLBL_D3Q19_Momentum(dist,Velocity, Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); ScaLBL_CopyToHost(&VEL[0],&Velocity[0],SIZE); sum_local=0.f; @@ -805,7 +801,7 @@ int main(int argc, char **argv) } // **************************************************** - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); // **************************************************** diff --git a/tests/TestMap.cpp b/tests/TestMap.cpp index a47c0d9e..f3010081 100644 --- a/tests/TestMap.cpp +++ b/tests/TestMap.cpp @@ -7,7 +7,7 @@ #include #include #include "common/ScaLBL.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" using namespace std; @@ -26,15 +26,9 @@ std::shared_ptr loadInputs( int nprocs ) //*************************************************************************************** int main(int argc, char **argv) { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); int check=0; { @@ -45,6 +39,7 @@ int main(int argc, char **argv) {1,0,1},{-1,0,-1},{1,0,-1},{-1,0,1}, {0,1,1},{0,-1,-1},{0,1,-1},{0,-1,1}}; + int rank = comm.getRank(); if (rank == 0){ printf("********************************************************\n"); printf("Running unit test: TestMap \n"); @@ -52,7 +47,7 @@ int main(int argc, char **argv) } // Load inputs - auto db = loadInputs( nprocs ); + auto db = loadInputs( comm.getSize() ); int Nx = db->getVector( "n" )[0]; int Ny = db->getVector( "n" )[1]; int Nz = db->getVector( "n" )[2]; @@ -94,7 +89,7 @@ int main(int argc, char **argv) neighborList= new int[18*Npad]; Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np); - MPI_Barrier(comm); + comm.barrier(); // Check the neighborlist printf("Check neighborlist: exterior %i, first interior %i last interior %i \n",ScaLBL_Comm->LastExterior(),ScaLBL_Comm->FirstInterior(),ScaLBL_Comm->LastInterior()); @@ -197,7 +192,7 @@ int main(int argc, char **argv) } // **************************************************** - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); // **************************************************** diff --git a/tests/TestMassConservationD3Q7.cpp b/tests/TestMassConservationD3Q7.cpp index bbfe8cae..68183cd2 100644 --- a/tests/TestMassConservationD3Q7.cpp +++ b/tests/TestMassConservationD3Q7.cpp @@ -8,7 +8,7 @@ #include #include "common/ScaLBL.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "models/ColorModel.h" inline void InitializeBubble(ScaLBL_ColorModel &ColorModel, double BubbleRadius){ @@ -67,11 +67,10 @@ inline void InitializeBubble(ScaLBL_ColorModel &ColorModel, double BubbleRadius) int main(int argc, char **argv) { // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); // parallel domain size (# of sub-domains) if (rank == 0){ @@ -266,7 +265,7 @@ int main(int argc, char **argv) } } // **************************************************** - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); // **************************************************** } diff --git a/tests/TestMicroCTReader.cpp b/tests/TestMicroCTReader.cpp index 4a4c6aac..9a54610c 100644 --- a/tests/TestMicroCTReader.cpp +++ b/tests/TestMicroCTReader.cpp @@ -1,6 +1,6 @@ // Test reading high-resolution files from the microct database -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/UnitTest.h" #include "common/Database.h" #include "common/Domain.h" @@ -13,12 +13,14 @@ void testReadMicroCT( const std::string& filename, UnitTest& ut ) { + Utilities::MPI comm( MPI_COMM_WORLD ); + // Get the domain info auto db = std::make_shared( filename ); auto domain_db = db->getDatabase( "Domain" ); // Test reading microCT files - auto data = readMicroCT( *domain_db, MPI_COMM_WORLD ); + auto data = readMicroCT( *domain_db, comm ); // Check if we loaded the data correctly if ( data.size() == domain_db->getVector( "n" ) ) @@ -30,7 +32,7 @@ void testReadMicroCT( const std::string& filename, UnitTest& ut ) auto n = domain_db->getVector( "n" ); auto nproc = domain_db->getVector( "nproc" ); int N[3] = { n[0]*nproc[0], n[1]*nproc[1], n[2]*nproc[2] }; - int rank = comm_rank(MPI_COMM_WORLD); + int rank = comm.getRank(); RankInfoStruct rankInfo( rank, nproc[0], nproc[1], nproc[2] ); std::vector meshData( 1 ); auto Var = std::make_shared(); @@ -41,7 +43,7 @@ void testReadMicroCT( const std::string& filename, UnitTest& ut ) meshData[0].meshName = "grid"; meshData[0].mesh = std::make_shared(rankInfo,n[0],n[1],n[2],N[0],N[1],N[2]); meshData[0].vars.push_back(Var); - IO::writeData( 0, meshData, MPI_COMM_WORLD ); + IO::writeData( 0, meshData, comm ); } diff --git a/tests/TestMomentsD3Q19.cpp b/tests/TestMomentsD3Q19.cpp index b26d7bed..6bd3e8ff 100644 --- a/tests/TestMomentsD3Q19.cpp +++ b/tests/TestMomentsD3Q19.cpp @@ -1,5 +1,5 @@ #include -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Utilities.h" #include @@ -463,13 +463,14 @@ inline void MRT_Transform(double *dist, int Np) { int main (int argc, char **argv) { MPI_Init(&argc,&argv); - int rank = MPI_WORLD_RANK(); - int nprocs = MPI_WORLD_SIZE(); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); for (int i=0; i tmp = netcdf::getVar( fid, "tmp" ); @@ -95,7 +96,8 @@ int main(int argc, char **argv) { // Initialize MPI MPI_Init(&argc,&argv); - int rank = comm_rank(MPI_COMM_WORLD); + Utilities::MPI comm( MPI_COMM_WORLD ); + const int rank = comm.getRank(); UnitTest ut; PROFILE_START("Main"); diff --git a/tests/TestPoiseuille.cpp b/tests/TestPoiseuille.cpp index e69507e1..744d292d 100644 --- a/tests/TestPoiseuille.cpp +++ b/tests/TestPoiseuille.cpp @@ -7,7 +7,7 @@ #include #include #include "common/ScaLBL.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "models/MRTModel.h" void ParallelPlates(ScaLBL_MRTModel &MRT){ @@ -47,15 +47,11 @@ void ParallelPlates(ScaLBL_MRTModel &MRT){ //*************************************************************************************** int main(int argc, char **argv) { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); int check=0; { if (rank == 0){ @@ -77,7 +73,7 @@ int main(int argc, char **argv) int SIZE=MRT.Np*sizeof(double); ScaLBL_D3Q19_Momentum(MRT.fq,MRT.Velocity, MRT.Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); ScaLBL_CopyToHost(&Vz[0],&MRT.Velocity[0],3*SIZE); if (rank == 0) printf("Force: %f,%f,%f \n",MRT.Fx,MRT.Fy,MRT.Fz); @@ -91,7 +87,7 @@ int main(int argc, char **argv) j=Ny/2; k=Nz/2; if (rank == 0) printf("Channel width=%f \n",W); if (rank == 0) printf("ID flag vz analytical\n"); - MPI_Barrier(comm); + comm.barrier(); if (rank == 0) { for (i=0;i #include #include "common/ScaLBL.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" //*************************************************************************************** int main(int argc, char **argv) { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); int check=0; { if (rank == 0){ @@ -50,7 +45,7 @@ int main(int argc, char **argv) printf("********************************************************\n"); } - MPI_Barrier(comm); + comm.barrier(); int kproc = rank/(nprocx*nprocy); int jproc = (rank-nprocx*nprocy*kproc)/nprocx; int iproc = rank-nprocx*nprocy*kproc-nprocz*jproc; @@ -58,7 +53,7 @@ int main(int argc, char **argv) if (rank == 0) { printf("i,j,k proc=%d %d %d \n",iproc,jproc,kproc); } - MPI_Barrier(comm); + comm.barrier(); if (rank == 1){ printf("i,j,k proc=%d %d %d \n",iproc,jproc,kproc); printf("\n\n"); @@ -102,11 +97,11 @@ int main(int argc, char **argv) } } } - MPI_Allreduce(&sum_local,&sum,1,MPI_DOUBLE,MPI_SUM,comm); + sum = comm.sumReduce( sum_local ); porosity = sum*iVol_global; if (rank==0) printf("Media porosity = %f \n",porosity); - MPI_Barrier(comm); + comm.barrier(); if (rank == 0) cout << "Domain set." << endl; if (rank==0) printf ("Create ScaLBL_Communicator \n"); @@ -133,7 +128,7 @@ int main(int argc, char **argv) IntArray Map(Nx,Ny,Nz); neighborList= new int[18*Npad]; Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np); - MPI_Barrier(comm); + comm.barrier(); //......................device distributions................................. if (rank==0) printf ("Allocating distributions \n"); @@ -194,7 +189,7 @@ int main(int argc, char **argv) } } // **************************************************** - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); // **************************************************** return check; diff --git a/tests/TestSegDist.cpp b/tests/TestSegDist.cpp index ece3222d..b5e23ec8 100644 --- a/tests/TestSegDist.cpp +++ b/tests/TestSegDist.cpp @@ -39,11 +39,10 @@ std::shared_ptr loadInputs( int nprocs ) int main(int argc, char **argv) { // Initialize MPI - int rank, nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); { @@ -98,7 +97,7 @@ int main(int argc, char **argv) } } - MPI_Barrier(comm); + comm.barrier(); if (rank==0) printf("Initialized! Converting to Signed Distance function \n"); double t1 = MPI_Wtime(); @@ -116,7 +115,7 @@ int main(int argc, char **argv) } } } - err = sumReduce( Dm.Comm, err ); + err = Dm.Comm.sumReduce( err ); err = sqrt( err / (nx*ny*nz*nprocs) ); if (rank==0) printf("Mean error %0.4f \n", err); @@ -142,7 +141,7 @@ int main(int argc, char **argv) IO::writeData( "testSegDist", data, MPI_COMM_WORLD ); } - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); return 0; diff --git a/tests/TestSubphase.cpp b/tests/TestSubphase.cpp index fd6383be..9738812f 100644 --- a/tests/TestSubphase.cpp +++ b/tests/TestSubphase.cpp @@ -26,11 +26,10 @@ std::shared_ptr loadInputs( int nprocs ) int main(int argc, char **argv) { // Initialize MPI - int rank, nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); { // Limit scope so variables that contain communicators will free before MPI_Finialize if ( rank==0 ) { @@ -137,7 +136,7 @@ int main(int argc, char **argv) // Averages->Reduce(); } // Limit scope so variables that contain communicators will free before MPI_Finialize - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); return 0; } diff --git a/tests/TestTopo3D.cpp b/tests/TestTopo3D.cpp index 8d00ef5a..948bb1d6 100644 --- a/tests/TestTopo3D.cpp +++ b/tests/TestTopo3D.cpp @@ -26,11 +26,10 @@ std::shared_ptr loadInputs( int nprocs ) int main(int argc, char **argv) { // Initialize MPI - int rank, nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); { // Limit scope so variables that contain communicators will free before MPI_Finialize if ( rank==0 ) { @@ -226,7 +225,7 @@ int main(int argc, char **argv) IO::writeData( timestep, visData, comm ); } // Limit scope so variables that contain communicators will free before MPI_Finialize - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); return 0; } diff --git a/tests/TestTorus.cpp b/tests/TestTorus.cpp index 2d486774..5125ce92 100644 --- a/tests/TestTorus.cpp +++ b/tests/TestTorus.cpp @@ -26,11 +26,10 @@ std::shared_ptr loadInputs( int nprocs ) int main(int argc, char **argv) { // Initialize MPI - int rank, nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); { // Limit scope so variables that contain communicators will free before MPI_Finialize if ( rank==0 ) { @@ -165,7 +164,7 @@ int main(int argc, char **argv) // Averages->Reduce(); } // Limit scope so variables that contain communicators will free before MPI_Finialize - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); return 0; } diff --git a/tests/TestTorusEvolve.cpp b/tests/TestTorusEvolve.cpp index 1a65d268..32cf7fd8 100644 --- a/tests/TestTorusEvolve.cpp +++ b/tests/TestTorusEvolve.cpp @@ -26,11 +26,10 @@ std::shared_ptr loadInputs( int nprocs ) int main(int argc, char **argv) { // Initialize MPI - int rank, nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); { // Limit scope so variables that contain communicators will free before MPI_Finialize if ( rank==0 ) { @@ -157,7 +156,7 @@ int main(int argc, char **argv) } } // Limit scope so variables that contain communicators will free before MPI_Finialize - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); return 0; } diff --git a/tests/TestTwoPhase.cpp b/tests/TestTwoPhase.cpp index a979314a..fa54d98d 100644 --- a/tests/TestTwoPhase.cpp +++ b/tests/TestTwoPhase.cpp @@ -8,7 +8,7 @@ #include #include "analysis/TwoPhase.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Communication.h" #include "IO/Mesh.h" #include "IO/Writer.h" @@ -17,11 +17,10 @@ int main(int argc, char **argv) { // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); { // Limit scope so Domain can free it's communicator printf("Running two-phase averaging test on %i processors \n",nprocs); @@ -110,7 +109,7 @@ int main(int argc, char **argv) fclose(PHASE); } // **************************************************** - MPI_Barrier(comm); + comm.barrier(); } // Limit scope so Domain will free it's communicator MPI_Finalize(); return 0; diff --git a/tests/TestWriter.cpp b/tests/TestWriter.cpp index 8936aaff..37858202 100644 --- a/tests/TestWriter.cpp +++ b/tests/TestWriter.cpp @@ -8,7 +8,7 @@ #include "common/UnitTest.h" #include "common/Utilities.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "IO/MeshDatabase.h" #include "IO/Reader.h" #include "IO/Writer.h" @@ -34,11 +34,9 @@ inline double distance( const Point& p ) // Test writing and reading the given format void testWriter( const std::string& format, std::vector& meshData, UnitTest& ut ) { - int rank, nprocs; - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); - MPI_Barrier(comm); + Utilities::MPI comm( MPI_COMM_WORLD ); + int nprocs = comm.getSize(); + comm.barrier(); // Get the format std::string format2 = format; @@ -63,7 +61,7 @@ void testWriter( const std::string& format, std::vector& mes IO::initialize( "test_"+format, format2, false ); IO::writeData( 0, meshData, comm ); IO::writeData( 3, meshData, comm ); - MPI_Barrier(comm); + comm.barrier(); PROFILE_STOP(format+"-write"); // Get the summary name for reading @@ -228,11 +226,10 @@ void testWriter( const std::string& format, std::vector& mes // Main int main(int argc, char **argv) { - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); Utilities::setAbortBehavior(true,2); Utilities::setErrorHandlers(); UnitTest ut; @@ -389,7 +386,7 @@ int main(int argc, char **argv) ut.report(); PROFILE_SAVE("TestWriter",true); int N_errors = ut.NumFailGlobal(); - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); return N_errors; } diff --git a/tests/convertIO.cpp b/tests/convertIO.cpp index 0937729f..27605237 100644 --- a/tests/convertIO.cpp +++ b/tests/convertIO.cpp @@ -5,7 +5,7 @@ #include #include -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Communication.h" #include "common/Utilities.h" #include "IO/Mesh.h" @@ -17,11 +17,10 @@ int main(int argc, char **argv) { // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); Utilities::setErrorHandlers(); PROFILE_ENABLE(2); PROFILE_ENABLE_TRACE(); @@ -70,20 +69,20 @@ int main(int argc, char **argv) i++; } - MPI_Barrier(comm); + comm.barrier(); PROFILE_STOP("Read"); // Save the mesh data to a new file PROFILE_START("Write"); IO::writeData( timestep, meshData, MPI_COMM_WORLD ); - MPI_Barrier(comm); + comm.barrier(); PROFILE_STOP("Write"); } } // Limit scope PROFILE_STOP("Main"); PROFILE_SAVE("convertData",true); - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); return 0; } diff --git a/tests/hello_world.cpp b/tests/hello_world.cpp index d236bf0e..810d3a9c 100644 --- a/tests/hello_world.cpp +++ b/tests/hello_world.cpp @@ -1,18 +1,19 @@ #include -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Utilities.h" int main (int argc, char **argv) { MPI_Init(&argc,&argv); - int rank = MPI_WORLD_RANK(); - int nprocs = MPI_WORLD_SIZE(); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); for (int i=0; i loadInputs( ) @@ -24,15 +24,11 @@ std::shared_ptr loadInputs( ) //*************************************************************************************** int main(int argc, char **argv) { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); { //***************************************** // MPI ranks for all 18 neighbors @@ -96,7 +92,7 @@ int main(int argc, char **argv) rank_xy, rank_XY, rank_xY, rank_Xy, rank_xz, rank_XZ, rank_xZ, rank_Xz, rank_yz, rank_YZ, rank_yZ, rank_Yz ); - MPI_Barrier(comm); + comm.barrier(); Nz += 2; Nx = Ny = Nz; // Cubic domain @@ -185,7 +181,7 @@ int main(int argc, char **argv) } // **************************************************** - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); // **************************************************** } diff --git a/tests/lbpm_color_macro_simulator.cpp b/tests/lbpm_color_macro_simulator.cpp index 1c619c5a..97df6812 100644 --- a/tests/lbpm_color_macro_simulator.cpp +++ b/tests/lbpm_color_macro_simulator.cpp @@ -9,7 +9,7 @@ #include "common/Communication.h" #include "analysis/TwoPhase.h" #include "analysis/runAnalysis.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "ProfilerApp.h" #include "threadpool/thread_pool.h" @@ -30,10 +30,9 @@ int main(int argc, char **argv) // Initialize MPI int provided_thread_support = -1; MPI_Init_thread(&argc,&argv,MPI_THREAD_MULTIPLE,&provided_thread_support); - MPI_Comm comm; - MPI_Comm_dup(MPI_COMM_WORLD,&comm); - int rank = comm_rank(comm); - int nprocs = comm_size(comm); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); { // Limit scope so variables that contain communicators will free before MPI_Finialize // parallel domain size (# of sub-domains) @@ -52,7 +51,7 @@ int main(int argc, char **argv) // int device=ScaLBL_SetDevice(rank); //printf("Using GPU ID %i for rank %i \n",device,rank); ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + comm.barrier(); PROFILE_ENABLE(1); //PROFILE_ENABLE_TRACE(); @@ -171,7 +170,7 @@ int main(int argc, char **argv) } // ************************************************************** // Broadcast simulation parameters from rank 0 to all other procs - MPI_Barrier(comm); + comm.barrier(); //................................................. MPI_Bcast(&tauA,1,MPI_DOUBLE,0,comm); MPI_Bcast(&tauB,1,MPI_DOUBLE,0,comm); @@ -207,7 +206,7 @@ int main(int argc, char **argv) // Get the rank info const RankInfoStruct rank_info(rank,nprocx,nprocy,nprocz); - MPI_Barrier(comm); + comm.barrier(); if (nprocs != nprocx*nprocy*nprocz){ printf("nprocx = %i \n",nprocx); @@ -262,7 +261,7 @@ int main(int argc, char **argv) // Mask that excludes the solid phase Domain Mask(Nx,Ny,Nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BoundaryCondition); - MPI_Barrier(comm); + comm.barrier(); Nx+=2; Ny+=2; Nz += 2; int N = Nx*Ny*Nz; @@ -297,7 +296,7 @@ int main(int argc, char **argv) sprintf(LocalRankString,"%05d",rank); sprintf(LocalRankFilename,"%s%s","SignDist.",LocalRankString); ReadBinaryFile(LocalRankFilename, Averages->SDs.data(), N); - MPI_Barrier(comm); + comm.barrier(); if (rank == 0) cout << "Domain set." << endl; if (rank==0) printf("Initialize from segmented data: solid=0, NWP=1, WP=2 \n"); @@ -341,7 +340,7 @@ int main(int argc, char **argv) delete [] cDen; delete [] cfq; */ - MPI_Barrier(comm); + comm.barrier(); } fflush(stdout); @@ -416,7 +415,7 @@ int main(int argc, char **argv) neighborList= new int[18*Npad]; Np = ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Mask.id,Np); if (rank==0) printf ("Set up memory efficient layout Npad=%i, Np=%i \n",Npad,Np); - MPI_Barrier(comm); + comm.barrier(); //........................................................................... // MAIN VARIABLES ALLOCATED HERE //........................................................................... @@ -537,7 +536,7 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + comm.barrier(); starttime = MPI_Wtime(); //......................................... @@ -589,7 +588,7 @@ int main(int argc, char **argv) } ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm.next, Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); // *************EVEN TIMESTEP************* timestep++; @@ -622,10 +621,10 @@ int main(int argc, char **argv) } ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm.next, Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); //************************************************************************ - MPI_Barrier(comm); + comm.barrier(); PROFILE_STOP("Update"); // Run the analysis @@ -637,7 +636,7 @@ int main(int argc, char **argv) PROFILE_SAVE("lbpm_color_simulator",1); //************************************************************************ ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + comm.barrier(); stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep @@ -657,9 +656,8 @@ int main(int argc, char **argv) PROFILE_STOP("Main"); PROFILE_SAVE("lbpm_color_simulator",1); // **************************************************** - MPI_Barrier(comm); + comm.barrier(); } // Limit scope so variables that contain communicators will free before MPI_Finialize - MPI_Comm_free(&comm); MPI_Finalize(); } diff --git a/tests/lbpm_color_simulator.cpp b/tests/lbpm_color_simulator.cpp index 1f63c653..cef13189 100644 --- a/tests/lbpm_color_simulator.cpp +++ b/tests/lbpm_color_simulator.cpp @@ -28,10 +28,9 @@ int main(int argc, char **argv) { // Limit scope so variables that contain communicators will free before MPI_Finialize - MPI_Comm comm; - MPI_Comm_dup(MPI_COMM_WORLD,&comm); - int rank = comm_rank(comm); - int nprocs = comm_size(comm); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); if (rank == 0){ printf("********************************************************\n"); @@ -41,7 +40,7 @@ int main(int argc, char **argv) // Initialize compute device ScaLBL_SetDevice(rank); ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + comm.barrier(); PROFILE_ENABLE(1); //PROFILE_ENABLE_TRACE(); @@ -51,7 +50,7 @@ int main(int argc, char **argv) Utilities::setErrorHandlers(); auto filename = argv[1]; - ScaLBL_ColorModel ColorModel(rank,nprocs,comm); + ScaLBL_ColorModel ColorModel(rank,nprocs,comm.dup()); ColorModel.ReadParams(filename); ColorModel.SetDomain(); ColorModel.ReadInput(); @@ -64,8 +63,7 @@ int main(int argc, char **argv) PROFILE_SAVE("lbpm_color_simulator",1); // **************************************************** - MPI_Barrier(comm); - MPI_Comm_free(&comm); + comm.barrier(); } // Limit scope so variables that contain communicators will free before MPI_Finialize diff --git a/tests/lbpm_dfh_simulator.cpp b/tests/lbpm_dfh_simulator.cpp index 1e8dc0f9..0d5902df 100644 --- a/tests/lbpm_dfh_simulator.cpp +++ b/tests/lbpm_dfh_simulator.cpp @@ -26,10 +26,9 @@ int main(int argc, char **argv) // Initialize MPI int provided_thread_support = -1; MPI_Init_thread(&argc,&argv,MPI_THREAD_MULTIPLE,&provided_thread_support); - MPI_Comm comm; - MPI_Comm_dup(MPI_COMM_WORLD,&comm); - int rank = comm_rank(comm); - int nprocs = comm_size(comm); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); if ( rank==0 && provided_thread_support 1) depth=atoi(argv[1]); @@ -222,7 +218,7 @@ int main(int argc, char **argv) rank_xy, rank_XY, rank_xY, rank_Xy, rank_xz, rank_XZ, rank_xZ, rank_Xz, rank_yz, rank_YZ, rank_yZ, rank_Yz ); - MPI_Barrier(comm); + comm.barrier(); Nx += 2; Ny += 2; @@ -277,13 +273,13 @@ int main(int argc, char **argv) //....................................................................... if (rank == 0) printf("Reading the disc packing \n"); if (rank == 0) ReadDiscPacking(ndiscs,cx,cy,rad); - MPI_Barrier(comm); + comm.barrier(); // Broadcast the sphere packing to all processes MPI_Bcast(cx,ndiscs,MPI_DOUBLE,0,comm); MPI_Bcast(cy,ndiscs,MPI_DOUBLE,0,comm); MPI_Bcast(rad,ndiscs,MPI_DOUBLE,0,comm); //........................................................................... - MPI_Barrier(comm); + comm.barrier(); if (rank == 0){ cout << "Domain set." << endl; printf("************ \n"); @@ -388,7 +384,7 @@ int main(int argc, char **argv) //...................................................................... // **************************************************** - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); // **************************************************** } diff --git a/tests/lbpm_inkbottle_pp.cpp b/tests/lbpm_inkbottle_pp.cpp index 3c39219d..669ab8c0 100644 --- a/tests/lbpm_inkbottle_pp.cpp +++ b/tests/lbpm_inkbottle_pp.cpp @@ -9,19 +9,15 @@ #include "common/ScaLBL.h" #include "common/Communication.h" #include "analysis/TwoPhase.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" int main(int argc, char **argv) { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); { // parallel domain size (# of sub-domains) int nprocx,nprocy,nprocz; @@ -83,7 +79,7 @@ int main(int argc, char **argv) } // ************************************************************** // Broadcast simulation parameters from rank 0 to all other procs - MPI_Barrier(comm); + comm.barrier(); // Computational domain MPI_Bcast(&Nx,1,MPI_INT,0,comm); MPI_Bcast(&Ny,1,MPI_INT,0,comm); @@ -96,7 +92,7 @@ int main(int argc, char **argv) MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. - MPI_Barrier(comm); + comm.barrier(); // ************************************************************** if (nprocs != nprocx*nprocy*nprocz){ @@ -123,7 +119,7 @@ int main(int argc, char **argv) rank_xy, rank_XY, rank_xY, rank_Xy, rank_xz, rank_XZ, rank_xZ, rank_Xz, rank_yz, rank_YZ, rank_yZ, rank_Yz ); - MPI_Barrier(comm); + comm.barrier(); Nz += 2; Nx = Ny = Nz; // Cubic domain @@ -221,7 +217,7 @@ int main(int argc, char **argv) } // **************************************************** - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); // **************************************************** } diff --git a/tests/lbpm_juanes_bench_disc_pp.cpp b/tests/lbpm_juanes_bench_disc_pp.cpp index 6f04cffa..47d8cb84 100644 --- a/tests/lbpm_juanes_bench_disc_pp.cpp +++ b/tests/lbpm_juanes_bench_disc_pp.cpp @@ -9,7 +9,7 @@ #include "analysis/pmmc.h" #include "common/Domain.h" #include "common/Communication.h" -#include "common/MPI_Helpers.h" // This includes mpi.h +#include "common/MPI.h" // This includes mpi.h #include "common/SpherePack.h" /* @@ -130,15 +130,11 @@ inline void SignedDistanceDiscPack(double *Distance, int ndiscs, double *List_cx int main(int argc, char **argv) { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); // parallel domain size (# of sub-domains) int nprocx,nprocy,nprocz; int iproc,jproc,kproc; @@ -194,7 +190,7 @@ int main(int argc, char **argv) } // ************************************************************** // Broadcast simulation parameters from rank 0 to all other procs - MPI_Barrier(comm); + comm.barrier(); //................................................. // Computational domain MPI_Bcast(&Nx,1,MPI_INT,0,comm); @@ -208,7 +204,7 @@ int main(int argc, char **argv) MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. - MPI_Barrier(comm); + comm.barrier(); // ************************************************************** double Rin,Rout; @@ -240,7 +236,7 @@ int main(int argc, char **argv) rank_xy, rank_XY, rank_xY, rank_Xy, rank_xz, rank_XZ, rank_xZ, rank_Xz, rank_yz, rank_YZ, rank_yZ, rank_Yz ); - MPI_Barrier(comm); + comm.barrier(); Nx += 2; Ny += 2; Nz += 2; int N = Nx*Ny*Nz; @@ -294,13 +290,13 @@ int main(int argc, char **argv) //....................................................................... if (rank == 0) printf("Reading the disc packing \n"); if (rank == 0) ReadDiscPacking(ndiscs,cx,cy,rad); - MPI_Barrier(comm); + comm.barrier(); // Broadcast the sphere packing to all processes MPI_Bcast(cx,ndiscs,MPI_DOUBLE,0,comm); MPI_Bcast(cy,ndiscs,MPI_DOUBLE,0,comm); MPI_Bcast(rad,ndiscs,MPI_DOUBLE,0,comm); //........................................................................... - MPI_Barrier(comm); + comm.barrier(); /* if (rank == 0){ cout << "Domain set." << endl; printf("************ \n"); @@ -312,7 +308,7 @@ int main(int argc, char **argv) } */ - MPI_Barrier(comm); + comm.barrier(); if (nprocz > 1 && rank==0) printf("Disc packs are 2D -- are you sure you want nprocz > 1? \n"); if (rank ==0) printf("Compute the signed distance part I \n"); //....................................................................... @@ -490,7 +486,7 @@ int main(int argc, char **argv) //...................................................................... // **************************************************** - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); // **************************************************** } diff --git a/tests/lbpm_minkowski_scalar.cpp b/tests/lbpm_minkowski_scalar.cpp index 3e3ede6d..721207a1 100644 --- a/tests/lbpm_minkowski_scalar.cpp +++ b/tests/lbpm_minkowski_scalar.cpp @@ -14,7 +14,7 @@ #include "common/Array.h" #include "common/Domain.h" #include "common/Communication.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "IO/MeshDatabase.h" #include "IO/Mesh.h" #include "IO/Writer.h" @@ -28,13 +28,11 @@ int main(int argc, char **argv) { - // Initialize MPI - int rank, nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); { Utilities::setErrorHandlers(); PROFILE_START("Main"); @@ -87,7 +85,7 @@ int main(int argc, char **argv) fclose(SEGDAT); printf("Read segmented data from %s \n",Filename.c_str()); } - MPI_Barrier(comm); + comm.barrier(); // Get the rank info int N = (nx+2)*(ny+2)*(nz+2); @@ -152,7 +150,7 @@ int main(int argc, char **argv) } else{ printf("Sending data to process %i \n", rnk); - MPI_Send(tmp,N,MPI_CHAR,rnk,15,comm); + comm.send(tmp,N,rnk,15); } } } @@ -161,13 +159,12 @@ int main(int argc, char **argv) else{ // Recieve the subdomain from rank = 0 printf("Ready to recieve data %i at process %i \n", N,rank); - MPI_Recv(Dm->id,N,MPI_CHAR,0,15,comm,MPI_STATUS_IGNORE); + comm.recv(Dm->id,N,0,15); } - MPI_Barrier(comm); + comm.barrier(); // Compute the Minkowski functionals - MPI_Barrier(comm); - std::shared_ptr Averages(new Minkowski(Dm)); + auto Averages = std::make_shared(Dm); // Calculate the distance // Initialize the domain and communication @@ -212,7 +209,7 @@ int main(int argc, char **argv) } PROFILE_STOP("Main"); PROFILE_SAVE("Minkowski",true); - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); return 0; } diff --git a/tests/lbpm_morph_pp.cpp b/tests/lbpm_morph_pp.cpp index 8fe8b228..939fdc32 100644 --- a/tests/lbpm_morph_pp.cpp +++ b/tests/lbpm_morph_pp.cpp @@ -23,11 +23,9 @@ int main(int argc, char **argv) { // Initialize MPI - int rank, nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); { //....................................................................... // Reading the domain information file @@ -127,13 +125,13 @@ int main(int argc, char **argv) if (rank==0) printf("Initialized solid phase -- Converting to Signed Distance function \n"); CalcDist(SignDist,id_solid,*Dm); - MPI_Barrier(comm); + comm.barrier(); // Extract only the connected part of NWP BlobIDstruct new_index; double vF=0.0; double vS=0.0; ComputeGlobalBlobIDs(nx-2,ny-2,nz-2,Dm->rank_info,phase,SignDist,vF,vS,phase_label,Dm->Comm); - MPI_Barrier(Dm->Comm); + Dm->Comm.barrier(); int count_connected=0; int count_porespace=0; @@ -155,9 +153,9 @@ int main(int argc, char **argv) } } } - count_connected=sumReduce( Dm->Comm, count_connected); - count_porespace=sumReduce( Dm->Comm, count_porespace); - count_water=sumReduce( Dm->Comm, count_water); + count_connected = Dm->Comm.sumReduce( count_connected ); + count_porespace = Dm->Comm.sumReduce( count_porespace ); + count_water = Dm->Comm.sumReduce( count_water ); for (int k=0; kComm, count_water); + count_water = Dm->Comm.sumReduce( count_water ); SW = double(count_water) / count_porespace; if(rank==0) printf("Final saturation: %f \n", SW); @@ -236,13 +234,13 @@ int main(int argc, char **argv) } } } - MPI_Barrier(comm); + comm.barrier(); auto filename2 = READFILE + ".morph.raw"; if (rank==0) printf("Writing file to: %s \n", filename2.c_str()); Mask->AggregateLabels(filename2); } - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); } diff --git a/tests/lbpm_morphdrain_pp.cpp b/tests/lbpm_morphdrain_pp.cpp index 8d73b1e4..d3c5a428 100644 --- a/tests/lbpm_morphdrain_pp.cpp +++ b/tests/lbpm_morphdrain_pp.cpp @@ -23,11 +23,9 @@ int main(int argc, char **argv) { // Initialize MPI - int rank, nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); { //....................................................................... // Reading the domain information file @@ -121,7 +119,7 @@ int main(int argc, char **argv) if (rank==0) printf("Initialized solid phase -- Converting to Signed Distance function \n"); CalcDist(SignDist,id_solid,*Dm); - MPI_Barrier(comm); + comm.barrier(); // Run the morphological opening MorphDrain(SignDist, id, Dm, SW); @@ -196,13 +194,13 @@ int main(int argc, char **argv) } } } - MPI_Barrier(comm); + comm.barrier(); auto filename2 = READFILE + ".morphdrain.raw"; if (rank==0) printf("Writing file to: %s \n", filename2.data() ); Mask->AggregateLabels( filename2 ); } - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); } diff --git a/tests/lbpm_morphopen_pp.cpp b/tests/lbpm_morphopen_pp.cpp index f8819348..a6209240 100644 --- a/tests/lbpm_morphopen_pp.cpp +++ b/tests/lbpm_morphopen_pp.cpp @@ -23,11 +23,9 @@ int main(int argc, char **argv) { // Initialize MPI - int rank, nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); { //....................................................................... // Reading the domain information file @@ -123,7 +121,7 @@ int main(int argc, char **argv) if (rank==0) printf("Initialized solid phase -- Converting to Signed Distance function \n"); CalcDist(SignDist,id_solid,*Dm); - MPI_Barrier(comm); + comm.barrier(); // Run the morphological opening MorphOpen(SignDist, id, Dm, SW, ErodeLabel, OpenLabel); @@ -198,13 +196,13 @@ int main(int argc, char **argv) } } } - MPI_Barrier(comm); + comm.barrier(); auto filename2 = READFILE + ".morphopen.raw"; if (rank==0) printf("Writing file to: %s \n", filename2.data()); Mask->AggregateLabels(filename2); } - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); } diff --git a/tests/lbpm_nondarcy_simulator.cpp b/tests/lbpm_nondarcy_simulator.cpp index 40672375..096dc790 100644 --- a/tests/lbpm_nondarcy_simulator.cpp +++ b/tests/lbpm_nondarcy_simulator.cpp @@ -9,7 +9,7 @@ #include "common/ScaLBL.h" #include "common/Communication.h" #include "analysis/TwoPhase.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" //#define WRITE_SURFACES @@ -77,15 +77,11 @@ int main(int argc, char **argv) } else { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); { // parallel domain size (# of sub-domains) int nprocx,nprocy,nprocz; @@ -160,7 +156,7 @@ int main(int argc, char **argv) } // ************************************************************** // Broadcast simulation parameters from rank 0 to all other procs - MPI_Barrier(comm); + comm.barrier(); //................................................. MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm); //MPI_Bcast(&pBC,1,MPI_LOGICAL,0,comm); @@ -185,7 +181,7 @@ int main(int argc, char **argv) MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. - MPI_Barrier(comm); + comm.barrier(); RESTART_INTERVAL=interval; // ************************************************************** @@ -222,7 +218,7 @@ int main(int argc, char **argv) rank_xy, rank_XY, rank_xY, rank_Xy, rank_xz, rank_XZ, rank_xZ, rank_Xz, rank_yz, rank_YZ, rank_yZ, rank_Yz ); - MPI_Barrier(comm); + comm.barrier(); Nx += 2; Ny += 2; Nz += 2; @@ -262,7 +258,7 @@ int main(int argc, char **argv) // WriteLocalSolidID(LocalRankFilename, id, N); sprintf(LocalRankFilename,"%s%s","SignDist.",LocalRankString); ReadBinaryFile(LocalRankFilename, Averages.SDs.data(), N); - MPI_Barrier(comm); + comm.barrier(); if (rank == 0) cout << "Domain set." << endl; //....................................................................... @@ -436,7 +432,7 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; - MPI_Barrier(comm); + comm.barrier(); starttime = MPI_Wtime(); //......................................... @@ -485,7 +481,7 @@ int main(int argc, char **argv) } //................................................................................... ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + comm.barrier(); // Timestep completed! @@ -557,7 +553,7 @@ int main(int argc, char **argv) //************************************************************************/ fclose(NONDARCY); ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + comm.barrier(); stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep @@ -575,7 +571,7 @@ int main(int argc, char **argv) NULL_USE(RESTART_INTERVAL); } // **************************************************** - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); // **************************************************** } diff --git a/tests/lbpm_nonnewtonian_simulator.cpp b/tests/lbpm_nonnewtonian_simulator.cpp index 5c33841f..ff8792e7 100644 --- a/tests/lbpm_nonnewtonian_simulator.cpp +++ b/tests/lbpm_nonnewtonian_simulator.cpp @@ -9,7 +9,7 @@ #include "common/ScaLBL.h" #include "common/Communication.h" #include "common/TwoPhase.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "ProfilerApp.h" #include "threadpool/thread_pool.h" @@ -99,21 +99,12 @@ inline void ZeroHalo(double *Data, int Nx, int Ny, int Nz) int main(int argc, char **argv) { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI - //MPI_Init(&argc,&argv); - - /* - * Definitely seems to be an issue - let's hope James gets back to me... - */ int provided_thread_support = -1; MPI_Init_thread(&argc,&argv,MPI_THREAD_MULTIPLE,&provided_thread_support); - MPI_Comm comm; - MPI_Comm_dup(MPI_COMM_WORLD,&comm); - int rank = comm_rank(comm); - int nprocs = comm_size(comm); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); if ( rank==0 && provided_thread_supportSDs.data(), N); - MPI_Barrier(comm); + comm.barrier(); if (rank == 0) cout << "Domain set." << endl; /* 3 */ //....................................................................... @@ -598,14 +589,14 @@ int main(int argc, char **argv) delete [] cDen; delete [] cDistEven; delete [] cDistOdd; - MPI_Barrier(comm); + comm.barrier(); } /* 14 */ // //...................................................................... // ScaLBL_D3Q7_Init(ID, A_even, A_odd, &Den[0], Nx, Ny, Nz); // ScaLBL_D3Q7_Init(ID, B_even, B_odd, &Den[N], Nx, Ny, Nz); // ScaLBL_DeviceBarrier(); -// MPI_Barrier(comm); /* 15 */ +// comm.barrier(); /* 15 */ //....................................................................... // Once phase has been initialized, map solid to account for 'smeared' interface @@ -631,7 +622,7 @@ int main(int argc, char **argv) // ScaLBL_Comm.SendHalo(Phi); // ScaLBL_Comm.RecvHalo(Phi); // ScaLBL_DeviceBarrier(); -// MPI_Barrier(comm); +// comm.barrier(); // //************************************************************************* /* 18 */ @@ -670,7 +661,7 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; - MPI_Barrier(comm); + comm.barrier(); starttime = MPI_Wtime(); /* @@ -804,7 +795,7 @@ int main(int argc, char **argv) } //................................................................................... ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + comm.barrier(); // Timestep completed! timestep++; @@ -818,7 +809,7 @@ int main(int argc, char **argv) } //************************************************************************/ ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + comm.barrier(); stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep @@ -835,7 +826,7 @@ int main(int argc, char **argv) NULL_USE(RESTART_INTERVAL); } - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); //**************************************************** } diff --git a/tests/lbpm_nonnewtonian_simulator.h b/tests/lbpm_nonnewtonian_simulator.h index 20da1ac3..4df5e628 100644 --- a/tests/lbpm_nonnewtonian_simulator.h +++ b/tests/lbpm_nonnewtonian_simulator.h @@ -1,7 +1,7 @@ // Run the analysis, blob identification, and write restart files #include "common/Array.h" #include "common/Communication.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "IO/MeshDatabase.h" //#define ANALYSIS_INTERVAL 6 @@ -9,20 +9,9 @@ #define BLOBID_INTERVAL 1000 - - - - enum AnalysisType{ AnalyzeNone=0, IdentifyBlobs=0x01, CopyPhaseIndicator=0x02, CopySimState=0x04, ComputeAverages=0x08, CreateRestart=0x10, WriteVis=0x20 }; - - - - - - - template void DeleteArray( const TYPE *p ) { @@ -30,12 +19,6 @@ void DeleteArray( const TYPE *p ) } - - - - - - // Structure used to store ids struct AnalysisWaitIdStruct { ThreadPool::thread_id_t blobID; @@ -45,7 +28,6 @@ struct AnalysisWaitIdStruct { }; - // Helper class to write the restart file from a seperate thread class WriteRestartWorkItem: public ThreadPool::WorkItem { @@ -84,9 +66,9 @@ typedef std::shared_ptr > BlobIDList; // timestep(timestep_), Nx(Nx_), Ny(Ny_), Nz(Nz_), rank_info(rank_info_), // phase(phase_), dist(dist_), last_id(last_id_), new_index(new_index_), new_id(new_id_), new_list(new_list_) // { -// MPI_Comm_dup(MPI_COMM_WORLD,&newcomm); +// newcomm = Utilities::MPI(MPI_COMM_WORLD).dup(); // } -// ~BlobIdentificationWorkItem1() { MPI_Comm_free(&newcomm); } +// ~BlobIdentificationWorkItem1() {} // virtual void run() { // // Compute the global blob id and compare to the previous version // PROFILE_START("Identify blobs",1); @@ -106,7 +88,7 @@ typedef std::shared_ptr > BlobIDList; // const DoubleArray& dist; // BlobIDstruct last_id, new_index, new_id; // BlobIDList new_list; -// MPI_Comm newcomm; +// Utilities::MPI newcomm; //}; // @@ -122,9 +104,9 @@ typedef std::shared_ptr > BlobIDList; // timestep(timestep_), Nx(Nx_), Ny(Ny_), Nz(Nz_), rank_info(rank_info_), // phase(phase_), dist(dist_), last_id(last_id_), new_index(new_index_), new_id(new_id_), new_list(new_list_) // { -// MPI_Comm_dup(MPI_COMM_WORLD,&newcomm); +// newcomm = Utilities::MPI(MPI_COMM_WORLD).dup(); // } -// ~BlobIdentificationWorkItem2() { MPI_Comm_free(&newcomm); } +// ~BlobIdentificationWorkItem2() { } // virtual void run() { // // Compute the global blob id and compare to the previous version // PROFILE_START("Identify blobs maps",1); @@ -158,7 +140,7 @@ typedef std::shared_ptr > BlobIDList; // const DoubleArray& dist; // BlobIDstruct last_id, new_index, new_id; // BlobIDList new_list; -// MPI_Comm newcomm; +// Utilities::MPI newcomm; //}; // @@ -171,9 +153,9 @@ public: TwoPhase& Avgerages_, fillHalo& fillData_ ): timestep(timestep_), visData(visData_), Averages(Avgerages_), fillData(fillData_) { - MPI_Comm_dup(MPI_COMM_WORLD,&newcomm); + newcomm = Utilities::MPI(MPI_COMM_WORLD).dup(); } - ~WriteVisWorkItem() { MPI_Comm_free(&newcomm); } + ~WriteVisWorkItem() {} virtual void run() { PROFILE_START("Save Vis",1); ASSERT(visData[0].vars[0]->name=="phase"); @@ -198,7 +180,7 @@ private: std::vector& visData; TwoPhase& Averages; fillHalo& fillData; - MPI_Comm newcomm; + Utilities::MPI newcomm; }; @@ -418,7 +400,7 @@ void run_analysis( int timestep, int restart_interval, // Spawn a thread to write the restart file if ( (type&CreateRestart) != 0 ) { - int rank = MPI_WORLD_RANK(); + int rank = comm.getRank(); // Wait for previous restart files to finish writing (not necessary, but helps to ensure memory usage is limited) tpool.wait(wait.restart); diff --git a/tests/lbpm_permeability_simulator.cpp b/tests/lbpm_permeability_simulator.cpp index dbcfb96b..eb5e6d4b 100644 --- a/tests/lbpm_permeability_simulator.cpp +++ b/tests/lbpm_permeability_simulator.cpp @@ -9,7 +9,7 @@ #include "common/ScaLBL.h" #include "common/Communication.h" #include "analysis/TwoPhase.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "models/MRTModel.h" //#define WRITE_SURFACES @@ -24,11 +24,10 @@ using namespace std; int main(int argc, char **argv) { // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); { if (rank == 0){ printf("********************************************************\n"); @@ -39,7 +38,7 @@ int main(int argc, char **argv) int device=ScaLBL_SetDevice(rank); NULL_USE( device ); ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + comm.barrier(); ScaLBL_MRTModel MRT(rank,nprocs,comm); auto filename = argv[1]; @@ -52,7 +51,7 @@ int main(int argc, char **argv) MRT.VelocityField(); } // **************************************************** - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); // **************************************************** } diff --git a/tests/lbpm_plates_pp.cpp b/tests/lbpm_plates_pp.cpp index 8344df47..acd64f52 100644 --- a/tests/lbpm_plates_pp.cpp +++ b/tests/lbpm_plates_pp.cpp @@ -9,19 +9,15 @@ #include "common/ScaLBL.h" #include "common/Communication.h" #include "analysis/TwoPhase.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" int main(int argc, char **argv) { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); { // parallel domain size (# of sub-domains) int nprocx,nprocy,nprocz; @@ -79,7 +75,7 @@ int main(int argc, char **argv) } // ************************************************************** // Broadcast simulation parameters from rank 0 to all other procs - MPI_Barrier(comm); + comm.barrier(); // Computational domain MPI_Bcast(&Nx,1,MPI_INT,0,comm); MPI_Bcast(&Ny,1,MPI_INT,0,comm); @@ -92,7 +88,7 @@ int main(int argc, char **argv) MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. - MPI_Barrier(comm); + comm.barrier(); // ************************************************************** if (nprocs != nprocx*nprocy*nprocz){ @@ -116,7 +112,7 @@ int main(int argc, char **argv) std::shared_ptr Averages( new TwoPhase(Dm) ); - MPI_Barrier(comm); + comm.barrier(); Nz += 2; Nx = Ny = Nz; // Cubic domain @@ -200,7 +196,7 @@ int main(int argc, char **argv) } // **************************************************** - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); // **************************************************** } diff --git a/tests/lbpm_porenetwork_pp.cpp b/tests/lbpm_porenetwork_pp.cpp index 496f9d86..4a6ccda7 100644 --- a/tests/lbpm_porenetwork_pp.cpp +++ b/tests/lbpm_porenetwork_pp.cpp @@ -9,19 +9,15 @@ #include "common/ScaLBL.h" #include "common/Communication.h" #include "analysis/TwoPhase.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" int main(int argc, char **argv) { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); { // parallel domain size (# of sub-domains) int nprocx,nprocy,nprocz; @@ -69,7 +65,7 @@ int main(int argc, char **argv) } // ************************************************************** // Broadcast simulation parameters from rank 0 to all other procs - MPI_Barrier(comm); + comm.barrier(); // Computational domain MPI_Bcast(&Nx,1,MPI_INT,0,comm); MPI_Bcast(&Ny,1,MPI_INT,0,comm); @@ -82,7 +78,7 @@ int main(int argc, char **argv) MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. - MPI_Barrier(comm); + comm.barrier(); // ************************************************************** if (nprocs != nprocx*nprocy*nprocz){ @@ -108,7 +104,7 @@ int main(int argc, char **argv) Dm->CommInit(); std::shared_ptr Averages( new TwoPhase(Dm) ); - MPI_Barrier(comm); + comm.barrier(); Nx += 2; Ny += 2; Nz += 2; @@ -293,7 +289,7 @@ int main(int argc, char **argv) } // **************************************************** - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); // **************************************************** } diff --git a/tests/lbpm_random_pp.cpp b/tests/lbpm_random_pp.cpp index 07c56e6f..ad4b83cc 100644 --- a/tests/lbpm_random_pp.cpp +++ b/tests/lbpm_random_pp.cpp @@ -52,11 +52,10 @@ inline void UnpackID(int *list, int count, char *recvbuf, char *ID){ int main(int argc, char **argv) { // Initialize MPI - int rank, nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); int InitialWetting; double Saturation; @@ -97,7 +96,7 @@ int main(int argc, char **argv) domain >> Lz; } - MPI_Barrier(comm); + comm.barrier(); // Computational domain MPI_Bcast(&nx,1,MPI_INT,0,comm); MPI_Bcast(&ny,1,MPI_INT,0,comm); @@ -110,7 +109,7 @@ int main(int argc, char **argv) MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. - MPI_Barrier(comm); + comm.barrier(); // Check that the number of processors >= the number of ranks if ( rank==0 ) { @@ -422,7 +421,7 @@ int main(int argc, char **argv) fwrite(id,1,N,ID); fclose(ID); - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); return 0; } diff --git a/tests/lbpm_refine_pp.cpp b/tests/lbpm_refine_pp.cpp index d90dbb04..149ae673 100644 --- a/tests/lbpm_refine_pp.cpp +++ b/tests/lbpm_refine_pp.cpp @@ -16,11 +16,10 @@ int main(int argc, char **argv) { // Initialize MPI - int rank, nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); { //....................................................................... @@ -422,7 +421,7 @@ int main(int argc, char **argv) } - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); return 0; } diff --git a/tests/lbpm_segmented_decomp.cpp b/tests/lbpm_segmented_decomp.cpp index 3384e454..1bc89adb 100644 --- a/tests/lbpm_segmented_decomp.cpp +++ b/tests/lbpm_segmented_decomp.cpp @@ -18,12 +18,10 @@ int main(int argc, char **argv) { // Initialize MPI - int rank, nprocs; MPI_Init(&argc,&argv); - - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); { @@ -84,7 +82,7 @@ int main(int argc, char **argv) image >> zStart; } - MPI_Barrier(comm); + comm.barrier(); // Computational domain //................................................. MPI_Bcast(&nx,1,MPI_INT,0,comm); @@ -105,7 +103,7 @@ int main(int argc, char **argv) MPI_Bcast(&yStart,1,MPI_INT,0,comm); MPI_Bcast(&zStart,1,MPI_INT,0,comm); //................................................. - MPI_Barrier(comm); + comm.barrier(); // Check that the number of processors >= the number of ranks if ( rank==0 ) { @@ -129,7 +127,7 @@ int main(int argc, char **argv) fclose(SEGDAT); printf("Read segmented data from %s \n",Filename); } - MPI_Barrier(comm); + comm.barrier(); // Get the rank info int N = (nx+2)*(ny+2)*(nz+2); @@ -204,7 +202,7 @@ int main(int argc, char **argv) printf("Ready to recieve data %i at process %i \n", N,rank); MPI_Recv(Dm.id,N,MPI_CHAR,0,15,comm,MPI_STATUS_IGNORE); } - MPI_Barrier(comm); + comm.barrier(); nx+=2; ny+=2; nz+=2; N=nx*ny*nz; @@ -340,7 +338,7 @@ int main(int argc, char **argv) if (!MULTINPUT){ if (rank==0) printf("Writing symmetric domain reflection\n"); - MPI_Barrier(comm); + comm.barrier(); int symrank,sympz; sympz = 2*nprocz - Dm.kproc() -1; symrank = sympz*nprocx*nprocy + Dm.jproc()*nprocx + Dm.iproc(); @@ -366,6 +364,6 @@ int main(int argc, char **argv) fclose(SYMID); } } - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); } diff --git a/tests/lbpm_segmented_pp.cpp b/tests/lbpm_segmented_pp.cpp index 007ff9d1..39cf0bd1 100644 --- a/tests/lbpm_segmented_pp.cpp +++ b/tests/lbpm_segmented_pp.cpp @@ -115,11 +115,10 @@ double ReadFromBlock( char *ID, int iproc, int jproc, int kproc, int Nx, int Ny, int main(int argc, char **argv) { // Initialize MPI - int rank, nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); { //....................................................................... // Reading the domain information file @@ -231,7 +230,7 @@ int main(int argc, char **argv) fclose(DIST); } - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); return 0; diff --git a/tests/lbpm_sphere_pp.cpp b/tests/lbpm_sphere_pp.cpp index 98778b8d..2e053eed 100644 --- a/tests/lbpm_sphere_pp.cpp +++ b/tests/lbpm_sphere_pp.cpp @@ -9,7 +9,7 @@ #include "analysis/pmmc.h" #include "common/Domain.h" #include "common/SpherePack.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Communication.h" /* @@ -22,15 +22,11 @@ using namespace std; int main(int argc, char **argv) { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); // parallel domain size (# of sub-domains) int iproc,jproc,kproc; int sendtag,recvtag; @@ -127,14 +123,14 @@ int main(int argc, char **argv) //....................................................................... if (rank == 0) printf("Reading the sphere packing \n"); if (rank == 0) ReadSpherePacking(nspheres,cx,cy,cz,rad); - MPI_Barrier(comm); + comm.barrier(); // Broadcast the sphere packing to all processes MPI_Bcast(cx,nspheres,MPI_DOUBLE,0,comm); MPI_Bcast(cy,nspheres,MPI_DOUBLE,0,comm); MPI_Bcast(cz,nspheres,MPI_DOUBLE,0,comm); MPI_Bcast(rad,nspheres,MPI_DOUBLE,0,comm); //........................................................................... - MPI_Barrier(comm); + comm.barrier(); if (rank == 0) cout << "Domain set." << endl; if (rank == 0){ // Compute the Sauter mean diameter @@ -217,7 +213,7 @@ int main(int argc, char **argv) fclose(ID); // **************************************************** - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); // **************************************************** } diff --git a/tests/lbpm_squaretube_pp.cpp b/tests/lbpm_squaretube_pp.cpp index 42715773..c1f05aee 100644 --- a/tests/lbpm_squaretube_pp.cpp +++ b/tests/lbpm_squaretube_pp.cpp @@ -9,19 +9,15 @@ #include "common/ScaLBL.h" #include "common/Communication.h" #include "analysis/TwoPhase.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" int main(int argc, char **argv) { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); { // parallel domain size (# of sub-domains) int nprocx,nprocy,nprocz; @@ -85,7 +81,7 @@ int main(int argc, char **argv) } // ************************************************************** // Broadcast simulation parameters from rank 0 to all other procs - MPI_Barrier(comm); + comm.barrier(); // Computational domain MPI_Bcast(&Nx,1,MPI_INT,0,comm); MPI_Bcast(&Ny,1,MPI_INT,0,comm); @@ -98,7 +94,7 @@ int main(int argc, char **argv) MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. - MPI_Barrier(comm); + comm.barrier(); // ************************************************************** if (nprocs != nprocx*nprocy*nprocz){ @@ -125,7 +121,7 @@ int main(int argc, char **argv) rank_xy, rank_XY, rank_xY, rank_Xy, rank_xz, rank_XZ, rank_xZ, rank_Xz, rank_yz, rank_YZ, rank_yZ, rank_Yz ); - MPI_Barrier(comm); + comm.barrier(); Nz += 2; Nx = Ny = Nz; // Cubic domain @@ -259,7 +255,7 @@ int main(int argc, char **argv) } // **************************************************** - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); // **************************************************** } diff --git a/tests/lbpm_uCT_maskfilter.cpp b/tests/lbpm_uCT_maskfilter.cpp index cff41ad7..857bc4e0 100644 --- a/tests/lbpm_uCT_maskfilter.cpp +++ b/tests/lbpm_uCT_maskfilter.cpp @@ -14,7 +14,7 @@ #include "common/Array.h" #include "common/Domain.h" #include "common/Communication.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "IO/MeshDatabase.h" #include "IO/Mesh.h" #include "IO/Writer.h" @@ -30,13 +30,11 @@ int main(int argc, char **argv) { - // Initialize MPI - int rank, nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); Utilities::setErrorHandlers(); PROFILE_START("Main"); @@ -151,7 +149,7 @@ int main(int argc, char **argv) } netcdf::close( distid ); - MPI_Barrier(comm); + comm.barrier(); PROFILE_STOP("ReadDistance"); if (rank==0) printf("Finished reading distance =\n"); @@ -184,7 +182,7 @@ int main(int argc, char **argv) fillFloat[0]->fill( LOCVOL[0] ); } netcdf::close( fid ); - MPI_Barrier(comm); + comm.barrier(); PROFILE_STOP("ReadVolume"); if (rank==0) printf("Read complete\n"); @@ -447,7 +445,7 @@ int main(int argc, char **argv) PROFILE_STOP("Main"); PROFILE_SAVE("lbpm_uCT_maskfilter",true); - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); return 0; } diff --git a/tests/lbpm_uCT_pp.cpp b/tests/lbpm_uCT_pp.cpp index 0285b864..6e8d1bde 100644 --- a/tests/lbpm_uCT_pp.cpp +++ b/tests/lbpm_uCT_pp.cpp @@ -14,7 +14,7 @@ #include "common/Array.h" #include "common/Domain.h" #include "common/Communication.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "IO/MeshDatabase.h" #include "IO/Mesh.h" #include "IO/Writer.h" @@ -31,11 +31,10 @@ int main(int argc, char **argv) { // Initialize MPI - int rank, nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); { Utilities::setErrorHandlers(); PROFILE_START("Main"); @@ -188,7 +187,7 @@ int main(int argc, char **argv) fillFloat[0]->fill( LOCVOL[0] ); } netcdf::close( fid ); - MPI_Barrier(comm); + comm.barrier(); PROFILE_STOP("ReadVolume"); if (rank==0) printf("Read complete\n"); @@ -251,15 +250,15 @@ int main(int argc, char **argv) } } } - count_plus=sumReduce( Dm[0]->Comm, count_plus); - count_minus=sumReduce( Dm[0]->Comm, count_minus); + count_plus = Dm[0]->Comm.sumReduce( count_plus); + count_minus = Dm[0]->Comm.sumReduce( count_minus); if (rank==0) printf("minimum value=%f, max value=%f \n",min_value,max_value); if (rank==0) printf("plus=%i, minus=%i \n",count_plus,count_minus); ASSERT( count_plus > 0 && count_minus > 0 ); - MPI_Barrier(comm); - mean_plus = sumReduce( Dm[0]->Comm, mean_plus ) / count_plus; - mean_minus = sumReduce( Dm[0]->Comm, mean_minus ) / count_minus; - MPI_Barrier(comm); + comm.barrier(); + mean_plus = Dm[0]->Comm.sumReduce( mean_plus ) / count_plus; + mean_minus = Dm[0]->Comm.sumReduce( mean_minus ) / count_minus; + comm.barrier(); if (rank==0) printf(" Region 1 mean (+): %f, Region 2 mean (-): %f \n",mean_plus, mean_minus); //if (rank==0) printf("Scale the input data (size = %i) \n",LOCVOL[0].length()); @@ -280,7 +279,7 @@ int main(int argc, char **argv) // Fill the source data for the coarse meshes if (rank==0) printf("Coarsen the mesh for N_levels=%i \n",N_levels); - MPI_Barrier(comm); + comm.barrier(); PROFILE_START("CoarsenMesh"); for (int i=1; i filter(ratio[0],ratio[1],ratio[2]); @@ -296,7 +295,7 @@ int main(int argc, char **argv) printf(" filter_x=%i, filter_y=%i, filter_z=%i \n",int(filter.size(0)),int(filter.size(1)),int(filter.size(2)) ); printf(" ratio= %i,%i,%i \n",int(ratio[0]),int(ratio[1]),int(ratio[2]) ); } - MPI_Barrier(comm); + comm.barrier(); } PROFILE_STOP("CoarsenMesh"); @@ -308,7 +307,7 @@ int main(int argc, char **argv) NonLocalMean.back(), *fillFloat.back(), *Dm.back(), nprocx, rough_cutoff, lamda, nlm_sigsq, nlm_depth); PROFILE_STOP("Solve coarse mesh"); - MPI_Barrier(comm); + comm.barrier(); // Refine the solution PROFILE_START("Refine distance"); @@ -322,7 +321,7 @@ int main(int argc, char **argv) rough_cutoff, lamda, nlm_sigsq, nlm_depth); } PROFILE_STOP("Refine distance"); - MPI_Barrier(comm); + comm.barrier(); // Perform a final filter PROFILE_START("Filtering final domains"); @@ -418,14 +417,14 @@ int main(int argc, char **argv) meshData[0].vars.push_back(filter_Dist2_var); fillDouble[0]->copy( filter_Dist2, filter_Dist2_var->data ); #endif - MPI_Barrier(comm); + comm.barrier(); if (rank==0) printf("Writing output \n"); // Write visulization data IO::writeData( 0, meshData, comm ); if (rank==0) printf("Finished. \n"); // Compute the Minkowski functionals - MPI_Barrier(comm); + comm.barrier(); auto Averages = std::make_shared(Dm[0]); Array phase_label(Nx[0]+2,Ny[0]+2,Nz[0]+2); @@ -457,7 +456,7 @@ int main(int argc, char **argv) } PROFILE_STOP("Main"); PROFILE_SAVE("lbpm_uCT_pp",true); - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); return 0; } diff --git a/tests/testCommunication.cpp b/tests/testCommunication.cpp index 57ce0959..911ef1c5 100644 --- a/tests/testCommunication.cpp +++ b/tests/testCommunication.cpp @@ -6,7 +6,7 @@ #include #include "common/Communication.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Array.h" using namespace std; @@ -15,11 +15,9 @@ using namespace std; //*************************************************************************************** -int test_communication( MPI_Comm comm, int nprocx, int nprocy, int nprocz ) +int test_communication( const Utilities::MPI& comm, int nprocx, int nprocy, int nprocz ) { - int rank,nprocs; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + int rank = comm.getRank(); int iproc,jproc,kproc; int sendtag,recvtag; if (rank==0) printf("\nRunning test %i %i %i\n",nprocx,nprocy,nprocz); @@ -38,7 +36,7 @@ int test_communication( MPI_Comm comm, int nprocx, int nprocy, int nprocz ) rank_xy, rank_XY, rank_xY, rank_Xy, rank_xz, rank_XZ, rank_xZ, rank_Xz, rank_yz, rank_YZ, rank_yZ, rank_Yz ); - MPI_Barrier(comm); + comm.barrier(); //********************************** @@ -85,7 +83,7 @@ int test_communication( MPI_Comm comm, int nprocx, int nprocy, int nprocz ) sendCount_xy = sendCount_yz = sendCount_xz = sendCount_Xy = sendCount_Yz = sendCount_xZ = 0; sendCount_xY = sendCount_yZ = sendCount_Xz = sendCount_XY = sendCount_YZ = sendCount_XZ = 0; - MPI_Barrier(comm); + comm.barrier(); if (rank==0) printf ("SendLists are ready on host\n"); //...................................................................................... // Use MPI to fill in the recvCounts form the associated processes @@ -158,7 +156,7 @@ int test_communication( MPI_Comm comm, int nprocx, int nprocy, int nprocz ) recvCount_yz, recvCount_YZ, recvCount_yZ, recvCount_Yz, rank_x, rank_y, rank_z, rank_X, rank_Y, rank_Z, rank_xy, rank_XY, rank_xY, rank_Xy, rank_xz, rank_XZ, rank_xZ, rank_Xz, rank_yz, rank_YZ, rank_yZ, rank_Yz ); - MPI_Barrier(comm); + comm.barrier(); if (rank==0) printf ("RecvLists finished\n"); // Free memory @@ -181,11 +179,9 @@ int test_communication( MPI_Comm comm, int nprocx, int nprocy, int nprocz ) template -int testHalo( MPI_Comm comm, int nprocx, int nprocy, int nprocz, int depth ) +int testHalo( const Utilities::MPI& comm, int nprocx, int nprocy, int nprocz, int depth ) { - int rank,nprocs; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + int rank = comm.getRank(); if ( rank==0 ) printf("\nRunning Halo test %i %i %i %i\n",nprocx,nprocy,nprocz,depth); @@ -255,11 +251,10 @@ int testHalo( MPI_Comm comm, int nprocx, int nprocy, int nprocz, int depth ) int main(int argc, char **argv) { // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); // Run the test with different domains int N_errors = 0; @@ -289,10 +284,9 @@ int main(int argc, char **argv) } // Finished - MPI_Barrier(comm); - int N_errors_global=0; - MPI_Allreduce( &N_errors, &N_errors_global, 1, MPI_INT, MPI_SUM, comm ); - MPI_Barrier(comm); + comm.barrier(); + int N_errors_global = comm.sumReduce( N_errors ); + comm.barrier(); MPI_Finalize(); if ( rank==0 ) { if ( N_errors_global==0 ) diff --git a/tests/test_dcel_minkowski.cpp b/tests/test_dcel_minkowski.cpp index 0d6cbca9..2669b522 100644 --- a/tests/test_dcel_minkowski.cpp +++ b/tests/test_dcel_minkowski.cpp @@ -26,9 +26,9 @@ std::shared_ptr loadInputs( ) int main(int argc, char **argv) { MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - //int rank = MPI_WORLD_RANK(); - //int nprocs = MPI_WORLD_SIZE(); + Utilities::MPI comm( MPI_COMM_WORLD ); + //int rank = comm.getRank(); + //int nprocs = comm.getSize(); int toReturn = 0; { int i,j,k; @@ -99,7 +99,7 @@ int main(int argc, char **argv) } PROFILE_SAVE("test_dcel_minkowski"); - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); return toReturn; } diff --git a/tests/test_dcel_tri_normal.cpp b/tests/test_dcel_tri_normal.cpp index 1e85b1f3..b6497140 100644 --- a/tests/test_dcel_tri_normal.cpp +++ b/tests/test_dcel_tri_normal.cpp @@ -26,7 +26,7 @@ std::shared_ptr loadInputs( ) int main(int argc, char **argv) { MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; + Utilities::MPI comm( MPI_COMM_WORLD ); int toReturn = 0; { int i,j,k; @@ -136,7 +136,7 @@ int main(int argc, char **argv) if (count_check > 0) toReturn=2; else printf("Succeeded. \n"); } - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); return toReturn; } From 0f91767b6c870101084fbae0978280c04c85a004 Mon Sep 17 00:00:00 2001 From: Mark Berrill Date: Tue, 28 Jan 2020 12:33:36 -0500 Subject: [PATCH 030/121] Moving more MPI calls to the wrapper --- IO/netcdf.cpp | 2 +- StackTrace/ErrorHandlers.h | 2 +- StackTrace/Utilities.cpp | 2 +- analysis/TwoPhase.cpp | 7 +- analysis/morphology.cpp | 132 +++-- common/Communication.h | 216 ++++---- common/Domain.cpp | 206 ++++---- common/Domain.h | 3 - common/MPI.I | 33 ++ common/MPI.cpp | 48 ++ common/MPI.h | 7 + common/ScaLBL.h | 1 - common/Utilities.cpp | 2 +- cpu/exe/lb2_Color_mpi.cpp | 538 ++++++++++--------- cpu/exe/lb2_Color_wia_mpi_bubble.cpp | 711 ++++++++++++-------------- gpu/exe/lb1_MRT_mpi.cpp | 348 ++++++------- gpu/exe/lb1_MRT_mpi.cu | 352 ++++++------- gpu/exe/lb2_Color.cu | 65 +-- gpu/exe/lb2_Color_mpi.cpp | 541 ++++++++++---------- gpu/exe/lb2_Color_pBC_wia_mpi.cpp | 621 ++++++++++------------ models/ColorModel.cpp | 8 +- models/DFHModel.cpp | 4 +- models/MRTModel.cpp | 4 +- tests/BlobAnalyzeParallel.cpp | 22 +- tests/GenerateSphereTest.cpp | 54 +- tests/TestBlobAnalyze.cpp | 28 +- tests/TestBubble.cpp | 41 +- tests/TestBubbleDFH.cpp | 4 +- tests/TestColorGrad.cpp | 20 +- tests/TestCommD3Q19.cpp | 4 +- tests/TestForceD3Q19.cpp | 4 +- tests/TestForceMoments.cpp | 4 +- tests/TestMRT.cpp | 28 +- tests/TestMicroCTReader.cpp | 1 - tests/TestMomentsD3Q19.cpp | 2 +- tests/TestNetcdf.cpp | 2 +- tests/TestSegDist.cpp | 4 +- tests/lb2_CMT_wia.cpp | 30 +- tests/lb2_Color_blob_wia_mpi.cpp | 427 ++++++++-------- tests/lbpm_BGK_simulator.cpp | 48 +- tests/lbpm_color_macro_simulator.cpp | 61 ++- tests/lbpm_disc_pp.cpp | 34 +- tests/lbpm_inkbottle_pp.cpp | 22 +- tests/lbpm_juanes_bench_disc_pp.cpp | 35 +- tests/lbpm_nondarcy_simulator.cpp | 52 +- tests/lbpm_nonnewtonian_simulator.cpp | 26 +- tests/lbpm_plates_pp.cpp | 24 +- tests/lbpm_porenetwork_pp.cpp | 25 +- tests/lbpm_random_pp.cpp | 92 ++-- tests/lbpm_segmented_decomp.cpp | 48 +- tests/lbpm_segmented_pp.cpp | 2 +- tests/lbpm_sphere_pp.cpp | 16 +- tests/lbpm_squaretube_pp.cpp | 25 +- 53 files changed, 2360 insertions(+), 2678 deletions(-) diff --git a/IO/netcdf.cpp b/IO/netcdf.cpp index e061579a..6c3773e3 100644 --- a/IO/netcdf.cpp +++ b/IO/netcdf.cpp @@ -119,7 +119,7 @@ std::string VariableTypeName( VariableType type ) int open( const std::string& filename, FileMode mode, const Utilities::MPI& comm ) { int fid = 0; - if ( comm == MPI_COMM_NULL ) { + if ( comm.isNull() ) { if ( mode == READ ) { int err = nc_open( filename.c_str(), NC_NOWRITE, &fid ); CHECK_NC_ERR( err ); diff --git a/StackTrace/ErrorHandlers.h b/StackTrace/ErrorHandlers.h index 12b8d7de..e43a4688 100644 --- a/StackTrace/ErrorHandlers.h +++ b/StackTrace/ErrorHandlers.h @@ -6,7 +6,7 @@ #include -#include "mpi.h" +#include "common/MPI.h" namespace StackTrace diff --git a/StackTrace/Utilities.cpp b/StackTrace/Utilities.cpp index 11f05777..5fb8e9b8 100644 --- a/StackTrace/Utilities.cpp +++ b/StackTrace/Utilities.cpp @@ -14,7 +14,7 @@ #include #ifdef USE_MPI -#include "mpi.h" +#include "common/MPI.h" #endif #ifdef USE_TIMER diff --git a/analysis/TwoPhase.cpp b/analysis/TwoPhase.cpp index ea136758..1dbdfbfa 100644 --- a/analysis/TwoPhase.cpp +++ b/analysis/TwoPhase.cpp @@ -890,14 +890,14 @@ void TwoPhase::ComponentAverages() RecvBuffer.resize(BLOB_AVG_COUNT,NumberComponents_NWP); /* for (int b=0; bComm); - MPI_Allreduce(&ComponentAverages_NWP(0,b),&RecvBuffer(0),BLOB_AVG_COUNT,MPI_DOUBLE,MPI_SUM,Dm->Comm); + Dm->Comm.barrier(); + Dm->Comm.sumReduce(&ComponentAverages_NWP(0,b),&RecvBuffer(0),BLOB_AVG_COUNT); for (int idx=0; idxComm.barrier(); Dm->Comm.sumReduce(ComponentAverages_NWP.data(),RecvBuffer.data(),BLOB_AVG_COUNT*NumberComponents_NWP); - // MPI_Reduce(ComponentAverages_NWP.data(),RecvBuffer.data(),BLOB_AVG_COUNT,MPI_DOUBLE,MPI_SUM,0,Dm->Comm); + // Dm->Comm.sumReduce(ComponentAverages_NWP.data(),RecvBuffer.data(),BLOB_AVG_COUNT); if (Dm->rank()==0){ printf("rescaling... \n"); @@ -994,7 +994,6 @@ void TwoPhase::ComponentAverages() // reduce the wetting phase averages for (int b=0; bComm.barrier(); -// MPI_Allreduce(&ComponentAverages_WP(0,b),RecvBuffer.data(),BLOB_AVG_COUNT,MPI_DOUBLE,MPI_SUM,Dm->Comm); Dm->Comm.sumReduce(&ComponentAverages_WP(0,b),RecvBuffer.data(),BLOB_AVG_COUNT); for (int idx=0; idx PackID(Dm->sendList_yZ, Dm->sendCount_yZ ,sendID_yZ, id); PackID(Dm->sendList_YZ, Dm->sendCount_YZ ,sendID_YZ, id); //...................................................................................... - MPI_Sendrecv(sendID_x,Dm->sendCount_x,MPI_CHAR,Dm->rank_x(),sendtag, - recvID_X,Dm->recvCount_X,MPI_CHAR,Dm->rank_X(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_X,Dm->sendCount_X,MPI_CHAR,Dm->rank_X(),sendtag, - recvID_x,Dm->recvCount_x,MPI_CHAR,Dm->rank_x(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_y,Dm->sendCount_y,MPI_CHAR,Dm->rank_y(),sendtag, - recvID_Y,Dm->recvCount_Y,MPI_CHAR,Dm->rank_Y(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Y,Dm->sendCount_Y,MPI_CHAR,Dm->rank_Y(),sendtag, - recvID_y,Dm->recvCount_y,MPI_CHAR,Dm->rank_y(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_z,Dm->sendCount_z,MPI_CHAR,Dm->rank_z(),sendtag, - recvID_Z,Dm->recvCount_Z,MPI_CHAR,Dm->rank_Z(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Z,Dm->sendCount_Z,MPI_CHAR,Dm->rank_Z(),sendtag, - recvID_z,Dm->recvCount_z,MPI_CHAR,Dm->rank_z(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xy,Dm->sendCount_xy,MPI_CHAR,Dm->rank_xy(),sendtag, - recvID_XY,Dm->recvCount_XY,MPI_CHAR,Dm->rank_XY(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_XY,Dm->sendCount_XY,MPI_CHAR,Dm->rank_XY(),sendtag, - recvID_xy,Dm->recvCount_xy,MPI_CHAR,Dm->rank_xy(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Xy,Dm->sendCount_Xy,MPI_CHAR,Dm->rank_Xy(),sendtag, - recvID_xY,Dm->recvCount_xY,MPI_CHAR,Dm->rank_xY(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xY,Dm->sendCount_xY,MPI_CHAR,Dm->rank_xY(),sendtag, - recvID_Xy,Dm->recvCount_Xy,MPI_CHAR,Dm->rank_Xy(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xz,Dm->sendCount_xz,MPI_CHAR,Dm->rank_xz(),sendtag, - recvID_XZ,Dm->recvCount_XZ,MPI_CHAR,Dm->rank_XZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_XZ,Dm->sendCount_XZ,MPI_CHAR,Dm->rank_XZ(),sendtag, - recvID_xz,Dm->recvCount_xz,MPI_CHAR,Dm->rank_xz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Xz,Dm->sendCount_Xz,MPI_CHAR,Dm->rank_Xz(),sendtag, - recvID_xZ,Dm->recvCount_xZ,MPI_CHAR,Dm->rank_xZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xZ,Dm->sendCount_xZ,MPI_CHAR,Dm->rank_xZ(),sendtag, - recvID_Xz,Dm->recvCount_Xz,MPI_CHAR,Dm->rank_Xz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_yz,Dm->sendCount_yz,MPI_CHAR,Dm->rank_yz(),sendtag, - recvID_YZ,Dm->recvCount_YZ,MPI_CHAR,Dm->rank_YZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_YZ,Dm->sendCount_YZ,MPI_CHAR,Dm->rank_YZ(),sendtag, - recvID_yz,Dm->recvCount_yz,MPI_CHAR,Dm->rank_yz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Yz,Dm->sendCount_Yz,MPI_CHAR,Dm->rank_Yz(),sendtag, - recvID_yZ,Dm->recvCount_yZ,MPI_CHAR,Dm->rank_yZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_yZ,Dm->sendCount_yZ,MPI_CHAR,Dm->rank_yZ(),sendtag, - recvID_Yz,Dm->recvCount_Yz,MPI_CHAR,Dm->rank_Yz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + Dm->Comm.sendrecv(sendID_x,Dm->sendCount_x,Dm->rank_x(),sendtag,recvID_X,Dm->recvCount_X,Dm->rank_X(),recvtag); + Dm->Comm.sendrecv(sendID_X,Dm->sendCount_X,Dm->rank_X(),sendtag,recvID_x,Dm->recvCount_x,Dm->rank_x(),recvtag); + Dm->Comm.sendrecv(sendID_y,Dm->sendCount_y,Dm->rank_y(),sendtag,recvID_Y,Dm->recvCount_Y,Dm->rank_Y(),recvtag); + Dm->Comm.sendrecv(sendID_Y,Dm->sendCount_Y,Dm->rank_Y(),sendtag,recvID_y,Dm->recvCount_y,Dm->rank_y(),recvtag); + Dm->Comm.sendrecv(sendID_z,Dm->sendCount_z,Dm->rank_z(),sendtag,recvID_Z,Dm->recvCount_Z,Dm->rank_Z(),recvtag); + Dm->Comm.sendrecv(sendID_Z,Dm->sendCount_Z,Dm->rank_Z(),sendtag,recvID_z,Dm->recvCount_z,Dm->rank_z(),recvtag); + Dm->Comm.sendrecv(sendID_xy,Dm->sendCount_xy,Dm->rank_xy(),sendtag,recvID_XY,Dm->recvCount_XY,Dm->rank_XY(),recvtag); + Dm->Comm.sendrecv(sendID_XY,Dm->sendCount_XY,Dm->rank_XY(),sendtag,recvID_xy,Dm->recvCount_xy,Dm->rank_xy(),recvtag); + Dm->Comm.sendrecv(sendID_Xy,Dm->sendCount_Xy,Dm->rank_Xy(),sendtag,recvID_xY,Dm->recvCount_xY,Dm->rank_xY(),recvtag); + Dm->Comm.sendrecv(sendID_xY,Dm->sendCount_xY,Dm->rank_xY(),sendtag,recvID_Xy,Dm->recvCount_Xy,Dm->rank_Xy(),recvtag); + Dm->Comm.sendrecv(sendID_xz,Dm->sendCount_xz,Dm->rank_xz(),sendtag,recvID_XZ,Dm->recvCount_XZ,Dm->rank_XZ(),recvtag); + Dm->Comm.sendrecv(sendID_XZ,Dm->sendCount_XZ,Dm->rank_XZ(),sendtag,recvID_xz,Dm->recvCount_xz,Dm->rank_xz(),recvtag); + Dm->Comm.sendrecv(sendID_Xz,Dm->sendCount_Xz,Dm->rank_Xz(),sendtag,recvID_xZ,Dm->recvCount_xZ,Dm->rank_xZ(),recvtag); + Dm->Comm.sendrecv(sendID_xZ,Dm->sendCount_xZ,Dm->rank_xZ(),sendtag,recvID_Xz,Dm->recvCount_Xz,Dm->rank_Xz(),recvtag); + Dm->Comm.sendrecv(sendID_yz,Dm->sendCount_yz,Dm->rank_yz(),sendtag,recvID_YZ,Dm->recvCount_YZ,Dm->rank_YZ(),recvtag); + Dm->Comm.sendrecv(sendID_YZ,Dm->sendCount_YZ,Dm->rank_YZ(),sendtag,recvID_yz,Dm->recvCount_yz,Dm->rank_yz(),recvtag); + Dm->Comm.sendrecv(sendID_Yz,Dm->sendCount_Yz,Dm->rank_Yz(),sendtag,recvID_yZ,Dm->recvCount_yZ,Dm->rank_yZ(),recvtag); + Dm->Comm.sendrecv(sendID_yZ,Dm->sendCount_yZ,Dm->rank_yZ(),sendtag,recvID_Yz,Dm->recvCount_Yz,Dm->rank_Yz(),recvtag); //...................................................................................... UnpackID(Dm->recvList_x, Dm->recvCount_x ,recvID_x, id); UnpackID(Dm->recvList_X, Dm->recvCount_X ,recvID_X, id); @@ -303,7 +285,7 @@ double morph_open() fillHalo fillChar(Dm->Comm,Dm->rank_info,{Nx-2,Ny-2,Nz-2},{1,1,1},0,1); - MPI_Allreduce(&LocalNumber,&GlobalNumber,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + GlobalNumber = Dm->Comm.sumReduce( LocalNumber ); count = 0.f; for (int k=1; kComm); + countGlobal = Dm->Comm.sumReduce( count ); return countGlobal; } */ @@ -506,42 +488,42 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptrsendList_yZ, Dm->sendCount_yZ ,sendID_yZ, id); PackID(Dm->sendList_YZ, Dm->sendCount_YZ ,sendID_YZ, id); //...................................................................................... - MPI_Sendrecv(sendID_x,Dm->sendCount_x,MPI_CHAR,Dm->rank_x(),sendtag, - recvID_X,Dm->recvCount_X,MPI_CHAR,Dm->rank_X(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_X,Dm->sendCount_X,MPI_CHAR,Dm->rank_X(),sendtag, - recvID_x,Dm->recvCount_x,MPI_CHAR,Dm->rank_x(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_y,Dm->sendCount_y,MPI_CHAR,Dm->rank_y(),sendtag, - recvID_Y,Dm->recvCount_Y,MPI_CHAR,Dm->rank_Y(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Y,Dm->sendCount_Y,MPI_CHAR,Dm->rank_Y(),sendtag, - recvID_y,Dm->recvCount_y,MPI_CHAR,Dm->rank_y(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_z,Dm->sendCount_z,MPI_CHAR,Dm->rank_z(),sendtag, - recvID_Z,Dm->recvCount_Z,MPI_CHAR,Dm->rank_Z(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Z,Dm->sendCount_Z,MPI_CHAR,Dm->rank_Z(),sendtag, - recvID_z,Dm->recvCount_z,MPI_CHAR,Dm->rank_z(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xy,Dm->sendCount_xy,MPI_CHAR,Dm->rank_xy(),sendtag, - recvID_XY,Dm->recvCount_XY,MPI_CHAR,Dm->rank_XY(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_XY,Dm->sendCount_XY,MPI_CHAR,Dm->rank_XY(),sendtag, - recvID_xy,Dm->recvCount_xy,MPI_CHAR,Dm->rank_xy(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Xy,Dm->sendCount_Xy,MPI_CHAR,Dm->rank_Xy(),sendtag, - recvID_xY,Dm->recvCount_xY,MPI_CHAR,Dm->rank_xY(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xY,Dm->sendCount_xY,MPI_CHAR,Dm->rank_xY(),sendtag, - recvID_Xy,Dm->recvCount_Xy,MPI_CHAR,Dm->rank_Xy(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xz,Dm->sendCount_xz,MPI_CHAR,Dm->rank_xz(),sendtag, - recvID_XZ,Dm->recvCount_XZ,MPI_CHAR,Dm->rank_XZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_XZ,Dm->sendCount_XZ,MPI_CHAR,Dm->rank_XZ(),sendtag, - recvID_xz,Dm->recvCount_xz,MPI_CHAR,Dm->rank_xz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Xz,Dm->sendCount_Xz,MPI_CHAR,Dm->rank_Xz(),sendtag, - recvID_xZ,Dm->recvCount_xZ,MPI_CHAR,Dm->rank_xZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xZ,Dm->sendCount_xZ,MPI_CHAR,Dm->rank_xZ(),sendtag, - recvID_Xz,Dm->recvCount_Xz,MPI_CHAR,Dm->rank_Xz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_yz,Dm->sendCount_yz,MPI_CHAR,Dm->rank_yz(),sendtag, - recvID_YZ,Dm->recvCount_YZ,MPI_CHAR,Dm->rank_YZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_YZ,Dm->sendCount_YZ,MPI_CHAR,Dm->rank_YZ(),sendtag, - recvID_yz,Dm->recvCount_yz,MPI_CHAR,Dm->rank_yz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Yz,Dm->sendCount_Yz,MPI_CHAR,Dm->rank_Yz(),sendtag, - recvID_yZ,Dm->recvCount_yZ,MPI_CHAR,Dm->rank_yZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_yZ,Dm->sendCount_yZ,MPI_CHAR,Dm->rank_yZ(),sendtag, - recvID_Yz,Dm->recvCount_Yz,MPI_CHAR,Dm->rank_Yz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + Dm->Comm.sendrecv(sendID_x,Dm->sendCount_x,Dm->rank_x(),sendtag, + recvID_X,Dm->recvCount_X,Dm->rank_X(),recvtag); + Dm->Comm.sendrecv(sendID_X,Dm->sendCount_X,Dm->rank_X(),sendtag, + recvID_x,Dm->recvCount_x,Dm->rank_x(),recvtag); + Dm->Comm.sendrecv(sendID_y,Dm->sendCount_y,Dm->rank_y(),sendtag, + recvID_Y,Dm->recvCount_Y,Dm->rank_Y(),recvtag); + Dm->Comm.sendrecv(sendID_Y,Dm->sendCount_Y,Dm->rank_Y(),sendtag, + recvID_y,Dm->recvCount_y,Dm->rank_y(),recvtag); + Dm->Comm.sendrecv(sendID_z,Dm->sendCount_z,Dm->rank_z(),sendtag, + recvID_Z,Dm->recvCount_Z,Dm->rank_Z(),recvtag); + Dm->Comm.sendrecv(sendID_Z,Dm->sendCount_Z,Dm->rank_Z(),sendtag, + recvID_z,Dm->recvCount_z,Dm->rank_z(),recvtag); + Dm->Comm.sendrecv(sendID_xy,Dm->sendCount_xy,Dm->rank_xy(),sendtag, + recvID_XY,Dm->recvCount_XY,Dm->rank_XY(),recvtag); + Dm->Comm.sendrecv(sendID_XY,Dm->sendCount_XY,Dm->rank_XY(),sendtag, + recvID_xy,Dm->recvCount_xy,Dm->rank_xy(),recvtag); + Dm->Comm.sendrecv(sendID_Xy,Dm->sendCount_Xy,Dm->rank_Xy(),sendtag, + recvID_xY,Dm->recvCount_xY,Dm->rank_xY(),recvtag); + Dm->Comm.sendrecv(sendID_xY,Dm->sendCount_xY,Dm->rank_xY(),sendtag, + recvID_Xy,Dm->recvCount_Xy,Dm->rank_Xy(),recvtag); + Dm->Comm.sendrecv(sendID_xz,Dm->sendCount_xz,Dm->rank_xz(),sendtag, + recvID_XZ,Dm->recvCount_XZ,Dm->rank_XZ(),recvtag); + Dm->Comm.sendrecv(sendID_XZ,Dm->sendCount_XZ,Dm->rank_XZ(),sendtag, + recvID_xz,Dm->recvCount_xz,Dm->rank_xz(),recvtag); + Dm->Comm.sendrecv(sendID_Xz,Dm->sendCount_Xz,Dm->rank_Xz(),sendtag, + recvID_xZ,Dm->recvCount_xZ,Dm->rank_xZ(),recvtag); + Dm->Comm.sendrecv(sendID_xZ,Dm->sendCount_xZ,Dm->rank_xZ(),sendtag, + recvID_Xz,Dm->recvCount_Xz,Dm->rank_Xz(),recvtag); + Dm->Comm.sendrecv(sendID_yz,Dm->sendCount_yz,Dm->rank_yz(),sendtag, + recvID_YZ,Dm->recvCount_YZ,Dm->rank_YZ(),recvtag); + Dm->Comm.sendrecv(sendID_YZ,Dm->sendCount_YZ,Dm->rank_YZ(),sendtag, + recvID_yz,Dm->recvCount_yz,Dm->rank_yz(),recvtag); + Dm->Comm.sendrecv(sendID_Yz,Dm->sendCount_Yz,Dm->rank_Yz(),sendtag, + recvID_yZ,Dm->recvCount_yZ,Dm->rank_yZ(),recvtag); + Dm->Comm.sendrecv(sendID_yZ,Dm->sendCount_yZ,Dm->rank_yZ(),sendtag, + recvID_Yz,Dm->recvCount_Yz,Dm->rank_Yz(),recvtag); //...................................................................................... UnpackID(Dm->recvList_x, Dm->recvCount_x ,recvID_x, id); UnpackID(Dm->recvList_X, Dm->recvCount_X ,recvID_X, id); @@ -617,7 +599,7 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptrrank_info,phase,SignDist,vF,vS,phase_label,Dm->Comm); - MPI_Barrier(Dm->Comm); + Dm->Comm.barrier(); for (int k=1; k +void MPI_CLASS::sendrecv( const char*, int, int, int, char*, int, int, int ) const; +template<> +void MPI_CLASS::sendrecv( const int*, int, int, int, int*, int, int, int ) const; +template<> +void MPI_CLASS::sendrecv( const float*, int, int, int, float*, int, int, int ) const; +template<> +void MPI_CLASS::sendrecv( const double*, int, int, int, double*, int, int, int ) const; +template +void MPI_CLASS::sendrecv( const TYPE *sendbuf, int sendcount, int dest, int sendtag, + TYPE *recvbuf, int recvcount, int source, int recvtag ) const +{ + ERROR( "Not implimented" ); +} +#else +template +void MPI_CLASS::sendrecv( const TYPE *sendbuf, int sendcount, int dest, int sendtag, + TYPE *recvbuf, int recvcount, int source, int recvtag ) const +{ + ASSERT( dest == 0 ); + ASSERT( source == 0 ); + ASSERT( sendcount == recvcount ); + ASSERT( sendtag == recvtag ); + memcpy( recvbuf, sendbuf, sendcount * sizeof( TYPE ) ); +} +#endif + + + /************************************************************************ * allGather * ************************************************************************/ diff --git a/common/MPI.cpp b/common/MPI.cpp index d20c1af2..9495372d 100644 --- a/common/MPI.cpp +++ b/common/MPI.cpp @@ -2805,6 +2805,54 @@ MPI_Request MPI_CLASS::IrecvBytes( } + +/************************************************************************ + * sendrecv * + ************************************************************************/ +#if defined( USE_MPI ) || defined( USE_EXT_MPI ) +template<> +void MPI_CLASS::sendrecv( const char* sendbuf, int sendcount, int dest, int sendtag, + char* recvbuf, int recvcount, int source, int recvtag ) const +{ + PROFILE_START( "sendrecv", profile_level ); + MPI_Sendrecv( sendbuf, sendcount, MPI_CHAR, dest, sendtag, + recvbuf, recvcount, MPI_CHAR, source, recvtag, + communicator, MPI_STATUS_IGNORE ); + PROFILE_STOP( "sendrecv", profile_level ); +} +template<> +void MPI_CLASS::sendrecv( const int* sendbuf, int sendcount, int dest, int sendtag, + int* recvbuf, int recvcount, int source, int recvtag ) const +{ + PROFILE_START( "sendrecv", profile_level ); + MPI_Sendrecv( sendbuf, sendcount, MPI_INT, dest, sendtag, + recvbuf, recvcount, MPI_INT, source, recvtag, + communicator, MPI_STATUS_IGNORE ); + PROFILE_STOP( "sendrecv", profile_level ); +} +template<> +void MPI_CLASS::sendrecv( const float* sendbuf, int sendcount, int dest, int sendtag, + float* recvbuf, int recvcount, int source, int recvtag ) const +{ + PROFILE_START( "sendrecv", profile_level ); + MPI_Sendrecv( sendbuf, sendcount, MPI_FLOAT, dest, sendtag, + recvbuf, recvcount, MPI_FLOAT, source, recvtag, + communicator, MPI_STATUS_IGNORE ); + PROFILE_STOP( "sendrecv", profile_level ); +} +template<> +void MPI_CLASS::sendrecv( const double* sendbuf, int sendcount, int dest, int sendtag, + double* recvbuf, int recvcount, int source, int recvtag ) const +{ + PROFILE_START( "sendrecv", profile_level ); + MPI_Sendrecv( sendbuf, sendcount, MPI_DOUBLE, dest, sendtag, + recvbuf, recvcount, MPI_DOUBLE, source, recvtag, + communicator, MPI_STATUS_IGNORE ); + PROFILE_STOP( "sendrecv", profile_level ); +} +#endif + + /************************************************************************ * allGather * * Note: these specializations are only called when using MPI. * diff --git a/common/MPI.h b/common/MPI.h index e3fd3e13..4161d6a7 100644 --- a/common/MPI.h +++ b/common/MPI.h @@ -792,6 +792,13 @@ public: // Member functions void *buf, const int N_bytes, const int send_proc, const int tag ) const; + /*! + * @brief This function sends and recieves data using a blocking call + */ + template + void sendrecv( const type *sendbuf, int sendcount, int dest, int sendtag, type *recvbuf, int recvcount, int source, int recvtag ) const; + + /*! * Each processor sends every other processor a single value. * @param[in] x Input value for allGather diff --git a/common/ScaLBL.h b/common/ScaLBL.h index 78896d3f..d7f012d1 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -206,7 +206,6 @@ private: int sendtag,recvtag; // Give the object it's own MPI communicator RankInfoStruct rank_info; - MPI_Group Group; // Group of processors associated with this domain Utilities::MPI MPI_COMM_SCALBL; // MPI Communicator for this domain MPI_Request req1[18],req2[18]; //...................................................................................... diff --git a/common/Utilities.cpp b/common/Utilities.cpp index 1cf764be..11d2b261 100644 --- a/common/Utilities.cpp +++ b/common/Utilities.cpp @@ -8,7 +8,7 @@ #endif #ifdef USE_MPI -#include "mpi.h" +#include "common/MPI.h" #endif #include diff --git a/cpu/exe/lb2_Color_mpi.cpp b/cpu/exe/lb2_Color_mpi.cpp index 0cade21e..cdf56af9 100644 --- a/cpu/exe/lb2_Color_mpi.cpp +++ b/cpu/exe/lb2_Color_mpi.cpp @@ -36,15 +36,11 @@ inline void UnpackID(int *list, int count, char *recvbuf, char *ID){ //*************************************************************************************** int main(int argc, char **argv) { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); // parallel domain size (# of sub-domains) int nprocx,nprocy,nprocz; int iproc,jproc,kproc; @@ -58,7 +54,6 @@ int main(int argc, char **argv) int rank_yz,rank_YZ,rank_yZ,rank_Yz; //********************************** MPI_Request req1[18],req2[18]; - MPI_Status stat1[18],stat2[18]; if (rank == 0){ printf("********************************************************\n"); @@ -115,31 +110,30 @@ int main(int argc, char **argv) } // ************************************************************** // Broadcast simulation parameters from rank 0 to all other procs - MPI_Barrier(comm); + comm.barrier(); //................................................. - MPI_Bcast(&Nz,1,MPI_INT,0,comm); - MPI_Bcast(&nBlocks,1,MPI_INT,0,comm); - MPI_Bcast(&nthreads,1,MPI_INT,0,comm); - MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&alpha,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&beta,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&das,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&dbs,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&pBC,1,MPI_LOGICAL,0,comm); - MPI_Bcast(&din,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&dout,1,MPI_DOUBLE,0,comm); - MPI_Bcast(×tepMax,1,MPI_INT,0,comm); - MPI_Bcast(&interval,1,MPI_INT,0,comm); - MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm); - - MPI_Bcast(&nprocx,1,MPI_INT,0,comm); - MPI_Bcast(&nprocy,1,MPI_INT,0,comm); - MPI_Bcast(&nprocz,1,MPI_INT,0,comm); + comm.bcast(&Nz,1,0); + comm.bcast(&nBlocks,1,0); + comm.bcast(&nthreads,1,0); + comm.bcast(&Fx,1,0); + comm.bcast(&Fy,1,0); + comm.bcast(&Fz,1,0); + comm.bcast(&tau,1,0); + comm.bcast(&alpha,1,0); + comm.bcast(&beta,1,0); + comm.bcast(&das,1,0); + comm.bcast(&dbs,1,0); + comm.bcast(&pBC,1,0); + comm.bcast(&din,1,0); + comm.bcast(&dout,1,0); + comm.bcast(×tepMax,1,0); + comm.bcast(&interval,1,0); + comm.bcast(&tol,1,0); + comm.bcast(&nprocx,1,0); + comm.bcast(&nprocy,1,0); + comm.bcast(&nprocz,1,0); //................................................. - MPI_Barrier(comm); + comm.barrier(); // ************************************************************** // ************************************************************** @@ -169,7 +163,7 @@ int main(int argc, char **argv) } - MPI_Barrier(comm); + comm.barrier(); kproc = rank/(nprocx*nprocy); jproc = (rank-nprocx*nprocy*kproc)/nprocx; iproc = rank-nprocx*nprocy*kproc-nprocz*jproc; @@ -451,7 +445,7 @@ int main(int argc, char **argv) PM.close(); // printf("File porosity = %f\n", double(sum)/N); //........................................................................... - MPI_Barrier(comm); + comm.barrier(); if (rank == 0) cout << "Domain set." << endl; //........................................................................... // Write the communcation structure into a file for debugging @@ -588,7 +582,7 @@ int main(int argc, char **argv) } } } - MPI_Barrier(comm); + comm.barrier(); if (rank==0) printf ("SendLists are ready on host\n"); //...................................................................................... // Use MPI to fill in the recvCounts form the associated processes @@ -599,46 +593,46 @@ int main(int argc, char **argv) //********************************************************************************** // Fill in the recieve counts using MPI sendtag = recvtag = 3; - MPI_Send(&sendCount_x,1,MPI_INT,rank_X,sendtag,comm); - MPI_Recv(&recvCount_X,1,MPI_INT,rank_x,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_X,1,MPI_INT,rank_x,sendtag,comm); - MPI_Recv(&recvCount_x,1,MPI_INT,rank_X,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_y,1,MPI_INT,rank_Y,sendtag,comm); - MPI_Recv(&recvCount_Y,1,MPI_INT,rank_y,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_Y,1,MPI_INT,rank_y,sendtag,comm); - MPI_Recv(&recvCount_y,1,MPI_INT,rank_Y,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_z,1,MPI_INT,rank_Z,sendtag,comm); - MPI_Recv(&recvCount_Z,1,MPI_INT,rank_z,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_Z,1,MPI_INT,rank_z,sendtag,comm); - MPI_Recv(&recvCount_z,1,MPI_INT,rank_Z,recvtag,comm,MPI_STATUS_IGNORE); + comm.Send(&sendCount_x,1,rank_X,sendtag); + comm.Recv(&recvCount_X,1,rank_x,recvtag); + comm.Send(&sendCount_X,1,rank_x,sendtag); + comm.Recv(&recvCount_x,1,rank_X,recvtag); + comm.Send(&sendCount_y,1,rank_Y,sendtag); + comm.Recv(&recvCount_Y,1,rank_y,recvtag); + comm.Send(&sendCount_Y,1,rank_y,sendtag); + comm.Recv(&recvCount_y,1,rank_Y,recvtag); + comm.Send(&sendCount_z,1,rank_Z,sendtag); + comm.Recv(&recvCount_Z,1,rank_z,recvtag); + comm.Send(&sendCount_Z,1,rank_z,sendtag); + comm.Recv(&recvCount_z,1,rank_Z,recvtag); - MPI_Send(&sendCount_xy,1,MPI_INT,rank_XY,sendtag,comm); - MPI_Recv(&recvCount_XY,1,MPI_INT,rank_xy,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_XY,1,MPI_INT,rank_xy,sendtag,comm); - MPI_Recv(&recvCount_xy,1,MPI_INT,rank_XY,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_Xy,1,MPI_INT,rank_xY,sendtag,comm); - MPI_Recv(&recvCount_xY,1,MPI_INT,rank_Xy,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_xY,1,MPI_INT,rank_Xy,sendtag,comm); - MPI_Recv(&recvCount_Xy,1,MPI_INT,rank_xY,recvtag,comm,MPI_STATUS_IGNORE); + comm.Send(&sendCount_xy,1,rank_XY,sendtag); + comm.Recv(&recvCount_XY,1,rank_xy,recvtag); + comm.Send(&sendCount_XY,1,rank_xy,sendtag); + comm.Recv(&recvCount_xy,1,rank_XY,recvtag); + comm.Send(&sendCount_Xy,1,rank_xY,sendtag); + comm.Recv(&recvCount_xY,1,rank_Xy,recvtag); + comm.Send(&sendCount_xY,1,rank_Xy,sendtag); + comm.Recv(&recvCount_Xy,1,rank_xY,recvtag); - MPI_Send(&sendCount_xz,1,MPI_INT,rank_XZ,sendtag,comm); - MPI_Recv(&recvCount_XZ,1,MPI_INT,rank_xz,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_XZ,1,MPI_INT,rank_xz,sendtag,comm); - MPI_Recv(&recvCount_xz,1,MPI_INT,rank_XZ,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_Xz,1,MPI_INT,rank_xZ,sendtag,comm); - MPI_Recv(&recvCount_xZ,1,MPI_INT,rank_Xz,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_xZ,1,MPI_INT,rank_Xz,sendtag,comm); - MPI_Recv(&recvCount_Xz,1,MPI_INT,rank_xZ,recvtag,comm,MPI_STATUS_IGNORE); + comm.Send(&sendCount_xz,1,rank_XZ,sendtag); + comm.Recv(&recvCount_XZ,1,rank_xz,recvtag); + comm.Send(&sendCount_XZ,1,rank_xz,sendtag); + comm.Recv(&recvCount_xz,1,rank_XZ,recvtag); + comm.Send(&sendCount_Xz,1,rank_xZ,sendtag); + comm.Recv(&recvCount_xZ,1,rank_Xz,recvtag); + comm.Send(&sendCount_xZ,1,rank_Xz,sendtag); + comm.Recv(&recvCount_Xz,1,rank_xZ,recvtag); - MPI_Send(&sendCount_yz,1,MPI_INT,rank_YZ,sendtag,comm); - MPI_Recv(&recvCount_YZ,1,MPI_INT,rank_yz,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_YZ,1,MPI_INT,rank_yz,sendtag,comm); - MPI_Recv(&recvCount_yz,1,MPI_INT,rank_YZ,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_Yz,1,MPI_INT,rank_yZ,sendtag,comm); - MPI_Recv(&recvCount_yZ,1,MPI_INT,rank_Yz,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_yZ,1,MPI_INT,rank_Yz,sendtag,comm); - MPI_Recv(&recvCount_Yz,1,MPI_INT,rank_yZ,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Barrier(comm); + comm.Send(&sendCount_yz,1,rank_YZ,sendtag); + comm.Recv(&recvCount_YZ,1,rank_yz,recvtag); + comm.Send(&sendCount_YZ,1,rank_yz,sendtag); + comm.Recv(&recvCount_yz,1,rank_YZ,recvtag); + comm.Send(&sendCount_Yz,1,rank_yZ,sendtag); + comm.Recv(&recvCount_yZ,1,rank_Yz,recvtag); + comm.Send(&sendCount_yZ,1,rank_Yz,sendtag); + comm.Recv(&recvCount_Yz,1,rank_yZ,recvtag); + comm.barrier(); //********************************************************************************** //...................................................................................... int *recvList_x, *recvList_y, *recvList_z, *recvList_X, *recvList_Y, *recvList_Z; @@ -669,48 +663,48 @@ int main(int argc, char **argv) // Use MPI to fill in the appropriate values for recvList // Fill in the recieve lists using MPI sendtag = recvtag = 4; - MPI_Isend(sendList_x, sendCount_x,MPI_INT,rank_X,sendtag,comm,&req1[0]); - MPI_Irecv(recvList_X, recvCount_X,MPI_INT,rank_x,recvtag,comm,&req2[0]); - MPI_Isend(sendList_X, sendCount_X,MPI_INT,rank_x,sendtag,comm,&req1[1]); - MPI_Irecv(recvList_x, recvCount_x,MPI_INT,rank_X,recvtag,comm,&req2[1]); - MPI_Isend(sendList_y, sendCount_y,MPI_INT,rank_Y,sendtag,comm,&req1[2]); - MPI_Irecv(recvList_Y, recvCount_Y,MPI_INT,rank_y,recvtag,comm,&req2[2]); - MPI_Isend(sendList_Y, sendCount_Y,MPI_INT,rank_y,sendtag,comm,&req1[3]); - MPI_Irecv(recvList_y, recvCount_y,MPI_INT,rank_Y,recvtag,comm,&req2[3]); - MPI_Isend(sendList_z, sendCount_z,MPI_INT,rank_Z,sendtag,comm,&req1[4]); - MPI_Irecv(recvList_Z, recvCount_Z,MPI_INT,rank_z,recvtag,comm,&req2[4]); - MPI_Isend(sendList_Z, sendCount_Z,MPI_INT,rank_z,sendtag,comm,&req1[5]); - MPI_Irecv(recvList_z, recvCount_z,MPI_INT,rank_Z,recvtag,comm,&req2[5]); + req1[0] = comm.Isend(sendList_x,sendCount_x,rank_X,sendtag); + req2[0] = comm.Irecv(recvList_X,recvCount_X,rank_x,recvtag); + req1[1] = comm.Isend(sendList_X,sendCount_X,rank_x,sendtag); + req2[1] = comm.Irecv(recvList_x,recvCount_x,rank_X,recvtag); + req1[2] = comm.Isend(sendList_y,sendCount_y,rank_Y,sendtag); + req2[2] = comm.Irecv(recvList_Y,recvCount_Y,rank_y,recvtag); + req1[3] = comm.Isend(sendList_Y,sendCount_Y,rank_y,sendtag); + req2[3] = comm.Irecv(recvList_y,recvCount_y,rank_Y,recvtag); + req1[4] = comm.Isend(sendList_z,sendCount_z,rank_Z,sendtag); + req2[4] = comm.Irecv(recvList_Z,recvCount_Z,rank_z,recvtag); + req1[5] = comm.Isend(sendList_Z,sendCount_Z,rank_z,sendtag); + req2[5] = comm.Irecv(recvList_z,recvCount_z,rank_Z,recvtag); - MPI_Isend(sendList_xy, sendCount_xy,MPI_INT,rank_XY,sendtag,comm,&req1[6]); - MPI_Irecv(recvList_XY, recvCount_XY,MPI_INT,rank_xy,recvtag,comm,&req2[6]); - MPI_Isend(sendList_XY, sendCount_XY,MPI_INT,rank_xy,sendtag,comm,&req1[7]); - MPI_Irecv(recvList_xy, recvCount_xy,MPI_INT,rank_XY,recvtag,comm,&req2[7]); - MPI_Isend(sendList_Xy, sendCount_Xy,MPI_INT,rank_xY,sendtag,comm,&req1[8]); - MPI_Irecv(recvList_xY, recvCount_xY,MPI_INT,rank_Xy,recvtag,comm,&req2[8]); - MPI_Isend(sendList_xY, sendCount_xY,MPI_INT,rank_Xy,sendtag,comm,&req1[9]); - MPI_Irecv(recvList_Xy, recvCount_Xy,MPI_INT,rank_xY,recvtag,comm,&req2[9]); + req1[6] = comm.Isend(sendList_xy,sendCount_xy,rank_XY,sendtag); + req2[6] = comm.Irecv(recvList_XY,recvCount_XY,rank_xy,recvtag); + req1[7] = comm.Isend(sendList_XY,sendCount_XY,rank_xy,sendtag); + req2[7] = comm.Irecv(recvList_xy,recvCount_xy,rank_XY,recvtag); + req1[8] = comm.Isend(sendList_Xy,sendCount_Xy,rank_xY,sendtag); + req2[8] = comm.Irecv(recvList_xY,recvCount_xY,rank_Xy,recvtag); + req1[9] = comm.Isend(sendList_xY,sendCount_xY,rank_Xy,sendtag); + req2[9] = comm.Irecv(recvList_Xy,recvCount_Xy,rank_xY,recvtag); - MPI_Isend(sendList_xz, sendCount_xz,MPI_INT,rank_XZ,sendtag,comm,&req1[10]); - MPI_Irecv(recvList_XZ, recvCount_XZ,MPI_INT,rank_xz,recvtag,comm,&req2[10]); - MPI_Isend(sendList_XZ, sendCount_XZ,MPI_INT,rank_xz,sendtag,comm,&req1[11]); - MPI_Irecv(recvList_xz, recvCount_xz,MPI_INT,rank_XZ,recvtag,comm,&req2[11]); - MPI_Isend(sendList_Xz, sendCount_Xz,MPI_INT,rank_xZ,sendtag,comm,&req1[12]); - MPI_Irecv(recvList_xZ, recvCount_xZ,MPI_INT,rank_Xz,recvtag,comm,&req2[12]); - MPI_Isend(sendList_xZ, sendCount_xZ,MPI_INT,rank_Xz,sendtag,comm,&req1[13]); - MPI_Irecv(recvList_Xz, recvCount_Xz,MPI_INT,rank_xZ,recvtag,comm,&req2[13]); + req1[10] = comm.Isend(sendList_xz,sendCount_xz,rank_XZ,sendtag); + req2[10] = comm.Irecv(recvList_XZ,recvCount_XZ,rank_xz,recvtag); + req1[11] = comm.Isend(sendList_XZ,sendCount_XZ,rank_xz,sendtag); + req2[11] = comm.Irecv(recvList_xz,recvCount_xz,rank_XZ,recvtag); + req1[12] = comm.Isend(sendList_Xz,sendCount_Xz,rank_xZ,sendtag); + req2[12] = comm.Irecv(recvList_xZ,recvCount_xZ,rank_Xz,recvtag); + req1[13] = comm.Isend(sendList_xZ,sendCount_xZ,rank_Xz,sendtag); + req2[13] = comm.Irecv(recvList_Xz,recvCount_Xz,rank_xZ,recvtag); - MPI_Isend(sendList_yz, sendCount_yz,MPI_INT,rank_YZ,sendtag,comm,&req1[14]); - MPI_Irecv(recvList_YZ, recvCount_YZ,MPI_INT,rank_yz,recvtag,comm,&req2[14]); - MPI_Isend(sendList_YZ, sendCount_YZ,MPI_INT,rank_yz,sendtag,comm,&req1[15]); - MPI_Irecv(recvList_yz, recvCount_yz,MPI_INT,rank_YZ,recvtag,comm,&req2[15]); - MPI_Isend(sendList_Yz, sendCount_Yz,MPI_INT,rank_yZ,sendtag,comm,&req1[16]); - MPI_Irecv(recvList_yZ, recvCount_yZ,MPI_INT,rank_Yz,recvtag,comm,&req2[16]); - MPI_Isend(sendList_yZ, sendCount_yZ,MPI_INT,rank_Yz,sendtag,comm,&req1[17]); - MPI_Irecv(recvList_Yz, recvCount_Yz,MPI_INT,rank_yZ,recvtag,comm,&req2[17]); - MPI_Waitall(18,req1,stat1); - MPI_Waitall(18,req2,stat2); - MPI_Barrier(comm); + req1[14] = comm.Isend(sendList_yz,sendCount_yz,rank_YZ,sendtag); + req2[14] = comm.Irecv(recvList_YZ,recvCount_YZ,rank_yz,recvtag); + req1[15] = comm.Isend(sendList_YZ,sendCount_YZ,rank_yz,sendtag); + req2[15] = comm.Irecv(recvList_yz,recvCount_yz,rank_YZ,recvtag); + req1[16] = comm.Isend(sendList_Yz,sendCount_Yz,rank_yZ,sendtag); + req2[16] = comm.Irecv(recvList_yZ,recvCount_yZ,rank_Yz,recvtag); + req1[17] = comm.Isend(sendList_yZ,sendCount_yZ,rank_Yz,sendtag); + req2[17] = comm.Irecv(recvList_Yz,recvCount_Yz,rank_yZ,recvtag); + comm.waitAll(18,req1); + comm.waitAll(18,req2); + comm.barrier(); //...................................................................................... for (int idx=0; idx #include #include -#include +#include "common/MPI.h" #include using namespace std; @@ -64,15 +64,11 @@ inline void UnpackID(int *list, int count, char *recvbuf, char *ID){ int main(int argc, char **argv) { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); // parallel domain size (# of sub-domains) int nprocx,nprocy,nprocz; int iproc,jproc,kproc; @@ -86,7 +82,6 @@ int main(int argc, char **argv) int rank_yz,rank_YZ,rank_yZ,rank_Yz; //********************************** MPI_Request req1[18],req2[18]; - MPI_Status stat1[18],stat2[18]; //********************************** //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! //!!!!!!!!!!! Random debugging communications!!!!!!!!!!!!!!!!!!!!!!!!!!!!! @@ -136,24 +131,23 @@ int main(int argc, char **argv) // ************************************************************** // Broadcast simulation parameters from rank 0 to all other procs - MPI_Barrier(comm); + comm.barrier(); //................................................. - MPI_Bcast(&Nz,1,MPI_INT,0,comm); - MPI_Bcast(&nBlocks,1,MPI_INT,0,comm); - MPI_Bcast(&nthreads,1,MPI_INT,0,comm); - MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm); - MPI_Bcast(×tepMax,1,MPI_INT,0,comm); - MPI_Bcast(&interval,1,MPI_INT,0,comm); - MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm); - - MPI_Bcast(&nprocx,1,MPI_INT,0,comm); - MPI_Bcast(&nprocy,1,MPI_INT,0,comm); - MPI_Bcast(&nprocz,1,MPI_INT,0,comm); + comm.bcast(&Nz,1,0); + comm.bcast(&nBlocks,1,0); + comm.bcast(&nthreads,1,0); + comm.bcast(&tau,1,0); + comm.bcast(&Fx,1,0); + comm.bcast(&Fy,1,0); + comm.bcast(&Fz,1,0); + comm.bcast(×tepMax,1,0); + comm.bcast(&interval,1,0); + comm.bcast(&tol,1,0); + comm.bcast(&nprocx,1,0); + comm.bcast(&nprocy,1,0); + comm.bcast(&nprocz,1,0); //................................................. - MPI_Barrier(comm); + comm.barrier(); // ************************************************************** double rlx_setA = 1.f/tau; @@ -176,7 +170,7 @@ int main(int argc, char **argv) printf("Sub-domain size = %i x %i x %i\n",Nz,Nz,Nz); } - MPI_Barrier(comm); + comm.barrier(); kproc = rank/(nprocx*nprocy); jproc = (rank-nprocx*nprocy*kproc)/nprocx; iproc = rank-nprocx*nprocy*kproc-nprocz*jproc; @@ -457,7 +451,7 @@ int main(int argc, char **argv) PM.close(); // printf("File porosity = %f\n", double(sum)/N); //........................................................................... - MPI_Barrier(comm); + comm.barrier(); if (rank == 0) cout << "Domain set." << endl; //........................................................................... // Write the communcation structure into a file for debugging @@ -594,7 +588,7 @@ int main(int argc, char **argv) } } } - MPI_Barrier(comm); + comm.barrier(); if (rank==0) printf ("SendLists are ready on host\n"); //...................................................................................... // Use MPI to fill in the recvCounts form the associated processes @@ -605,46 +599,46 @@ int main(int argc, char **argv) //********************************************************************************** // Fill in the recieve counts using MPI sendtag = recvtag = 3; - MPI_Send(&sendCount_x,1,MPI_INT,rank_X,sendtag,comm); - MPI_Recv(&recvCount_X,1,MPI_INT,rank_x,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_X,1,MPI_INT,rank_x,sendtag,comm); - MPI_Recv(&recvCount_x,1,MPI_INT,rank_X,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_y,1,MPI_INT,rank_Y,sendtag,comm); - MPI_Recv(&recvCount_Y,1,MPI_INT,rank_y,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_Y,1,MPI_INT,rank_y,sendtag,comm); - MPI_Recv(&recvCount_y,1,MPI_INT,rank_Y,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_z,1,MPI_INT,rank_Z,sendtag,comm); - MPI_Recv(&recvCount_Z,1,MPI_INT,rank_z,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_Z,1,MPI_INT,rank_z,sendtag,comm); - MPI_Recv(&recvCount_z,1,MPI_INT,rank_Z,recvtag,comm,MPI_STATUS_IGNORE); + comm.send(&sendCount_x,1,rank_X,sendtag); + comm.recv(&recvCount_X,1,rank_x,recvtag); + comm.send(&sendCount_X,1,rank_x,sendtag); + comm.recv(&recvCount_x,1,rank_X,recvtag); + comm.send(&sendCount_y,1,rank_Y,sendtag); + comm.recv(&recvCount_Y,1,rank_y,recvtag); + comm.send(&sendCount_Y,1,rank_y,sendtag); + comm.recv(&recvCount_y,1,rank_Y,recvtag); + comm.send(&sendCount_z,1,rank_Z,sendtag); + comm.recv(&recvCount_Z,1,rank_z,recvtag); + comm.send(&sendCount_Z,1,rank_z,sendtag); + comm.recv(&recvCount_z,1,rank_Z,recvtag); - MPI_Send(&sendCount_xy,1,MPI_INT,rank_XY,sendtag,comm); - MPI_Recv(&recvCount_XY,1,MPI_INT,rank_xy,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_XY,1,MPI_INT,rank_xy,sendtag,comm); - MPI_Recv(&recvCount_xy,1,MPI_INT,rank_XY,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_Xy,1,MPI_INT,rank_xY,sendtag,comm); - MPI_Recv(&recvCount_xY,1,MPI_INT,rank_Xy,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_xY,1,MPI_INT,rank_Xy,sendtag,comm); - MPI_Recv(&recvCount_Xy,1,MPI_INT,rank_xY,recvtag,comm,MPI_STATUS_IGNORE); + comm.send(&sendCount_xy,1,rank_XY,sendtag); + comm.recv(&recvCount_XY,1,rank_xy,recvtag); + comm.send(&sendCount_XY,1,rank_xy,sendtag); + comm.recv(&recvCount_xy,1,rank_XY,recvtag); + comm.send(&sendCount_Xy,1,rank_xY,sendtag); + comm.recv(&recvCount_xY,1,rank_Xy,recvtag); + comm.send(&sendCount_xY,1,rank_Xy,sendtag); + comm.recv(&recvCount_Xy,1,rank_xY,recvtag); - MPI_Send(&sendCount_xz,1,MPI_INT,rank_XZ,sendtag,comm); - MPI_Recv(&recvCount_XZ,1,MPI_INT,rank_xz,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_XZ,1,MPI_INT,rank_xz,sendtag,comm); - MPI_Recv(&recvCount_xz,1,MPI_INT,rank_XZ,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_Xz,1,MPI_INT,rank_xZ,sendtag,comm); - MPI_Recv(&recvCount_xZ,1,MPI_INT,rank_Xz,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_xZ,1,MPI_INT,rank_Xz,sendtag,comm); - MPI_Recv(&recvCount_Xz,1,MPI_INT,rank_xZ,recvtag,comm,MPI_STATUS_IGNORE); + comm.send(&sendCount_xz,1,rank_XZ,sendtag); + comm.recv(&recvCount_XZ,1,rank_xz,recvtag); + comm.send(&sendCount_XZ,1,rank_xz,sendtag); + comm.recv(&recvCount_xz,1,rank_XZ,recvtag); + comm.send(&sendCount_Xz,1,rank_xZ,sendtag); + comm.recv(&recvCount_xZ,1,rank_Xz,recvtag); + comm.send(&sendCount_xZ,1,rank_Xz,sendtag); + comm.recv(&recvCount_Xz,1,rank_xZ,recvtag); - MPI_Send(&sendCount_yz,1,MPI_INT,rank_YZ,sendtag,comm); - MPI_Recv(&recvCount_YZ,1,MPI_INT,rank_yz,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_YZ,1,MPI_INT,rank_yz,sendtag,comm); - MPI_Recv(&recvCount_yz,1,MPI_INT,rank_YZ,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_Yz,1,MPI_INT,rank_yZ,sendtag,comm); - MPI_Recv(&recvCount_yZ,1,MPI_INT,rank_Yz,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_yZ,1,MPI_INT,rank_Yz,sendtag,comm); - MPI_Recv(&recvCount_Yz,1,MPI_INT,rank_yZ,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Barrier(comm); + comm.send(&sendCount_yz,1,rank_YZ,sendtag); + comm.recv(&recvCount_YZ,1,rank_yz,recvtag); + comm.send(&sendCount_YZ,1,rank_yz,sendtag); + comm.recv(&recvCount_yz,1,rank_YZ,recvtag); + comm.send(&sendCount_Yz,1,rank_yZ,sendtag); + comm.recv(&recvCount_yZ,1,rank_Yz,recvtag); + comm.send(&sendCount_yZ,1,rank_Yz,sendtag); + comm.recv(&recvCount_Yz,1,rank_yZ,recvtag); + comm.barrier(); //********************************************************************************** //...................................................................................... int *recvList_x, *recvList_y, *recvList_z, *recvList_X, *recvList_Y, *recvList_Z; @@ -675,48 +669,48 @@ int main(int argc, char **argv) // Use MPI to fill in the appropriate values for recvList // Fill in the recieve lists using MPI sendtag = recvtag = 4; - MPI_Isend(sendList_x, sendCount_x,MPI_INT,rank_X,sendtag,comm,&req1[0]); - MPI_Irecv(recvList_X, recvCount_X,MPI_INT,rank_x,recvtag,comm,&req2[0]); - MPI_Isend(sendList_X, sendCount_X,MPI_INT,rank_x,sendtag,comm,&req1[1]); - MPI_Irecv(recvList_x, recvCount_x,MPI_INT,rank_X,recvtag,comm,&req2[1]); - MPI_Isend(sendList_y, sendCount_y,MPI_INT,rank_Y,sendtag,comm,&req1[2]); - MPI_Irecv(recvList_Y, recvCount_Y,MPI_INT,rank_y,recvtag,comm,&req2[2]); - MPI_Isend(sendList_Y, sendCount_Y,MPI_INT,rank_y,sendtag,comm,&req1[3]); - MPI_Irecv(recvList_y, recvCount_y,MPI_INT,rank_Y,recvtag,comm,&req2[3]); - MPI_Isend(sendList_z, sendCount_z,MPI_INT,rank_Z,sendtag,comm,&req1[4]); - MPI_Irecv(recvList_Z, recvCount_Z,MPI_INT,rank_z,recvtag,comm,&req2[4]); - MPI_Isend(sendList_Z, sendCount_Z,MPI_INT,rank_z,sendtag,comm,&req1[5]); - MPI_Irecv(recvList_z, recvCount_z,MPI_INT,rank_Z,recvtag,comm,&req2[5]); + req1[0] = comm.Isend(sendList_x,sendCount_x,rank_X,sendtag); + req2[0] = comm.Irecv(recvList_X,recvCount_X,rank_x,recvtag); + req1[1] = comm.Isend(sendList_X,sendCount_X,rank_x,sendtag); + req2[1] = comm.Irecv(recvList_x,recvCount_x,rank_X,recvtag); + req1[2] = comm.Isend(sendList_y,sendCount_y,rank_Y,sendtag); + req2[2] = comm.Irecv(recvList_Y,recvCount_Y,rank_y,recvtag); + req1[3] = comm.Isend(sendList_Y,sendCount_Y,rank_y,sendtag); + req2[3] = comm.Irecv(recvList_y,recvCount_y,rank_Y,recvtag); + req1[4] = comm.Isend(sendList_z,sendCount_z,rank_Z,sendtag); + req2[4] = comm.Irecv(recvList_Z,recvCount_Z,rank_z,recvtag); + req1[5] = comm.Isend(sendList_Z,sendCount_Z,rank_z,sendtag); + req2[5] = comm.Irecv(recvList_z,recvCount_z,rank_Z,recvtag); - MPI_Isend(sendList_xy, sendCount_xy,MPI_INT,rank_XY,sendtag,comm,&req1[6]); - MPI_Irecv(recvList_XY, recvCount_XY,MPI_INT,rank_xy,recvtag,comm,&req2[6]); - MPI_Isend(sendList_XY, sendCount_XY,MPI_INT,rank_xy,sendtag,comm,&req1[7]); - MPI_Irecv(recvList_xy, recvCount_xy,MPI_INT,rank_XY,recvtag,comm,&req2[7]); - MPI_Isend(sendList_Xy, sendCount_Xy,MPI_INT,rank_xY,sendtag,comm,&req1[8]); - MPI_Irecv(recvList_xY, recvCount_xY,MPI_INT,rank_Xy,recvtag,comm,&req2[8]); - MPI_Isend(sendList_xY, sendCount_xY,MPI_INT,rank_Xy,sendtag,comm,&req1[9]); - MPI_Irecv(recvList_Xy, recvCount_Xy,MPI_INT,rank_xY,recvtag,comm,&req2[9]); + req1[6] = comm.Isend(sendList_xy,sendCount_xy,rank_XY,sendtag); + req2[6] = comm.Irecv(recvList_XY,recvCount_XY,rank_xy,recvtag); + req1[7] = comm.Isend(sendList_XY,sendCount_XY,rank_xy,sendtag); + req2[7] = comm.Irecv(recvList_xy,recvCount_xy,rank_XY,recvtag); + req1[8] = comm.Isend(sendList_Xy,sendCount_Xy,rank_xY,sendtag); + req2[8] = comm.Irecv(recvList_xY,recvCount_xY,rank_Xy,recvtag); + req1[9] = comm.Isend(sendList_xY,sendCount_xY,rank_Xy,sendtag); + req2[9] = comm.Irecv(recvList_Xy,recvCount_Xy,rank_xY,recvtag); - MPI_Isend(sendList_xz, sendCount_xz,MPI_INT,rank_XZ,sendtag,comm,&req1[10]); - MPI_Irecv(recvList_XZ, recvCount_XZ,MPI_INT,rank_xz,recvtag,comm,&req2[10]); - MPI_Isend(sendList_XZ, sendCount_XZ,MPI_INT,rank_xz,sendtag,comm,&req1[11]); - MPI_Irecv(recvList_xz, recvCount_xz,MPI_INT,rank_XZ,recvtag,comm,&req2[11]); - MPI_Isend(sendList_Xz, sendCount_Xz,MPI_INT,rank_xZ,sendtag,comm,&req1[12]); - MPI_Irecv(recvList_xZ, recvCount_xZ,MPI_INT,rank_Xz,recvtag,comm,&req2[12]); - MPI_Isend(sendList_xZ, sendCount_xZ,MPI_INT,rank_Xz,sendtag,comm,&req1[13]); - MPI_Irecv(recvList_Xz, recvCount_Xz,MPI_INT,rank_xZ,recvtag,comm,&req2[13]); + req1[10] = comm.Isend(sendList_xz,sendCount_xz,rank_XZ,sendtag); + req2[10] = comm.Irecv(recvList_XZ,recvCount_XZ,rank_xz,recvtag); + req1[11] = comm.Isend(sendList_XZ,sendCount_XZ,rank_xz,sendtag); + req2[11] = comm.Irecv(recvList_xz,recvCount_xz,rank_XZ,recvtag); + req1[12] = comm.Isend(sendList_Xz,sendCount_Xz,rank_xZ,sendtag); + req2[12] = comm.Irecv(recvList_xZ,recvCount_xZ,rank_Xz,recvtag); + req1[13] = comm.Isend(sendList_xZ,sendCount_xZ,rank_Xz,sendtag); + req2[13] = comm.Irecv(recvList_Xz,recvCount_Xz,rank_xZ,recvtag); - MPI_Isend(sendList_yz, sendCount_yz,MPI_INT,rank_YZ,sendtag,comm,&req1[14]); - MPI_Irecv(recvList_YZ, recvCount_YZ,MPI_INT,rank_yz,recvtag,comm,&req2[14]); - MPI_Isend(sendList_YZ, sendCount_YZ,MPI_INT,rank_yz,sendtag,comm,&req1[15]); - MPI_Irecv(recvList_yz, recvCount_yz,MPI_INT,rank_YZ,recvtag,comm,&req2[15]); - MPI_Isend(sendList_Yz, sendCount_Yz,MPI_INT,rank_yZ,sendtag,comm,&req1[16]); - MPI_Irecv(recvList_yZ, recvCount_yZ,MPI_INT,rank_Yz,recvtag,comm,&req2[16]); - MPI_Isend(sendList_yZ, sendCount_yZ,MPI_INT,rank_Yz,sendtag,comm,&req1[17]); - MPI_Irecv(recvList_Yz, recvCount_Yz,MPI_INT,rank_yZ,recvtag,comm,&req2[17]); - MPI_Waitall(18,req1,stat1); - MPI_Waitall(18,req2,stat2); - MPI_Barrier(comm); + req1[14] = comm.Isend(sendList_yz,sendCount_yz,rank_YZ,sendtag); + req2[14] = comm.Irecv(recvList_YZ,recvCount_YZ,rank_yz,recvtag); + req1[15] = comm.Isend(sendList_YZ,sendCount_YZ,rank_yz,sendtag); + req2[15] = comm.Irecv(recvList_yz,recvCount_yz,rank_YZ,recvtag); + req1[16] = comm.Isend(sendList_Yz,sendCount_Yz,rank_yZ,sendtag); + req2[16] = comm.Irecv(recvList_yZ,recvCount_yZ,rank_Yz,recvtag); + req1[17] = comm.Isend(sendList_yZ,sendCount_yZ,rank_Yz,sendtag); + req2[17] = comm.Irecv(recvList_Yz,recvCount_Yz,rank_yZ,recvtag); + comm.waitAll(18,req1); + comm.waitAll(18,req2); + comm.barrier(); //...................................................................................... double *sendbuf_x, *sendbuf_y, *sendbuf_z, *sendbuf_X, *sendbuf_Y, *sendbuf_Z; double *sendbuf_xy, *sendbuf_yz, *sendbuf_xz, *sendbuf_Xy, *sendbuf_Yz, *sendbuf_xZ; @@ -915,42 +909,24 @@ int main(int argc, char **argv) PackID(sendList_yZ, sendCount_yZ ,sendID_yZ, id); PackID(sendList_YZ, sendCount_YZ ,sendID_YZ, id); //...................................................................................... - MPI_Sendrecv(sendID_x,sendCount_x,MPI_CHAR,rank_X,sendtag, - recvID_X,recvCount_X,MPI_CHAR,rank_x,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_X,sendCount_X,MPI_CHAR,rank_x,sendtag, - recvID_x,recvCount_x,MPI_CHAR,rank_X,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_y,sendCount_y,MPI_CHAR,rank_Y,sendtag, - recvID_Y,recvCount_Y,MPI_CHAR,rank_y,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Y,sendCount_Y,MPI_CHAR,rank_y,sendtag, - recvID_y,recvCount_y,MPI_CHAR,rank_Y,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_z,sendCount_z,MPI_CHAR,rank_Z,sendtag, - recvID_Z,recvCount_Z,MPI_CHAR,rank_z,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Z,sendCount_Z,MPI_CHAR,rank_z,sendtag, - recvID_z,recvCount_z,MPI_CHAR,rank_Z,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xy,sendCount_xy,MPI_CHAR,rank_XY,sendtag, - recvID_XY,recvCount_XY,MPI_CHAR,rank_xy,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_XY,sendCount_XY,MPI_CHAR,rank_xy,sendtag, - recvID_xy,recvCount_xy,MPI_CHAR,rank_XY,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Xy,sendCount_Xy,MPI_CHAR,rank_xY,sendtag, - recvID_xY,recvCount_xY,MPI_CHAR,rank_Xy,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xY,sendCount_xY,MPI_CHAR,rank_Xy,sendtag, - recvID_Xy,recvCount_Xy,MPI_CHAR,rank_xY,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xz,sendCount_xz,MPI_CHAR,rank_XZ,sendtag, - recvID_XZ,recvCount_XZ,MPI_CHAR,rank_xz,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_XZ,sendCount_XZ,MPI_CHAR,rank_xz,sendtag, - recvID_xz,recvCount_xz,MPI_CHAR,rank_XZ,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Xz,sendCount_Xz,MPI_CHAR,rank_xZ,sendtag, - recvID_xZ,recvCount_xZ,MPI_CHAR,rank_Xz,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xZ,sendCount_xZ,MPI_CHAR,rank_Xz,sendtag, - recvID_Xz,recvCount_Xz,MPI_CHAR,rank_xZ,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_yz,sendCount_yz,MPI_CHAR,rank_YZ,sendtag, - recvID_YZ,recvCount_YZ,MPI_CHAR,rank_yz,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_YZ,sendCount_YZ,MPI_CHAR,rank_yz,sendtag, - recvID_yz,recvCount_yz,MPI_CHAR,rank_YZ,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Yz,sendCount_Yz,MPI_CHAR,rank_yZ,sendtag, - recvID_yZ,recvCount_yZ,MPI_CHAR,rank_Yz,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_yZ,sendCount_yZ,MPI_CHAR,rank_Yz,sendtag, - recvID_Yz,recvCount_Yz,MPI_CHAR,rank_yZ,recvtag,comm,MPI_STATUS_IGNORE); + comm.sendrecv(sendID_x,sendCount_x,rank_X,sendtag,recvID_X,recvCount_X,rank_x,recvtag); + comm.sendrecv(sendID_X,sendCount_X,rank_x,sendtag,recvID_x,recvCount_x,rank_X,recvtag); + comm.sendrecv(sendID_y,sendCount_y,rank_Y,sendtag,recvID_Y,recvCount_Y,rank_y,recvtag); + comm.sendrecv(sendID_Y,sendCount_Y,rank_y,sendtag,recvID_y,recvCount_y,rank_Y,recvtag); + comm.sendrecv(sendID_z,sendCount_z,rank_Z,sendtag,recvID_Z,recvCount_Z,rank_z,recvtag); + comm.sendrecv(sendID_Z,sendCount_Z,rank_z,sendtag,recvID_z,recvCount_z,rank_Z,recvtag); + comm.sendrecv(sendID_xy,sendCount_xy,rank_XY,sendtag,recvID_XY,recvCount_XY,rank_xy,recvtag); + comm.sendrecv(sendID_XY,sendCount_XY,rank_xy,sendtag,recvID_xy,recvCount_xy,rank_XY,recvtag); + comm.sendrecv(sendID_Xy,sendCount_Xy,rank_xY,sendtag,recvID_xY,recvCount_xY,rank_Xy,recvtag); + comm.sendrecv(sendID_xY,sendCount_xY,rank_Xy,sendtag,recvID_Xy,recvCount_Xy,rank_xY,recvtag); + comm.sendrecv(sendID_xz,sendCount_xz,rank_XZ,sendtag,recvID_XZ,recvCount_XZ,rank_xz,recvtag); + comm.sendrecv(sendID_XZ,sendCount_XZ,rank_xz,sendtag,recvID_xz,recvCount_xz,rank_XZ,recvtag); + comm.sendrecv(sendID_Xz,sendCount_Xz,rank_xZ,sendtag,recvID_xZ,recvCount_xZ,rank_Xz,recvtag); + comm.sendrecv(sendID_xZ,sendCount_xZ,rank_Xz,sendtag,recvID_Xz,recvCount_Xz,rank_xZ,recvtag); + comm.sendrecv(sendID_yz,sendCount_yz,rank_YZ,sendtag,recvID_YZ,recvCount_YZ,rank_yz,recvtag); + comm.sendrecv(sendID_YZ,sendCount_YZ,rank_yz,sendtag,recvID_yz,recvCount_yz,rank_YZ,recvtag); + comm.sendrecv(sendID_Yz,sendCount_Yz,rank_yZ,sendtag,recvID_yZ,recvCount_yZ,rank_Yz,recvtag); + comm.sendrecv(sendID_yZ,sendCount_yZ,rank_Yz,sendtag,recvID_Yz,recvCount_Yz,rank_yZ,recvtag); //...................................................................................... UnpackID(recvList_x, recvCount_x ,recvID_x, id); UnpackID(recvList_X, recvCount_X ,recvID_X, id); @@ -983,7 +959,7 @@ int main(int argc, char **argv) free(recvID_yz); free(recvID_YZ); free(recvID_yZ); free(recvID_Yz); //...................................................................................... if (rank==0) printf ("Devices are ready to communicate. \n"); - MPI_Barrier(comm); + comm.barrier(); //...........device phase ID................................................. if (rank==0) printf ("Copying phase ID to device \n"); @@ -1023,8 +999,8 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; - MPI_Barrier(comm); - starttime = MPI_Wtime(); + comm.barrier(); + starttime = Utilities::MPI::time(); // Old cuda timer is below // cudaEvent_t start, stop; // float time; @@ -1136,48 +1112,48 @@ int main(int argc, char **argv) //................................................................................... // Send all the distributions - MPI_Isend(sendbuf_x, 5*sendCount_x,MPI_DOUBLE,rank_X,sendtag,comm,&req1[0]); - MPI_Irecv(recvbuf_X, 5*recvCount_X,MPI_DOUBLE,rank_x,recvtag,comm,&req2[0]); - MPI_Isend(sendbuf_X, 5*sendCount_X,MPI_DOUBLE,rank_x,sendtag,comm,&req1[1]); - MPI_Irecv(recvbuf_x, 5*recvCount_x,MPI_DOUBLE,rank_X,recvtag,comm,&req2[1]); - MPI_Isend(sendbuf_y, 5*sendCount_y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[2]); - MPI_Irecv(recvbuf_Y, 5*recvCount_Y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[2]); - MPI_Isend(sendbuf_Y, 5*sendCount_Y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[3]); - MPI_Irecv(recvbuf_y, 5*recvCount_y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[3]); - MPI_Isend(sendbuf_z, 5*sendCount_z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[4]); - MPI_Irecv(recvbuf_Z, 5*recvCount_Z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[4]); - MPI_Isend(sendbuf_Z, 5*sendCount_Z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[5]); - MPI_Irecv(recvbuf_z, 5*recvCount_z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[5]); - MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_XY,sendtag,comm,&req1[6]); - MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_xy,recvtag,comm,&req2[6]); - MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_xy,sendtag,comm,&req1[7]); - MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_XY,recvtag,comm,&req2[7]); - MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_xY,sendtag,comm,&req1[8]); - MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_Xy,recvtag,comm,&req2[8]); - MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_Xy,sendtag,comm,&req1[9]); - MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_xY,recvtag,comm,&req2[9]); - MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_XZ,sendtag,comm,&req1[10]); - MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_xz,recvtag,comm,&req2[10]); - MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_xz,sendtag,comm,&req1[11]); - MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_XZ,recvtag,comm,&req2[11]); - MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_xZ,sendtag,comm,&req1[12]); - MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_Xz,recvtag,comm,&req2[12]); - MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_Xz,sendtag,comm,&req1[13]); - MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_xZ,recvtag,comm,&req2[13]); - MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_YZ,sendtag,comm,&req1[14]); - MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_yz,recvtag,comm,&req2[14]); - MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_yz,sendtag,comm,&req1[15]); - MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_YZ,recvtag,comm,&req2[15]); - MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_yZ,sendtag,comm,&req1[16]); - MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_Yz,recvtag,comm,&req2[16]); - MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_Yz,sendtag,comm,&req1[17]); - MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_yZ,recvtag,comm,&req2[17]); + req1[0] = comm.Isend(sendbuf_x,5*sendCount_x,rank_X,sendtag); + req2[0] = comm.Irecv(recvbuf_X,5*recvCount_X,rank_x,recvtag); + req1[1] = comm.Isend(sendbuf_X,5*sendCount_X,rank_x,sendtag); + req2[1] = comm.Irecv(recvbuf_x,5*recvCount_x,rank_X,recvtag); + req1[2] = comm.Isend(sendbuf_y,5*sendCount_y,rank_Y,sendtag); + req2[2] = comm.Irecv(recvbuf_Y,5*recvCount_Y,rank_y,recvtag); + req1[3] = comm.Isend(sendbuf_Y,5*sendCount_Y,rank_y,sendtag); + req2[3] = comm.Irecv(recvbuf_y,5*recvCount_y,rank_Y,recvtag); + req1[4] = comm.Isend(sendbuf_z,5*sendCount_z,rank_Z,sendtag); + req2[4] = comm.Irecv(recvbuf_Z,5*recvCount_Z,rank_z,recvtag); + req1[5] = comm.Isend(sendbuf_Z,5*sendCount_Z,rank_z,sendtag); + req2[5] = comm.Irecv(recvbuf_z,5*recvCount_z,rank_Z,recvtag); + req1[6] = comm.Isend(sendbuf_xy,sendCount_xy,rank_XY,sendtag); + req2[6] = comm.Irecv(recvbuf_XY,recvCount_XY,rank_xy,recvtag); + req1[7] = comm.Isend(sendbuf_XY,sendCount_XY,rank_xy,sendtag); + req2[7] = comm.Irecv(recvbuf_xy,recvCount_xy,rank_XY,recvtag); + req1[8] = comm.Isend(sendbuf_Xy,sendCount_Xy,rank_xY,sendtag); + req2[8] = comm.Irecv(recvbuf_xY,recvCount_xY,rank_Xy,recvtag); + req1[9] = comm.Isend(sendbuf_xY,sendCount_xY,rank_Xy,sendtag); + req2[9] = comm.Irecv(recvbuf_Xy,recvCount_Xy,rank_xY,recvtag); + req1[10] = comm.Isend(sendbuf_xz,sendCount_xz,rank_XZ,sendtag); + req2[10] = comm.Irecv(recvbuf_XZ,recvCount_XZ,rank_xz,recvtag); + req1[11] = comm.Isend(sendbuf_XZ,sendCount_XZ,rank_xz,sendtag); + req2[11] = comm.Irecv(recvbuf_xz,recvCount_xz,rank_XZ,recvtag); + req1[12] = comm.Isend(sendbuf_Xz,sendCount_Xz,rank_xZ,sendtag); + req2[12] = comm.Irecv(recvbuf_xZ,recvCount_xZ,rank_Xz,recvtag); + req1[13] = comm.Isend(sendbuf_xZ,sendCount_xZ,rank_Xz,sendtag); + req2[13] = comm.Irecv(recvbuf_Xz,recvCount_Xz,rank_xZ,recvtag); + req1[14] = comm.Isend(sendbuf_yz,sendCount_yz,rank_YZ,sendtag); + req2[14] = comm.Irecv(recvbuf_YZ,recvCount_YZ,rank_yz,recvtag); + req1[15] = comm.Isend(sendbuf_YZ,sendCount_YZ,rank_yz,sendtag); + req2[15] = comm.Irecv(recvbuf_yz,recvCount_yz,rank_YZ,recvtag); + req1[16] = comm.Isend(sendbuf_Yz,sendCount_Yz,rank_yZ,sendtag); + req2[16] = comm.Irecv(recvbuf_yZ,recvCount_yZ,rank_Yz,recvtag); + req1[17] = comm.Isend(sendbuf_yZ,sendCount_yZ,rank_Yz,sendtag); + req2[17] = comm.Irecv(recvbuf_Yz,recvCount_Yz,rank_yZ,recvtag); //................................................................................... //................................................................................... // Wait for completion of D3Q19 communication - MPI_Waitall(18,req1,stat1); - MPI_Waitall(18,req2,stat2); + comm.waitAll(18,req1); + comm.waitAll(18,req2); //................................................................................... // Unpack the distributions on the device //................................................................................... @@ -1260,7 +1236,7 @@ int main(int argc, char **argv) //***************************************************************************** //***************************************************************************** - MPI_Barrier(comm); + comm.barrier(); // Iteration completed! timestep++; //................................................................... @@ -1269,8 +1245,8 @@ int main(int argc, char **argv) // cudaThreadSynchronize(); dvc_Barrier(); - MPI_Barrier(comm); - stoptime = MPI_Wtime(); + comm.barrier(); + stoptime = Utilities::MPI::time(); // cout << "CPU time: " << (stoptime - starttime) << " seconds" << endl; cputime = stoptime - starttime; // cout << "Lattice update rate: "<< double(Nx*Ny*Nz*timestep)/cputime/1000000 << " MLUPS" << endl; @@ -1304,7 +1280,7 @@ int main(int argc, char **argv) // dvc_CopyToDevice(velocity, vel, 3*dist_mem_size, dvc_CopyToDeviceDeviceToHost); //.............................................................................. // cudaThreadSynchronize(); -// MPI_Barrier(comm); +// comm.barrier(); //............................................................ //....Write the z-velocity to test poiseuille flow............ // double vz,vz_avg; @@ -1333,7 +1309,7 @@ int main(int argc, char **argv) // free (velocity); free(id); // **************************************************** - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); // **************************************************** } diff --git a/gpu/exe/lb1_MRT_mpi.cu b/gpu/exe/lb1_MRT_mpi.cu index 0c0863c7..776ea29f 100644 --- a/gpu/exe/lb1_MRT_mpi.cu +++ b/gpu/exe/lb1_MRT_mpi.cu @@ -1,8 +1,10 @@ +#include "common/MPI.h" + #include #include #include #include -#include + inline void PackID(int *list, int count, char *sendbuf, char *ID){ // Fill in the phase ID values from neighboring processors @@ -553,15 +555,11 @@ void Write_Out(double *array, int Nx, int Ny, int Nz){ int main(int argc, char **argv) { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); // parallel domain size (# of sub-domains) int nprocx,nprocy,nprocz; int iproc,jproc,kproc; @@ -575,7 +573,6 @@ int main(int argc, char **argv) int rank_yz,rank_YZ,rank_yZ,rank_Yz; //********************************** MPI_Request req1[18],req2[18]; - MPI_Status stat1[18],stat2[18]; //********************************** //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! //!!!!!!!!!!! Random debugging communications!!!!!!!!!!!!!!!!!!!!!!!!!!!!! @@ -625,24 +622,21 @@ int main(int argc, char **argv) // ************************************************************** // Broadcast simulation parameters from rank 0 to all other procs - MPI_Barrier(comm); + comm.barrier(); //................................................. - MPI_Bcast(&Nz,1,MPI_INT,0,comm); - MPI_Bcast(&nBlocks,1,MPI_INT,0,comm); - MPI_Bcast(&nthreads,1,MPI_INT,0,comm); - MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&iterMax,1,MPI_INT,0,comm); - MPI_Bcast(&interval,1,MPI_INT,0,comm); - MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm); - - MPI_Bcast(&nprocx,1,MPI_INT,0,comm); - MPI_Bcast(&nprocy,1,MPI_INT,0,comm); - MPI_Bcast(&nprocz,1,MPI_INT,0,comm); - //................................................. - MPI_Barrier(comm); + comm.bcast(&Nz,1,0); + comm.bcast(&nBlocks,1,0); + comm.bcast(&nthreads,1,0); + comm.bcast(&tau,1,0); + comm.bcast(&Fx,1,0); + comm.bcast(&Fy,1,0); + comm.bcast(&Fz,1,0); + comm.bcast(&iterMax,1,0); + comm.bcast(&interval,1,0); + comm.bcast(&tol,1,0); + comm.bcast(&nprocx,1,0); + comm.bcast(&nprocy,1,0); + comm.bcast(&nprocz,1,0); // ************************************************************** double rlx_setA = 1.f/tau; @@ -665,7 +659,7 @@ int main(int argc, char **argv) printf("Sub-domain size = %i x %i x %i\n",Nz,Nz,Nz); } - MPI_Barrier(comm); + comm.barrier(); kproc = rank/(nprocx*nprocy); jproc = (rank-nprocx*nprocy*kproc)/nprocx; iproc = rank-nprocx*nprocy*kproc-nprocz*jproc; @@ -946,7 +940,7 @@ int main(int argc, char **argv) PM.close(); // printf("File porosity = %f\n", double(sum)/N); //........................................................................... - MPI_Barrier(comm); + comm.barrier(); if (rank == 0) cout << "Domain set." << endl; //........................................................................... // Write the communcation structure into a file for debugging @@ -1083,7 +1077,7 @@ int main(int argc, char **argv) } } } - MPI_Barrier(comm); + comm.barrier(); if (rank==0) printf ("SendLists are ready on host\n"); //...................................................................................... // Use MPI to fill in the recvCounts form the associated processes @@ -1094,46 +1088,46 @@ int main(int argc, char **argv) //********************************************************************************** // Fill in the recieve counts using MPI sendtag = recvtag = 3; - MPI_Send(&sendCount_x,1,MPI_INT,rank_X,sendtag,comm); - MPI_Recv(&recvCount_X,1,MPI_INT,rank_x,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_X,1,MPI_INT,rank_x,sendtag,comm); - MPI_Recv(&recvCount_x,1,MPI_INT,rank_X,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_y,1,MPI_INT,rank_Y,sendtag,comm); - MPI_Recv(&recvCount_Y,1,MPI_INT,rank_y,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_Y,1,MPI_INT,rank_y,sendtag,comm); - MPI_Recv(&recvCount_y,1,MPI_INT,rank_Y,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_z,1,MPI_INT,rank_Z,sendtag,comm); - MPI_Recv(&recvCount_Z,1,MPI_INT,rank_z,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_Z,1,MPI_INT,rank_z,sendtag,comm); - MPI_Recv(&recvCount_z,1,MPI_INT,rank_Z,recvtag,comm,MPI_STATUS_IGNORE); + comm.send(&sendCount_x,1,rank_X,sendtag); + comm.recv(&recvCount_X,1,rank_x,recvtag); + comm.send(&sendCount_X,1,rank_x,sendtag); + comm.recv(&recvCount_x,1,rank_X,recvtag); + comm.send(&sendCount_y,1,rank_Y,sendtag); + comm.recv(&recvCount_Y,1,rank_y,recvtag); + comm.send(&sendCount_Y,1,rank_y,sendtag); + comm.recv(&recvCount_y,1,rank_Y,recvtag); + comm.send(&sendCount_z,1,rank_Z,sendtag); + comm.recv(&recvCount_Z,1,rank_z,recvtag); + comm.send(&sendCount_Z,1,rank_z,sendtag); + comm.recv(&recvCount_z,1,rank_Z,recvtag); - MPI_Send(&sendCount_xy,1,MPI_INT,rank_XY,sendtag,comm); - MPI_Recv(&recvCount_XY,1,MPI_INT,rank_xy,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_XY,1,MPI_INT,rank_xy,sendtag,comm); - MPI_Recv(&recvCount_xy,1,MPI_INT,rank_XY,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_Xy,1,MPI_INT,rank_xY,sendtag,comm); - MPI_Recv(&recvCount_xY,1,MPI_INT,rank_Xy,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_xY,1,MPI_INT,rank_Xy,sendtag,comm); - MPI_Recv(&recvCount_Xy,1,MPI_INT,rank_xY,recvtag,comm,MPI_STATUS_IGNORE); + comm.send(&sendCount_xy,1,rank_XY,sendtag); + comm.recv(&recvCount_XY,1,rank_xy,recvtag); + comm.send(&sendCount_XY,1,rank_xy,sendtag); + comm.recv(&recvCount_xy,1,rank_XY,recvtag); + comm.send(&sendCount_Xy,1,rank_xY,sendtag); + comm.recv(&recvCount_xY,1,rank_Xy,recvtag); + comm.send(&sendCount_xY,1,rank_Xy,sendtag); + comm.recv(&recvCount_Xy,1,rank_xY,recvtag); - MPI_Send(&sendCount_xz,1,MPI_INT,rank_XZ,sendtag,comm); - MPI_Recv(&recvCount_XZ,1,MPI_INT,rank_xz,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_XZ,1,MPI_INT,rank_xz,sendtag,comm); - MPI_Recv(&recvCount_xz,1,MPI_INT,rank_XZ,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_Xz,1,MPI_INT,rank_xZ,sendtag,comm); - MPI_Recv(&recvCount_xZ,1,MPI_INT,rank_Xz,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_xZ,1,MPI_INT,rank_Xz,sendtag,comm); - MPI_Recv(&recvCount_Xz,1,MPI_INT,rank_xZ,recvtag,comm,MPI_STATUS_IGNORE); + comm.send(&sendCount_xz,1,rank_XZ,sendtag); + comm.recv(&recvCount_XZ,1,rank_xz,recvtag); + comm.send(&sendCount_XZ,1,rank_xz,sendtag); + comm.recv(&recvCount_xz,1,rank_XZ,recvtag); + comm.send(&sendCount_Xz,1,rank_xZ,sendtag); + comm.recv(&recvCount_xZ,1,rank_Xz,recvtag); + comm.send(&sendCount_xZ,1,rank_Xz,sendtag); + comm.recv(&recvCount_Xz,1,rank_xZ,recvtag); - MPI_Send(&sendCount_yz,1,MPI_INT,rank_YZ,sendtag,comm); - MPI_Recv(&recvCount_YZ,1,MPI_INT,rank_yz,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_YZ,1,MPI_INT,rank_yz,sendtag,comm); - MPI_Recv(&recvCount_yz,1,MPI_INT,rank_YZ,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_Yz,1,MPI_INT,rank_yZ,sendtag,comm); - MPI_Recv(&recvCount_yZ,1,MPI_INT,rank_Yz,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_yZ,1,MPI_INT,rank_Yz,sendtag,comm); - MPI_Recv(&recvCount_Yz,1,MPI_INT,rank_yZ,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Barrier(comm); + comm.send(&sendCount_yz,1,rank_YZ,sendtag); + comm.recv(&recvCount_YZ,1,rank_yz,recvtag); + comm.send(&sendCount_YZ,1,rank_yz,sendtag); + comm.recv(&recvCount_yz,1,rank_YZ,recvtag); + comm.send(&sendCount_Yz,1,rank_yZ,sendtag); + comm.recv(&recvCount_yZ,1,rank_Yz,recvtag); + comm.send(&sendCount_yZ,1,rank_Yz,sendtag); + comm.recv(&recvCount_Yz,1,rank_yZ,recvtag); + comm.barrier(); //********************************************************************************** //recvCount_x = sendCount_x; //recvCount_X = sendCount_X; @@ -1157,7 +1151,7 @@ int main(int argc, char **argv) //...................................................................................... // Use MPI to fill in the appropriate values // int tag = 5; - // MPI_Sendrecv(sendCount_x,1,MPI_INT,rank_x,tag,sendCount_X,1,MPI_INT,comm,req); + // Mcomm.sendrecv(sendCount_x,1,rank_x,tag,sendCount_X,1); //...................................................................................... int *recvList_x, *recvList_y, *recvList_z, *recvList_X, *recvList_Y, *recvList_Z; int *recvList_xy, *recvList_yz, *recvList_xz, *recvList_Xy, *recvList_Yz, *recvList_xZ; @@ -1187,48 +1181,48 @@ int main(int argc, char **argv) // Use MPI to fill in the appropriate values for recvList // Fill in the recieve lists using MPI sendtag = recvtag = 4; - MPI_Isend(sendList_x, sendCount_x,MPI_INT,rank_X,sendtag,comm,&req1[0]); - MPI_Irecv(recvList_X, recvCount_X,MPI_INT,rank_x,recvtag,comm,&req2[0]); - MPI_Isend(sendList_X, sendCount_X,MPI_INT,rank_x,sendtag,comm,&req1[1]); - MPI_Irecv(recvList_x, recvCount_x,MPI_INT,rank_X,recvtag,comm,&req2[1]); - MPI_Isend(sendList_y, sendCount_y,MPI_INT,rank_Y,sendtag,comm,&req1[2]); - MPI_Irecv(recvList_Y, recvCount_Y,MPI_INT,rank_y,recvtag,comm,&req2[2]); - MPI_Isend(sendList_Y, sendCount_Y,MPI_INT,rank_y,sendtag,comm,&req1[3]); - MPI_Irecv(recvList_y, recvCount_y,MPI_INT,rank_Y,recvtag,comm,&req2[3]); - MPI_Isend(sendList_z, sendCount_z,MPI_INT,rank_Z,sendtag,comm,&req1[4]); - MPI_Irecv(recvList_Z, recvCount_Z,MPI_INT,rank_z,recvtag,comm,&req2[4]); - MPI_Isend(sendList_Z, sendCount_Z,MPI_INT,rank_z,sendtag,comm,&req1[5]); - MPI_Irecv(recvList_z, recvCount_z,MPI_INT,rank_Z,recvtag,comm,&req2[5]); + req1[0] = comm.Isend(sendList_x,sendCount_x,rank_X,sendtag); + req2[0] = comm.Irecv(recvList_X,recvCount_X,rank_x,recvtag); + req1[1] = comm.Isend(sendList_X,sendCount_X,rank_x,sendtag); + req2[1] = comm.Irecv(recvList_x,recvCount_x,rank_X,recvtag); + req1[2] = comm.Isend(sendList_y,sendCount_y,rank_Y,sendtag); + req2[2] = comm.Irecv(recvList_Y,recvCount_Y,rank_y,recvtag); + req1[3] = comm.Isend(sendList_Y,sendCount_Y,rank_y,sendtag); + req2[3] = comm.Irecv(recvList_y,recvCount_y,rank_Y,recvtag); + req1[4] = comm.Isend(sendList_z,sendCount_z,rank_Z,sendtag); + req2[4] = comm.Irecv(recvList_Z,recvCount_Z,rank_z,recvtag); + req1[5] = comm.Isend(sendList_Z,sendCount_Z,rank_z,sendtag); + req2[5] = comm.Irecv(recvList_z,recvCount_z,rank_Z,recvtag); - MPI_Isend(sendList_xy, sendCount_xy,MPI_INT,rank_XY,sendtag,comm,&req1[6]); - MPI_Irecv(recvList_XY, recvCount_XY,MPI_INT,rank_xy,recvtag,comm,&req2[6]); - MPI_Isend(sendList_XY, sendCount_XY,MPI_INT,rank_xy,sendtag,comm,&req1[7]); - MPI_Irecv(recvList_xy, recvCount_xy,MPI_INT,rank_XY,recvtag,comm,&req2[7]); - MPI_Isend(sendList_Xy, sendCount_Xy,MPI_INT,rank_xY,sendtag,comm,&req1[8]); - MPI_Irecv(recvList_xY, recvCount_xY,MPI_INT,rank_Xy,recvtag,comm,&req2[8]); - MPI_Isend(sendList_xY, sendCount_xY,MPI_INT,rank_Xy,sendtag,comm,&req1[9]); - MPI_Irecv(recvList_Xy, recvCount_Xy,MPI_INT,rank_xY,recvtag,comm,&req2[9]); + req1[6] = comm.Isend(sendList_xy,sendCount_xy,rank_XY,sendtag); + req2[6] = comm.Irecv(recvList_XY,recvCount_XY,rank_xy,recvtag); + req1[7] = comm.Isend(sendList_XY,sendCount_XY,rank_xy,sendtag); + req2[7] = comm.Irecv(recvList_xy,recvCount_xy,rank_XY,recvtag); + req1[8] = comm.Isend(sendList_Xy,sendCount_Xy,rank_xY,sendtag); + req2[8] = comm.Irecv(recvList_xY,recvCount_xY,rank_Xy,recvtag); + req1[9] = comm.Isend(sendList_xY,sendCount_xY,rank_Xy,sendtag); + req2[9] = comm.Irecv(recvList_Xy,recvCount_Xy,rank_xY,recvtag); - MPI_Isend(sendList_xz, sendCount_xz,MPI_INT,rank_XZ,sendtag,comm,&req1[10]); - MPI_Irecv(recvList_XZ, recvCount_XZ,MPI_INT,rank_xz,recvtag,comm,&req2[10]); - MPI_Isend(sendList_XZ, sendCount_XZ,MPI_INT,rank_xz,sendtag,comm,&req1[11]); - MPI_Irecv(recvList_xz, recvCount_xz,MPI_INT,rank_XZ,recvtag,comm,&req2[11]); - MPI_Isend(sendList_Xz, sendCount_Xz,MPI_INT,rank_xZ,sendtag,comm,&req1[12]); - MPI_Irecv(recvList_xZ, recvCount_xZ,MPI_INT,rank_Xz,recvtag,comm,&req2[12]); - MPI_Isend(sendList_xZ, sendCount_xZ,MPI_INT,rank_Xz,sendtag,comm,&req1[13]); - MPI_Irecv(recvList_Xz, recvCount_Xz,MPI_INT,rank_xZ,recvtag,comm,&req2[13]); + req1[10] = comm.Isend(sendList_xz,sendCount_xz,rank_XZ,sendtag); + req2[10] = comm.Irecv(recvList_XZ,recvCount_XZ,rank_xz,recvtag); + req1[11] = comm.Isend(sendList_XZ,sendCount_XZ,rank_xz,sendtag); + req2[11] = comm.Irecv(recvList_xz,recvCount_xz,rank_XZ,recvtag); + req1[12] = comm.Isend(sendList_Xz,sendCount_Xz,rank_xZ,sendtag); + req2[12] = comm.Irecv(recvList_xZ,recvCount_xZ,rank_Xz,recvtag); + req1[13] = comm.Isend(sendList_xZ,sendCount_xZ,rank_Xz,sendtag); + req2[13] = comm.Irecv(recvList_Xz,recvCount_Xz,rank_xZ,recvtag); - MPI_Isend(sendList_yz, sendCount_yz,MPI_INT,rank_YZ,sendtag,comm,&req1[14]); - MPI_Irecv(recvList_YZ, recvCount_YZ,MPI_INT,rank_yz,recvtag,comm,&req2[14]); - MPI_Isend(sendList_YZ, sendCount_YZ,MPI_INT,rank_yz,sendtag,comm,&req1[15]); - MPI_Irecv(recvList_yz, recvCount_yz,MPI_INT,rank_YZ,recvtag,comm,&req2[15]); - MPI_Isend(sendList_Yz, sendCount_Yz,MPI_INT,rank_yZ,sendtag,comm,&req1[16]); - MPI_Irecv(recvList_yZ, recvCount_yZ,MPI_INT,rank_Yz,recvtag,comm,&req2[16]); - MPI_Isend(sendList_yZ, sendCount_yZ,MPI_INT,rank_Yz,sendtag,comm,&req1[17]); - MPI_Irecv(recvList_Yz, recvCount_Yz,MPI_INT,rank_yZ,recvtag,comm,&req2[17]); - MPI_Waitall(18,req1,stat1); - MPI_Waitall(18,req2,stat2); - MPI_Barrier(comm); + req1[14] = comm.Isend(sendList_yz,sendCount_yz,rank_YZ,sendtag); + req2[14] = comm.Irecv(recvList_YZ,recvCount_YZ,rank_yz,recvtag); + req1[15] = comm.Isend(sendList_YZ,sendCount_YZ,rank_yz,sendtag); + req2[15] = comm.Irecv(recvList_yz,recvCount_yz,rank_YZ,recvtag); + req1[16] = comm.Isend(sendList_Yz,sendCount_Yz,rank_yZ,sendtag); + req2[16] = comm.Irecv(recvList_yZ,recvCount_yZ,rank_Yz,recvtag); + req1[17] = comm.Isend(sendList_yZ,sendCount_yZ,rank_Yz,sendtag); + req2[17] = comm.Irecv(recvList_Yz,recvCount_Yz,rank_yZ,recvtag); + comm.waitAll(18,req1); + comm.waitAll(18,req2); + comm.barrier(); //...................................................................................... double *sendbuf_x, *sendbuf_y, *sendbuf_z, *sendbuf_X, *sendbuf_Y, *sendbuf_Z; double *sendbuf_xy, *sendbuf_yz, *sendbuf_xz, *sendbuf_Xy, *sendbuf_Yz, *sendbuf_xZ; @@ -1427,42 +1421,24 @@ int main(int argc, char **argv) PackID(sendList_yZ, sendCount_yZ ,sendID_yZ, id); PackID(sendList_YZ, sendCount_YZ ,sendID_YZ, id); //...................................................................................... - MPI_Sendrecv(sendID_x,sendCount_x,MPI_CHAR,rank_X,sendtag, - recvID_X,recvCount_X,MPI_CHAR,rank_x,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_X,sendCount_X,MPI_CHAR,rank_x,sendtag, - recvID_x,recvCount_x,MPI_CHAR,rank_X,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_y,sendCount_y,MPI_CHAR,rank_Y,sendtag, - recvID_Y,recvCount_Y,MPI_CHAR,rank_y,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Y,sendCount_Y,MPI_CHAR,rank_y,sendtag, - recvID_y,recvCount_y,MPI_CHAR,rank_Y,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_z,sendCount_z,MPI_CHAR,rank_Z,sendtag, - recvID_Z,recvCount_Z,MPI_CHAR,rank_z,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Z,sendCount_Z,MPI_CHAR,rank_z,sendtag, - recvID_z,recvCount_z,MPI_CHAR,rank_Z,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xy,sendCount_xy,MPI_CHAR,rank_XY,sendtag, - recvID_XY,recvCount_XY,MPI_CHAR,rank_xy,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_XY,sendCount_XY,MPI_CHAR,rank_xy,sendtag, - recvID_xy,recvCount_xy,MPI_CHAR,rank_XY,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Xy,sendCount_Xy,MPI_CHAR,rank_xY,sendtag, - recvID_xY,recvCount_xY,MPI_CHAR,rank_Xy,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xY,sendCount_xY,MPI_CHAR,rank_Xy,sendtag, - recvID_Xy,recvCount_Xy,MPI_CHAR,rank_xY,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xz,sendCount_xz,MPI_CHAR,rank_XZ,sendtag, - recvID_XZ,recvCount_XZ,MPI_CHAR,rank_xz,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_XZ,sendCount_XZ,MPI_CHAR,rank_xz,sendtag, - recvID_xz,recvCount_xz,MPI_CHAR,rank_XZ,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Xz,sendCount_Xz,MPI_CHAR,rank_xZ,sendtag, - recvID_xZ,recvCount_xZ,MPI_CHAR,rank_Xz,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xZ,sendCount_xZ,MPI_CHAR,rank_Xz,sendtag, - recvID_Xz,recvCount_Xz,MPI_CHAR,rank_xZ,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_yz,sendCount_yz,MPI_CHAR,rank_YZ,sendtag, - recvID_YZ,recvCount_YZ,MPI_CHAR,rank_yz,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_YZ,sendCount_YZ,MPI_CHAR,rank_yz,sendtag, - recvID_yz,recvCount_yz,MPI_CHAR,rank_YZ,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Yz,sendCount_Yz,MPI_CHAR,rank_yZ,sendtag, - recvID_yZ,recvCount_yZ,MPI_CHAR,rank_Yz,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_yZ,sendCount_yZ,MPI_CHAR,rank_Yz,sendtag, - recvID_Yz,recvCount_Yz,MPI_CHAR,rank_yZ,recvtag,comm,MPI_STATUS_IGNORE); + comm.sendrecv(sendID_x,sendCount_x,rank_X,sendtag,recvID_X,recvCount_X,rank_x,recvtag); + comm.sendrecv(sendID_X,sendCount_X,rank_x,sendtag,recvID_x,recvCount_x,rank_X,recvtag); + comm.sendrecv(sendID_y,sendCount_y,rank_Y,sendtag,recvID_Y,recvCount_Y,rank_y,recvtag); + comm.sendrecv(sendID_Y,sendCount_Y,rank_y,sendtag,recvID_y,recvCount_y,rank_Y,recvtag); + comm.sendrecv(sendID_z,sendCount_z,rank_Z,sendtag,recvID_Z,recvCount_Z,rank_z,recvtag); + comm.sendrecv(sendID_Z,sendCount_Z,rank_z,sendtag,recvID_z,recvCount_z,rank_Z,recvtag); + comm.sendrecv(sendID_xy,sendCount_xy,rank_XY,sendtag,recvID_XY,recvCount_XY,rank_xy,recvtag); + comm.sendrecv(sendID_XY,sendCount_XY,rank_xy,sendtag,recvID_xy,recvCount_xy,rank_XY,recvtag); + comm.sendrecv(sendID_Xy,sendCount_Xy,rank_xY,sendtag,recvID_xY,recvCount_xY,rank_Xy,recvtag); + comm.sendrecv(sendID_xY,sendCount_xY,rank_Xy,sendtag,recvID_Xy,recvCount_Xy,rank_xY,recvtag); + comm.sendrecv(sendID_xz,sendCount_xz,rank_XZ,sendtag,recvID_XZ,recvCount_XZ,rank_xz,recvtag); + comm.sendrecv(sendID_XZ,sendCount_XZ,rank_xz,sendtag,recvID_xz,recvCount_xz,rank_XZ,recvtag); + comm.sendrecv(sendID_Xz,sendCount_Xz,rank_xZ,sendtag,recvID_xZ,recvCount_xZ,rank_Xz,recvtag); + comm.sendrecv(sendID_xZ,sendCount_xZ,rank_Xz,sendtag,recvID_Xz,recvCount_Xz,rank_xZ,recvtag); + comm.sendrecv(sendID_yz,sendCount_yz,rank_YZ,sendtag,recvID_YZ,recvCount_YZ,rank_yz,recvtag); + comm.sendrecv(sendID_YZ,sendCount_YZ,rank_yz,sendtag,recvID_yz,recvCount_yz,rank_YZ,recvtag); + comm.sendrecv(sendID_Yz,sendCount_Yz,rank_yZ,sendtag,recvID_yZ,recvCount_yZ,rank_Yz,recvtag); + comm.sendrecv(sendID_yZ,sendCount_yZ,rank_Yz,sendtag,recvID_Yz,recvCount_Yz,rank_yZ,recvtag); //...................................................................................... UnpackID(recvList_x, recvCount_x ,recvID_x, id); UnpackID(recvList_X, recvCount_X ,recvID_X, id); @@ -1495,7 +1471,7 @@ int main(int argc, char **argv) free(recvID_yz); free(recvID_YZ); free(recvID_yZ); free(recvID_Yz); //...................................................................................... if (rank==0) printf ("Devices are ready to communicate. \n"); - MPI_Barrier(comm); + comm.barrier(); //...........device phase ID................................................. if (rank==0) printf ("Copying phase ID to device \n"); @@ -1535,8 +1511,8 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; - MPI_Barrier(comm); - starttime = MPI_Wtime(); + comm.barrier(); + starttime = Utilities::MPI::time(); // Old cuda timer is below // cudaEvent_t start, stop; // float time; @@ -1633,48 +1609,48 @@ int main(int argc, char **argv) //................................................................................... // Send all the distributions - MPI_Isend(sendbuf_x, 5*sendCount_x,MPI_DOUBLE,rank_X,sendtag,comm,&req1[0]); - MPI_Irecv(recvbuf_X, 5*recvCount_X,MPI_DOUBLE,rank_x,recvtag,comm,&req2[0]); - MPI_Isend(sendbuf_X, 5*sendCount_X,MPI_DOUBLE,rank_x,sendtag,comm,&req1[1]); - MPI_Irecv(recvbuf_x, 5*recvCount_x,MPI_DOUBLE,rank_X,recvtag,comm,&req2[1]); - MPI_Isend(sendbuf_y, 5*sendCount_y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[2]); - MPI_Irecv(recvbuf_Y, 5*recvCount_Y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[2]); - MPI_Isend(sendbuf_Y, 5*sendCount_Y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[3]); - MPI_Irecv(recvbuf_y, 5*recvCount_y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[3]); - MPI_Isend(sendbuf_z, 5*sendCount_z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[4]); - MPI_Irecv(recvbuf_Z, 5*recvCount_Z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[4]); - MPI_Isend(sendbuf_Z, 5*sendCount_Z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[5]); - MPI_Irecv(recvbuf_z, 5*recvCount_z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[5]); - MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_XY,sendtag,comm,&req1[6]); - MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_xy,recvtag,comm,&req2[6]); - MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_xy,sendtag,comm,&req1[7]); - MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_XY,recvtag,comm,&req2[7]); - MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_xY,sendtag,comm,&req1[8]); - MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_Xy,recvtag,comm,&req2[8]); - MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_Xy,sendtag,comm,&req1[9]); - MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_xY,recvtag,comm,&req2[9]); - MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_XZ,sendtag,comm,&req1[10]); - MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_xz,recvtag,comm,&req2[10]); - MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_xz,sendtag,comm,&req1[11]); - MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_XZ,recvtag,comm,&req2[11]); - MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_xZ,sendtag,comm,&req1[12]); - MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_Xz,recvtag,comm,&req2[12]); - MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_Xz,sendtag,comm,&req1[13]); - MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_xZ,recvtag,comm,&req2[13]); - MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_YZ,sendtag,comm,&req1[14]); - MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_yz,recvtag,comm,&req2[14]); - MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_yz,sendtag,comm,&req1[15]); - MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_YZ,recvtag,comm,&req2[15]); - MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_yZ,sendtag,comm,&req1[16]); - MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_Yz,recvtag,comm,&req2[16]); - MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_Yz,sendtag,comm,&req1[17]); - MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_yZ,recvtag,comm,&req2[17]); + req1[0] = comm.Isend(sendbuf_x,5*sendCount_x,rank_X,sendtag); + req2[0] = comm.Irecv(recvbuf_X,5*recvCount_X,rank_x,recvtag); + req1[1] = comm.Isend(sendbuf_X,5*sendCount_X,rank_x,sendtag); + req2[1] = comm.Irecv(recvbuf_x,5*recvCount_x,rank_X,recvtag); + req1[2] = comm.Isend(sendbuf_y,5*sendCount_y,rank_Y,sendtag); + req2[2] = comm.Irecv(recvbuf_Y,5*recvCount_Y,rank_y,recvtag); + req1[3] = comm.Isend(sendbuf_Y,5*sendCount_Y,rank_y,sendtag); + req2[3] = comm.Irecv(recvbuf_y,5*recvCount_y,rank_Y,recvtag); + req1[4] = comm.Isend(sendbuf_z,5*sendCount_z,rank_Z,sendtag); + req2[4] = comm.Irecv(recvbuf_Z,5*recvCount_Z,rank_z,recvtag); + req1[5] = comm.Isend(sendbuf_Z,5*sendCount_Z,rank_z,sendtag); + req2[5] = comm.Irecv(recvbuf_z,5*recvCount_z,rank_Z,recvtag); + req1[6] = comm.Isend(sendbuf_xy,sendCount_xy,rank_XY,sendtag); + req2[6] = comm.Irecv(recvbuf_XY,recvCount_XY,rank_xy,recvtag); + req1[7] = comm.Isend(sendbuf_XY,sendCount_XY,rank_xy,sendtag); + req2[7] = comm.Irecv(recvbuf_xy,recvCount_xy,rank_XY,recvtag); + req1[8] = comm.Isend(sendbuf_Xy,sendCount_Xy,rank_xY,sendtag); + req2[8] = comm.Irecv(recvbuf_xY,recvCount_xY,rank_Xy,recvtag); + req1[9] = comm.Isend(sendbuf_xY,sendCount_xY,rank_Xy,sendtag); + req2[9] = comm.Irecv(recvbuf_Xy,recvCount_Xy,rank_xY,recvtag); + req1[10] = comm.Isend(sendbuf_xz,sendCount_xz,rank_XZ,sendtag); + req2[10] = comm.Irecv(recvbuf_XZ,recvCount_XZ,rank_xz,recvtag); + req1[11] = comm.Isend(sendbuf_XZ,sendCount_XZ,rank_xz,sendtag); + req2[11] = comm.Irecv(recvbuf_xz,recvCount_xz,rank_XZ,recvtag); + req1[12] = comm.Isend(sendbuf_Xz,sendCount_Xz,rank_xZ,sendtag); + req2[12] = comm.Irecv(recvbuf_xZ,recvCount_xZ,rank_Xz,recvtag); + req1[13] = comm.Isend(sendbuf_xZ,sendCount_xZ,rank_Xz,sendtag); + req2[13] = comm.Irecv(recvbuf_Xz,recvCount_Xz,rank_xZ,recvtag); + req1[14] = comm.Isend(sendbuf_yz,sendCount_yz,rank_YZ,sendtag); + req2[14] = comm.Irecv(recvbuf_YZ,recvCount_YZ,rank_yz,recvtag); + req1[15] = comm.Isend(sendbuf_YZ,sendCount_YZ,rank_yz,sendtag); + req2[15] = comm.Irecv(recvbuf_yz,recvCount_yz,rank_YZ,recvtag); + req1[16] = comm.Isend(sendbuf_Yz,sendCount_Yz,rank_yZ,sendtag); + req2[16] = comm.Irecv(recvbuf_yZ,recvCount_yZ,rank_Yz,recvtag); + req1[17] = comm.Isend(sendbuf_yZ,sendCount_yZ,rank_Yz,sendtag); + req2[17] = comm.Irecv(recvbuf_Yz,recvCount_Yz,rank_yZ,recvtag); //................................................................................... //................................................................................... // Wait for completion of D3Q19 communication - MPI_Waitall(18,req1,stat1); - MPI_Waitall(18,req2,stat2); + comm.waitAll(18,req1); + comm.waitAll(18,req2); //................................................................................... // Unpack the distributions on the device //................................................................................... @@ -1758,7 +1734,7 @@ int main(int argc, char **argv) //***************************************************************************** //***************************************************************************** - MPI_Barrier(comm); + comm.barrier(); // Iteration completed! iter++; //................................................................... @@ -1766,8 +1742,8 @@ int main(int argc, char **argv) //************************************************************************/ cudaThreadSynchronize(); - MPI_Barrier(comm); - stoptime = MPI_Wtime(); + comm.barrier(); + stoptime = Utilities::MPI::time(); // cout << "CPU time: " << (stoptime - starttime) << " seconds" << endl; cputime = stoptime - starttime; // cout << "Lattice update rate: "<< double(Nx*Ny*Nz*iter)/cputime/1000000 << " MLUPS" << endl; @@ -1802,7 +1778,7 @@ int main(int argc, char **argv) cudaMemcpy(velocity, vel, 3*dist_mem_size, cudaMemcpyDeviceToHost); //.............................................................................. cudaThreadSynchronize(); - MPI_Barrier(comm); + comm.barrier(); //............................................................ //....Write the z-velocity to test poiseuille flow............ double vz,vz_avg; @@ -1831,7 +1807,7 @@ int main(int argc, char **argv) free (velocity); free(id); // **************************************************** - MPI_Barrier(comm); + comm.barrier(); MPI_Finalize(); // **************************************************** } diff --git a/gpu/exe/lb2_Color.cu b/gpu/exe/lb2_Color.cu index 1871b23c..1f227d08 100644 --- a/gpu/exe/lb2_Color.cu +++ b/gpu/exe/lb2_Color.cu @@ -1,6 +1,4 @@ -#ifdef useMPI -#include -#endif +#include "common/MPI.h" #include #include @@ -62,18 +60,10 @@ int main(int argc, char *argv[]) { //********** Initialize MPI **************** - int numprocs,rank; -#ifdef useMPI - MPI_Status stat; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_size(comm,&numprocs); - MPI_Comm_rank(comm,&rank); -#else - MPI_Comm comm = MPI_COMM_WORLD; - numprocs = 1; - rank = 0; -#endif + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int numprocs = comm.getSize(); //****************************************** if (rank == 0){ @@ -123,32 +113,31 @@ int main(int argc, char *argv[]) input >> tol; // error tolerance //............................................................. } -#ifdef useMPI // ************************************************************** // Broadcast simulation parameters from rank 0 to all other procs - MPI_Barrier(comm); + comm.barrier(); //................................................. - MPI_Bcast(&Nz,1,MPI_INT,0,comm); - MPI_Bcast(&nBlocks,1,MPI_INT,0,comm); - MPI_Bcast(&nthreads,1,MPI_INT,0,comm); - MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&alpha,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&beta,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&das,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&dbs,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&pBC,1,MPI_LOGICAL,0,comm); - MPI_Bcast(&din,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&dout,1,MPI_DOUBLE,0,comm); - MPI_Bcast(×tepMax,1,MPI_INT,0,comm); - MPI_Bcast(&interval,1,MPI_INT,0,comm); - MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm); + comm.bcast(&Nz,1,0); + comm.bcast(&nBlocks,1,0); + comm.bcast(&nthreads,1,0); + comm.bcast(&Fx,1,0); + comm.bcast(&Fy,1,0); + comm.bcast(&Fz,1,0); + comm.bcast(&tau,1,0); + comm.bcast(&alpha,1,0); + comm.bcast(&beta,1,0); + comm.bcast(&das,1,0); + comm.bcast(&dbs,1,0); + comm.bcast(&pBC,1,0); + comm.bcast(&din,1,0); + comm.bcast(&dout,1,0); + + comm.bcast(×tepMax,1,0); + comm.bcast(&interval,1,0); + comm.bcast(&tol,1,0); //................................................. - MPI_Barrier(comm); + comm.barrier(); // ************************************************************** -#endif double rlxA = 1.f/tau; double rlxB = 8.f*(2.f-rlxA)/(8.f-rlxA); @@ -243,11 +232,7 @@ int main(int argc, char *argv[]) if (k==4) k=Nz-5; } } -#ifdef useMPI //............................................................ - MPI_Barrier(comm); - MPI_Bcast(&id[0],N,MPI_CHAR,0,comm); - MPI_Barrier(comm); -#endif + comm.bcast(&id[0],N,0); if (rank == 0) printf("Domain set.\n"); //........................................................................... diff --git a/gpu/exe/lb2_Color_mpi.cpp b/gpu/exe/lb2_Color_mpi.cpp index fe11d32f..a2f3d8a9 100644 --- a/gpu/exe/lb2_Color_mpi.cpp +++ b/gpu/exe/lb2_Color_mpi.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include "common/MPI.h" using namespace std; @@ -98,15 +98,11 @@ inline void UnpackID(int *list, int count, char *recvbuf, char *ID){ int main(int argc, char **argv) { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); // parallel domain size (# of sub-domains) int nprocx,nprocy,nprocz; int iproc,jproc,kproc; @@ -120,7 +116,6 @@ int main(int argc, char **argv) int rank_yz,rank_YZ,rank_yZ,rank_Yz; //********************************** MPI_Request req1[18],req2[18]; - MPI_Status stat1[18],stat2[18]; if (rank == 0){ printf("********************************************************\n"); @@ -177,31 +172,30 @@ int main(int argc, char **argv) } // ************************************************************** // Broadcast simulation parameters from rank 0 to all other procs - MPI_Barrier(comm); + comm.barrier(); //................................................. - MPI_Bcast(&Nz,1,MPI_INT,0,comm); - MPI_Bcast(&nBlocks,1,MPI_INT,0,comm); - MPI_Bcast(&nthreads,1,MPI_INT,0,comm); - MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&alpha,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&beta,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&das,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&dbs,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&pBC,1,MPI_LOGICAL,0,comm); - MPI_Bcast(&din,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&dout,1,MPI_DOUBLE,0,comm); - MPI_Bcast(×tepMax,1,MPI_INT,0,comm); - MPI_Bcast(&interval,1,MPI_INT,0,comm); - MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm); - - MPI_Bcast(&nprocx,1,MPI_INT,0,comm); - MPI_Bcast(&nprocy,1,MPI_INT,0,comm); - MPI_Bcast(&nprocz,1,MPI_INT,0,comm); + comm.bcast(&Nz,1,0); + comm.bcast(&nBlocks,1,0); + comm.bcast(&nthreads,1,0); + comm.bcast(&Fx,1,0); + comm.bcast(&Fy,1,0); + comm.bcast(&Fz,1,0); + comm.bcast(&tau,1,0); + comm.bcast(&alpha,1,0); + comm.bcast(&beta,1,0); + comm.bcast(&das,1,0); + comm.bcast(&dbs,1,0); + comm.bcast(&pBC,1,0); + comm.bcast(&din,1,0); + comm.bcast(&dout,1,0); + comm.bcast(×tepMax,1,0); + comm.bcast(&interval,1,0); + comm.bcast(&tol,1,0); + comm.bcast(&nprocx,1,0); + comm.bcast(&nprocy,1,0); + comm.bcast(&nprocz,1,0); //................................................. - MPI_Barrier(comm); + comm.barrier(); // ************************************************************** // ************************************************************** @@ -231,7 +225,7 @@ int main(int argc, char **argv) } - MPI_Barrier(comm); + comm.barrier(); kproc = rank/(nprocx*nprocy); jproc = (rank-nprocx*nprocy*kproc)/nprocx; iproc = rank-nprocx*nprocy*kproc-nprocz*jproc; @@ -513,7 +507,7 @@ int main(int argc, char **argv) PM.close(); // printf("File porosity = %f\n", double(sum)/N); //........................................................................... - MPI_Barrier(comm); + comm.barrier(); if (rank == 0) cout << "Domain set." << endl; //........................................................................... // Write the communcation structure into a file for debugging @@ -650,7 +644,7 @@ int main(int argc, char **argv) } } } - MPI_Barrier(comm); + comm.barrier(); if (rank==0) printf ("SendLists are ready on host\n"); //...................................................................................... // Use MPI to fill in the recvCounts form the associated processes @@ -661,46 +655,46 @@ int main(int argc, char **argv) //********************************************************************************** // Fill in the recieve counts using MPI sendtag = recvtag = 3; - MPI_Send(&sendCount_x,1,MPI_INT,rank_X,sendtag,comm); - MPI_Recv(&recvCount_X,1,MPI_INT,rank_x,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_X,1,MPI_INT,rank_x,sendtag,comm); - MPI_Recv(&recvCount_x,1,MPI_INT,rank_X,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_y,1,MPI_INT,rank_Y,sendtag,comm); - MPI_Recv(&recvCount_Y,1,MPI_INT,rank_y,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_Y,1,MPI_INT,rank_y,sendtag,comm); - MPI_Recv(&recvCount_y,1,MPI_INT,rank_Y,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_z,1,MPI_INT,rank_Z,sendtag,comm); - MPI_Recv(&recvCount_Z,1,MPI_INT,rank_z,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_Z,1,MPI_INT,rank_z,sendtag,comm); - MPI_Recv(&recvCount_z,1,MPI_INT,rank_Z,recvtag,comm,MPI_STATUS_IGNORE); + comm.Send(&sendCount_x,1,rank_X,sendtag); + comm.Recv(&recvCount_X,1,rank_x,recvtag); + comm.Send(&sendCount_X,1,rank_x,sendtag); + comm.Recv(&recvCount_x,1,rank_X,recvtag); + comm.Send(&sendCount_y,1,rank_Y,sendtag); + comm.Recv(&recvCount_Y,1,rank_y,recvtag); + comm.Send(&sendCount_Y,1,rank_y,sendtag); + comm.Recv(&recvCount_y,1,rank_Y,recvtag); + comm.Send(&sendCount_z,1,rank_Z,sendtag); + comm.Recv(&recvCount_Z,1,rank_z,recvtag); + comm.Send(&sendCount_Z,1,rank_z,sendtag); + comm.Recv(&recvCount_z,1,rank_Z,recvtag); - MPI_Send(&sendCount_xy,1,MPI_INT,rank_XY,sendtag,comm); - MPI_Recv(&recvCount_XY,1,MPI_INT,rank_xy,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_XY,1,MPI_INT,rank_xy,sendtag,comm); - MPI_Recv(&recvCount_xy,1,MPI_INT,rank_XY,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_Xy,1,MPI_INT,rank_xY,sendtag,comm); - MPI_Recv(&recvCount_xY,1,MPI_INT,rank_Xy,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_xY,1,MPI_INT,rank_Xy,sendtag,comm); - MPI_Recv(&recvCount_Xy,1,MPI_INT,rank_xY,recvtag,comm,MPI_STATUS_IGNORE); + comm.Send(&sendCount_xy,1,rank_XY,sendtag); + comm.Recv(&recvCount_XY,1,rank_xy,recvtag); + comm.Send(&sendCount_XY,1,rank_xy,sendtag); + comm.Recv(&recvCount_xy,1,rank_XY,recvtag); + comm.Send(&sendCount_Xy,1,rank_xY,sendtag); + comm.Recv(&recvCount_xY,1,rank_Xy,recvtag); + comm.Send(&sendCount_xY,1,rank_Xy,sendtag); + comm.Recv(&recvCount_Xy,1,rank_xY,recvtag); - MPI_Send(&sendCount_xz,1,MPI_INT,rank_XZ,sendtag,comm); - MPI_Recv(&recvCount_XZ,1,MPI_INT,rank_xz,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_XZ,1,MPI_INT,rank_xz,sendtag,comm); - MPI_Recv(&recvCount_xz,1,MPI_INT,rank_XZ,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_Xz,1,MPI_INT,rank_xZ,sendtag,comm); - MPI_Recv(&recvCount_xZ,1,MPI_INT,rank_Xz,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_xZ,1,MPI_INT,rank_Xz,sendtag,comm); - MPI_Recv(&recvCount_Xz,1,MPI_INT,rank_xZ,recvtag,comm,MPI_STATUS_IGNORE); + comm.Send(&sendCount_xz,1,rank_XZ,sendtag); + comm.Recv(&recvCount_XZ,1,rank_xz,recvtag); + comm.Send(&sendCount_XZ,1,rank_xz,sendtag); + comm.Recv(&recvCount_xz,1,rank_XZ,recvtag); + comm.Send(&sendCount_Xz,1,rank_xZ,sendtag); + comm.Recv(&recvCount_xZ,1,rank_Xz,recvtag); + comm.Send(&sendCount_xZ,1,rank_Xz,sendtag); + comm.Recv(&recvCount_Xz,1,rank_xZ,recvtag); - MPI_Send(&sendCount_yz,1,MPI_INT,rank_YZ,sendtag,comm); - MPI_Recv(&recvCount_YZ,1,MPI_INT,rank_yz,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_YZ,1,MPI_INT,rank_yz,sendtag,comm); - MPI_Recv(&recvCount_yz,1,MPI_INT,rank_YZ,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_Yz,1,MPI_INT,rank_yZ,sendtag,comm); - MPI_Recv(&recvCount_yZ,1,MPI_INT,rank_Yz,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_yZ,1,MPI_INT,rank_Yz,sendtag,comm); - MPI_Recv(&recvCount_Yz,1,MPI_INT,rank_yZ,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Barrier(comm); + comm.Send(&sendCount_yz,1,rank_YZ,sendtag); + comm.Recv(&recvCount_YZ,1,rank_yz,recvtag); + comm.Send(&sendCount_YZ,1,rank_yz,sendtag); + comm.Recv(&recvCount_yz,1,rank_YZ,recvtag); + comm.Send(&sendCount_Yz,1,rank_yZ,sendtag); + comm.Recv(&recvCount_yZ,1,rank_Yz,recvtag); + comm.Send(&sendCount_yZ,1,rank_Yz,sendtag); + comm.Recv(&recvCount_Yz,1,rank_yZ,recvtag); + comm.barrier(); //********************************************************************************** //...................................................................................... int *recvList_x, *recvList_y, *recvList_z, *recvList_X, *recvList_Y, *recvList_Z; @@ -731,48 +725,48 @@ int main(int argc, char **argv) // Use MPI to fill in the appropriate values for recvList // Fill in the recieve lists using MPI sendtag = recvtag = 4; - MPI_Isend(sendList_x, sendCount_x,MPI_INT,rank_X,sendtag,comm,&req1[0]); - MPI_Irecv(recvList_X, recvCount_X,MPI_INT,rank_x,recvtag,comm,&req2[0]); - MPI_Isend(sendList_X, sendCount_X,MPI_INT,rank_x,sendtag,comm,&req1[1]); - MPI_Irecv(recvList_x, recvCount_x,MPI_INT,rank_X,recvtag,comm,&req2[1]); - MPI_Isend(sendList_y, sendCount_y,MPI_INT,rank_Y,sendtag,comm,&req1[2]); - MPI_Irecv(recvList_Y, recvCount_Y,MPI_INT,rank_y,recvtag,comm,&req2[2]); - MPI_Isend(sendList_Y, sendCount_Y,MPI_INT,rank_y,sendtag,comm,&req1[3]); - MPI_Irecv(recvList_y, recvCount_y,MPI_INT,rank_Y,recvtag,comm,&req2[3]); - MPI_Isend(sendList_z, sendCount_z,MPI_INT,rank_Z,sendtag,comm,&req1[4]); - MPI_Irecv(recvList_Z, recvCount_Z,MPI_INT,rank_z,recvtag,comm,&req2[4]); - MPI_Isend(sendList_Z, sendCount_Z,MPI_INT,rank_z,sendtag,comm,&req1[5]); - MPI_Irecv(recvList_z, recvCount_z,MPI_INT,rank_Z,recvtag,comm,&req2[5]); + req1[0] = comm.Isend(sendList_x,sendCount_x,rank_X,sendtag); + req2[0] = comm.Irecv(recvList_X,recvCount_X,rank_x,recvtag); + req1[1] = comm.Isend(sendList_X,sendCount_X,rank_x,sendtag); + req2[1] = comm.Irecv(recvList_x,recvCount_x,rank_X,recvtag); + req1[2] = comm.Isend(sendList_y,sendCount_y,rank_Y,sendtag); + req2[2] = comm.Irecv(recvList_Y,recvCount_Y,rank_y,recvtag); + req1[3] = comm.Isend(sendList_Y,sendCount_Y,rank_y,sendtag); + req2[3] = comm.Irecv(recvList_y,recvCount_y,rank_Y,recvtag); + req1[4] = comm.Isend(sendList_z,sendCount_z,rank_Z,sendtag); + req2[4] = comm.Irecv(recvList_Z,recvCount_Z,rank_z,recvtag); + req1[5] = comm.Isend(sendList_Z,sendCount_Z,rank_z,sendtag); + req2[5] = comm.Irecv(recvList_z,recvCount_z,rank_Z,recvtag); - MPI_Isend(sendList_xy, sendCount_xy,MPI_INT,rank_XY,sendtag,comm,&req1[6]); - MPI_Irecv(recvList_XY, recvCount_XY,MPI_INT,rank_xy,recvtag,comm,&req2[6]); - MPI_Isend(sendList_XY, sendCount_XY,MPI_INT,rank_xy,sendtag,comm,&req1[7]); - MPI_Irecv(recvList_xy, recvCount_xy,MPI_INT,rank_XY,recvtag,comm,&req2[7]); - MPI_Isend(sendList_Xy, sendCount_Xy,MPI_INT,rank_xY,sendtag,comm,&req1[8]); - MPI_Irecv(recvList_xY, recvCount_xY,MPI_INT,rank_Xy,recvtag,comm,&req2[8]); - MPI_Isend(sendList_xY, sendCount_xY,MPI_INT,rank_Xy,sendtag,comm,&req1[9]); - MPI_Irecv(recvList_Xy, recvCount_Xy,MPI_INT,rank_xY,recvtag,comm,&req2[9]); + req1[6] = comm.Isend(sendList_xy,sendCount_xy,rank_XY,sendtag); + req2[6] = comm.Irecv(recvList_XY,recvCount_XY,rank_xy,recvtag); + req1[7] = comm.Isend(sendList_XY,sendCount_XY,rank_xy,sendtag); + req2[7] = comm.Irecv(recvList_xy,recvCount_xy,rank_XY,recvtag); + req1[8] = comm.Isend(sendList_Xy,sendCount_Xy,rank_xY,sendtag); + req2[8] = comm.Irecv(recvList_xY,recvCount_xY,rank_Xy,recvtag); + req1[9] = comm.Isend(sendList_xY,sendCount_xY,rank_Xy,sendtag); + req2[9] = comm.Irecv(recvList_Xy,recvCount_Xy,rank_xY,recvtag); - MPI_Isend(sendList_xz, sendCount_xz,MPI_INT,rank_XZ,sendtag,comm,&req1[10]); - MPI_Irecv(recvList_XZ, recvCount_XZ,MPI_INT,rank_xz,recvtag,comm,&req2[10]); - MPI_Isend(sendList_XZ, sendCount_XZ,MPI_INT,rank_xz,sendtag,comm,&req1[11]); - MPI_Irecv(recvList_xz, recvCount_xz,MPI_INT,rank_XZ,recvtag,comm,&req2[11]); - MPI_Isend(sendList_Xz, sendCount_Xz,MPI_INT,rank_xZ,sendtag,comm,&req1[12]); - MPI_Irecv(recvList_xZ, recvCount_xZ,MPI_INT,rank_Xz,recvtag,comm,&req2[12]); - MPI_Isend(sendList_xZ, sendCount_xZ,MPI_INT,rank_Xz,sendtag,comm,&req1[13]); - MPI_Irecv(recvList_Xz, recvCount_Xz,MPI_INT,rank_xZ,recvtag,comm,&req2[13]); + req1[10] = comm.Isend(sendList_xz,sendCount_xz,rank_XZ,sendtag); + req2[10] = comm.Irecv(recvList_XZ,recvCount_XZ,rank_xz,recvtag); + req1[11] = comm.Isend(sendList_XZ,sendCount_XZ,rank_xz,sendtag); + req2[11] = comm.Irecv(recvList_xz,recvCount_xz,rank_XZ,recvtag); + req1[12] = comm.Isend(sendList_Xz,sendCount_Xz,rank_xZ,sendtag); + req2[12] = comm.Irecv(recvList_xZ,recvCount_xZ,rank_Xz,recvtag); + req1[13] = comm.Isend(sendList_xZ,sendCount_xZ,rank_Xz,sendtag); + req2[13] = comm.Irecv(recvList_Xz,recvCount_Xz,rank_xZ,recvtag); - MPI_Isend(sendList_yz, sendCount_yz,MPI_INT,rank_YZ,sendtag,comm,&req1[14]); - MPI_Irecv(recvList_YZ, recvCount_YZ,MPI_INT,rank_yz,recvtag,comm,&req2[14]); - MPI_Isend(sendList_YZ, sendCount_YZ,MPI_INT,rank_yz,sendtag,comm,&req1[15]); - MPI_Irecv(recvList_yz, recvCount_yz,MPI_INT,rank_YZ,recvtag,comm,&req2[15]); - MPI_Isend(sendList_Yz, sendCount_Yz,MPI_INT,rank_yZ,sendtag,comm,&req1[16]); - MPI_Irecv(recvList_yZ, recvCount_yZ,MPI_INT,rank_Yz,recvtag,comm,&req2[16]); - MPI_Isend(sendList_yZ, sendCount_yZ,MPI_INT,rank_Yz,sendtag,comm,&req1[17]); - MPI_Irecv(recvList_Yz, recvCount_Yz,MPI_INT,rank_yZ,recvtag,comm,&req2[17]); - MPI_Waitall(18,req1,stat1); - MPI_Waitall(18,req2,stat2); - MPI_Barrier(comm); + req1[14] = comm.Isend(sendList_yz,sendCount_yz,rank_YZ,sendtag); + req2[14] = comm.Irecv(recvList_YZ,recvCount_YZ,rank_yz,recvtag); + req1[15] = comm.Isend(sendList_YZ,sendCount_YZ,rank_yz,sendtag); + req2[15] = comm.Irecv(recvList_yz,recvCount_yz,rank_YZ,recvtag); + req1[16] = comm.Isend(sendList_Yz,sendCount_Yz,rank_yZ,sendtag); + req2[16] = comm.Irecv(recvList_yZ,recvCount_yZ,rank_Yz,recvtag); + req1[17] = comm.Isend(sendList_yZ,sendCount_yZ,rank_Yz,sendtag); + req2[17] = comm.Irecv(recvList_Yz,recvCount_Yz,rank_yZ,recvtag); + comm.waitAll(18,req1); + comm.waitAll(18,req2); + comm.barrier(); //...................................................................................... for (int idx=0; idx #include #include -#include +#include "common/MPI.h" #include "pmmc.h" #include "Domain.h" @@ -101,15 +101,11 @@ inline void UnpackID(int *list, int count, char *recvbuf, char *ID){ int main(int argc, char **argv) { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI - int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); // parallel domain size (# of sub-domains) int nprocx,nprocy,nprocz; int iproc,jproc,kproc; @@ -123,7 +119,6 @@ int main(int argc, char **argv) int rank_yz,rank_YZ,rank_yZ,rank_Yz; //********************************** MPI_Request req1[18],req2[18]; - MPI_Status stat1[18],stat2[18]; if (rank == 0){ printf("********************************************************\n"); @@ -203,35 +198,35 @@ int main(int argc, char **argv) } // ************************************************************** // Broadcast simulation parameters from rank 0 to all other procs - MPI_Barrier(comm); + comm.barrier(); //................................................. - MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&alpha,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&beta,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&das,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&dbs,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&pBC,1,MPI_LOGICAL,0,comm); - MPI_Bcast(&din,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&dout,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm); - MPI_Bcast(×tepMax,1,MPI_INT,0,comm); - MPI_Bcast(&interval,1,MPI_INT,0,comm); - MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm); + comm.bcast(&tau,1,0); + comm.bcast(&alpha,1,0); + comm.bcast(&beta,1,0); + comm.bcast(&das,1,0); + comm.bcast(&dbs,1,0); + comm.bcast(&pBC,1,0); + comm.bcast(&din,1,0); + comm.bcast(&dout,1,0); + comm.bcast(&Fx,1,0); + comm.bcast(&Fy,1,0); + comm.bcast(&Fz,1,0); + comm.bcast(×tepMax,1,0); + comm.bcast(&interval,1,0); + comm.bcast(&tol,1,0); // Computational domain - MPI_Bcast(&Nz,1,MPI_INT,0,comm); - MPI_Bcast(&nBlocks,1,MPI_INT,0,comm); - MPI_Bcast(&nthreads,1,MPI_INT,0,comm); - MPI_Bcast(&nprocx,1,MPI_INT,0,comm); - MPI_Bcast(&nprocy,1,MPI_INT,0,comm); - MPI_Bcast(&nprocz,1,MPI_INT,0,comm); - MPI_Bcast(&nspheres,1,MPI_INT,0,comm); - MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); + comm.bcast(&Nz,1,0); + comm.bcast(&nBlocks,1,0); + comm.bcast(&nthreads,1,0); + comm.bcast(&nprocx,1,0); + comm.bcast(&nprocy,1,0); + comm.bcast(&nprocz,1,0); + comm.bcast(&nspheres,1,0); + comm.bcast(&Lx,1,0); + comm.bcast(&Ly,1,0); + comm.bcast(&Lz,1,0); //................................................. - MPI_Barrier(comm); + comm.barrier(); // ************************************************************** // ************************************************************** double Ps = -(das-dbs)/(das+dbs); @@ -263,7 +258,7 @@ int main(int argc, char **argv) printf("********************************************************\n"); } - MPI_Barrier(comm); + comm.barrier(); kproc = rank/(nprocx*nprocy); jproc = (rank-nprocx*nprocy*kproc)/nprocx; iproc = rank-nprocx*nprocy*kproc-nprocz*jproc; @@ -561,14 +556,14 @@ int main(int argc, char **argv) //....................................................................... if (rank == 0) printf("Reading the sphere packing \n"); if (rank == 0) ReadSpherePacking(nspheres,cx,cy,cz,rad); - MPI_Barrier(comm); + comm.barrier(); // Broadcast the sphere packing to all processes - MPI_Bcast(cx,nspheres,MPI_DOUBLE,0,comm); - MPI_Bcast(cy,nspheres,MPI_DOUBLE,0,comm); - MPI_Bcast(cz,nspheres,MPI_DOUBLE,0,comm); - MPI_Bcast(rad,nspheres,MPI_DOUBLE,0,comm); + comm.bcast(cx,nspheres,0); + comm.bcast(cy,nspheres,0); + comm.bcast(cz,nspheres,0); + comm.bcast(rad,nspheres,0); //........................................................................... - MPI_Barrier(comm); + comm.barrier(); if (rank == 0) cout << "Domain set." << endl; //....................................................................... // sprintf(LocalRankString,"%05d",rank); @@ -718,7 +713,7 @@ int main(int argc, char **argv) } } } - MPI_Barrier(comm); + comm.barrier(); if (rank==0) printf ("SendLists are ready on host\n"); //...................................................................................... // Use MPI to fill in the recvCounts form the associated processes @@ -729,89 +724,49 @@ int main(int argc, char **argv) //********************************************************************************** // Fill in the recieve counts using MPI sendtag = recvtag = 3; - MPI_Isend(&sendCount_x, 1,MPI_INT,rank_X,sendtag,comm,&req1[0]); - MPI_Irecv(&recvCount_X, 1,MPI_INT,rank_x,recvtag,comm,&req2[0]); - MPI_Isend(&sendCount_X, 1,MPI_INT,rank_x,sendtag,comm,&req1[1]); - MPI_Irecv(&recvCount_x, 1,MPI_INT,rank_X,recvtag,comm,&req2[1]); - MPI_Isend(&sendCount_y, 1,MPI_INT,rank_Y,sendtag,comm,&req1[2]); - MPI_Irecv(&recvCount_Y, 1,MPI_INT,rank_y,recvtag,comm,&req2[2]); - MPI_Isend(&sendCount_Y, 1,MPI_INT,rank_y,sendtag,comm,&req1[3]); - MPI_Irecv(&recvCount_y, 1,MPI_INT,rank_Y,recvtag,comm,&req2[3]); - MPI_Isend(&sendCount_z, 1,MPI_INT,rank_Z,sendtag,comm,&req1[4]); - MPI_Irecv(&recvCount_Z, 1,MPI_INT,rank_z,recvtag,comm,&req2[4]); - MPI_Isend(&sendCount_Z, 1,MPI_INT,rank_z,sendtag,comm,&req1[5]); - MPI_Irecv(&recvCount_z, 1,MPI_INT,rank_Z,recvtag,comm,&req2[5]); + req1[0] = comm.Isend(&sendCount_x,1,rank_X,sendtag); + req2[0] = comm.Irecv(&recvCount_X,1,rank_x,recvtag); + req1[1] = comm.Isend(&sendCount_X,1,rank_x,sendtag); + req2[1] = comm.Irecv(&recvCount_x,1,rank_X,recvtag); + req1[2] = comm.Isend(&sendCount_y,1,rank_Y,sendtag); + req2[2] = comm.Irecv(&recvCount_Y,1,rank_y,recvtag); + req1[3] = comm.Isend(&sendCount_Y,1,rank_y,sendtag); + req2[3] = comm.Irecv(&recvCount_y,1,rank_Y,recvtag); + req1[4] = comm.Isend(&sendCount_z,1,rank_Z,sendtag); + req2[4] = comm.Irecv(&recvCount_Z,1,rank_z,recvtag); + req1[5] = comm.Isend(&sendCount_Z,1,rank_z,sendtag); + req2[5] = comm.Irecv(&recvCount_z,1,rank_Z,recvtag); - MPI_Isend(&sendCount_xy, 1,MPI_INT,rank_XY,sendtag,comm,&req1[6]); - MPI_Irecv(&recvCount_XY, 1,MPI_INT,rank_xy,recvtag,comm,&req2[6]); - MPI_Isend(&sendCount_XY, 1,MPI_INT,rank_xy,sendtag,comm,&req1[7]); - MPI_Irecv(&recvCount_xy, 1,MPI_INT,rank_XY,recvtag,comm,&req2[7]); - MPI_Isend(&sendCount_Xy, 1,MPI_INT,rank_xY,sendtag,comm,&req1[8]); - MPI_Irecv(&recvCount_xY, 1,MPI_INT,rank_Xy,recvtag,comm,&req2[8]); - MPI_Isend(&sendCount_xY, 1,MPI_INT,rank_Xy,sendtag,comm,&req1[9]); - MPI_Irecv(&recvCount_Xy, 1,MPI_INT,rank_xY,recvtag,comm,&req2[9]); + req1[6] = comm.Isend(&sendCount_xy,1,rank_XY,sendtag); + req2[6] = comm.Irecv(&recvCount_XY,1,rank_xy,recvtag); + req1[7] = comm.Isend(&sendCount_XY,1,rank_xy,sendtag); + req2[7] = comm.Irecv(&recvCount_xy,1,rank_XY,recvtag); + req1[8] = comm.Isend(&sendCount_Xy,1,rank_xY,sendtag); + req2[8] = comm.Irecv(&recvCount_xY,1,rank_Xy,recvtag); + req1[9] = comm.Isend(&sendCount_xY,1,rank_Xy,sendtag); + req2[9] = comm.Irecv(&recvCount_Xy,1,rank_xY,recvtag); - MPI_Isend(&sendCount_xz, 1,MPI_INT,rank_XZ,sendtag,comm,&req1[10]); - MPI_Irecv(&recvCount_XZ, 1,MPI_INT,rank_xz,recvtag,comm,&req2[10]); - MPI_Isend(&sendCount_XZ, 1,MPI_INT,rank_xz,sendtag,comm,&req1[11]); - MPI_Irecv(&recvCount_xz, 1,MPI_INT,rank_XZ,recvtag,comm,&req2[11]); - MPI_Isend(&sendCount_Xz, 1,MPI_INT,rank_xZ,sendtag,comm,&req1[12]); - MPI_Irecv(&recvCount_xZ, 1,MPI_INT,rank_Xz,recvtag,comm,&req2[12]); - MPI_Isend(&sendCount_xZ, 1,MPI_INT,rank_Xz,sendtag,comm,&req1[13]); - MPI_Irecv(&recvCount_Xz, 1,MPI_INT,rank_xZ,recvtag,comm,&req2[13]); + req1[10] = comm.Isend(&sendCount_xz,1,rank_XZ,sendtag); + req2[10] = comm.Irecv(&recvCount_XZ,1,rank_xz,recvtag); + req1[11] = comm.Isend(&sendCount_XZ,1,rank_xz,sendtag); + req2[11] = comm.Irecv(&recvCount_xz,1,rank_XZ,recvtag); + req1[12] = comm.Isend(&sendCount_Xz,1,rank_xZ,sendtag); + req2[12] = comm.Irecv(&recvCount_xZ,1,rank_Xz,recvtag); + req1[13] = comm.Isend(&sendCount_xZ,1,rank_Xz,sendtag); + req2[13] = comm.Irecv(&recvCount_Xz,1,rank_xZ,recvtag); - MPI_Isend(&sendCount_yz, 1,MPI_INT,rank_YZ,sendtag,comm,&req1[14]); - MPI_Irecv(&recvCount_YZ, 1,MPI_INT,rank_yz,recvtag,comm,&req2[14]); - MPI_Isend(&sendCount_YZ, 1,MPI_INT,rank_yz,sendtag,comm,&req1[15]); - MPI_Irecv(&recvCount_yz, 1,MPI_INT,rank_YZ,recvtag,comm,&req2[15]); - MPI_Isend(&sendCount_Yz, 1,MPI_INT,rank_yZ,sendtag,comm,&req1[16]); - MPI_Irecv(&recvCount_yZ, 1,MPI_INT,rank_Yz,recvtag,comm,&req2[16]); - MPI_Isend(&sendCount_yZ, 1,MPI_INT,rank_Yz,sendtag,comm,&req1[17]); - MPI_Irecv(&recvCount_Yz, 1,MPI_INT,rank_yZ,recvtag,comm,&req2[17]); - MPI_Waitall(18,req1,stat1); - MPI_Waitall(18,req2,stat2); - MPI_Barrier(comm); -/* MPI_Send(&sendCount_x,1,MPI_INT,rank_X,sendtag,comm); - MPI_Recv(&recvCount_X,1,MPI_INT,rank_x,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_X,1,MPI_INT,rank_x,sendtag,comm); - MPI_Recv(&recvCount_x,1,MPI_INT,rank_X,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_y,1,MPI_INT,rank_Y,sendtag,comm); - MPI_Recv(&recvCount_Y,1,MPI_INT,rank_y,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_Y,1,MPI_INT,rank_y,sendtag,comm); - MPI_Recv(&recvCount_y,1,MPI_INT,rank_Y,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_z,1,MPI_INT,rank_Z,sendtag,comm); - MPI_Recv(&recvCount_Z,1,MPI_INT,rank_z,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_Z,1,MPI_INT,rank_z,sendtag,comm); - MPI_Recv(&recvCount_z,1,MPI_INT,rank_Z,recvtag,comm,MPI_STATUS_IGNORE); - - MPI_Send(&sendCount_xy,1,MPI_INT,rank_XY,sendtag,comm); - MPI_Recv(&recvCount_XY,1,MPI_INT,rank_xy,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_XY,1,MPI_INT,rank_xy,sendtag,comm); - MPI_Recv(&recvCount_xy,1,MPI_INT,rank_XY,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_Xy,1,MPI_INT,rank_xY,sendtag,comm); - MPI_Recv(&recvCount_xY,1,MPI_INT,rank_Xy,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_xY,1,MPI_INT,rank_Xy,sendtag,comm); - MPI_Recv(&recvCount_Xy,1,MPI_INT,rank_xY,recvtag,comm,MPI_STATUS_IGNORE); - - MPI_Send(&sendCount_xz,1,MPI_INT,rank_XZ,sendtag,comm); - MPI_Recv(&recvCount_XZ,1,MPI_INT,rank_xz,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_XZ,1,MPI_INT,rank_xz,sendtag,comm); - MPI_Recv(&recvCount_xz,1,MPI_INT,rank_XZ,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_Xz,1,MPI_INT,rank_xZ,sendtag,comm); - MPI_Recv(&recvCount_xZ,1,MPI_INT,rank_Xz,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_xZ,1,MPI_INT,rank_Xz,sendtag,comm); - MPI_Recv(&recvCount_Xz,1,MPI_INT,rank_xZ,recvtag,comm,MPI_STATUS_IGNORE); - - MPI_Send(&sendCount_yz,1,MPI_INT,rank_YZ,sendtag,comm); - MPI_Recv(&recvCount_YZ,1,MPI_INT,rank_yz,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_YZ,1,MPI_INT,rank_yz,sendtag,comm); - MPI_Recv(&recvCount_yz,1,MPI_INT,rank_YZ,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_Yz,1,MPI_INT,rank_yZ,sendtag,comm); - MPI_Recv(&recvCount_yZ,1,MPI_INT,rank_Yz,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Send(&sendCount_yZ,1,MPI_INT,rank_Yz,sendtag,comm); - MPI_Recv(&recvCount_Yz,1,MPI_INT,rank_yZ,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Barrier(comm); -*/ //********************************************************************************** + req1[14] = comm.Isend(&sendCount_yz,1,rank_YZ,sendtag); + req2[14] = comm.Irecv(&recvCount_YZ,1,rank_yz,recvtag); + req1[15] = comm.Isend(&sendCount_YZ,1,rank_yz,sendtag); + req2[15] = comm.Irecv(&recvCount_yz,1,rank_YZ,recvtag); + req1[16] = comm.Isend(&sendCount_Yz,1,rank_yZ,sendtag); + req2[16] = comm.Irecv(&recvCount_yZ,1,rank_Yz,recvtag); + req1[17] = comm.Isend(&sendCount_yZ,1,rank_Yz,sendtag); + req2[17] = comm.Irecv(&recvCount_Yz,1,rank_yZ,recvtag); + comm.waitAll(18,req1); + comm.waitAll(18,req2); + comm.barrier(); + //********************************************************************************** //...................................................................................... int *recvList_x, *recvList_y, *recvList_z, *recvList_X, *recvList_Y, *recvList_Z; int *recvList_xy, *recvList_yz, *recvList_xz, *recvList_Xy, *recvList_Yz, *recvList_xZ; @@ -841,48 +796,48 @@ int main(int argc, char **argv) // Use MPI to fill in the appropriate values for recvList // Fill in the recieve lists using MPI sendtag = recvtag = 4; - MPI_Isend(sendList_x, sendCount_x,MPI_INT,rank_X,sendtag,comm,&req1[0]); - MPI_Irecv(recvList_X, recvCount_X,MPI_INT,rank_x,recvtag,comm,&req2[0]); - MPI_Isend(sendList_X, sendCount_X,MPI_INT,rank_x,sendtag,comm,&req1[1]); - MPI_Irecv(recvList_x, recvCount_x,MPI_INT,rank_X,recvtag,comm,&req2[1]); - MPI_Isend(sendList_y, sendCount_y,MPI_INT,rank_Y,sendtag,comm,&req1[2]); - MPI_Irecv(recvList_Y, recvCount_Y,MPI_INT,rank_y,recvtag,comm,&req2[2]); - MPI_Isend(sendList_Y, sendCount_Y,MPI_INT,rank_y,sendtag,comm,&req1[3]); - MPI_Irecv(recvList_y, recvCount_y,MPI_INT,rank_Y,recvtag,comm,&req2[3]); - MPI_Isend(sendList_z, sendCount_z,MPI_INT,rank_Z,sendtag,comm,&req1[4]); - MPI_Irecv(recvList_Z, recvCount_Z,MPI_INT,rank_z,recvtag,comm,&req2[4]); - MPI_Isend(sendList_Z, sendCount_Z,MPI_INT,rank_z,sendtag,comm,&req1[5]); - MPI_Irecv(recvList_z, recvCount_z,MPI_INT,rank_Z,recvtag,comm,&req2[5]); + req1[0] = comm.Isend(sendList_x,sendCount_x,rank_X,sendtag); + req2[0] = comm.Irecv(recvList_X,recvCount_X,rank_x,recvtag); + req1[1] = comm.Isend(sendList_X,sendCount_X,rank_x,sendtag); + req2[1] = comm.Irecv(recvList_x,recvCount_x,rank_X,recvtag); + req1[2] = comm.Isend(sendList_y,sendCount_y,rank_Y,sendtag); + req2[2] = comm.Irecv(recvList_Y,recvCount_Y,rank_y,recvtag); + req1[3] = comm.Isend(sendList_Y,sendCount_Y,rank_y,sendtag); + req2[3] = comm.Irecv(recvList_y,recvCount_y,rank_Y,recvtag); + req1[4] = comm.Isend(sendList_z,sendCount_z,rank_Z,sendtag); + req2[4] = comm.Irecv(recvList_Z,recvCount_Z,rank_z,recvtag); + req1[5] = comm.Isend(sendList_Z,sendCount_Z,rank_z,sendtag); + req2[5] = comm.Irecv(recvList_z,recvCount_z,rank_Z,recvtag); - MPI_Isend(sendList_xy, sendCount_xy,MPI_INT,rank_XY,sendtag,comm,&req1[6]); - MPI_Irecv(recvList_XY, recvCount_XY,MPI_INT,rank_xy,recvtag,comm,&req2[6]); - MPI_Isend(sendList_XY, sendCount_XY,MPI_INT,rank_xy,sendtag,comm,&req1[7]); - MPI_Irecv(recvList_xy, recvCount_xy,MPI_INT,rank_XY,recvtag,comm,&req2[7]); - MPI_Isend(sendList_Xy, sendCount_Xy,MPI_INT,rank_xY,sendtag,comm,&req1[8]); - MPI_Irecv(recvList_xY, recvCount_xY,MPI_INT,rank_Xy,recvtag,comm,&req2[8]); - MPI_Isend(sendList_xY, sendCount_xY,MPI_INT,rank_Xy,sendtag,comm,&req1[9]); - MPI_Irecv(recvList_Xy, recvCount_Xy,MPI_INT,rank_xY,recvtag,comm,&req2[9]); + req1[6] = comm.Isend(sendList_xy,sendCount_xy,rank_XY,sendtag); + req2[6] = comm.Irecv(recvList_XY,recvCount_XY,rank_xy,recvtag); + req1[7] = comm.Isend(sendList_XY,sendCount_XY,rank_xy,sendtag); + req2[7] = comm.Irecv(recvList_xy,recvCount_xy,rank_XY,recvtag); + req1[8] = comm.Isend(sendList_Xy,sendCount_Xy,rank_xY,sendtag); + req2[8] = comm.Irecv(recvList_xY,recvCount_xY,rank_Xy,recvtag); + req1[9] = comm.Isend(sendList_xY,sendCount_xY,rank_Xy,sendtag); + req2[9] = comm.Irecv(recvList_Xy,recvCount_Xy,rank_xY,recvtag); - MPI_Isend(sendList_xz, sendCount_xz,MPI_INT,rank_XZ,sendtag,comm,&req1[10]); - MPI_Irecv(recvList_XZ, recvCount_XZ,MPI_INT,rank_xz,recvtag,comm,&req2[10]); - MPI_Isend(sendList_XZ, sendCount_XZ,MPI_INT,rank_xz,sendtag,comm,&req1[11]); - MPI_Irecv(recvList_xz, recvCount_xz,MPI_INT,rank_XZ,recvtag,comm,&req2[11]); - MPI_Isend(sendList_Xz, sendCount_Xz,MPI_INT,rank_xZ,sendtag,comm,&req1[12]); - MPI_Irecv(recvList_xZ, recvCount_xZ,MPI_INT,rank_Xz,recvtag,comm,&req2[12]); - MPI_Isend(sendList_xZ, sendCount_xZ,MPI_INT,rank_Xz,sendtag,comm,&req1[13]); - MPI_Irecv(recvList_Xz, recvCount_Xz,MPI_INT,rank_xZ,recvtag,comm,&req2[13]); + req1[10] = comm.Isend(sendList_xz,sendCount_xz,rank_XZ,sendtag); + req2[10] = comm.Irecv(recvList_XZ,recvCount_XZ,rank_xz,recvtag); + req1[11] = comm.Isend(sendList_XZ,sendCount_XZ,rank_xz,sendtag); + req2[11] = comm.Irecv(recvList_xz,recvCount_xz,rank_XZ,recvtag); + req1[12] = comm.Isend(sendList_Xz,sendCount_Xz,rank_xZ,sendtag); + req2[12] = comm.Irecv(recvList_xZ,recvCount_xZ,rank_Xz,recvtag); + req1[13] = comm.Isend(sendList_xZ,sendCount_xZ,rank_Xz,sendtag); + req2[13] = comm.Irecv(recvList_Xz,recvCount_Xz,rank_xZ,recvtag); - MPI_Isend(sendList_yz, sendCount_yz,MPI_INT,rank_YZ,sendtag,comm,&req1[14]); - MPI_Irecv(recvList_YZ, recvCount_YZ,MPI_INT,rank_yz,recvtag,comm,&req2[14]); - MPI_Isend(sendList_YZ, sendCount_YZ,MPI_INT,rank_yz,sendtag,comm,&req1[15]); - MPI_Irecv(recvList_yz, recvCount_yz,MPI_INT,rank_YZ,recvtag,comm,&req2[15]); - MPI_Isend(sendList_Yz, sendCount_Yz,MPI_INT,rank_yZ,sendtag,comm,&req1[16]); - MPI_Irecv(recvList_yZ, recvCount_yZ,MPI_INT,rank_Yz,recvtag,comm,&req2[16]); - MPI_Isend(sendList_yZ, sendCount_yZ,MPI_INT,rank_Yz,sendtag,comm,&req1[17]); - MPI_Irecv(recvList_Yz, recvCount_Yz,MPI_INT,rank_yZ,recvtag,comm,&req2[17]); - MPI_Waitall(18,req1,stat1); - MPI_Waitall(18,req2,stat2); - MPI_Barrier(comm); + req1[14] = comm.Isend(sendList_yz,sendCount_yz,rank_YZ,sendtag); + req2[14] = comm.Irecv(recvList_YZ,recvCount_YZ,rank_yz,recvtag); + req1[15] = comm.Isend(sendList_YZ,sendCount_YZ,rank_yz,sendtag); + req2[15] = comm.Irecv(recvList_yz,recvCount_yz,rank_YZ,recvtag); + req1[16] = comm.Isend(sendList_Yz,sendCount_Yz,rank_yZ,sendtag); + req2[16] = comm.Irecv(recvList_yZ,recvCount_yZ,rank_Yz,recvtag); + req1[17] = comm.Isend(sendList_yZ,sendCount_yZ,rank_Yz,sendtag); + req2[17] = comm.Irecv(recvList_Yz,recvCount_Yz,rank_yZ,recvtag); + comm.waitAll(18,req1); + comm.waitAll(18,req2); + comm.barrier(); //...................................................................................... for (int idx=0; idxkeyExists( "GridFile" )){ // Read the local domain data - auto input_id = readMicroCT( *domain_db, MPI_COMM_WORLD ); + auto input_id = readMicroCT( *domain_db, comm ); // Fill the halo (assuming GCW of 1) array size0 = { (int) input_id.size(0), (int) input_id.size(1), (int) input_id.size(2) }; ArraySize size1 = { (size_t) Mask->Nx, (size_t) Mask->Ny, (size_t) Mask->Nz }; ASSERT( (int) size1[0] == size0[0]+2 && (int) size1[1] == size0[1]+2 && (int) size1[2] == size0[2]+2 ); - fillHalo fill( MPI_COMM_WORLD, Mask->rank_info, size0, { 1, 1, 1 }, 0, 1 ); + fillHalo fill( comm, Mask->rank_info, size0, { 1, 1, 1 }, 0, 1 ); Array id_view; id_view.viewRaw( size1, Mask->id ); fill.copy( input_id, id_view ); @@ -652,7 +652,7 @@ void ScaLBL_ColorModel::Run(){ double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); comm.barrier(); - starttime = MPI_Wtime(); + starttime = Utilities::MPI::time(); //......................................... //************ MAIN ITERATION LOOP ***************************************/ @@ -991,7 +991,7 @@ void ScaLBL_ColorModel::Run(){ //************************************************************************ ScaLBL_DeviceBarrier(); comm.barrier(); - stoptime = MPI_Wtime(); + stoptime = Utilities::MPI::time(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep cputime = (stoptime - starttime)/timestep; diff --git a/models/DFHModel.cpp b/models/DFHModel.cpp index ced5853f..9709b107 100644 --- a/models/DFHModel.cpp +++ b/models/DFHModel.cpp @@ -487,7 +487,7 @@ void ScaLBL_DFHModel::Run(){ double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); comm.barrier(); - starttime = MPI_Wtime(); + starttime = Utilities::MPI::time(); //......................................... //************ MAIN ITERATION LOOP ***************************************/ @@ -583,7 +583,7 @@ void ScaLBL_DFHModel::Run(){ //************************************************************************ ScaLBL_DeviceBarrier(); comm.barrier(); - stoptime = MPI_Wtime(); + stoptime = Utilities::MPI::time(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep cputime = (stoptime - starttime)/timestep; diff --git a/models/MRTModel.cpp b/models/MRTModel.cpp index d9b8069d..76d54571 100644 --- a/models/MRTModel.cpp +++ b/models/MRTModel.cpp @@ -208,7 +208,7 @@ void ScaLBL_MRTModel::Run(){ double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); comm.barrier(); - starttime = MPI_Wtime(); + starttime = Utilities::MPI::time(); if (rank==0) printf("Beginning AA timesteps, timestepMax = %i \n", timestepMax); if (rank==0) printf("********************************************************\n"); timestep=0; @@ -306,7 +306,7 @@ void ScaLBL_MRTModel::Run(){ } } //************************************************************************/ - stoptime = MPI_Wtime(); + stoptime = Utilities::MPI::time(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep cputime = (stoptime - starttime)/timestep; diff --git a/tests/BlobAnalyzeParallel.cpp b/tests/BlobAnalyzeParallel.cpp index 48e9e230..773309f9 100644 --- a/tests/BlobAnalyzeParallel.cpp +++ b/tests/BlobAnalyzeParallel.cpp @@ -138,16 +138,16 @@ int main(int argc, char **argv) } comm.barrier(); // Computational domain - MPI_Bcast(&nx,1,MPI_INT,0,comm); - MPI_Bcast(&ny,1,MPI_INT,0,comm); - MPI_Bcast(&nz,1,MPI_INT,0,comm); - MPI_Bcast(&nprocx,1,MPI_INT,0,comm); - MPI_Bcast(&nprocy,1,MPI_INT,0,comm); - MPI_Bcast(&nprocz,1,MPI_INT,0,comm); - MPI_Bcast(&nspheres,1,MPI_INT,0,comm); - MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); + comm.bcast(&nx,1,0); + comm.bcast(&ny,1,0); + comm.bcast(&nz,1,0); + comm.bcast(&nprocx,1,0); + comm.bcast(&nprocy,1,0); + comm.bcast(&nprocz,1,0); + comm.bcast(&nspheres,1,0); + comm.bcast(&Lx,1,0); + comm.bcast(&Ly,1,0); + comm.bcast(&Lz,1,0); //................................................. comm.barrier(); @@ -291,7 +291,7 @@ int main(int argc, char **argv) } Dm.CommInit(); // Initialize communications for domains - MPI_Allreduce(&sum,&sum_global,1,MPI_DOUBLE,MPI_SUM,comm); + sum_global = comm.sumReduce( sum ); porosity = sum_global/Dm.Volume; if (rank==0) printf("Porosity = %f \n",porosity); diff --git a/tests/GenerateSphereTest.cpp b/tests/GenerateSphereTest.cpp index 5886be21..0c84287e 100644 --- a/tests/GenerateSphereTest.cpp +++ b/tests/GenerateSphereTest.cpp @@ -213,42 +213,24 @@ inline void MorphOpen(DoubleArray SignDist, char *id, Domain &Dm, int nx, int ny PackID(Dm.sendList_yZ, Dm.sendCount_yZ ,sendID_yZ, id); PackID(Dm.sendList_YZ, Dm.sendCount_YZ ,sendID_YZ, id); //...................................................................................... - MPI_Sendrecv(sendID_x,Dm.sendCount_x,MPI_CHAR,Dm.rank_x(),sendtag, - recvID_X,Dm.recvCount_X,MPI_CHAR,Dm.rank_X(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_X,Dm.sendCount_X,MPI_CHAR,Dm.rank_X(),sendtag, - recvID_x,Dm.recvCount_x,MPI_CHAR,Dm.rank_x(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_y,Dm.sendCount_y,MPI_CHAR,Dm.rank_y(),sendtag, - recvID_Y,Dm.recvCount_Y,MPI_CHAR,Dm.rank_Y(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Y,Dm.sendCount_Y,MPI_CHAR,Dm.rank_Y(),sendtag, - recvID_y,Dm.recvCount_y,MPI_CHAR,Dm.rank_y(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_z,Dm.sendCount_z,MPI_CHAR,Dm.rank_z(),sendtag, - recvID_Z,Dm.recvCount_Z,MPI_CHAR,Dm.rank_Z(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Z,Dm.sendCount_Z,MPI_CHAR,Dm.rank_Z(),sendtag, - recvID_z,Dm.recvCount_z,MPI_CHAR,Dm.rank_z(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xy,Dm.sendCount_xy,MPI_CHAR,Dm.rank_xy(),sendtag, - recvID_XY,Dm.recvCount_XY,MPI_CHAR,Dm.rank_XY(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_XY,Dm.sendCount_XY,MPI_CHAR,Dm.rank_XY(),sendtag, - recvID_xy,Dm.recvCount_xy,MPI_CHAR,Dm.rank_xy(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Xy,Dm.sendCount_Xy,MPI_CHAR,Dm.rank_Xy(),sendtag, - recvID_xY,Dm.recvCount_xY,MPI_CHAR,Dm.rank_xY(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xY,Dm.sendCount_xY,MPI_CHAR,Dm.rank_xY(),sendtag, - recvID_Xy,Dm.recvCount_Xy,MPI_CHAR,Dm.rank_Xy(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xz,Dm.sendCount_xz,MPI_CHAR,Dm.rank_xz(),sendtag, - recvID_XZ,Dm.recvCount_XZ,MPI_CHAR,Dm.rank_XZ(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_XZ,Dm.sendCount_XZ,MPI_CHAR,Dm.rank_XZ(),sendtag, - recvID_xz,Dm.recvCount_xz,MPI_CHAR,Dm.rank_xz(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Xz,Dm.sendCount_Xz,MPI_CHAR,Dm.rank_Xz(),sendtag, - recvID_xZ,Dm.recvCount_xZ,MPI_CHAR,Dm.rank_xZ(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xZ,Dm.sendCount_xZ,MPI_CHAR,Dm.rank_xZ(),sendtag, - recvID_Xz,Dm.recvCount_Xz,MPI_CHAR,Dm.rank_Xz(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_yz,Dm.sendCount_yz,MPI_CHAR,Dm.rank_yz(),sendtag, - recvID_YZ,Dm.recvCount_YZ,MPI_CHAR,Dm.rank_YZ(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_YZ,Dm.sendCount_YZ,MPI_CHAR,Dm.rank_YZ(),sendtag, - recvID_yz,Dm.recvCount_yz,MPI_CHAR,Dm.rank_yz(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Yz,Dm.sendCount_Yz,MPI_CHAR,Dm.rank_Yz(),sendtag, - recvID_yZ,Dm.recvCount_yZ,MPI_CHAR,Dm.rank_yZ(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_yZ,Dm.sendCount_yZ,MPI_CHAR,Dm.rank_yZ(),sendtag, - recvID_Yz,Dm.recvCount_Yz,MPI_CHAR,Dm.rank_Yz(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + Dm.Comm.sendrecv(sendID_x,Dm.sendCount_x,Dm.rank_x(),sendtag,recvID_X,Dm.recvCount_X,Dm.rank_X(),recvtag); + Dm.Comm.sendrecv(sendID_X,Dm.sendCount_X,Dm.rank_X(),sendtag,recvID_x,Dm.recvCount_x,Dm.rank_x(),recvtag); + Dm.Comm.sendrecv(sendID_y,Dm.sendCount_y,Dm.rank_y(),sendtag,recvID_Y,Dm.recvCount_Y,Dm.rank_Y(),recvtag); + Dm.Comm.sendrecv(sendID_Y,Dm.sendCount_Y,Dm.rank_Y(),sendtag,recvID_y,Dm.recvCount_y,Dm.rank_y(),recvtag); + Dm.Comm.sendrecv(sendID_z,Dm.sendCount_z,Dm.rank_z(),sendtag,recvID_Z,Dm.recvCount_Z,Dm.rank_Z(),recvtag); + Dm.Comm.sendrecv(sendID_Z,Dm.sendCount_Z,Dm.rank_Z(),sendtag,recvID_z,Dm.recvCount_z,Dm.rank_z(),recvtag); + Dm.Comm.sendrecv(sendID_xy,Dm.sendCount_xy,Dm.rank_xy(),sendtag,recvID_XY,Dm.recvCount_XY,Dm.rank_XY(),recvtag); + Dm.Comm.sendrecv(sendID_XY,Dm.sendCount_XY,Dm.rank_XY(),sendtag,recvID_xy,Dm.recvCount_xy,Dm.rank_xy(),recvtag); + Dm.Comm.sendrecv(sendID_Xy,Dm.sendCount_Xy,Dm.rank_Xy(),sendtag,recvID_xY,Dm.recvCount_xY,Dm.rank_xY(),recvtag); + Dm.Comm.sendrecv(sendID_xY,Dm.sendCount_xY,Dm.rank_xY(),sendtag,recvID_Xy,Dm.recvCount_Xy,Dm.rank_Xy(),recvtag); + Dm.Comm.sendrecv(sendID_xz,Dm.sendCount_xz,Dm.rank_xz(),sendtag,recvID_XZ,Dm.recvCount_XZ,Dm.rank_XZ(),recvtag); + Dm.Comm.sendrecv(sendID_XZ,Dm.sendCount_XZ,Dm.rank_XZ(),sendtag,recvID_xz,Dm.recvCount_xz,Dm.rank_xz(),recvtag); + Dm.Comm.sendrecv(sendID_Xz,Dm.sendCount_Xz,Dm.rank_Xz(),sendtag,recvID_xZ,Dm.recvCount_xZ,Dm.rank_xZ(),recvtag); + Dm.Comm.sendrecv(sendID_xZ,Dm.sendCount_xZ,Dm.rank_xZ(),sendtag,recvID_Xz,Dm.recvCount_Xz,Dm.rank_Xz(),recvtag); + Dm.Comm.sendrecv(sendID_yz,Dm.sendCount_yz,Dm.rank_yz(),sendtag,recvID_YZ,Dm.recvCount_YZ,Dm.rank_YZ(),recvtag); + Dm.Comm.sendrecv(sendID_YZ,Dm.sendCount_YZ,Dm.rank_YZ(),sendtag,recvID_yz,Dm.recvCount_yz,Dm.rank_yz(),recvtag); + Dm.Comm.sendrecv(sendID_Yz,Dm.sendCount_Yz,Dm.rank_Yz(),sendtag,recvID_yZ,Dm.recvCount_yZ,Dm.rank_yZ(),recvtag); + Dm.Comm.sendrecv(sendID_yZ,Dm.sendCount_yZ,Dm.rank_yZ(),sendtag,recvID_Yz,Dm.recvCount_Yz,Dm.rank_Yz(),recvtag); //...................................................................................... UnpackID(Dm.recvList_x, Dm.recvCount_x ,recvID_x, id); UnpackID(Dm.recvList_X, Dm.recvCount_X ,recvID_X, id); diff --git a/tests/TestBlobAnalyze.cpp b/tests/TestBlobAnalyze.cpp index 63d928c1..19360fe3 100644 --- a/tests/TestBlobAnalyze.cpp +++ b/tests/TestBlobAnalyze.cpp @@ -190,16 +190,16 @@ int main(int argc, char **argv) } comm.barrier(); // Computational domain - MPI_Bcast(&nx,1,MPI_INT,0,comm); - MPI_Bcast(&ny,1,MPI_INT,0,comm); - MPI_Bcast(&nz,1,MPI_INT,0,comm); - MPI_Bcast(&nprocx,1,MPI_INT,0,comm); - MPI_Bcast(&nprocy,1,MPI_INT,0,comm); - MPI_Bcast(&nprocz,1,MPI_INT,0,comm); - MPI_Bcast(&nspheres,1,MPI_INT,0,comm); - MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); + comm.bcast(&nx,1,0); + comm.bcast(&ny,1,0); + comm.bcast(&nz,1,0); + comm.bcast(&nprocx,1,0); + comm.bcast(&nprocy,1,0); + comm.bcast(&nprocz,1,0); + comm.bcast(&nspheres,1,0); + comm.bcast(&Lx,1,0); + comm.bcast(&Ly,1,0); + comm.bcast(&Lz,1,0); //................................................. comm.barrier(); @@ -255,10 +255,10 @@ int main(int argc, char **argv) comm.barrier(); // Broadcast the sphere packing to all processes - MPI_Bcast(cx,nspheres,MPI_DOUBLE,0,comm); - MPI_Bcast(cy,nspheres,MPI_DOUBLE,0,comm); - MPI_Bcast(cz,nspheres,MPI_DOUBLE,0,comm); - MPI_Bcast(rad,nspheres,MPI_DOUBLE,0,comm); + comm.bcast(cx,nspheres,0); + comm.bcast(cy,nspheres,0); + comm.bcast(cz,nspheres,0); + comm.bcast(rad,nspheres,0); //........................................................................... comm.barrier(); //....................................................................... diff --git a/tests/TestBubble.cpp b/tests/TestBubble.cpp index e7e0ced8..6eb74b37 100644 --- a/tests/TestBubble.cpp +++ b/tests/TestBubble.cpp @@ -45,7 +45,6 @@ int main(int argc, char **argv) int nprocx,nprocy,nprocz; MPI_Request req1[18],req2[18]; - MPI_Status stat1[18],stat2[18]; if (rank == 0){ printf("********************************************************\n"); @@ -434,7 +433,7 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; comm.barrier(); - starttime = MPI_Wtime(); + starttime = Utilities::MPI::time(); //......................................... //........................................................................... // MAIN VARIABLES INITIALIZED HERE @@ -809,25 +808,25 @@ int main(int argc, char **argv) } //........................................................................... comm.barrier(); - MPI_Allreduce(&nwp_volume,&nwp_volume_global,1,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&awn,&awn_global,1,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&ans,&ans_global,1,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&aws,&aws_global,1,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&lwns,&lwns_global,1,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&As,&As_global,1,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&Jwn,&Jwn_global,1,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&efawns,&efawns_global,1,MPI_DOUBLE,MPI_SUM,comm); + nwp_volume_global = comm.sumReduce( nwp_volume ); + awn_global = comm.sumReduce( awn ); + ans_global = comm.sumReduce( ans ); + aws_global = comm.sumReduce( aws ); + lwns_global = comm.sumReduce( lwns ); + As_global = comm.sumReduce( As ); + Jwn_global = comm.sumReduce( Jwn ); + efawns_global = comm.sumReduce( efawns ); // Phase averages - MPI_Allreduce(&vol_w,&vol_w_global,1,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&vol_n,&vol_n_global,1,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&paw,&paw_global,1,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&pan,&pan_global,1,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&vaw(0),&vaw_global(0),3,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&van(0),&van_global(0),3,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&vawn(0),&vawn_global(0),3,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&Gwn(0),&Gwn_global(0),6,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&Gns(0),&Gns_global(0),6,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&Gws(0),&Gws_global(0),6,MPI_DOUBLE,MPI_SUM,comm); + vol_w_global = comm.sumReduce( vol_w ); + vol_n_global = comm.sumReduce( vol_n ); + paw_global = comm.sumReduce( paw ); + pan_global = comm.sumReduce( pan ); + vaw_global(0) = comm.sumReduce( vaw(0) ); + van_global(0) = comm.sumReduce( van(0) ); + vawn_global(0) = comm.sumReduce( vawn(0) ); + Gwn_global(0) = comm.sumReduce( Gwn(0) ); + Gns_global(0) = comm.sumReduce( Gns(0) ); + Gws_global(0) = comm.sumReduce( Gws(0) ); comm.barrier(); //......................................................................... // Compute the change in the total surface energy based on the defined interval @@ -952,7 +951,7 @@ int main(int argc, char **argv) //************************************************************************/ ScaLBL_DeviceBarrier(); comm.barrier(); - stoptime = MPI_Wtime(); + stoptime = Utilities::MPI::time(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep cputime = (stoptime - starttime)/timestep; diff --git a/tests/TestBubbleDFH.cpp b/tests/TestBubbleDFH.cpp index 7f5d0047..8b4f1a9b 100644 --- a/tests/TestBubbleDFH.cpp +++ b/tests/TestBubbleDFH.cpp @@ -387,7 +387,7 @@ int main(int argc, char **argv) double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); comm.barrier(); - starttime = MPI_Wtime(); + starttime = Utilities::MPI::time(); //......................................... err = 1.0; @@ -487,7 +487,7 @@ int main(int argc, char **argv) //************************************************************************ ScaLBL_DeviceBarrier(); comm.barrier(); - stoptime = MPI_Wtime(); + stoptime = Utilities::MPI::time(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep cputime = (stoptime - starttime)/timestep; diff --git a/tests/TestColorGrad.cpp b/tests/TestColorGrad.cpp index df1c1daf..2566f8c0 100644 --- a/tests/TestColorGrad.cpp +++ b/tests/TestColorGrad.cpp @@ -114,16 +114,16 @@ int main(int argc, char **argv) // Broadcast simulation parameters from rank 0 to all other procs comm.barrier(); //................................................. - MPI_Bcast(&Nx,1,MPI_INT,0,comm); - MPI_Bcast(&Ny,1,MPI_INT,0,comm); - MPI_Bcast(&Nz,1,MPI_INT,0,comm); - MPI_Bcast(&nprocx,1,MPI_INT,0,comm); - MPI_Bcast(&nprocy,1,MPI_INT,0,comm); - MPI_Bcast(&nprocz,1,MPI_INT,0,comm); - MPI_Bcast(&nspheres,1,MPI_INT,0,comm); - MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); + comm.bcast(&Nx,1,0); + comm.bcast(&Ny,1,0); + comm.bcast(&Nz,1,0); + comm.bcast(&nprocx,1,0); + comm.bcast(&nprocy,1,0); + comm.bcast(&nprocz,1,0); + comm.bcast(&nspheres,1,0); + comm.bcast(&Lx,1,0); + comm.bcast(&Ly,1,0); + comm.bcast(&Lz,1,0); //................................................. comm.barrier(); // ************************************************************** diff --git a/tests/TestCommD3Q19.cpp b/tests/TestCommD3Q19.cpp index d2799355..c4a045ae 100644 --- a/tests/TestCommD3Q19.cpp +++ b/tests/TestCommD3Q19.cpp @@ -378,7 +378,7 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; comm.barrier(); - starttime = MPI_Wtime(); + starttime = Utilities::MPI::time(); //......................................... @@ -403,7 +403,7 @@ int main(int argc, char **argv) //................................................................... } //************************************************************************/ - stoptime = MPI_Wtime(); + stoptime = Utilities::MPI::time(); // cout << "CPU time: " << (stoptime - starttime) << " seconds" << endl; cputime = stoptime - starttime; // cout << "Lattice update rate: "<< double(Nx*Ny*Nz*timestep)/cputime/1000000 << " MLUPS" << endl; diff --git a/tests/TestForceD3Q19.cpp b/tests/TestForceD3Q19.cpp index f8569624..31151584 100644 --- a/tests/TestForceD3Q19.cpp +++ b/tests/TestForceD3Q19.cpp @@ -450,7 +450,7 @@ int main (int argc, char **argv) for (int i=0; iSendD3Q19(dist, &dist[10*Np]); @@ -244,7 +244,7 @@ int main(int argc, char **argv) //************************************************************************/ - stoptime = MPI_Wtime(); + stoptime = Utilities::MPI::time(); // cout << "CPU time: " << (stoptime - starttime) << " seconds" << endl; cputime = stoptime - starttime; // cout << "Lattice update rate: "<< double(Nx*Ny*Nz*timestep)/cputime/1000000 << " MLUPS" << endl; diff --git a/tests/TestMRT.cpp b/tests/TestMRT.cpp index 5f2c4449..e4acba99 100644 --- a/tests/TestMRT.cpp +++ b/tests/TestMRT.cpp @@ -580,16 +580,16 @@ int main(int argc, char **argv) // Broadcast simulation parameters from rank 0 to all other procs comm.barrier(); //................................................. - MPI_Bcast(&Nx,1,MPI_INT,0,comm); - MPI_Bcast(&Ny,1,MPI_INT,0,comm); - MPI_Bcast(&Nz,1,MPI_INT,0,comm); - MPI_Bcast(&nprocx,1,MPI_INT,0,comm); - MPI_Bcast(&nprocy,1,MPI_INT,0,comm); - MPI_Bcast(&nprocz,1,MPI_INT,0,comm); - MPI_Bcast(&nspheres,1,MPI_INT,0,comm); - MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); + comm.bcast(&Nx,1,0); + comm.bcast(&Ny,1,0); + comm.bcast(&Nz,1,0); + comm.bcast(&nprocx,1,0); + comm.bcast(&nprocy,1,0); + comm.bcast(&nprocz,1,0); + comm.bcast(&nspheres,1,0); + comm.bcast(&Lx,1,0); + comm.bcast(&Ly,1,0); + comm.bcast(&Lz,1,0); //................................................. comm.barrier(); // ************************************************************** @@ -668,7 +668,7 @@ int main(int argc, char **argv) } } comm.barrier(); - MPI_Allreduce(&sum_local,&sum,1,MPI_DOUBLE,MPI_SUM,comm); + sum = comm.sumReduce( sum_local ); porosity = sum*iVol_global; if (rank==0) printf("Media porosity = %f \n",porosity); @@ -731,7 +731,7 @@ int main(int argc, char **argv) double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); comm.barrier(); - starttime = MPI_Wtime(); + starttime = Utilities::MPI::time(); while (timestep < timesteps) { @@ -752,7 +752,7 @@ int main(int argc, char **argv) } //************************************************************************/ - stoptime = MPI_Wtime(); + stoptime = Utilities::MPI::time(); // cout << "CPU time: " << (stoptime - starttime) << " seconds" << endl; cputime = stoptime - starttime; // cout << "Lattice update rate: "<< double(Nx*Ny*Nz*timestep)/cputime/1000000 << " MLUPS" << endl; @@ -795,7 +795,7 @@ int main(int argc, char **argv) } } } - MPI_Allreduce(&sum_local,&sum,1,MPI_DOUBLE,MPI_SUM,comm); + sum = comm.sumReduce( sum_local ); double PoreVel = sum*iVol_global; if (rank==0) printf("Velocity = %f \n",PoreVel); diff --git a/tests/TestMicroCTReader.cpp b/tests/TestMicroCTReader.cpp index 9a54610c..52a5b9d3 100644 --- a/tests/TestMicroCTReader.cpp +++ b/tests/TestMicroCTReader.cpp @@ -62,7 +62,6 @@ int main(int argc, char **argv) int N_errors = ut.NumFailGlobal(); // Close MPI - MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); return N_errors; } diff --git a/tests/TestMomentsD3Q19.cpp b/tests/TestMomentsD3Q19.cpp index 6bd3e8ff..2660ed26 100644 --- a/tests/TestMomentsD3Q19.cpp +++ b/tests/TestMomentsD3Q19.cpp @@ -539,7 +539,7 @@ int main (int argc, char **argv) error=count; // Finished - MPI_Barrier(MPI_COMM_WORLD); + comm.barrier(); MPI_Finalize(); return error; } diff --git a/tests/TestNetcdf.cpp b/tests/TestNetcdf.cpp index 8768c9ea..3d0498d2 100644 --- a/tests/TestNetcdf.cpp +++ b/tests/TestNetcdf.cpp @@ -116,7 +116,7 @@ int main(int argc, char **argv) PROFILE_SAVE("TestNetcdf"); // Close MPI - MPI_Barrier(MPI_COMM_WORLD); + comm.barrier(); MPI_Finalize(); return N_errors; } diff --git a/tests/TestSegDist.cpp b/tests/TestSegDist.cpp index b5e23ec8..ecb6d6b9 100644 --- a/tests/TestSegDist.cpp +++ b/tests/TestSegDist.cpp @@ -100,10 +100,10 @@ int main(int argc, char **argv) comm.barrier(); if (rank==0) printf("Initialized! Converting to Signed Distance function \n"); - double t1 = MPI_Wtime(); + double t1 = Utilities::MPI::time(); DoubleArray Distance(nx,ny,nz); CalcDist(Distance,id,Dm,{false,false,false}); - double t2 = MPI_Wtime(); + double t2 = Utilities::MPI::time(); if (rank==0) printf("Total time: %f seconds \n",t2-t1); diff --git a/tests/lb2_CMT_wia.cpp b/tests/lb2_CMT_wia.cpp index 820428a3..389bc8a8 100644 --- a/tests/lb2_CMT_wia.cpp +++ b/tests/lb2_CMT_wia.cpp @@ -292,18 +292,18 @@ int main(int argc, char **argv) //................................................................................... // Send all the distributions - MPI_Isend(sendbuf_x, 2*sendCount_x,MPI_DOUBLE,rank_x,sendtag,comm,&req1[0]); - MPI_Irecv(recvbuf_X, 2*recvCount_X,MPI_DOUBLE,rank_X,recvtag,comm,&req2[0]); - MPI_Isend(sendbuf_X, 2*sendCount_X,MPI_DOUBLE,rank_X,sendtag,comm,&req1[1]); - MPI_Irecv(recvbuf_x, 2*recvCount_x,MPI_DOUBLE,rank_x,recvtag,comm,&req2[1]); - MPI_Isend(sendbuf_y, 2*sendCount_y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[2]); - MPI_Irecv(recvbuf_Y, 2*recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[2]); - MPI_Isend(sendbuf_Y, 2*sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[3]); - MPI_Irecv(recvbuf_y, 2*recvCount_y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[3]); - MPI_Isend(sendbuf_z, 2*sendCount_z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[4]); - MPI_Irecv(recvbuf_Z, 2*recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[4]); - MPI_Isend(sendbuf_Z, 2*sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[5]); - MPI_Irecv(recvbuf_z, 2*recvCount_z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[5]); + req1[0] = comm.Isend(sendbuf_x,2*sendCount_x,rank_x,sendtag); + req2[0] = comm.Irecv(recvbuf_X,2*recvCount_X,rank_X,recvtag); + req1[1] = comm.Isend(sendbuf_X,2*sendCount_X,rank_X,sendtag); + req2[1] = comm.Irecv(recvbuf_x,2*recvCount_x,rank_x,recvtag); + req1[2] = comm.Isend(sendbuf_y,2*sendCount_y,rank_y,sendtag); + req2[2] = comm.Irecv(recvbuf_Y,2*recvCount_Y,rank_Y,recvtag); + req1[3] = comm.Isend(sendbuf_Y,2*sendCount_Y,rank_Y,sendtag); + req2[3] = comm.Irecv(recvbuf_y,2*recvCount_y,rank_y,recvtag); + req1[4] = comm.Isend(sendbuf_z,2*sendCount_z,rank_z,sendtag); + req2[4] = comm.Irecv(recvbuf_Z,2*recvCount_Z,rank_Z,recvtag); + req1[5] = comm.Isend(sendbuf_Z,2*sendCount_Z,rank_Z,sendtag); + req2[5] = comm.Irecv(recvbuf_z,2*recvCount_z,rank_z,recvtag); */ //................................................................................... ScaLBL_D3Q7_Swap(ID, &packed_even[0], &packed_odd[0], Nx, Ny, Nz); @@ -311,8 +311,8 @@ int main(int argc, char **argv) /* //................................................................................... // Wait for completion of D3Q19 communication - MPI_Waitall(6,req1,stat1); - MPI_Waitall(6,req2,stat2); + comm.waitAll(6,req1); + comm.waitAll(6,req2); //................................................................................... // Unpack the distributions on the device //................................................................................... @@ -358,7 +358,7 @@ int main(int argc, char **argv) fclose(PHASE); // Close MPI - MPI_Barrier(MPI_COMM_WORLD); + comm.barrier(); MPI_Finalize(); return 0; } diff --git a/tests/lb2_Color_blob_wia_mpi.cpp b/tests/lb2_Color_blob_wia_mpi.cpp index 70342176..e3323612 100644 --- a/tests/lb2_Color_blob_wia_mpi.cpp +++ b/tests/lb2_Color_blob_wia_mpi.cpp @@ -114,7 +114,6 @@ int main(int argc, char **argv) int rank_yz,rank_YZ,rank_yZ,rank_Yz; //********************************** MPI_Request req1[18],req2[18]; - MPI_Status stat1[18],stat2[18]; if (rank == 0){ printf("********************************************************\n"); @@ -207,36 +206,36 @@ int main(int argc, char **argv) // Broadcast simulation parameters from rank 0 to all other procs comm.barrier(); //................................................. - MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&alpha,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&beta,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&das,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&dbs,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&phi_s,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&wp_saturation,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&pBC,1,MPI_LOGICAL,0,comm); - MPI_Bcast(&Restart,1,MPI_LOGICAL,0,comm); - MPI_Bcast(&din,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&dout,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm); - MPI_Bcast(×tepMax,1,MPI_INT,0,comm); - MPI_Bcast(&interval,1,MPI_INT,0,comm); - MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm); + comm.bcast(&tau,1,0); + comm.bcast(&alpha,1,0); + comm.bcast(&beta,1,0); + comm.bcast(&das,1,0); + comm.bcast(&dbs,1,0); + comm.bcast(&phi_s,1,0); + comm.bcast(&wp_saturation,1,0); + comm.bcast(&pBC,1,0); + comm.bcast(&Restart,1,0); + comm.bcast(&din,1,0); + comm.bcast(&dout,1,0); + comm.bcast(&Fx,1,0); + comm.bcast(&Fy,1,0); + comm.bcast(&Fz,1,0); + comm.bcast(×tepMax,1,0); + comm.bcast(&interval,1,0); + comm.bcast(&tol,1,0); // Computational domain - MPI_Bcast(&Nx,1,MPI_INT,0,comm); - MPI_Bcast(&Ny,1,MPI_INT,0,comm); - MPI_Bcast(&Nz,1,MPI_INT,0,comm); -// MPI_Bcast(&nBlocks,1,MPI_INT,0,comm); -// MPI_Bcast(&nthreads,1,MPI_INT,0,comm); - MPI_Bcast(&nprocx,1,MPI_INT,0,comm); - MPI_Bcast(&nprocy,1,MPI_INT,0,comm); - MPI_Bcast(&nprocz,1,MPI_INT,0,comm); - MPI_Bcast(&nspheres,1,MPI_INT,0,comm); - MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); + comm.bcast(&Nx,1,0); + comm.bcast(&Ny,1,0); + comm.bcast(&Nz,1,0); +// comm.bcast(&nBlocks,1,0); +// comm.bcast(&nthreads,1,0); + comm.bcast(&nprocx,1,0); + comm.bcast(&nprocy,1,0); + comm.bcast(&nprocz,1,0); + comm.bcast(&nspheres,1,0); + comm.bcast(&Lx,1,0); + comm.bcast(&Ly,1,0); + comm.bcast(&Lz,1,0); //................................................. comm.barrier(); @@ -399,10 +398,10 @@ int main(int argc, char **argv) if (rank == 0) ReadSpherePacking(nspheres,cx,cy,cz,rad); comm.barrier(); // Broadcast the sphere packing to all processes - MPI_Bcast(cx,nspheres,MPI_DOUBLE,0,comm); - MPI_Bcast(cy,nspheres,MPI_DOUBLE,0,comm); - MPI_Bcast(cz,nspheres,MPI_DOUBLE,0,comm); - MPI_Bcast(rad,nspheres,MPI_DOUBLE,0,comm); + comm.bcast(cx,nspheres,0); + comm.bcast(cy,nspheres,0); + comm.bcast(cz,nspheres,0); + comm.bcast(rad,nspheres,0); //........................................................................... comm.barrier(); if (rank == 0) cout << "Domain set." << endl; @@ -418,7 +417,7 @@ int main(int argc, char **argv) D = 6.0*(Nx-2)*nprocx*totVol / totArea / Lx; printf("Sauter Mean Diameter (computed from sphere packing) = %f \n ",D); } - MPI_Bcast(&D,1,MPI_DOUBLE,0,comm); + comm.bcast(&D,1,0); //....................................................................... // sprintf(LocalRankString,"%05d",rank); @@ -478,7 +477,7 @@ int main(int argc, char **argv) id[(Nz-1)*Nx*Ny] = id[(Nz-1)*Nx*Ny+Nx-1] = id[(Nz-1)*Nx*Ny+(Ny-1)*Nx] = id[(Nz-1)*Nx*Ny+(Ny-1)*Nx + Nx-1] = 0; //......................................................... sum_local = 1.0*sum; - MPI_Allreduce(&sum_local,&porosity,1,MPI_DOUBLE,MPI_SUM,comm); + porosity = comm.sumReduce( sum_local ); porosity = porosity*iVol_global; if (rank==0) printf("Media porosity = %f \n",porosity); @@ -886,42 +885,24 @@ int main(int argc, char **argv) PackID(sendList_yZ, sendCount_yZ ,sendID_yZ, id); PackID(sendList_YZ, sendCount_YZ ,sendID_YZ, id); //...................................................................................... - MPI_Sendrecv(sendID_x,sendCount_x,MPI_CHAR,rank_x,sendtag, - recvID_X,recvCount_X,MPI_CHAR,rank_X,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_X,sendCount_X,MPI_CHAR,rank_X,sendtag, - recvID_x,recvCount_x,MPI_CHAR,rank_x,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_y,sendCount_y,MPI_CHAR,rank_y,sendtag, - recvID_Y,recvCount_Y,MPI_CHAR,rank_Y,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Y,sendCount_Y,MPI_CHAR,rank_Y,sendtag, - recvID_y,recvCount_y,MPI_CHAR,rank_y,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_z,sendCount_z,MPI_CHAR,rank_z,sendtag, - recvID_Z,recvCount_Z,MPI_CHAR,rank_Z,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Z,sendCount_Z,MPI_CHAR,rank_Z,sendtag, - recvID_z,recvCount_z,MPI_CHAR,rank_z,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xy,sendCount_xy,MPI_CHAR,rank_xy,sendtag, - recvID_XY,recvCount_XY,MPI_CHAR,rank_XY,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_XY,sendCount_XY,MPI_CHAR,rank_XY,sendtag, - recvID_xy,recvCount_xy,MPI_CHAR,rank_xy,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Xy,sendCount_Xy,MPI_CHAR,rank_Xy,sendtag, - recvID_xY,recvCount_xY,MPI_CHAR,rank_xY,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xY,sendCount_xY,MPI_CHAR,rank_xY,sendtag, - recvID_Xy,recvCount_Xy,MPI_CHAR,rank_Xy,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xz,sendCount_xz,MPI_CHAR,rank_xz,sendtag, - recvID_XZ,recvCount_XZ,MPI_CHAR,rank_XZ,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_XZ,sendCount_XZ,MPI_CHAR,rank_XZ,sendtag, - recvID_xz,recvCount_xz,MPI_CHAR,rank_xz,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Xz,sendCount_Xz,MPI_CHAR,rank_Xz,sendtag, - recvID_xZ,recvCount_xZ,MPI_CHAR,rank_xZ,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xZ,sendCount_xZ,MPI_CHAR,rank_xZ,sendtag, - recvID_Xz,recvCount_Xz,MPI_CHAR,rank_Xz,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_yz,sendCount_yz,MPI_CHAR,rank_yz,sendtag, - recvID_YZ,recvCount_YZ,MPI_CHAR,rank_YZ,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_YZ,sendCount_YZ,MPI_CHAR,rank_YZ,sendtag, - recvID_yz,recvCount_yz,MPI_CHAR,rank_yz,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Yz,sendCount_Yz,MPI_CHAR,rank_Yz,sendtag, - recvID_yZ,recvCount_yZ,MPI_CHAR,rank_yZ,recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_yZ,sendCount_yZ,MPI_CHAR,rank_yZ,sendtag, - recvID_Yz,recvCount_Yz,MPI_CHAR,rank_Yz,recvtag,comm,MPI_STATUS_IGNORE); + comm.sendrecv(sendID_x,sendCount_x,rank_x,sendtag,recvID_X,recvCount_X,rank_X,recvtag); + comm.sendrecv(sendID_X,sendCount_X,rank_X,sendtag,recvID_x,recvCount_x,rank_x,recvtag); + comm.sendrecv(sendID_y,sendCount_y,rank_y,sendtag,recvID_Y,recvCount_Y,rank_Y,recvtag); + comm.sendrecv(sendID_Y,sendCount_Y,rank_Y,sendtag,recvID_y,recvCount_y,rank_y,recvtag); + comm.sendrecv(sendID_z,sendCount_z,rank_z,sendtag,recvID_Z,recvCount_Z,rank_Z,recvtag); + comm.sendrecv(sendID_Z,sendCount_Z,rank_Z,sendtag,recvID_z,recvCount_z,rank_z,recvtag); + comm.sendrecv(sendID_xy,sendCount_xy,rank_xy,sendtag,recvID_XY,recvCount_XY,rank_XY,recvtag); + comm.sendrecv(sendID_XY,sendCount_XY,rank_XY,sendtag,recvID_xy,recvCount_xy,rank_xy,recvtag); + comm.sendrecv(sendID_Xy,sendCount_Xy,rank_Xy,sendtag,recvID_xY,recvCount_xY,rank_xY,recvtag); + comm.sendrecv(sendID_xY,sendCount_xY,rank_xY,sendtag,recvID_Xy,recvCount_Xy,rank_Xy,recvtag); + comm.sendrecv(sendID_xz,sendCount_xz,rank_xz,sendtag,recvID_XZ,recvCount_XZ,rank_XZ,recvtag); + comm.sendrecv(sendID_XZ,sendCount_XZ,rank_XZ,sendtag,recvID_xz,recvCount_xz,rank_xz,recvtag); + comm.sendrecv(sendID_Xz,sendCount_Xz,rank_Xz,sendtag,recvID_xZ,recvCount_xZ,rank_xZ,recvtag); + comm.sendrecv(sendID_xZ,sendCount_xZ,rank_xZ,sendtag,recvID_Xz,recvCount_Xz,rank_Xz,recvtag); + comm.sendrecv(sendID_yz,sendCount_yz,rank_yz,sendtag,recvID_YZ,recvCount_YZ,rank_YZ,recvtag); + comm.sendrecv(sendID_YZ,sendCount_YZ,rank_YZ,sendtag,recvID_yz,recvCount_yz,rank_yz,recvtag); + comm.sendrecv(sendID_Yz,sendCount_Yz,rank_Yz,sendtag,recvID_yZ,recvCount_yZ,rank_yZ,recvtag); + comm.sendrecv(sendID_yZ,sendCount_yZ,rank_yZ,sendtag,recvID_Yz,recvCount_Yz,rank_Yz,recvtag); //...................................................................................... UnpackID(recvList_x, recvCount_x ,recvID_x, id); UnpackID(recvList_X, recvCount_X ,recvID_X, id); @@ -1380,48 +1361,48 @@ int main(int argc, char **argv) //................................................................................... // Send / Recv all the phase indcator field values //................................................................................... - MPI_Isend(sendbuf_x, sendCount_x,MPI_DOUBLE,rank_x,sendtag,comm,&req1[0]); - MPI_Irecv(recvbuf_X, recvCount_X,MPI_DOUBLE,rank_X,recvtag,comm,&req2[0]); - MPI_Isend(sendbuf_X, sendCount_X,MPI_DOUBLE,rank_X,sendtag,comm,&req1[1]); - MPI_Irecv(recvbuf_x, recvCount_x,MPI_DOUBLE,rank_x,recvtag,comm,&req2[1]); - MPI_Isend(sendbuf_y, sendCount_y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[2]); - MPI_Irecv(recvbuf_Y, recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[2]); - MPI_Isend(sendbuf_Y, sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[3]); - MPI_Irecv(recvbuf_y, recvCount_y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[3]); - MPI_Isend(sendbuf_z, sendCount_z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[4]); - MPI_Irecv(recvbuf_Z, recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[4]); - MPI_Isend(sendbuf_Z, sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[5]); - MPI_Irecv(recvbuf_z, recvCount_z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[5]); - MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_xy,sendtag,comm,&req1[6]); - MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_XY,recvtag,comm,&req2[6]); - MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_XY,sendtag,comm,&req1[7]); - MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_xy,recvtag,comm,&req2[7]); - MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_Xy,sendtag,comm,&req1[8]); - MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_xY,recvtag,comm,&req2[8]); - MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_xY,sendtag,comm,&req1[9]); - MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_Xy,recvtag,comm,&req2[9]); - MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_xz,sendtag,comm,&req1[10]); - MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_XZ,recvtag,comm,&req2[10]); - MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_XZ,sendtag,comm,&req1[11]); - MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_xz,recvtag,comm,&req2[11]); - MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_Xz,sendtag,comm,&req1[12]); - MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_xZ,recvtag,comm,&req2[12]); - MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_xZ,sendtag,comm,&req1[13]); - MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_Xz,recvtag,comm,&req2[13]); - MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_yz,sendtag,comm,&req1[14]); - MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_YZ,recvtag,comm,&req2[14]); - MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_YZ,sendtag,comm,&req1[15]); - MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_yz,recvtag,comm,&req2[15]); - MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_Yz,sendtag,comm,&req1[16]); - MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_yZ,recvtag,comm,&req2[16]); - MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_yZ,sendtag,comm,&req1[17]); - MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_Yz,recvtag,comm,&req2[17]); + req1[0] = comm.Isend(sendbuf_x, sendCount_x,rank_x,sendtag); + req2[0] = comm.Irecv(recvbuf_X, recvCount_X,rank_X,recvtag); + req1[1] = comm.Isend(sendbuf_X, sendCount_X,rank_X,sendtag); + req2[1] = comm.Irecv(recvbuf_x, recvCount_x,rank_x,recvtag); + req1[2] = comm.Isend(sendbuf_y, sendCount_y,rank_y,sendtag); + req2[2] = comm.Irecv(recvbuf_Y, recvCount_Y,rank_Y,recvtag); + req1[3] = comm.Isend(sendbuf_Y, sendCount_Y,rank_Y,sendtag); + req2[3] = comm.Irecv(recvbuf_y, recvCount_y,rank_y,recvtag); + req1[4] = comm.Isend(sendbuf_z, sendCount_z,rank_z,sendtag); + req2[4] = comm.Irecv(recvbuf_Z, recvCount_Z,rank_Z,recvtag); + req1[5] = comm.Isend(sendbuf_Z, sendCount_Z,rank_Z,sendtag); + req2[5] = comm.Irecv(recvbuf_z, recvCount_z,rank_z,recvtag); + req1[6] = comm.Isend(sendbuf_xy, sendCount_xy,rank_xy,sendtag); + req2[6] = comm.Irecv(recvbuf_XY, recvCount_XY,rank_XY,recvtag); + req1[7] = comm.Isend(sendbuf_XY, sendCount_XY,rank_XY,sendtag); + req2[7] = comm.Irecv(recvbuf_xy, recvCount_xy,rank_xy,recvtag); + req1[8] = comm.Isend(sendbuf_Xy, sendCount_Xy,rank_Xy,sendtag); + req2[8] = comm.Irecv(recvbuf_xY, recvCount_xY,rank_xY,recvtag); + req1[9] = comm.Isend(sendbuf_xY, sendCount_xY,rank_xY,sendtag); + req2[9] = comm.Irecv(recvbuf_Xy, recvCount_Xy,rank_Xy,recvtag); + req1[10] = comm.Isend(sendbuf_xz, sendCount_xz,rank_xz,sendtag); + req2[10] = comm.Irecv(recvbuf_XZ, recvCount_XZ,rank_XZ,recvtag); + req1[11] = comm.Isend(sendbuf_XZ, sendCount_XZ,rank_XZ,sendtag); + req2[11] = comm.Irecv(recvbuf_xz, recvCount_xz,rank_xz,recvtag); + req1[12] = comm.Isend(sendbuf_Xz, sendCount_Xz,rank_Xz,sendtag); + req2[12] = comm.Irecv(recvbuf_xZ, recvCount_xZ,rank_xZ,recvtag); + req1[13] = comm.Isend(sendbuf_xZ, sendCount_xZ,rank_xZ,sendtag); + req2[13] = comm.Irecv(recvbuf_Xz, recvCount_Xz,rank_Xz,recvtag); + req1[14] = comm.Isend(sendbuf_yz, sendCount_yz,rank_yz,sendtag); + req2[14] = comm.Irecv(recvbuf_YZ, recvCount_YZ,rank_YZ,recvtag); + req1[15] = comm.Isend(sendbuf_YZ, sendCount_YZ,rank_YZ,sendtag); + req2[15] = comm.Irecv(recvbuf_yz, recvCount_yz,rank_yz,recvtag); + req1[16] = comm.Isend(sendbuf_Yz, sendCount_Yz,rank_Yz,sendtag); + req2[16] = comm.Irecv(recvbuf_yZ, recvCount_yZ,rank_yZ,recvtag); + req1[17] = comm.Isend(sendbuf_yZ, sendCount_yZ,rank_yZ,sendtag); + req2[17] = comm.Irecv(recvbuf_Yz, recvCount_Yz,rank_Yz,recvtag); //................................................................................... //................................................................................... // Wait for completion of Indicator Field communication //................................................................................... - MPI_Waitall(18,req1,stat1); - MPI_Waitall(18,req2,stat2); + comm.waitAll(18,req1); + comm.waitAll(18,req2); ScaLBL_DeviceBarrier(); //................................................................................... //................................................................................... @@ -1497,7 +1478,7 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; comm.barrier(); - starttime = MPI_Wtime(); + starttime = Utilities::MPI::time(); //......................................... sendtag = recvtag = 5; @@ -1593,42 +1574,42 @@ int main(int argc, char **argv) //................................................................................... // Send all the distributions - MPI_Isend(sendbuf_x, 5*sendCount_x,MPI_DOUBLE,rank_x,sendtag,comm,&req1[0]); - MPI_Irecv(recvbuf_X, 5*recvCount_X,MPI_DOUBLE,rank_X,recvtag,comm,&req2[0]); - MPI_Isend(sendbuf_X, 5*sendCount_X,MPI_DOUBLE,rank_X,sendtag,comm,&req1[1]); - MPI_Irecv(recvbuf_x, 5*recvCount_x,MPI_DOUBLE,rank_x,recvtag,comm,&req2[1]); - MPI_Isend(sendbuf_y, 5*sendCount_y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[2]); - MPI_Irecv(recvbuf_Y, 5*recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[2]); - MPI_Isend(sendbuf_Y, 5*sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[3]); - MPI_Irecv(recvbuf_y, 5*recvCount_y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[3]); - MPI_Isend(sendbuf_z, 5*sendCount_z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[4]); - MPI_Irecv(recvbuf_Z, 5*recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[4]); - MPI_Isend(sendbuf_Z, 5*sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[5]); - MPI_Irecv(recvbuf_z, 5*recvCount_z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[5]); - MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_xy,sendtag,comm,&req1[6]); - MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_XY,recvtag,comm,&req2[6]); - MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_XY,sendtag,comm,&req1[7]); - MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_xy,recvtag,comm,&req2[7]); - MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_Xy,sendtag,comm,&req1[8]); - MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_xY,recvtag,comm,&req2[8]); - MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_xY,sendtag,comm,&req1[9]); - MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_Xy,recvtag,comm,&req2[9]); - MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_xz,sendtag,comm,&req1[10]); - MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_XZ,recvtag,comm,&req2[10]); - MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_XZ,sendtag,comm,&req1[11]); - MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_xz,recvtag,comm,&req2[11]); - MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_Xz,sendtag,comm,&req1[12]); - MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_xZ,recvtag,comm,&req2[12]); - MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_xZ,sendtag,comm,&req1[13]); - MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_Xz,recvtag,comm,&req2[13]); - MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_yz,sendtag,comm,&req1[14]); - MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_YZ,recvtag,comm,&req2[14]); - MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_YZ,sendtag,comm,&req1[15]); - MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_yz,recvtag,comm,&req2[15]); - MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_Yz,sendtag,comm,&req1[16]); - MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_yZ,recvtag,comm,&req2[16]); - MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_yZ,sendtag,comm,&req1[17]); - MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_Yz,recvtag,comm,&req2[17]); + req1[0] = comm.Isend(sendbuf_x, 5*sendCount_x,rank_x,sendtag); + req2[0] = comm.Irecv(recvbuf_X, 5*recvCount_X,rank_X,recvtag); + req1[1] = comm.Isend(sendbuf_X, 5*sendCount_X,rank_X,sendtag); + req2[1] = comm.Irecv(recvbuf_x, 5*recvCount_x,rank_x,recvtag); + req1[2] = comm.Isend(sendbuf_y, 5*sendCount_y,rank_y,sendtag); + req2[2] = comm.Irecv(recvbuf_Y, 5*recvCount_Y,rank_Y,recvtag); + req1[3] = comm.Isend(sendbuf_Y, 5*sendCount_Y,rank_Y,sendtag); + req2[3] = comm.Irecv(recvbuf_y, 5*recvCount_y,rank_y,recvtag); + req1[4] = comm.Isend(sendbuf_z, 5*sendCount_z,rank_z,sendtag); + req2[4] = comm.Irecv(recvbuf_Z, 5*recvCount_Z,rank_Z,recvtag); + req1[5] = comm.Isend(sendbuf_Z, 5*sendCount_Z,rank_Z,sendtag); + req2[5] = comm.Irecv(recvbuf_z, 5*recvCount_z,rank_z,recvtag); + req1[6] = comm.Isend(sendbuf_xy, sendCount_xy,rank_xy,sendtag); + req2[6] = comm.Irecv(recvbuf_XY, recvCount_XY,rank_XY,recvtag); + req1[7] = comm.Isend(sendbuf_XY, sendCount_XY,rank_XY,sendtag); + req2[7] = comm.Irecv(recvbuf_xy, recvCount_xy,rank_xy,recvtag); + req1[8] = comm.Isend(sendbuf_Xy, sendCount_Xy,rank_Xy,sendtag); + req2[8] = comm.Irecv(recvbuf_xY, recvCount_xY,rank_xY,recvtag); + req1[9] = comm.Isend(sendbuf_xY, sendCount_xY,rank_xY,sendtag); + req2[9] = comm.Irecv(recvbuf_Xy, recvCount_Xy,rank_Xy,recvtag); + req1[10] = comm.Isend(sendbuf_xz, sendCount_xz,rank_xz,sendtag); + req2[10] = comm.Irecv(recvbuf_XZ, recvCount_XZ,rank_XZ,recvtag); + req1[11] = comm.Isend(sendbuf_XZ, sendCount_XZ,rank_XZ,sendtag); + req2[11] = comm.Irecv(recvbuf_xz, recvCount_xz,rank_xz,recvtag); + req1[12] = comm.Isend(sendbuf_Xz, sendCount_Xz,rank_Xz,sendtag); + req2[12] = comm.Irecv(recvbuf_xZ, recvCount_xZ,rank_xZ,recvtag); + req1[13] = comm.Isend(sendbuf_xZ, sendCount_xZ,rank_xZ,sendtag); + req2[13] = comm.Irecv(recvbuf_Xz, recvCount_Xz,rank_Xz,recvtag); + req1[14] = comm.Isend(sendbuf_yz, sendCount_yz,rank_yz,sendtag); + req2[14] = comm.Irecv(recvbuf_YZ, recvCount_YZ,rank_YZ,recvtag); + req1[15] = comm.Isend(sendbuf_YZ, sendCount_YZ,rank_YZ,sendtag); + req2[15] = comm.Irecv(recvbuf_yz, recvCount_yz,rank_yz,recvtag); + req1[16] = comm.Isend(sendbuf_Yz, sendCount_Yz,rank_Yz,sendtag); + req2[16] = comm.Irecv(recvbuf_yZ, recvCount_yZ,rank_yZ,recvtag); + req1[17] = comm.Isend(sendbuf_yZ, sendCount_yZ,rank_yZ,sendtag); + req2[17] = comm.Irecv(recvbuf_Yz, recvCount_Yz,rank_Yz,recvtag); //................................................................................... //************************************************************************* @@ -1648,8 +1629,8 @@ int main(int argc, char **argv) //................................................................................... // Wait for completion of D3Q19 communication - MPI_Waitall(18,req1,stat1); - MPI_Waitall(18,req2,stat2); + comm.waitAll(18,req1); + comm.waitAll(18,req2); //................................................................................... // Unpack the distributions on the device @@ -1743,18 +1724,18 @@ int main(int argc, char **argv) //................................................................................... // Send all the distributions - MPI_Isend(sendbuf_x, 2*sendCount_x,MPI_DOUBLE,rank_x,sendtag,comm,&req1[0]); - MPI_Irecv(recvbuf_X, 2*recvCount_X,MPI_DOUBLE,rank_X,recvtag,comm,&req2[0]); - MPI_Isend(sendbuf_X, 2*sendCount_X,MPI_DOUBLE,rank_X,sendtag,comm,&req1[1]); - MPI_Irecv(recvbuf_x, 2*recvCount_x,MPI_DOUBLE,rank_x,recvtag,comm,&req2[1]); - MPI_Isend(sendbuf_y, 2*sendCount_y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[2]); - MPI_Irecv(recvbuf_Y, 2*recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[2]); - MPI_Isend(sendbuf_Y, 2*sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[3]); - MPI_Irecv(recvbuf_y, 2*recvCount_y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[3]); - MPI_Isend(sendbuf_z, 2*sendCount_z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[4]); - MPI_Irecv(recvbuf_Z, 2*recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[4]); - MPI_Isend(sendbuf_Z, 2*sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[5]); - MPI_Irecv(recvbuf_z, 2*recvCount_z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[5]); + req1[0] = comm.Isend(sendbuf_x, 2*sendCount_x,rank_x,sendtag); + req2[0] = comm.Irecv(recvbuf_X, 2*recvCount_X,rank_X,recvtag); + req1[1] = comm.Isend(sendbuf_X, 2*sendCount_X,rank_X,sendtag); + req2[1] = comm.Irecv(recvbuf_x, 2*recvCount_x,rank_x,recvtag); + req1[2] = comm.Isend(sendbuf_y, 2*sendCount_y,rank_y,sendtag); + req2[2] = comm.Irecv(recvbuf_Y, 2*recvCount_Y,rank_Y,recvtag); + req1[3] = comm.Isend(sendbuf_Y, 2*sendCount_Y,rank_Y,sendtag); + req2[3] = comm.Irecv(recvbuf_y, 2*recvCount_y,rank_y,recvtag); + req1[4] = comm.Isend(sendbuf_z, 2*sendCount_z,rank_z,sendtag); + req2[4] = comm.Irecv(recvbuf_Z, 2*recvCount_Z,rank_Z,recvtag); + req1[5] = comm.Isend(sendbuf_Z, 2*sendCount_Z,rank_Z,sendtag); + req2[5] = comm.Irecv(recvbuf_z, 2*recvCount_z,rank_z,recvtag); //................................................................................... ScaLBL_D3Q7_Swap(ID, A_even, A_odd, Nx, Ny, Nz); @@ -1762,8 +1743,8 @@ int main(int argc, char **argv) //................................................................................... // Wait for completion of D3Q19 communication - MPI_Waitall(6,req1,stat1); - MPI_Waitall(6,req2,stat2); + comm.waitAll(6,req1); + comm.waitAll(6,req2); //................................................................................... // Unpack the distributions on the device //................................................................................... @@ -1824,48 +1805,48 @@ int main(int argc, char **argv) //................................................................................... // Send / Recv all the phase indcator field values //................................................................................... - MPI_Isend(sendbuf_x, sendCount_x,MPI_DOUBLE,rank_x,sendtag,comm,&req1[0]); - MPI_Irecv(recvbuf_X, recvCount_X,MPI_DOUBLE,rank_X,recvtag,comm,&req2[0]); - MPI_Isend(sendbuf_X, sendCount_X,MPI_DOUBLE,rank_X,sendtag,comm,&req1[1]); - MPI_Irecv(recvbuf_x, recvCount_x,MPI_DOUBLE,rank_x,recvtag,comm,&req2[1]); - MPI_Isend(sendbuf_y, sendCount_y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[2]); - MPI_Irecv(recvbuf_Y, recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[2]); - MPI_Isend(sendbuf_Y, sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[3]); - MPI_Irecv(recvbuf_y, recvCount_y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[3]); - MPI_Isend(sendbuf_z, sendCount_z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[4]); - MPI_Irecv(recvbuf_Z, recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[4]); - MPI_Isend(sendbuf_Z, sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[5]); - MPI_Irecv(recvbuf_z, recvCount_z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[5]); - MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_xy,sendtag,comm,&req1[6]); - MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_XY,recvtag,comm,&req2[6]); - MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_XY,sendtag,comm,&req1[7]); - MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_xy,recvtag,comm,&req2[7]); - MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_Xy,sendtag,comm,&req1[8]); - MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_xY,recvtag,comm,&req2[8]); - MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_xY,sendtag,comm,&req1[9]); - MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_Xy,recvtag,comm,&req2[9]); - MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_xz,sendtag,comm,&req1[10]); - MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_XZ,recvtag,comm,&req2[10]); - MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_XZ,sendtag,comm,&req1[11]); - MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_xz,recvtag,comm,&req2[11]); - MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_Xz,sendtag,comm,&req1[12]); - MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_xZ,recvtag,comm,&req2[12]); - MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_xZ,sendtag,comm,&req1[13]); - MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_Xz,recvtag,comm,&req2[13]); - MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_yz,sendtag,comm,&req1[14]); - MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_YZ,recvtag,comm,&req2[14]); - MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_YZ,sendtag,comm,&req1[15]); - MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_yz,recvtag,comm,&req2[15]); - MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_Yz,sendtag,comm,&req1[16]); - MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_yZ,recvtag,comm,&req2[16]); - MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_yZ,sendtag,comm,&req1[17]); - MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_Yz,recvtag,comm,&req2[17]); + req1[0] = comm.Isend(sendbuf_x, sendCount_x,rank_x,sendtag,comm,&req1[0]); + req2[0] = comm.Irecv(recvbuf_X, recvCount_X,rank_X,recvtag,comm,&req2[0]); + req1[1] = comm.Isend(sendbuf_X, sendCount_X,rank_X,sendtag,comm,&req1[1]); + req2[1] = comm.Irecv(recvbuf_x, recvCount_x,rank_x,recvtag,comm,&req2[1]); + req1[2] = comm.Isend(sendbuf_y, sendCount_y,rank_y,sendtag,comm,&req1[2]); + req2[2] = comm.Irecv(recvbuf_Y, recvCount_Y,rank_Y,recvtag,comm,&req2[2]); + req1[3] = comm.Isend(sendbuf_Y, sendCount_Y,rank_Y,sendtag,comm,&req1[3]); + req2[3] = comm.Irecv(recvbuf_y, recvCount_y,rank_y,recvtag,comm,&req2[3]); + req1[4] = comm.Isend(sendbuf_z, sendCount_z,rank_z,sendtag,comm,&req1[4]); + req2[4] = comm.Irecv(recvbuf_Z, recvCount_Z,rank_Z,recvtag,comm,&req2[4]); + req1[5] = comm.Isend(sendbuf_Z, sendCount_Z,rank_Z,sendtag,comm,&req1[5]); + req2[5] = comm.Irecv(recvbuf_z, recvCount_z,rank_z,recvtag,comm,&req2[5]); + req1[6] = comm.Isend(sendbuf_xy, sendCount_xy,rank_xy,sendtag,comm,&req1[6]); + req2[6] = comm.Irecv(recvbuf_XY, recvCount_XY,rank_XY,recvtag,comm,&req2[6]); + req1[7] = comm.Isend(sendbuf_XY, sendCount_XY,rank_XY,sendtag,comm,&req1[7]); + req2[7] = comm.Irecv(recvbuf_xy, recvCount_xy,rank_xy,recvtag,comm,&req2[7]); + req1[8] = comm.Isend(sendbuf_Xy, sendCount_Xy,rank_Xy,sendtag,comm,&req1[8]); + req2[8] = comm.Irecv(recvbuf_xY, recvCount_xY,rank_xY,recvtag,comm,&req2[8]); + req1[9] = comm.Isend(sendbuf_xY, sendCount_xY,rank_xY,sendtag,comm,&req1[9]); + req2[9] = comm.Irecv(recvbuf_Xy, recvCount_Xy,rank_Xy,recvtag,comm,&req2[9]); + req1[10] = comm.Isend(sendbuf_xz, sendCount_xz,rank_xz,sendtag,comm,&req1[10]); + req2[10] = comm.Irecv(recvbuf_XZ, recvCount_XZ,rank_XZ,recvtag,comm,&req2[10]); + req1[11] = comm.Isend(sendbuf_XZ, sendCount_XZ,rank_XZ,sendtag,comm,&req1[11]); + req2[11] = comm.Irecv(recvbuf_xz, recvCount_xz,rank_xz,recvtag,comm,&req2[11]); + req1[12] = comm.Isend(sendbuf_Xz, sendCount_Xz,rank_Xz,sendtag,comm,&req1[12]); + req2[12] = comm.Irecv(recvbuf_xZ, recvCount_xZ,rank_xZ,recvtag,comm,&req2[12]); + req1[13] = comm.Isend(sendbuf_xZ, sendCount_xZ,rank_xZ,sendtag,comm,&req1[13]); + req2[13] = comm.Irecv(recvbuf_Xz, recvCount_Xz,rank_Xz,recvtag,comm,&req2[13]); + req1[14] = comm.Isend(sendbuf_yz, sendCount_yz,rank_yz,sendtag,comm,&req1[14]); + req2[14] = comm.Irecv(recvbuf_YZ, recvCount_YZ,rank_YZ,recvtag,comm,&req2[14]); + req1[15] = comm.Isend(sendbuf_YZ, sendCount_YZ,rank_YZ,sendtag,comm,&req1[15]); + req2[15] = comm.Irecv(recvbuf_yz, recvCount_yz,rank_yz,recvtag,comm,&req2[15]); + req1[16] = comm.Isend(sendbuf_Yz, sendCount_Yz,rank_Yz,sendtag,comm,&req1[16]); + req2[16] = comm.Irecv(recvbuf_yZ, recvCount_yZ,rank_yZ,recvtag,comm,&req2[16]); + req1[17] = comm.Isend(sendbuf_yZ, sendCount_yZ,rank_yZ,sendtag,comm,&req1[17]); + req2[17] = comm.Irecv(recvbuf_Yz, recvCount_Yz,rank_Yz,recvtag,comm,&req2[17]); //................................................................................... //................................................................................... // Wait for completion of Indicator Field communication //................................................................................... - MPI_Waitall(18,req1,stat1); - MPI_Waitall(18,req2,stat2); + comm.waitAll(18,req1); + comm.waitAll(18,req2); ScaLBL_DeviceBarrier(); //................................................................................... //................................................................................... @@ -2442,28 +2423,28 @@ int main(int argc, char **argv) //........................................................................... comm.barrier(); - MPI_Allreduce(&nwp_volume,&nwp_volume_global,1,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&awn,&awn_global,1,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&ans,&ans_global,1,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&aws,&aws_global,1,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&lwns,&lwns_global,1,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&As,&As_global,1,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&Jwn,&Jwn_global,1,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&Kwn,&Kwn_global,1,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&efawns,&efawns_global,1,MPI_DOUBLE,MPI_SUM,comm); + nwp_volume_global = comm.sumReduce( nwp_volume ); + awn_global = comm.sumReduce( awn ); + ans_global = comm.sumReduce( ans ); + aws_global = comm.sumReduce( aws ); + lwns_global = comm.sumReduce( lwns ); + As_global = comm.sumReduce( As ); + Jwn_global = comm.sumReduce( Jwn ); + Kwn_global = comm.sumReduce( Kwn ); + efawns_global = comm.sumReduce( efawns ); // Phase averages - MPI_Allreduce(&vol_w,&vol_w_global,1,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&vol_n,&vol_n_global,1,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&paw,&paw_global,1,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&pan,&pan_global,1,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&vaw(0),&vaw_global(0),3,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&van(0),&van_global(0),3,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&vawn(0),&vawn_global(0),3,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&Gwn(0),&Gwn_global(0),6,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&Gns(0),&Gns_global(0),6,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&Gws(0),&Gws_global(0),6,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&trawn,&trawn_global,1,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&trJwn,&trJwn_global,1,MPI_DOUBLE,MPI_SUM,comm); + vol_w_global = comm.sumReduce( vol_w ); + vol_n_global = comm.sumReduce( vol_n ); + paw_global = comm.sumReduce( paw ); + pan_global = comm.sumReduce( pan ); + vaw_global(0) = comm.sumReduce( vaw(0) ); + van_global(0) = comm.sumReduce( van(0) ); + vawn_global(0) = comm.sumReduce( vawn(0) ); + Gwn_global(0) = comm.sumReduce( Gwn(0) ); + Gns_global(0) = comm.sumReduce( Gns(0) ); + Gws_global(0) = comm.sumReduce( Gws(0) ); + trawn_global = comm.sumReduce( trawn ); + trJwn_global = comm.sumReduce( trJwn ); comm.barrier(); //......................................................................... // Compute the change in the total surface energy based on the defined interval @@ -2689,7 +2670,7 @@ int main(int argc, char **argv) //************************************************************************/ ScaLBL_DeviceBarrier(); comm.barrier(); - stoptime = MPI_Wtime(); + stoptime = Utilities::MPI::time(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep cputime = (stoptime - starttime)/timestep; diff --git a/tests/lbpm_BGK_simulator.cpp b/tests/lbpm_BGK_simulator.cpp index 8b079900..1ac61853 100644 --- a/tests/lbpm_BGK_simulator.cpp +++ b/tests/lbpm_BGK_simulator.cpp @@ -97,28 +97,28 @@ int main(int argc, char **argv) // Broadcast simulation parameters from rank 0 to all other procs comm.barrier(); //................................................. - MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm); - //MPI_Bcast(&pBC,1,MPI_LOGICAL,0,comm); - // MPI_Bcast(&Restart,1,MPI_LOGICAL,0,comm); - MPI_Bcast(&din,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&dout,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm); - MPI_Bcast(×tepMax,1,MPI_INT,0,comm); - MPI_Bcast(&interval,1,MPI_INT,0,comm); - MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm); + comm.bcast(&tau,1,0); + //comm.bcast(&pBC,1,0); + //comm.bcast(&Restart,1,0); + comm.bcast(&din,1,0); + comm.bcast(&dout,1,0); + comm.bcast(&Fx,1,0); + comm.bcast(&Fy,1,0); + comm.bcast(&Fz,1,0); + comm.bcast(×tepMax,1,0); + comm.bcast(&interval,1,0); + comm.bcast(&tol,1,0); // Computational domain - MPI_Bcast(&Nx,1,MPI_INT,0,comm); - MPI_Bcast(&Ny,1,MPI_INT,0,comm); - MPI_Bcast(&Nz,1,MPI_INT,0,comm); - MPI_Bcast(&nprocx,1,MPI_INT,0,comm); - MPI_Bcast(&nprocy,1,MPI_INT,0,comm); - MPI_Bcast(&nprocz,1,MPI_INT,0,comm); - //MPI_Bcast(&nspheres,1,MPI_INT,0,comm); - MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); + comm.bcast(&Nx,1,0); + comm.bcast(&Ny,1,0); + comm.bcast(&Nz,1,0); + comm.bcast(&nprocx,1,0); + comm.bcast(&nprocy,1,0); + comm.bcast(&nprocz,1,0); + //comm.bcast(&nspheres,1,0); + comm.bcast(&Lx,1,0); + comm.bcast(&Ly,1,0); + comm.bcast(&Lz,1,0); //................................................. comm.barrier(); @@ -249,7 +249,7 @@ int main(int argc, char **argv) } } } - MPI_Allreduce(&sum_local,&sum,1,MPI_DOUBLE,MPI_SUM,comm); + sum = comm.sumReduce( sum_local ); porosity = sum*iVol_global; if (rank==0) printf("Media porosity = %f \n",porosity); @@ -331,7 +331,7 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; comm.barrier(); - starttime = MPI_Wtime(); + starttime = Utilities::MPI::time(); //......................................... double D32,Fo,Re,velocity,err1D,mag_force,vel_prev; @@ -410,7 +410,7 @@ int main(int argc, char **argv) //************************************************************************/ ScaLBL_DeviceBarrier(); comm.barrier(); - stoptime = MPI_Wtime(); + stoptime = Utilities::MPI::time(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep cputime = (stoptime - starttime)/timestep; diff --git a/tests/lbpm_color_macro_simulator.cpp b/tests/lbpm_color_macro_simulator.cpp index 97df6812..c92b0c45 100644 --- a/tests/lbpm_color_macro_simulator.cpp +++ b/tests/lbpm_color_macro_simulator.cpp @@ -39,9 +39,6 @@ int main(int argc, char **argv) int nprocx,nprocy,nprocz; int iproc,jproc,kproc; - MPI_Request req1[18],req2[18]; - MPI_Status stat1[18],stat2[18]; - if (rank == 0){ printf("********************************************************\n"); printf("Running Color LBM \n"); @@ -172,32 +169,32 @@ int main(int argc, char **argv) // Broadcast simulation parameters from rank 0 to all other procs comm.barrier(); //................................................. - MPI_Bcast(&tauA,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&tauB,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&rhoA,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&rhoB,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&alpha,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&beta,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&BoundaryCondition,1,MPI_INT,0,comm); - MPI_Bcast(&InitialCondition,1,MPI_INT,0,comm); - MPI_Bcast(&din,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&dout,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm); - MPI_Bcast(×tepMax,1,MPI_INT,0,comm); - MPI_Bcast(&RESTART_INTERVAL,1,MPI_INT,0,comm); - MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm); + comm.bcast(&tauA,1,0); + comm.bcast(&tauB,1,0); + comm.bcast(&rhoA,1,0); + comm.bcast(&rhoB,1,0); + comm.bcast(&alpha,1,0); + comm.bcast(&beta,1,0); + comm.bcast(&BoundaryCondition,1,0); + comm.bcast(&InitialCondition,1,0); + comm.bcast(&din,1,0); + comm.bcast(&dout,1,0); + comm.bcast(&Fx,1,0); + comm.bcast(&Fy,1,0); + comm.bcast(&Fz,1,0); + comm.bcast(×tepMax,1,0); + comm.bcast(&RESTART_INTERVAL,1,0); + comm.bcast(&tol,1,0); // Computational domain - MPI_Bcast(&Nx,1,MPI_INT,0,comm); - MPI_Bcast(&Ny,1,MPI_INT,0,comm); - MPI_Bcast(&Nz,1,MPI_INT,0,comm); - MPI_Bcast(&nprocx,1,MPI_INT,0,comm); - MPI_Bcast(&nprocy,1,MPI_INT,0,comm); - MPI_Bcast(&nprocz,1,MPI_INT,0,comm); - MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); + comm.bcast(&Nx,1,0); + comm.bcast(&Ny,1,0); + comm.bcast(&Nz,1,0); + comm.bcast(&nprocx,1,0); + comm.bcast(&nprocy,1,0); + comm.bcast(&nprocz,1,0); + comm.bcast(&Lx,1,0); + comm.bcast(&Ly,1,0); + comm.bcast(&Lz,1,0); //................................................. flux = 0.f; @@ -322,7 +319,7 @@ int main(int argc, char **argv) timestep=0; } } - MPI_Bcast(×tep,1,MPI_INT,0,comm); + comm.bcast(×tep,1,0); FILE *RESTART = fopen(LocalRestartFile,"rb"); if (IDFILE==NULL) ERROR("lbpm_color_simulator: Error opening file: Restart.xxxxx"); readID=fread(id,1,N,RESTART); @@ -361,7 +358,7 @@ int main(int argc, char **argv) } } } - MPI_Allreduce(&sum_local,&sum,1,MPI_DOUBLE,MPI_SUM,comm); + sum - comm.sumReduce( sum_local ); porosity = sum*iVol_global; if (rank==0) printf("Media porosity = %f \n",porosity); //......................................................... @@ -537,7 +534,7 @@ int main(int argc, char **argv) double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); comm.barrier(); - starttime = MPI_Wtime(); + starttime = Utilities::MPI::time(); //......................................... err = 1.0; @@ -637,7 +634,7 @@ int main(int argc, char **argv) //************************************************************************ ScaLBL_DeviceBarrier(); comm.barrier(); - stoptime = MPI_Wtime(); + stoptime = Utilities::MPI::time(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep cputime = (stoptime - starttime)/timestep; diff --git a/tests/lbpm_disc_pp.cpp b/tests/lbpm_disc_pp.cpp index 20d41884..41825c7d 100644 --- a/tests/lbpm_disc_pp.cpp +++ b/tests/lbpm_disc_pp.cpp @@ -9,7 +9,7 @@ #include "analysis/pmmc.h" #include "common/Domain.h" #include "common/Communication.h" -#include "common/MPI.h" // This includes mpi.h +#include "common/MPI.h" #include "common/SpherePack.h" /* @@ -147,8 +147,6 @@ int main(int argc, char **argv) int rank_xz,rank_XZ,rank_xZ,rank_Xz; int rank_yz,rank_YZ,rank_yZ,rank_Yz; //********************************** - MPI_Request req1[18],req2[18]; - MPI_Status stat1[18],stat2[18]; int depth; @@ -189,16 +187,16 @@ int main(int argc, char **argv) comm.barrier(); //................................................. // Computational domain - MPI_Bcast(&Nx,1,MPI_INT,0,comm); - MPI_Bcast(&Ny,1,MPI_INT,0,comm); - MPI_Bcast(&Nz,1,MPI_INT,0,comm); - MPI_Bcast(&nprocx,1,MPI_INT,0,comm); - MPI_Bcast(&nprocy,1,MPI_INT,0,comm); - MPI_Bcast(&nprocz,1,MPI_INT,0,comm); - MPI_Bcast(&ndiscs,1,MPI_INT,0,comm); - MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); + comm.bcast(&Nx,1,0); + comm.bcast(&Ny,1,0); + comm.bcast(&Nz,1,0); + comm.bcast(&nprocx,1,0); + comm.bcast(&nprocy,1,0); + comm.bcast(&nprocz,1,0); + comm.bcast(&ndiscs,1,0); + comm.bcast(&Lx,1,0); + comm.bcast(&Ly,1,0); + comm.bcast(&Lz,1,0); //................................................. comm.barrier(); @@ -275,9 +273,9 @@ int main(int argc, char **argv) if (rank == 0) ReadDiscPacking(ndiscs,cx,cy,rad); comm.barrier(); // Broadcast the sphere packing to all processes - MPI_Bcast(cx,ndiscs,MPI_DOUBLE,0,comm); - MPI_Bcast(cy,ndiscs,MPI_DOUBLE,0,comm); - MPI_Bcast(rad,ndiscs,MPI_DOUBLE,0,comm); + comm.bcast(cx,ndiscs,0); + comm.bcast(cy,ndiscs,0); + comm.bcast(rad,ndiscs,0); //........................................................................... comm.barrier(); if (rank == 0){ @@ -346,7 +344,7 @@ int main(int argc, char **argv) } } sum_local = 1.0*sum; - MPI_Allreduce(&sum_local,&porosity,1,MPI_DOUBLE,MPI_SUM,comm); + porosity = comm.sumReduce( sum_local ); porosity = porosity*iVol_global; if (rank==0) printf("Media porosity = %f \n",porosity); @@ -362,7 +360,7 @@ int main(int argc, char **argv) } } } - MPI_Allreduce(&sum_local,&pore_vol,1,MPI_DOUBLE,MPI_SUM,comm); + pore_vol = comm.sumReduce( sum_local ); //......................................................... // don't perform computations at the eight corners diff --git a/tests/lbpm_inkbottle_pp.cpp b/tests/lbpm_inkbottle_pp.cpp index 669ab8c0..ca188633 100644 --- a/tests/lbpm_inkbottle_pp.cpp +++ b/tests/lbpm_inkbottle_pp.cpp @@ -81,16 +81,16 @@ int main(int argc, char **argv) // Broadcast simulation parameters from rank 0 to all other procs comm.barrier(); // Computational domain - MPI_Bcast(&Nx,1,MPI_INT,0,comm); - MPI_Bcast(&Ny,1,MPI_INT,0,comm); - MPI_Bcast(&Nz,1,MPI_INT,0,comm); - MPI_Bcast(&nprocx,1,MPI_INT,0,comm); - MPI_Bcast(&nprocy,1,MPI_INT,0,comm); - MPI_Bcast(&nprocz,1,MPI_INT,0,comm); - MPI_Bcast(&nspheres,1,MPI_INT,0,comm); - MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); + comm.bcast(&Nx,1,0); + comm.bcast(&Ny,1,0); + comm.bcast(&Nz,1,0); + comm.bcast(&nprocx,1,0); + comm.bcast(&nprocy,1,0); + comm.bcast(&nprocz,1,0); + comm.bcast(&nspheres,1,0); + comm.bcast(&Lx,1,0); + comm.bcast(&Ly,1,0); + comm.bcast(&Lz,1,0); //................................................. comm.barrier(); @@ -197,7 +197,7 @@ int main(int argc, char **argv) } } } - MPI_Allreduce(&sum_local,&pore_vol,1,MPI_DOUBLE,MPI_SUM,comm); + pore_vol = comm.sumReduce( sum_local ); //......................................................... // don't perform computations at the eight corners diff --git a/tests/lbpm_juanes_bench_disc_pp.cpp b/tests/lbpm_juanes_bench_disc_pp.cpp index 47d8cb84..a90d43f8 100644 --- a/tests/lbpm_juanes_bench_disc_pp.cpp +++ b/tests/lbpm_juanes_bench_disc_pp.cpp @@ -9,7 +9,7 @@ #include "analysis/pmmc.h" #include "common/Domain.h" #include "common/Communication.h" -#include "common/MPI.h" // This includes mpi.h +#include "common/MPI.h" #include "common/SpherePack.h" /* @@ -147,9 +147,6 @@ int main(int argc, char **argv) int rank_xz,rank_XZ,rank_xZ,rank_Xz; int rank_yz,rank_YZ,rank_yZ,rank_Yz; //********************************** - MPI_Request req1[18],req2[18]; - MPI_Status stat1[18],stat2[18]; - if (rank == 0){ printf("********************************************************\n"); @@ -193,16 +190,16 @@ int main(int argc, char **argv) comm.barrier(); //................................................. // Computational domain - MPI_Bcast(&Nx,1,MPI_INT,0,comm); - MPI_Bcast(&Ny,1,MPI_INT,0,comm); - MPI_Bcast(&Nz,1,MPI_INT,0,comm); - MPI_Bcast(&nprocx,1,MPI_INT,0,comm); - MPI_Bcast(&nprocy,1,MPI_INT,0,comm); - MPI_Bcast(&nprocz,1,MPI_INT,0,comm); - MPI_Bcast(&ndiscs,1,MPI_INT,0,comm); - MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); + comm.bcast(&Nx,1,0); + comm.bcast(&Ny,1,0); + comm.bcast(&Nz,1,0); + comm.bcast(&nprocx,1,0); + comm.bcast(&nprocy,1,0); + comm.bcast(&nprocz,1,0); + comm.bcast(&ndiscs,1,0); + comm.bcast(&Lx,1,0); + comm.bcast(&Ly,1,0); + comm.bcast(&Lz,1,0); //................................................. comm.barrier(); @@ -292,9 +289,9 @@ int main(int argc, char **argv) if (rank == 0) ReadDiscPacking(ndiscs,cx,cy,rad); comm.barrier(); // Broadcast the sphere packing to all processes - MPI_Bcast(cx,ndiscs,MPI_DOUBLE,0,comm); - MPI_Bcast(cy,ndiscs,MPI_DOUBLE,0,comm); - MPI_Bcast(rad,ndiscs,MPI_DOUBLE,0,comm); + comm.bcast(cx,ndiscs,0); + comm.bcast(cy,ndiscs,0); + comm.bcast(rad,ndiscs,0); //........................................................................... comm.barrier(); /* if (rank == 0){ @@ -436,7 +433,7 @@ int main(int argc, char **argv) } } sum_local = 1.0*sum; - MPI_Allreduce(&sum_local,&porosity,1,MPI_DOUBLE,MPI_SUM,comm); + porosity = comm.sumReduce( sum_local ); porosity = porosity*iVol_global; if (rank==0) printf("Media porosity = %f \n",porosity); @@ -452,7 +449,7 @@ int main(int argc, char **argv) } } } - MPI_Allreduce(&sum_local,&pore_vol,1,MPI_DOUBLE,MPI_SUM,comm); + pore_vol = comm.sumReduce( sum_local ); //......................................................... // don't perform computations at the eight corners diff --git a/tests/lbpm_nondarcy_simulator.cpp b/tests/lbpm_nondarcy_simulator.cpp index 096dc790..a25fef69 100644 --- a/tests/lbpm_nondarcy_simulator.cpp +++ b/tests/lbpm_nondarcy_simulator.cpp @@ -94,8 +94,6 @@ int main(int argc, char **argv) int rank_xz,rank_XZ,rank_xZ,rank_Xz; int rank_yz,rank_YZ,rank_yZ,rank_Yz; //********************************** - MPI_Request req1[18],req2[18]; - MPI_Status stat1[18],stat2[18]; double REYNOLDS_NUMBER = 100.f; if (argc > 1){ @@ -158,28 +156,28 @@ int main(int argc, char **argv) // Broadcast simulation parameters from rank 0 to all other procs comm.barrier(); //................................................. - MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm); - //MPI_Bcast(&pBC,1,MPI_LOGICAL,0,comm); - // MPI_Bcast(&Restart,1,MPI_LOGICAL,0,comm); - MPI_Bcast(&din,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&dout,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm); - MPI_Bcast(×tepMax,1,MPI_INT,0,comm); - MPI_Bcast(&interval,1,MPI_INT,0,comm); - MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm); + comm.bcast(&tau,1,0); + //comm.bcast(&pBC,1,0); + //comm.bcast(&Restart,1,0); + comm.bcast(&din,1,0); + comm.bcast(&dout,1,0); + comm.bcast(&Fx,1,0); + comm.bcast(&Fy,1,0); + comm.bcast(&Fz,1,0); + comm.bcast(×tepMax,1,0); + comm.bcast(&interval,1,0); + comm.bcast(&tol,1,0); // Computational domain - MPI_Bcast(&Nx,1,MPI_INT,0,comm); - MPI_Bcast(&Ny,1,MPI_INT,0,comm); - MPI_Bcast(&Nz,1,MPI_INT,0,comm); - MPI_Bcast(&nprocx,1,MPI_INT,0,comm); - MPI_Bcast(&nprocy,1,MPI_INT,0,comm); - MPI_Bcast(&nprocz,1,MPI_INT,0,comm); - MPI_Bcast(&nspheres,1,MPI_INT,0,comm); - MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); + comm.bcast(&Nx,1,0); + comm.bcast(&Ny,1,0); + comm.bcast(&Nz,1,0); + comm.bcast(&nprocx,1,0); + comm.bcast(&nprocy,1,0); + comm.bcast(&nprocz,1,0); + comm.bcast(&nspheres,1,0); + comm.bcast(&Lx,1,0); + comm.bcast(&Ly,1,0); + comm.bcast(&Lz,1,0); //................................................. comm.barrier(); @@ -308,8 +306,8 @@ int main(int argc, char **argv) } } } - MPI_Allreduce(&sum_local,&pore_vol,1,MPI_DOUBLE,MPI_SUM,comm); - // MPI_Allreduce(&sum_local,&porosity,1,MPI_DOUBLE,MPI_SUM,comm); + por_vol = comm.sumReduce( sum_local ); + //porosity = comm.sumReduce( sum_local ); porosity = pore_vol*iVol_global; if (rank==0) printf("Media porosity = %f \n",porosity); //......................................................... @@ -433,7 +431,7 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; comm.barrier(); - starttime = MPI_Wtime(); + starttime = Utilities::MPI::time(); //......................................... double D32,vawx,vawy,vawz,Fo,Re,velocity,err1D,mag_force,vel_prev; @@ -554,7 +552,7 @@ int main(int argc, char **argv) fclose(NONDARCY); ScaLBL_DeviceBarrier(); comm.barrier(); - stoptime = MPI_Wtime(); + stoptime = Utilities::MPI::time(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep cputime = (stoptime - starttime)/timestep; diff --git a/tests/lbpm_nonnewtonian_simulator.cpp b/tests/lbpm_nonnewtonian_simulator.cpp index ff8792e7..bea3a814 100644 --- a/tests/lbpm_nonnewtonian_simulator.cpp +++ b/tests/lbpm_nonnewtonian_simulator.cpp @@ -124,8 +124,6 @@ int main(int argc, char **argv) // int rank_xz,rank_XZ,rank_xZ,rank_Xz; // int rank_yz,rank_YZ,rank_yZ,rank_Yz; //********************************** - MPI_Request req1[18],req2[18]; - MPI_Status stat1[18],stat2[18]; if (rank == 0){ printf("********************************************************\n"); @@ -428,8 +426,8 @@ int main(int argc, char **argv) } } - MPI_Allreduce(&sum_local,&pore_vol,1,MPI_DOUBLE,MPI_SUM,comm); /* 6 */ - //MPI_Allreduce(&sum_local,&porosity,1,MPI_DOUBLE,MPI_SUM,comm); + pore_vol = comm.sumReduce( sum_local ); /* 6 */ + //porosity = comm.sumReduce( sum_local ); porosity = pore_vol*iVol_global; if (rank==0) printf("Media porosity = %f \n",porosity); @@ -574,7 +572,7 @@ int main(int argc, char **argv) timestep=5; } } - MPI_Bcast(×tep,1,MPI_INT,0,comm); + comm.bcast(×tep,1,0); // Read in the restart file to CPU buffers double *cDen = new double[2*N]; @@ -662,7 +660,7 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; comm.barrier(); - starttime = MPI_Wtime(); + starttime = Utilities::MPI::time(); /* * Create the thread pool @@ -810,7 +808,7 @@ int main(int argc, char **argv) //************************************************************************/ ScaLBL_DeviceBarrier(); comm.barrier(); - stoptime = MPI_Wtime(); + stoptime = Utilities::MPI::time(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep cputime = (stoptime - starttime)/timestep; @@ -835,20 +833,6 @@ int main(int argc, char **argv) - - - - - - - - - - - - - - // Scrap // if (rank==0){ diff --git a/tests/lbpm_plates_pp.cpp b/tests/lbpm_plates_pp.cpp index acd64f52..37191979 100644 --- a/tests/lbpm_plates_pp.cpp +++ b/tests/lbpm_plates_pp.cpp @@ -31,8 +31,6 @@ int main(int argc, char **argv) int rank_xz,rank_XZ,rank_xZ,rank_Xz; int rank_yz,rank_YZ,rank_yZ,rank_Yz; //********************************** - MPI_Request req1[18],req2[18]; - MPI_Status stat1[18],stat2[18]; double TubeRadius =15.0; double WIDTH; @@ -77,16 +75,16 @@ int main(int argc, char **argv) // Broadcast simulation parameters from rank 0 to all other procs comm.barrier(); // Computational domain - MPI_Bcast(&Nx,1,MPI_INT,0,comm); - MPI_Bcast(&Ny,1,MPI_INT,0,comm); - MPI_Bcast(&Nz,1,MPI_INT,0,comm); - MPI_Bcast(&nprocx,1,MPI_INT,0,comm); - MPI_Bcast(&nprocy,1,MPI_INT,0,comm); - MPI_Bcast(&nprocz,1,MPI_INT,0,comm); - MPI_Bcast(&nspheres,1,MPI_INT,0,comm); - MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); + comm.bcast(&Nx,1,0); + comm.bcast(&Ny,1,0); + comm.bcast(&Nz,1,0); + comm.bcast(&nprocx,1,0); + comm.bcast(&nprocy,1,0); + comm.bcast(&nprocz,1,0); + comm.bcast(&nspheres,1,0); + comm.bcast(&Lx,1,0); + comm.bcast(&Ly,1,0); + comm.bcast(&Lz,1,0); //................................................. comm.barrier(); @@ -176,7 +174,7 @@ int main(int argc, char **argv) } } } - MPI_Allreduce(&sum_local,&pore_vol,1,MPI_DOUBLE,MPI_SUM,comm); + pore_vol = comm.sumReduce( sum_local ); //......................................................... // don't perform computations at the eight corners diff --git a/tests/lbpm_porenetwork_pp.cpp b/tests/lbpm_porenetwork_pp.cpp index 4a6ccda7..1715811f 100644 --- a/tests/lbpm_porenetwork_pp.cpp +++ b/tests/lbpm_porenetwork_pp.cpp @@ -24,9 +24,6 @@ int main(int argc, char **argv) int iproc,jproc,kproc; int sendtag,recvtag; //***************************************** - MPI_Request req1[18],req2[18]; - MPI_Status stat1[18],stat2[18]; - //********************************** int nsph,ncyl, BC; nsph = atoi(argv[1]); @@ -67,16 +64,16 @@ int main(int argc, char **argv) // Broadcast simulation parameters from rank 0 to all other procs comm.barrier(); // Computational domain - MPI_Bcast(&Nx,1,MPI_INT,0,comm); - MPI_Bcast(&Ny,1,MPI_INT,0,comm); - MPI_Bcast(&Nz,1,MPI_INT,0,comm); - MPI_Bcast(&nprocx,1,MPI_INT,0,comm); - MPI_Bcast(&nprocy,1,MPI_INT,0,comm); - MPI_Bcast(&nprocz,1,MPI_INT,0,comm); - MPI_Bcast(&nspheres,1,MPI_INT,0,comm); - MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); + comm.bcast(&Nx,1,0); + comm.bcast(&Ny,1,0); + comm.bcast(&Nz,1,0); + comm.bcast(&nprocx,1,0); + comm.bcast(&nprocy,1,0); + comm.bcast(&nprocz,1,0); + comm.bcast(&nspheres,1,0); + comm.bcast(&Lx,1,0); + comm.bcast(&Ly,1,0); + comm.bcast(&Lz,1,0); //................................................. comm.barrier(); @@ -269,7 +266,7 @@ int main(int argc, char **argv) } } } - MPI_Allreduce(&sum_local,&pore_vol,1,MPI_DOUBLE,MPI_SUM,comm); + pore_vol = comm.sumReduce( sum_local ); if (rank==0) printf("Pore volume = %f \n",pore_vol/double(Nx*Ny*Nz)); //......................................................... // don't perform computations at the eight corners diff --git a/tests/lbpm_random_pp.cpp b/tests/lbpm_random_pp.cpp index ad4b83cc..8318f50f 100644 --- a/tests/lbpm_random_pp.cpp +++ b/tests/lbpm_random_pp.cpp @@ -98,16 +98,16 @@ int main(int argc, char **argv) } comm.barrier(); // Computational domain - MPI_Bcast(&nx,1,MPI_INT,0,comm); - MPI_Bcast(&ny,1,MPI_INT,0,comm); - MPI_Bcast(&nz,1,MPI_INT,0,comm); - MPI_Bcast(&nprocx,1,MPI_INT,0,comm); - MPI_Bcast(&nprocy,1,MPI_INT,0,comm); - MPI_Bcast(&nprocz,1,MPI_INT,0,comm); - MPI_Bcast(&nspheres,1,MPI_INT,0,comm); - MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); + comm.bcast(&nx,1,0); + comm.bcast(&ny,1,0); + comm.bcast(&nz,1,0); + comm.bcast(&nprocx,1,0); + comm.bcast(&nprocy,1,0); + comm.bcast(&nprocz,1,0); + comm.bcast(&nspheres,1,0); + comm.bcast(&Lx,1,0); + comm.bcast(&Ly,1,0); + comm.bcast(&Lz,1,0); //................................................. comm.barrier(); @@ -166,7 +166,7 @@ int main(int argc, char **argv) } } // total Global is the number of nodes in the pore-space - MPI_Allreduce(&count,&totalGlobal,1,MPI_INT,MPI_SUM,comm); + totalGlobal = sumReduce( count ); float porosity=float(totalGlobal)/(nprocx*nprocy*nprocz*(nx-2)*(ny-2)*(nz-2)); if (rank==0) printf("Media Porosity: %f \n",porosity); @@ -216,12 +216,12 @@ int main(int argc, char **argv) sizeY = SizeY[bin]; sizeZ = SizeZ[bin]; } - MPI_Bcast(&x,1,MPI_INT,0,comm); - MPI_Bcast(&y,1,MPI_INT,0,comm); - MPI_Bcast(&z,1,MPI_INT,0,comm); - MPI_Bcast(&sizeX,1,MPI_INT,0,comm); - MPI_Bcast(&sizeY,1,MPI_INT,0,comm); - MPI_Bcast(&sizeZ,1,MPI_INT,0,comm); + comm.bcast(&x,1,0); + comm.bcast(&y,1,0); + comm.bcast(&z,1,0); + comm.bcast(&sizeX,1,0); + comm.bcast(&sizeY,1,0); + comm.bcast(&sizeZ,1,0); //if (rank==0) printf("Broadcast block at %i,%i,%i \n",x,y,z); @@ -269,7 +269,7 @@ int main(int argc, char **argv) } } } - MPI_Allreduce(&count,&countGlobal,1,MPI_INT,MPI_SUM,comm); + countGlobal = sumReduce( count ); sat = float(countGlobal)/totalGlobal; //if (rank==0) printf("New count=%i\n",countGlobal); //if (rank==0) printf("New saturation=%f\n",sat); @@ -345,42 +345,24 @@ int main(int argc, char **argv) PackID(Dm.sendList_yZ, Dm.sendCount_yZ ,sendID_yZ, id); PackID(Dm.sendList_YZ, Dm.sendCount_YZ ,sendID_YZ, id); //...................................................................................... - MPI_Sendrecv(sendID_x,Dm.sendCount_x,MPI_CHAR,Dm.rank_x(),sendtag, - recvID_X,Dm.recvCount_X,MPI_CHAR,Dm.rank_X(),recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_X,Dm.sendCount_X,MPI_CHAR,Dm.rank_X(),sendtag, - recvID_x,Dm.recvCount_x,MPI_CHAR,Dm.rank_x(),recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_y,Dm.sendCount_y,MPI_CHAR,Dm.rank_y(),sendtag, - recvID_Y,Dm.recvCount_Y,MPI_CHAR,Dm.rank_Y(),recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Y,Dm.sendCount_Y,MPI_CHAR,Dm.rank_Y(),sendtag, - recvID_y,Dm.recvCount_y,MPI_CHAR,Dm.rank_y(),recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_z,Dm.sendCount_z,MPI_CHAR,Dm.rank_z(),sendtag, - recvID_Z,Dm.recvCount_Z,MPI_CHAR,Dm.rank_Z(),recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Z,Dm.sendCount_Z,MPI_CHAR,Dm.rank_Z(),sendtag, - recvID_z,Dm.recvCount_z,MPI_CHAR,Dm.rank_z(),recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xy,Dm.sendCount_xy,MPI_CHAR,Dm.rank_xy(),sendtag, - recvID_XY,Dm.recvCount_XY,MPI_CHAR,Dm.rank_XY(),recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_XY,Dm.sendCount_XY,MPI_CHAR,Dm.rank_XY(),sendtag, - recvID_xy,Dm.recvCount_xy,MPI_CHAR,Dm.rank_xy(),recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Xy,Dm.sendCount_Xy,MPI_CHAR,Dm.rank_Xy(),sendtag, - recvID_xY,Dm.recvCount_xY,MPI_CHAR,Dm.rank_xY(),recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xY,Dm.sendCount_xY,MPI_CHAR,Dm.rank_xY(),sendtag, - recvID_Xy,Dm.recvCount_Xy,MPI_CHAR,Dm.rank_Xy(),recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xz,Dm.sendCount_xz,MPI_CHAR,Dm.rank_xz(),sendtag, - recvID_XZ,Dm.recvCount_XZ,MPI_CHAR,Dm.rank_XZ(),recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_XZ,Dm.sendCount_XZ,MPI_CHAR,Dm.rank_XZ(),sendtag, - recvID_xz,Dm.recvCount_xz,MPI_CHAR,Dm.rank_xz(),recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Xz,Dm.sendCount_Xz,MPI_CHAR,Dm.rank_Xz(),sendtag, - recvID_xZ,Dm.recvCount_xZ,MPI_CHAR,Dm.rank_xZ(),recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xZ,Dm.sendCount_xZ,MPI_CHAR,Dm.rank_xZ(),sendtag, - recvID_Xz,Dm.recvCount_Xz,MPI_CHAR,Dm.rank_Xz(),recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_yz,Dm.sendCount_yz,MPI_CHAR,Dm.rank_yz(),sendtag, - recvID_YZ,Dm.recvCount_YZ,MPI_CHAR,Dm.rank_YZ(),recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_YZ,Dm.sendCount_YZ,MPI_CHAR,Dm.rank_YZ(),sendtag, - recvID_yz,Dm.recvCount_yz,MPI_CHAR,Dm.rank_yz(),recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Yz,Dm.sendCount_Yz,MPI_CHAR,Dm.rank_Yz(),sendtag, - recvID_yZ,Dm.recvCount_yZ,MPI_CHAR,Dm.rank_yZ(),recvtag,comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_yZ,Dm.sendCount_yZ,MPI_CHAR,Dm.rank_yZ(),sendtag, - recvID_Yz,Dm.recvCount_Yz,MPI_CHAR,Dm.rank_Yz(),recvtag,comm,MPI_STATUS_IGNORE); + comm.sendrecv(sendID_x,Dm.sendCount_x,Dm.rank_x(),sendtag,recvID_X,Dm.recvCount_X,Dm.rank_X(),recvtag); + comm.sendrecv(sendID_X,Dm.sendCount_X,Dm.rank_X(),sendtag,recvID_x,Dm.recvCount_x,Dm.rank_x(),recvtag); + comm.sendrecv(sendID_y,Dm.sendCount_y,Dm.rank_y(),sendtag,recvID_Y,Dm.recvCount_Y,Dm.rank_Y(),recvtag); + comm.sendrecv(sendID_Y,Dm.sendCount_Y,Dm.rank_Y(),sendtag,recvID_y,Dm.recvCount_y,Dm.rank_y(),recvtag); + comm.sendrecv(sendID_z,Dm.sendCount_z,Dm.rank_z(),sendtag,recvID_Z,Dm.recvCount_Z,Dm.rank_Z(),recvtag); + comm.sendrecv(sendID_Z,Dm.sendCount_Z,Dm.rank_Z(),sendtag,recvID_z,Dm.recvCount_z,Dm.rank_z(),recvtag); + comm.sendrecv(sendID_xy,Dm.sendCount_xy,Dm.rank_xy(),sendtag,recvID_XY,Dm.recvCount_XY,Dm.rank_XY(),recvtag); + comm.sendrecv(sendID_XY,Dm.sendCount_XY,Dm.rank_XY(),sendtag,recvID_xy,Dm.recvCount_xy,Dm.rank_xy(),recvtag); + comm.sendrecv(sendID_Xy,Dm.sendCount_Xy,Dm.rank_Xy(),sendtag,recvID_xY,Dm.recvCount_xY,Dm.rank_xY(),recvtag); + comm.sendrecv(sendID_xY,Dm.sendCount_xY,Dm.rank_xY(),sendtag,recvID_Xy,Dm.recvCount_Xy,Dm.rank_Xy(),recvtag); + comm.sendrecv(sendID_xz,Dm.sendCount_xz,Dm.rank_xz(),sendtag,recvID_XZ,Dm.recvCount_XZ,Dm.rank_XZ(),recvtag); + comm.sendrecv(sendID_XZ,Dm.sendCount_XZ,Dm.rank_XZ(),sendtag,recvID_xz,Dm.recvCount_xz,Dm.rank_xz(),recvtag); + comm.sendrecv(sendID_Xz,Dm.sendCount_Xz,Dm.rank_Xz(),sendtag,recvID_xZ,Dm.recvCount_xZ,Dm.rank_xZ(),recvtag); + comm.sendrecv(sendID_xZ,Dm.sendCount_xZ,Dm.rank_xZ(),sendtag,recvID_Xz,Dm.recvCount_Xz,Dm.rank_Xz(),recvtag); + comm.sendrecv(sendID_yz,Dm.sendCount_yz,Dm.rank_yz(),sendtag,recvID_YZ,Dm.recvCount_YZ,Dm.rank_YZ(),recvtag); + comm.sendrecv(sendID_YZ,Dm.sendCount_YZ,Dm.rank_YZ(),sendtag,recvID_yz,Dm.recvCount_yz,Dm.rank_yz(),recvtag); + comm.sendrecv(sendID_Yz,Dm.sendCount_Yz,Dm.rank_Yz(),sendtag,recvID_yZ,Dm.recvCount_yZ,Dm.rank_yZ(),recvtag); + comm.sendrecv(sendID_yZ,Dm.sendCount_yZ,Dm.rank_yZ(),sendtag,recvID_Yz,Dm.recvCount_Yz,Dm.rank_Yz(),recvtag); //...................................................................................... UnpackID(Dm.recvList_x, Dm.recvCount_x ,recvID_x, id); UnpackID(Dm.recvList_X, Dm.recvCount_X ,recvID_X, id); @@ -412,7 +394,7 @@ int main(int argc, char **argv) } } } - MPI_Allreduce(&count,&countGlobal,1,MPI_INT,MPI_SUM,comm); + countGlobal = comm.sumReduce( count ); sat = float(countGlobal)/totalGlobal; if (rank==0) printf("Final saturation=%f\n",sat); diff --git a/tests/lbpm_segmented_decomp.cpp b/tests/lbpm_segmented_decomp.cpp index 1bc89adb..65b8576f 100644 --- a/tests/lbpm_segmented_decomp.cpp +++ b/tests/lbpm_segmented_decomp.cpp @@ -85,23 +85,23 @@ int main(int argc, char **argv) comm.barrier(); // Computational domain //................................................. - MPI_Bcast(&nx,1,MPI_INT,0,comm); - MPI_Bcast(&ny,1,MPI_INT,0,comm); - MPI_Bcast(&nz,1,MPI_INT,0,comm); - MPI_Bcast(&nprocx,1,MPI_INT,0,comm); - MPI_Bcast(&nprocy,1,MPI_INT,0,comm); - MPI_Bcast(&nprocz,1,MPI_INT,0,comm); - MPI_Bcast(&nspheres,1,MPI_INT,0,comm); - MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); + comm.bcast(&nx,1,0); + comm.bcast(&ny,1,0); + comm.bcast(&nz,1,0); + comm.bcast(&nprocx,1,0); + comm.bcast(&nprocy,1,0); + comm.bcast(&nprocz,1,0); + comm.bcast(&nspheres,1,0); + comm.bcast(&Lx,1,0); + comm.bcast(&Ly,1,0); + comm.bcast(&Lz,1,0); //................................................. - MPI_Bcast(&Nx,1,MPI_INT,0,comm); - MPI_Bcast(&Ny,1,MPI_INT,0,comm); - MPI_Bcast(&Nz,1,MPI_INT,0,comm); - MPI_Bcast(&xStart,1,MPI_INT,0,comm); - MPI_Bcast(&yStart,1,MPI_INT,0,comm); - MPI_Bcast(&zStart,1,MPI_INT,0,comm); + comm.bcast(&Nx,1,0); + comm.bcast(&Ny,1,0); + comm.bcast(&Nz,1,0); + comm.bcast(&xStart,1,0); + comm.bcast(&yStart,1,0); + comm.bcast(&zStart,1,0); //................................................. comm.barrier(); @@ -191,7 +191,7 @@ int main(int argc, char **argv) } else{ printf("Sending data to process %i \n", rnk); - MPI_Send(tmp,N,MPI_CHAR,rnk,15,comm); + comm.send(tmp,N,rnk,15); } } } @@ -200,7 +200,7 @@ int main(int argc, char **argv) else{ // Recieve the subdomain from rank = 0 printf("Ready to recieve data %i at process %i \n", N,rank); - MPI_Recv(Dm.id,N,MPI_CHAR,0,15,comm,MPI_STATUS_IGNORE); + comm.recv(Dm.id,N,0,15); } comm.barrier(); @@ -243,8 +243,8 @@ int main(int argc, char **argv) printf("Original label=%i, New label=%i \n",oldlabel,newlabel); } } - MPI_Barrier(MPI_COMM_WORLD); - MPI_Bcast(LabelList,2*NLABELS,MPI_INT,0,MPI_COMM_WORLD); + comm.barrier(); + comm.bcast(LabelList,2*NLABELS,0); char *newIDs; newIDs= new char [nx*ny*nz]; @@ -278,8 +278,8 @@ int main(int argc, char **argv) } } } - MPI_Allreduce(&count,&countGlobal,1,MPI_INT,MPI_SUM,comm); - MPI_Allreduce(&total,&totalGlobal,1,MPI_INT,MPI_SUM,comm); + countGlobal = comm.sumReduce( count ); + totalGlobal = comm.sumReduce( total ); float porosity = float(totalGlobal-countGlobal)/totalGlobal; @@ -321,8 +321,8 @@ int main(int argc, char **argv) } } } - MPI_Allreduce(&count,&countGlobal,1,MPI_INT,MPI_SUM,comm); - MPI_Allreduce(&total,&totalGlobal,1,MPI_INT,MPI_SUM,comm); + countGlobal = comm.sumReduce( count ); + totalGlobal = comm.sumReduce( total ); float saturation = float(countGlobal)/totalGlobal; if (rank==0) printf("wetting phase saturation=%f\n",saturation); diff --git a/tests/lbpm_segmented_pp.cpp b/tests/lbpm_segmented_pp.cpp index 39cf0bd1..484a11e2 100644 --- a/tests/lbpm_segmented_pp.cpp +++ b/tests/lbpm_segmented_pp.cpp @@ -180,7 +180,7 @@ int main(int argc, char **argv) fflush(stdout); porosity = ReadFromBlock(Dm->id,Dm->iproc(),Dm->jproc(),Dm->kproc(),nx,ny,nz); - MPI_Barrier(MPI_COMM_WORLD); + comm.barrier(); if (rank==0) printf("Writing local ID files (poros=%f) \n",porosity); fflush(stdout); FILE *ID = fopen(LocalRankFilename,"wb"); diff --git a/tests/lbpm_sphere_pp.cpp b/tests/lbpm_sphere_pp.cpp index 2e053eed..0df11b96 100644 --- a/tests/lbpm_sphere_pp.cpp +++ b/tests/lbpm_sphere_pp.cpp @@ -38,8 +38,6 @@ int main(int argc, char **argv) int rank_xz,rank_XZ,rank_xZ,rank_Xz; int rank_yz,rank_YZ,rank_yZ,rank_Yz; //********************************** - MPI_Request req1[18],req2[18]; - MPI_Status stat1[18],stat2[18]; if (rank == 0){ printf("********************************************************\n"); @@ -125,10 +123,10 @@ int main(int argc, char **argv) if (rank == 0) ReadSpherePacking(nspheres,cx,cy,cz,rad); comm.barrier(); // Broadcast the sphere packing to all processes - MPI_Bcast(cx,nspheres,MPI_DOUBLE,0,comm); - MPI_Bcast(cy,nspheres,MPI_DOUBLE,0,comm); - MPI_Bcast(cz,nspheres,MPI_DOUBLE,0,comm); - MPI_Bcast(rad,nspheres,MPI_DOUBLE,0,comm); + comm.bcast(cx,nspheres,0); + comm.bcast(cy,nspheres,0); + comm.bcast(cz,nspheres,0); + comm.bcast(rad,nspheres,0); //........................................................................... comm.barrier(); if (rank == 0) cout << "Domain set." << endl; @@ -144,7 +142,7 @@ int main(int argc, char **argv) D = 6.0*(Nx-2)*nprocx*totVol / totArea / Lx; printf("Sauter Mean Diameter (computed from sphere packing) = %f \n",D); } - MPI_Bcast(&D,1,MPI_DOUBLE,0,comm); + comm.bcast(&D,1,0); //....................................................................... SignedDistance(SignDist.data(),nspheres,cx,cy,cz,rad,Lx,Ly,Lz,Nx,Ny,Nz, @@ -177,7 +175,7 @@ int main(int argc, char **argv) } } sum_local = 1.0*sum; - MPI_Allreduce(&sum_local,&porosity,1,MPI_DOUBLE,MPI_SUM,comm); + porosity = comm.sumReduce( sum_local ); porosity = porosity*iVol_global; if (rank==0) printf("Media porosity = %f \n",porosity); @@ -193,7 +191,7 @@ int main(int argc, char **argv) } } } - MPI_Allreduce(&sum_local,&pore_vol,1,MPI_DOUBLE,MPI_SUM,comm); + pore_vol = comm.sumReduce( sum_local ); //......................................................... // don't perform computations at the eight corners diff --git a/tests/lbpm_squaretube_pp.cpp b/tests/lbpm_squaretube_pp.cpp index c1f05aee..a4ee5f60 100644 --- a/tests/lbpm_squaretube_pp.cpp +++ b/tests/lbpm_squaretube_pp.cpp @@ -30,9 +30,6 @@ int main(int argc, char **argv) int rank_xy,rank_XY,rank_xY,rank_Xy; int rank_xz,rank_XZ,rank_xZ,rank_Xz; int rank_yz,rank_YZ,rank_yZ,rank_Yz; - //********************************** - MPI_Request req1[18],req2[18]; - MPI_Status stat1[18],stat2[18]; int ORIENTATION=2; //default: the tube is aligned with Z axis //ORIENTATION = 0: tube is aligned with X axis @@ -83,16 +80,16 @@ int main(int argc, char **argv) // Broadcast simulation parameters from rank 0 to all other procs comm.barrier(); // Computational domain - MPI_Bcast(&Nx,1,MPI_INT,0,comm); - MPI_Bcast(&Ny,1,MPI_INT,0,comm); - MPI_Bcast(&Nz,1,MPI_INT,0,comm); - MPI_Bcast(&nprocx,1,MPI_INT,0,comm); - MPI_Bcast(&nprocy,1,MPI_INT,0,comm); - MPI_Bcast(&nprocz,1,MPI_INT,0,comm); - MPI_Bcast(&nspheres,1,MPI_INT,0,comm); - MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); + comm.bcast(&Nx,1,0); + comm.bcast(&Ny,1,0); + comm.bcast(&Nz,1,0); + comm.bcast(&nprocx,1,0); + comm.bcast(&nprocy,1,0); + comm.bcast(&nprocz,1,0); + comm.bcast(&nspheres,1,0); + comm.bcast(&Lx,1,0); + comm.bcast(&Ly,1,0); + comm.bcast(&Lz,1,0); //................................................. comm.barrier(); @@ -235,7 +232,7 @@ int main(int argc, char **argv) } } } - MPI_Allreduce(&sum_local,&pore_vol,1,MPI_DOUBLE,MPI_SUM,comm); + pore_vol = comm.sumReduce( sum_local ); //......................................................... // don't perform computations at the eight corners From 2a66e63672084b687677c891dc4b44001f92188b Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Wed, 29 Jan 2020 17:14:48 -0500 Subject: [PATCH 031/121] add pressure BC for abs-perm simulator; need validation test for this --- models/GreyscaleModel.cpp | 62 +++++++++++++++++++++++++++++++-------- 1 file changed, 50 insertions(+), 12 deletions(-) diff --git a/models/GreyscaleModel.cpp b/models/GreyscaleModel.cpp index 36f853b1..0499951f 100644 --- a/models/GreyscaleModel.cpp +++ b/models/GreyscaleModel.cpp @@ -44,7 +44,7 @@ void ScaLBL_GreyscaleModel::ReadParams(string filename){ flux=0.0; dp = 10.0; //unit of 'dp': voxel - // Greyscale Model parameters + // ---------------------- Greyscale Model parameters -----------------------// if (greyscale_db->keyExists( "timestepMax" )){ timestepMax = greyscale_db->getScalar( "timestepMax" ); } @@ -77,10 +77,14 @@ void ScaLBL_GreyscaleModel::ReadParams(string filename){ if (greyscale_db->keyExists( "tolerance" )){ tolerance = greyscale_db->getScalar( "tolerance" ); } + // ------------------------------------------------------------------------// + + //------------------------ Other Domain parameters ------------------------// BoundaryCondition = 0; if (domain_db->keyExists( "BC" )){ BoundaryCondition = domain_db->getScalar( "BC" ); } + // ------------------------------------------------------------------------// } void ScaLBL_GreyscaleModel::SetDomain(){ @@ -366,6 +370,9 @@ void ScaLBL_GreyscaleModel::Create(){ void ScaLBL_GreyscaleModel::Initialize(){ if (rank==0) printf ("Initializing distributions \n"); + //TODO: for BGK, you need to consider voxel porosity + // for IMRT, the whole set of feq is different + // if in the future you have different collison mode, need to write two set of initialization functions ScaLBL_D3Q19_Init(fq, Np); if (Restart == true){ @@ -431,21 +438,36 @@ void ScaLBL_GreyscaleModel::Run(){ double flow_rate_previous = 0.0; while (timestep < timestepMax && error > tolerance) { //************************************************************************/ + // *************ODD TIMESTEP*************// timestep++; ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL - //ScaLBL_D3Q19_AAodd_Greyscale(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity); - ScaLBL_D3Q19_AAodd_Greyscale_IMRT(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den); + ScaLBL_D3Q19_AAodd_Greyscale(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity); + //ScaLBL_D3Q19_AAodd_Greyscale_IMRT(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den); ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE - //ScaLBL_D3Q19_AAodd_Greyscale(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity); - ScaLBL_D3Q19_AAodd_Greyscale_IMRT(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den); + ScaLBL_DeviceBarrier(); + // Set BCs + if (BoundaryCondition == 3){ + ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + } + ScaLBL_D3Q19_AAodd_Greyscale(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity); + //ScaLBL_D3Q19_AAodd_Greyscale_IMRT(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + + // *************EVEN TIMESTEP*************// timestep++; ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL - //ScaLBL_D3Q19_AAeven_Greyscale(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity); - ScaLBL_D3Q19_AAeven_Greyscale_IMRT(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den); + ScaLBL_D3Q19_AAeven_Greyscale(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity); + //ScaLBL_D3Q19_AAeven_Greyscale_IMRT(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den); ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE - //ScaLBL_D3Q19_AAeven_Greyscale(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity); - ScaLBL_D3Q19_AAeven_Greyscale_IMRT(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den); + ScaLBL_DeviceBarrier(); + // Set BCs + if (BoundaryCondition == 3){ + ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + } + ScaLBL_D3Q19_AAeven_Greyscale(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity); + //ScaLBL_D3Q19_AAeven_Greyscale_IMRT(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); //************************************************************************/ @@ -461,12 +483,28 @@ void ScaLBL_GreyscaleModel::Run(){ double px_loc,py_loc,pz_loc; double px,py,pz; double mass_loc,mass_glb; + + //parameters for domain average + int64_t i,j,k,n,imin,jmin,kmin,kmax; + // If external boundary conditions are set, do not average over the inlet and outlet + kmin=1; kmax=Nz-1; + //In case user forgets to specify the inlet/outlet buffer layers for BC>0 + if (BoundaryCondition > 0 && Dm->kproc() == 0) kmin=4; + if (BoundaryCondition > 0 && Dm->kproc() == Dm->nprocz()-1) kmax=Nz-4; + + imin=jmin=1; + // If inlet/outlet layers exist use these as default + //if (Dm->inlet_layers_x > 0) imin = Dm->inlet_layers_x; + //if (Dm->inlet_layers_y > 0) jmin = Dm->inlet_layers_y; + if (BoundaryCondition > 0 && Dm->inlet_layers_z > 0 && Dm->kproc() == 0) kmin = 1 + Dm->inlet_layers_z;//"1" indicates the halo layer + if (BoundaryCondition > 0 && Dm->outlet_layers_z > 0 && Dm->kproc() == Dm->nprocz()-1) kmax = Nz-1 - Dm->outlet_layers_z; + px_loc = py_loc = pz_loc = 0.f; mass_loc = 0.f; - for (int k=1; k 0){ px_loc += Velocity_x(i,j,k)*Den*PorosityMap(i,j,k); py_loc += Velocity_y(i,j,k)*Den*PorosityMap(i,j,k); From 6e7cb832546f5064551019cc713e8f7c63ee9203 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Wed, 29 Jan 2020 23:49:36 -0500 Subject: [PATCH 032/121] add pressure to output data --- common/ScaLBL.h | 8 ++++---- cpu/Greyscale.cpp | 20 ++++++++++++++++---- gpu/Greyscale.cu | 36 ++++++++++++++++++++++++------------ models/GreyscaleModel.cpp | 35 ++++++++++++++++++++++++++--------- models/GreyscaleModel.h | 3 ++- 5 files changed, 72 insertions(+), 30 deletions(-) diff --git a/common/ScaLBL.h b/common/ScaLBL.h index d2495e3f..04cfbd97 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -57,16 +57,16 @@ extern "C" void ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int star // GREYSCALE MODEL extern "C" void ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz, - double *Poros,double *Perm, double *Velocity); + double *Poros,double *Perm, double *Velocity,double *Pressure); extern "C" void ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz, - double *Poros,double *Perm, double *Velocity); + double *Poros,double *Perm, double *Velocity,double *Pressure); extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz, - double *Poros,double *Perm, double *Velocity,double Den); + double *Poros,double *Perm, double *Velocity,double Den,double *Pressure); extern "C" void ScaLBL_D3Q19_AAodd_Greyscale_IMRT(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz, - double *Poros,double *Perm, double *Velocity,double Den); + double *Poros,double *Perm, double *Velocity,double Den,double *Pressure); // MRT MODEL diff --git a/cpu/Greyscale.cpp b/cpu/Greyscale.cpp index f2be769e..d1bde7f2 100644 --- a/cpu/Greyscale.cpp +++ b/cpu/Greyscale.cpp @@ -1,11 +1,12 @@ #include extern "C" void ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finish, int Np, double rlx, double Gx, double Gy, double Gz, - double *Poros,double *Perm, double *Velocity){ + double *Poros,double *Perm, double *Velocity, double *Pressure){ int n; // conserved momemnts double rho,vx,vy,vz,v_mag; double ux,uy,uz,u_mag; + double pressure; //double uu; // non-conserved moments double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; @@ -48,6 +49,7 @@ extern "C" void ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finis if (porosity==1.0) c1 = 0.0;//i.e. apparent pore nodes rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17; + pressure = rho/porosity/3.0; vx = (f1-f2+f7-f8+f9-f10+f11-f12+f13-f14)/rho+0.5*porosity*Gx; vy = (f3-f4+f7-f8-f9+f10+f15-f16+f17-f18)/rho+0.5*porosity*Gy; vz = (f5-f6+f11-f12-f13+f14+f15-f16-f17+f18)/rho+0.5*porosity*Gz; @@ -159,15 +161,18 @@ extern "C" void ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finis Velocity[0*Np+n] = ux; Velocity[1*Np+n] = uy; Velocity[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = pressure; } } extern "C" void ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Gx, double Gy, double Gz, - double *Poros,double *Perm, double *Velocity){ + double *Poros,double *Perm, double *Velocity,double *Pressure){ int n; // conserved momemnts double rho,vx,vy,vz,v_mag; double ux,uy,uz,u_mag; + double pressure; //double uu; // non-conserved moments double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; @@ -266,6 +271,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist, in if (porosity==1.0) c1 = 0.0;//i.e. apparent pore nodes rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17; + pressure = rho/porosity/3.0; vx = (f1-f2+f7-f8+f9-f10+f11-f12+f13-f14)/rho+0.5*porosity*Gx; vy = (f3-f4+f7-f8-f9+f10+f15-f16+f17-f18)/rho+0.5*porosity*Gy; vz = (f5-f6+f11-f12-f13+f14+f15-f16-f17+f18)/rho+0.5*porosity*Gz; @@ -377,12 +383,14 @@ extern "C" void ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist, in Velocity[0*Np+n] = ux; Velocity[1*Np+n] = uy; Velocity[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = pressure; } } extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, int finish, int Np, double rlx, double Gx, double Gy, double Gz, - double *Poros,double *Perm, double *Velocity, double Den){ + double *Poros,double *Perm, double *Velocity, double Den,double *Pressure){ int n; double vx,vy,vz,v_mag; double ux,uy,uz,u_mag; @@ -837,11 +845,13 @@ extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, int Velocity[0*Np+n] = ux; Velocity[1*Np+n] = uy; Velocity[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = pressure; } } extern "C" void ScaLBL_D3Q19_AAodd_Greyscale_IMRT(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Gx, double Gy, double Gz, - double *Poros,double *Perm, double *Velocity, double Den){ + double *Poros,double *Perm, double *Velocity, double Den,double *Pressure){ int n, nread; double vx,vy,vz,v_mag; double ux,uy,uz,u_mag; @@ -1332,6 +1342,8 @@ extern "C" void ScaLBL_D3Q19_AAodd_Greyscale_IMRT(int *neighborList, double *dis Velocity[0*Np+n] = ux; Velocity[1*Np+n] = uy; Velocity[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = pressure; } } diff --git a/gpu/Greyscale.cu b/gpu/Greyscale.cu index 5b8273fe..12ef6f17 100644 --- a/gpu/Greyscale.cu +++ b/gpu/Greyscale.cu @@ -4,11 +4,12 @@ #define NTHREADS 256 __global__ void dvc_ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finish, int Np, double rlx, double Gx, double Gy, double Gz, - double *Poros,double *Perm, double *Velocity){ + double *Poros,double *Perm, double *Velocity, double *Pressure){ int n; // conserved momemnts double rho,vx,vy,vz,v_mag; double ux,uy,uz,u_mag; + double pressure; //double uu; // non-conserved moments double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; @@ -56,6 +57,7 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int f if (porosity==1.0) c1 = 0.0;//i.e. apparent pore nodes rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17; + pressure = rho/porosity/3.0; vx = (f1-f2+f7-f8+f9-f10+f11-f12+f13-f14)/rho+0.5*porosity*Gx; vy = (f3-f4+f7-f8-f9+f10+f15-f16+f17-f18)/rho+0.5*porosity*Gy; vz = (f5-f6+f11-f12-f13+f14+f15-f16-f17+f18)/rho+0.5*porosity*Gz; @@ -167,17 +169,20 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int f Velocity[0*Np+n] = ux; Velocity[1*Np+n] = uy; Velocity[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = pressure; } } } __global__ void dvc_ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Gx, double Gy, double Gz, - double *Poros,double *Perm, double *Velocity){ + double *Poros,double *Perm, double *Velocity, double *Pressure){ int n; // conserved momemnts double rho,vx,vy,vz,v_mag; double ux,uy,uz,u_mag; + double pressure; //double uu; // non-conserved moments double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; @@ -279,6 +284,7 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist if (porosity==1.0) c1 = 0.0;//i.e. apparent pore nodes rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17; + pressure = rho/porosity/3.0; vx = (f1-f2+f7-f8+f9-f10+f11-f12+f13-f14)/rho+0.5*porosity*Gx; vy = (f3-f4+f7-f8-f9+f10+f15-f16+f17-f18)/rho+0.5*porosity*Gy; vz = (f5-f6+f11-f12-f13+f14+f15-f16-f17+f18)/rho+0.5*porosity*Gz; @@ -390,12 +396,14 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist Velocity[0*Np+n] = ux; Velocity[1*Np+n] = uy; Velocity[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = pressure; } } } __global__ void dvc_ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, int finish, int Np, double rlx, double Gx, double Gy, double Gz, - double *Poros,double *Perm, double *Velocity, double Den){ + double *Poros,double *Perm, double *Velocity, double Den, double *Pressure){ int n; double vx,vy,vz,v_mag; @@ -857,6 +865,8 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, Velocity[0*Np+n] = ux; Velocity[1*Np+n] = uy; Velocity[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = pressure; } } @@ -864,7 +874,7 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, __global__ void dvc_ScaLBL_D3Q19_AAodd_Greyscale_IMRT(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Gx, double Gy, double Gz, - double *Poros,double *Perm, double *Velocity,double Den){ + double *Poros,double *Perm, double *Velocity,double Den, double *Pressure){ int n, nread; double vx,vy,vz,v_mag; @@ -1361,15 +1371,17 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_Greyscale_IMRT(int *neighborList, double Velocity[0*Np+n] = ux; Velocity[1*Np+n] = uy; Velocity[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = pressure; } } } -extern "C" void ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity){ +extern "C" void ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity,double *Pressure){ - dvc_ScaLBL_D3Q19_AAeven_Greyscale<<>>(dist,start,finish,Np,rlx,Fx,Fy,Fz,Poros,Perm,Velocity); + dvc_ScaLBL_D3Q19_AAeven_Greyscale<<>>(dist,start,finish,Np,rlx,Fx,Fy,Fz,Poros,Perm,Velocity,Pressure); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ @@ -1377,9 +1389,9 @@ extern "C" void ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finis } } -extern "C" void ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity){ +extern "C" void ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity,double *Pressure){ - dvc_ScaLBL_D3Q19_AAodd_Greyscale<<>>(neighborList,dist,start,finish,Np,rlx,Fx,Fy,Fz,Poros,Perm,Velocity); + dvc_ScaLBL_D3Q19_AAodd_Greyscale<<>>(neighborList,dist,start,finish,Np,rlx,Fx,Fy,Fz,Poros,Perm,Velocity,Pressure); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ @@ -1387,9 +1399,9 @@ extern "C" void ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist, in } } -extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity,double Den){ +extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity,double Den,double *Pressure){ - dvc_ScaLBL_D3Q19_AAeven_Greyscale_IMRT<<>>(dist,start,finish,Np,rlx,Fx,Fy,Fz,Poros,Perm,Velocity,Den); + dvc_ScaLBL_D3Q19_AAeven_Greyscale_IMRT<<>>(dist,start,finish,Np,rlx,Fx,Fy,Fz,Poros,Perm,Velocity,Den,Pressure); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ @@ -1397,9 +1409,9 @@ extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, int } } -extern "C" void ScaLBL_D3Q19_AAodd_Greyscale_IMRT(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity,double Den){ +extern "C" void ScaLBL_D3Q19_AAodd_Greyscale_IMRT(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity,double Den,double *Pressure){ - dvc_ScaLBL_D3Q19_AAodd_Greyscale_IMRT<<>>(neighborList,dist,start,finish,Np,rlx,Fx,Fy,Fz,Poros,Perm,Velocity,Den); + dvc_ScaLBL_D3Q19_AAodd_Greyscale_IMRT<<>>(neighborList,dist,start,finish,Np,rlx,Fx,Fy,Fz,Poros,Perm,Velocity,Den,Pressure); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ diff --git a/models/GreyscaleModel.cpp b/models/GreyscaleModel.cpp index 0499951f..018af5ec 100644 --- a/models/GreyscaleModel.cpp +++ b/models/GreyscaleModel.cpp @@ -104,6 +104,7 @@ void ScaLBL_GreyscaleModel::SetDomain(){ Velocity_y.resize(Nx,Ny,Nz); Velocity_z.resize(Nx,Ny,Nz); PorosityMap.resize(Nx,Ny,Nz); + Pressure.resize(Nx,Ny,Nz); id = new signed char [N]; for (int i=0; iid[i] = 1; // initialize this way @@ -320,7 +321,7 @@ void ScaLBL_GreyscaleModel::Create(){ ScaLBL_AllocateDeviceMemory((void **) &fq, 19*dist_mem_size); ScaLBL_AllocateDeviceMemory((void **) &Permeability, sizeof(double)*Np); ScaLBL_AllocateDeviceMemory((void **) &Porosity, sizeof(double)*Np); - ScaLBL_AllocateDeviceMemory((void **) &Pressure, sizeof(double)*Np); + ScaLBL_AllocateDeviceMemory((void **) &Pressure_dvc, sizeof(double)*Np); ScaLBL_AllocateDeviceMemory((void **) &Velocity, 3*sizeof(double)*Np); //........................................................................... // Update GPU data structures @@ -441,8 +442,8 @@ void ScaLBL_GreyscaleModel::Run(){ // *************ODD TIMESTEP*************// timestep++; ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL - ScaLBL_D3Q19_AAodd_Greyscale(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity); - //ScaLBL_D3Q19_AAodd_Greyscale_IMRT(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den); + //ScaLBL_D3Q19_AAodd_Greyscale(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Pressure_dvc); + ScaLBL_D3Q19_AAodd_Greyscale_IMRT(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE ScaLBL_DeviceBarrier(); // Set BCs @@ -450,15 +451,15 @@ void ScaLBL_GreyscaleModel::Run(){ ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); } - ScaLBL_D3Q19_AAodd_Greyscale(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity); - //ScaLBL_D3Q19_AAodd_Greyscale_IMRT(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den); + //ScaLBL_D3Q19_AAodd_Greyscale(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Pressure_dvc); + ScaLBL_D3Q19_AAodd_Greyscale_IMRT(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); // *************EVEN TIMESTEP*************// timestep++; ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL - ScaLBL_D3Q19_AAeven_Greyscale(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity); - //ScaLBL_D3Q19_AAeven_Greyscale_IMRT(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den); + //ScaLBL_D3Q19_AAeven_Greyscale(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Pressure_dvc); + ScaLBL_D3Q19_AAeven_Greyscale_IMRT(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE ScaLBL_DeviceBarrier(); // Set BCs @@ -466,8 +467,8 @@ void ScaLBL_GreyscaleModel::Run(){ ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); } - ScaLBL_D3Q19_AAeven_Greyscale(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity); - //ScaLBL_D3Q19_AAeven_Greyscale_IMRT(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den); + //ScaLBL_D3Q19_AAeven_Greyscale(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Pressure_dvc); + ScaLBL_D3Q19_AAeven_Greyscale_IMRT(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); //************************************************************************/ @@ -476,6 +477,7 @@ void ScaLBL_GreyscaleModel::Run(){ ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],Velocity_y); ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],Velocity_z); ScaLBL_Comm->RegularLayout(Map,Porosity,PorosityMap); + //ScaLBL_Comm->RegularLayout(Map,Pressure_dvc,Pressure); double count_loc=0; double count; @@ -678,6 +680,7 @@ void ScaLBL_GreyscaleModel::VelocityField(){ auto VyVar = std::make_shared(); auto VzVar = std::make_shared(); auto SignDistVar = std::make_shared(); + auto PressureVar = std::make_shared(); IO::initialize("","silo","false"); // Create the MeshDataStruct @@ -706,20 +709,34 @@ void ScaLBL_GreyscaleModel::VelocityField(){ VzVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); visData[0].vars.push_back(VzVar); + PressureVar->name = "Pressure"; + PressureVar->type = IO::VariableType::VolumeVariable; + PressureVar->dim = 1; + PressureVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(PressureVar); + Array& SignData = visData[0].vars[0]->data; Array& VelxData = visData[0].vars[1]->data; Array& VelyData = visData[0].vars[2]->data; Array& VelzData = visData[0].vars[3]->data; + Array& PressureData = visData[0].vars[4]->data; ASSERT(visData[0].vars[0]->name=="SignDist"); ASSERT(visData[0].vars[1]->name=="Velocity_x"); ASSERT(visData[0].vars[2]->name=="Velocity_y"); ASSERT(visData[0].vars[3]->name=="Velocity_z"); + ASSERT(visData[0].vars[4]->name=="Pressure"); + ScaLBL_Comm->RegularLayout(Map,&Velocity[0],Velocity_x); + ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],Velocity_y); + ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],Velocity_z); + ScaLBL_Comm->RegularLayout(Map,Pressure_dvc,Pressure); + fillData.copy(SignDist,SignData); fillData.copy(Velocity_x,VelxData); fillData.copy(Velocity_y,VelyData); fillData.copy(Velocity_z,VelzData); + fillData.copy(Pressure,PressureData); IO::writeData( timestep, visData, Dm->Comm ); diff --git a/models/GreyscaleModel.h b/models/GreyscaleModel.h index d1399053..a7a5f528 100644 --- a/models/GreyscaleModel.h +++ b/models/GreyscaleModel.h @@ -65,13 +65,14 @@ public: double *Permeability;//grey voxel permeability double *Porosity; double *Velocity; - double *Pressure; + double *Pressure_dvc; IntArray Map; DoubleArray SignDist; DoubleArray Velocity_x; DoubleArray Velocity_y; DoubleArray Velocity_z; DoubleArray PorosityMap; + DoubleArray Pressure; private: MPI_Comm comm; From 25df1e0f3522ffcd1ad51e3de90892e2894ac51c Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Thu, 30 Jan 2020 13:23:27 -0500 Subject: [PATCH 033/121] add a few print-out to make the program output more verbose --- common/Domain.cpp | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/common/Domain.cpp b/common/Domain.cpp index 48bfed15..1be64859 100644 --- a/common/Domain.cpp +++ b/common/Domain.cpp @@ -434,7 +434,7 @@ void Domain::Decomp(std::string Filename) } if (inlet_layers_z > 0){ - printf("Checkerboard pattern at z inlet for %i layers \n",inlet_layers_z); + printf("Checkerboard pattern at z inlet for %i layers, saturated with phase label=%i \n",inlet_layers_z,inlet_layers_phase); // use checkerboard pattern for (int k = zStart; k < zStart+inlet_layers_z; k++){ for (int j = 0; j 0){ - printf("Checkerboard pattern at z outlet for %i layers \n",outlet_layers_z); + printf("Checkerboard pattern at z outlet for %i layers, saturated with phase label=%i \n",outlet_layers_z,outlet_layers_phase); // use checkerboard pattern for (int k = zStart + nz*nprocz - outlet_layers_z; k < zStart + nz*nprocz; k++){ for (int j = 0; j 0 && kproc() == 0){ - if (inlet_layers_z < 4) inlet_layers_z=4; + if (inlet_layers_z < 4){ + inlet_layers_z=4; + if(RANK==0){ + printf("NOTE:Non-periodic BC is applied, but the number of Z-inlet layers is not specified (or is smaller than 3 voxels) \n"); + printf(" the number of Z-inlet layer is reset to %i voxels, saturated with phase label=%i",inlet_layers_z-1,inlet_layers_phase); + } + } for (int k=0; k 0 && kproc() == nprocz-1){ - if (outlet_layers_z < 4) outlet_layers_z=4; + if (outlet_layers_z < 4){ + outlet_layers_z=4; + if(RANK==0){ + printf("NOTE:Non-periodic BC is applied, but the number of Z-outlet layers is not specified (or is smaller than 3 voxels) \n"); + printf(" the number of Z-outlet layer is reset to %i voxels, saturated with phase label=%i",outlet_layers_z-1,outlet_layers_phase); + } + } for (int k=Nz-outlet_layers_z; k Date: Thu, 30 Jan 2020 17:57:56 -0500 Subject: [PATCH 034/121] fix printf bug --- common/Domain.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/common/Domain.cpp b/common/Domain.cpp index 1be64859..1028a0ef 100644 --- a/common/Domain.cpp +++ b/common/Domain.cpp @@ -597,8 +597,7 @@ void Domain::Decomp(std::string Filename) if (inlet_layers_z < 4){ inlet_layers_z=4; if(RANK==0){ - printf("NOTE:Non-periodic BC is applied, but the number of Z-inlet layers is not specified (or is smaller than 3 voxels) \n"); - printf(" the number of Z-inlet layer is reset to %i voxels, saturated with phase label=%i",inlet_layers_z-1,inlet_layers_phase); + printf("NOTE:Non-periodic BC is applied, but the number of Z-inlet layers is not specified (or is smaller than 3 voxels) \n the number of Z-inlet layer is reset to %i voxels, saturated with phase label=%i \n",inlet_layers_z-1,inlet_layers_phase); } } for (int k=0; k 0 && kproc() == nprocz-1){ if (outlet_layers_z < 4){ outlet_layers_z=4; - if(RANK==0){ - printf("NOTE:Non-periodic BC is applied, but the number of Z-outlet layers is not specified (or is smaller than 3 voxels) \n"); - printf(" the number of Z-outlet layer is reset to %i voxels, saturated with phase label=%i",outlet_layers_z-1,outlet_layers_phase); + if(RANK==nprocs-1){ + printf("NOTE:Non-periodic BC is applied, but the number of Z-outlet layers is not specified (or is smaller than 3 voxels) \n the number of Z-outlet layer is reset to %i voxels, saturated with phase label=%i \n",outlet_layers_z-1,outlet_layers_phase); } } for (int k=Nz-outlet_layers_z; k Date: Fri, 31 Jan 2020 15:15:26 -0500 Subject: [PATCH 035/121] Some updates:(1)add different fq initialization for BGK and IMRT;(2)user can choose collision model --- common/ScaLBL.h | 4 +++ cpu/D3Q19.cpp | 27 ++++++++++++++ gpu/D3Q19.cu | 40 ++++++++++++++++++++- models/GreyscaleModel.cpp | 76 +++++++++++++++++++++++++++++++++------ models/GreyscaleModel.h | 1 + 5 files changed, 136 insertions(+), 12 deletions(-) diff --git a/common/ScaLBL.h b/common/ScaLBL.h index 04cfbd97..007fda34 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -46,6 +46,7 @@ extern "C" void ScaLBL_UnpackDenD3Q7(int *list, int count, double *recvbuf, int extern "C" void ScaLBL_D3Q19_Init(double *Dist, int Np); + extern "C" void ScaLBL_D3Q19_Momentum(double *dist, double *vel, int Np); extern "C" void ScaLBL_D3Q19_Pressure(double *dist, double *press, int Np); @@ -56,6 +57,9 @@ extern "C" void ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int extern "C" void ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz); // GREYSCALE MODEL + +extern "C" void ScaLBL_D3Q19_GreyIMRT_Init(double *Dist, int Np, double Den); + extern "C" void ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz, double *Poros,double *Perm, double *Velocity,double *Pressure); diff --git a/cpu/D3Q19.cpp b/cpu/D3Q19.cpp index 2af59883..244bb3d2 100644 --- a/cpu/D3Q19.cpp +++ b/cpu/D3Q19.cpp @@ -84,6 +84,33 @@ extern "C" void ScaLBL_D3Q19_Init(double *dist, int Np) } } + +extern "C" void ScaLBL_D3Q19_GreyIMRT_Init(double *dist, int Np, double Den) +{ + int n; + for (n=0; n>>(dist, Np); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ - printf("CUDA error in ScaLBL_D3Q19_AA_Init: %s \n",cudaGetErrorString(err)); + printf("CUDA error in ScaLBL_D3Q19_Init: %s \n",cudaGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_GreyIMRT_Init(double *dist, int Np, double Den){ + dvc_ScaLBL_D3Q19_GreyIMRT_Init<<>>(dist, Np, Den); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_GreyIMRT_Init: %s \n",cudaGetErrorString(err)); } } diff --git a/models/GreyscaleModel.cpp b/models/GreyscaleModel.cpp index 018af5ec..5f8e4e36 100644 --- a/models/GreyscaleModel.cpp +++ b/models/GreyscaleModel.cpp @@ -43,6 +43,7 @@ void ScaLBL_GreyscaleModel::ReadParams(string filename){ din=dout=1.0; flux=0.0; dp = 10.0; //unit of 'dp': voxel + CollisionType = 1; //1: IMRT; 2: BGK // ---------------------- Greyscale Model parameters -----------------------// if (greyscale_db->keyExists( "timestepMax" )){ @@ -77,6 +78,10 @@ void ScaLBL_GreyscaleModel::ReadParams(string filename){ if (greyscale_db->keyExists( "tolerance" )){ tolerance = greyscale_db->getScalar( "tolerance" ); } + auto collision = greyscale_db->getWithDefault( "collision", "IMRT" ); + if (collision == "BGK"){ + CollisionType=2; + } // ------------------------------------------------------------------------// //------------------------ Other Domain parameters ------------------------// @@ -374,7 +379,20 @@ void ScaLBL_GreyscaleModel::Initialize(){ //TODO: for BGK, you need to consider voxel porosity // for IMRT, the whole set of feq is different // if in the future you have different collison mode, need to write two set of initialization functions - ScaLBL_D3Q19_Init(fq, Np); + if (CollisionType==1){ + ScaLBL_D3Q19_GreyIMRT_Init(fq, Np, Den); + if (rank==0) printf("Collision model: Incompressible MRT.\n"); + } + else if (CollisionType==2){ + ScaLBL_D3Q19_Init(fq, Np); + if (rank==0) printf("Collision model: BGK.\n"); + } + else{ + if (rank==0) printf("Unknown collison type! IMRT collision is used.\n"); + ScaLBL_D3Q19_GreyIMRT_Init(fq, Np, Den); + CollisionType=1; + greyscale_db->putScalar( "collision", "IMRT" ); + } if (Restart == true){ if (rank==0){ @@ -442,8 +460,17 @@ void ScaLBL_GreyscaleModel::Run(){ // *************ODD TIMESTEP*************// timestep++; ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL - //ScaLBL_D3Q19_AAodd_Greyscale(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Pressure_dvc); - ScaLBL_D3Q19_AAodd_Greyscale_IMRT(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); + switch (CollisionType){ + case 1: + ScaLBL_D3Q19_AAodd_Greyscale_IMRT(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); + break; + case 2: + ScaLBL_D3Q19_AAodd_Greyscale(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Pressure_dvc); + break; + default: + ScaLBL_D3Q19_AAodd_Greyscale_IMRT(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); + break; + } ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE ScaLBL_DeviceBarrier(); // Set BCs @@ -451,25 +478,52 @@ void ScaLBL_GreyscaleModel::Run(){ ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); } - //ScaLBL_D3Q19_AAodd_Greyscale(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Pressure_dvc); - ScaLBL_D3Q19_AAodd_Greyscale_IMRT(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); + switch (CollisionType){ + case 1: + ScaLBL_D3Q19_AAodd_Greyscale_IMRT(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); + break; + case 2: + ScaLBL_D3Q19_AAodd_Greyscale(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Pressure_dvc); + break; + default: + ScaLBL_D3Q19_AAodd_Greyscale_IMRT(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); + break; + } ScaLBL_DeviceBarrier(); MPI_Barrier(comm); // *************EVEN TIMESTEP*************// timestep++; ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL - //ScaLBL_D3Q19_AAeven_Greyscale(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Pressure_dvc); - ScaLBL_D3Q19_AAeven_Greyscale_IMRT(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); - ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE + switch (CollisionType){ + case 1: + ScaLBL_D3Q19_AAeven_Greyscale_IMRT(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); + break; + case 2: + ScaLBL_D3Q19_AAeven_Greyscale(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Pressure_dvc); + break; + default: + ScaLBL_D3Q19_AAeven_Greyscale_IMRT(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); + break; + } + ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE ScaLBL_DeviceBarrier(); // Set BCs if (BoundaryCondition == 3){ ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); } - //ScaLBL_D3Q19_AAeven_Greyscale(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Pressure_dvc); - ScaLBL_D3Q19_AAeven_Greyscale_IMRT(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + switch (CollisionType){ + case 1: + ScaLBL_D3Q19_AAeven_Greyscale_IMRT(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); + break; + case 2: + ScaLBL_D3Q19_AAeven_Greyscale(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Pressure_dvc); + break; + default: + ScaLBL_D3Q19_AAeven_Greyscale_IMRT(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); + break; + } + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); //************************************************************************/ if (timestep%analysis_interval==0){ diff --git a/models/GreyscaleModel.h b/models/GreyscaleModel.h index a7a5f528..792e87ea 100644 --- a/models/GreyscaleModel.h +++ b/models/GreyscaleModel.h @@ -35,6 +35,7 @@ public: bool Restart,pBC; int timestep,timestepMax; int BoundaryCondition; + int CollisionType; double tau; double Den;//constant density double tolerance; From ea8fceda8c40f7e904c6999aba617d2e20a78451 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Fri, 31 Jan 2020 15:42:27 -0500 Subject: [PATCH 036/121] revert to the old velocity averaging method as it is more accurate --- models/GreyscaleModel.cpp | 63 ++++++++++++++++++++++++++------------- 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/models/GreyscaleModel.cpp b/models/GreyscaleModel.cpp index 5f8e4e36..4b803272 100644 --- a/models/GreyscaleModel.cpp +++ b/models/GreyscaleModel.cpp @@ -530,15 +530,16 @@ void ScaLBL_GreyscaleModel::Run(){ ScaLBL_Comm->RegularLayout(Map,&Velocity[0],Velocity_x); ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],Velocity_y); ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],Velocity_z); - ScaLBL_Comm->RegularLayout(Map,Porosity,PorosityMap); + //ScaLBL_Comm->RegularLayout(Map,Porosity,PorosityMap); //ScaLBL_Comm->RegularLayout(Map,Pressure_dvc,Pressure); double count_loc=0; double count; double vax,vay,vaz; - double px_loc,py_loc,pz_loc; - double px,py,pz; - double mass_loc,mass_glb; + double vax_loc,vay_loc,vaz_loc; + //double px_loc,py_loc,pz_loc; + //double px,py,pz; + //double mass_loc,mass_glb; //parameters for domain average int64_t i,j,k,n,imin,jmin,kmin,kmax; @@ -555,30 +556,51 @@ void ScaLBL_GreyscaleModel::Run(){ if (BoundaryCondition > 0 && Dm->inlet_layers_z > 0 && Dm->kproc() == 0) kmin = 1 + Dm->inlet_layers_z;//"1" indicates the halo layer if (BoundaryCondition > 0 && Dm->outlet_layers_z > 0 && Dm->kproc() == Dm->nprocz()-1) kmax = Nz-1 - Dm->outlet_layers_z; - - px_loc = py_loc = pz_loc = 0.f; - mass_loc = 0.f; +// px_loc = py_loc = pz_loc = 0.f; +// mass_loc = 0.f; +// for (int k=kmin; k 0){ +// px_loc += Velocity_x(i,j,k)*Den*PorosityMap(i,j,k); +// py_loc += Velocity_y(i,j,k)*Den*PorosityMap(i,j,k); +// pz_loc += Velocity_z(i,j,k)*Den*PorosityMap(i,j,k); +// mass_loc += Den*PorosityMap(i,j,k); +// } +// } +// } +// } +// MPI_Allreduce(&px_loc, &px, 1,MPI_DOUBLE,MPI_SUM,Mask->Comm); +// MPI_Allreduce(&py_loc, &py, 1,MPI_DOUBLE,MPI_SUM,Mask->Comm); +// MPI_Allreduce(&pz_loc, &pz, 1,MPI_DOUBLE,MPI_SUM,Mask->Comm); +// MPI_Allreduce(&mass_loc,&mass_glb,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); +// +// vax = px/mass_glb; +// vay = py/mass_glb; +// vaz = pz/mass_glb; + + vax_loc = vay_loc = vaz_loc = 0.f; for (int k=kmin; k 0){ - px_loc += Velocity_x(i,j,k)*Den*PorosityMap(i,j,k); - py_loc += Velocity_y(i,j,k)*Den*PorosityMap(i,j,k); - pz_loc += Velocity_z(i,j,k)*Den*PorosityMap(i,j,k); - mass_loc += Den*PorosityMap(i,j,k); + vax_loc += Velocity_x(i,j,k); + vay_loc += Velocity_y(i,j,k); + vaz_loc += Velocity_z(i,j,k); + count_loc+=1.0; } } } } - MPI_Allreduce(&px_loc, &px, 1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&py_loc, &py, 1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&pz_loc, &pz, 1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&mass_loc,&mass_glb,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - - vax = px/mass_glb; - vay = py/mass_glb; - vaz = pz/mass_glb; + MPI_Allreduce(&vax_loc,&vax,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&vay_loc,&vay,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&vaz_loc,&vaz,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + vax /= count; + vay /= count; + vaz /= count; + double force_mag = sqrt(Fx*Fx+Fy*Fy+Fz*Fz); double dir_x = Fx/force_mag; double dir_y = Fy/force_mag; @@ -590,7 +612,8 @@ void ScaLBL_GreyscaleModel::Run(){ dir_z = 1.0; force_mag = 1.0; } - double flow_rate = (px*dir_x + py*dir_y + pz*dir_z)/mass_glb; + //double flow_rate = (px*dir_x + py*dir_y + pz*dir_z)/mass_glb; + double flow_rate = (vax*dir_x + vay*dir_y + vaz*dir_z); error = fabs(flow_rate - flow_rate_previous) / fabs(flow_rate); flow_rate_previous = flow_rate; From 50e4b5a9baf3a2767c1b69e8ef70a5a4377dbd48 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Sat, 1 Feb 2020 14:04:39 -0500 Subject: [PATCH 037/121] add the greyscale effective viscosity back, but by default it is set equal to the normal viscosity --- common/ScaLBL.h | 8 ++--- cpu/Greyscale.cpp | 48 ++++++++++++++--------------- gpu/Greyscale.cu | 64 +++++++++++++++++++-------------------- models/GreyscaleModel.cpp | 29 ++++++++++-------- models/GreyscaleModel.h | 1 + 5 files changed, 77 insertions(+), 73 deletions(-) diff --git a/common/ScaLBL.h b/common/ScaLBL.h index 007fda34..447a9b14 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -60,16 +60,16 @@ extern "C" void ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int star extern "C" void ScaLBL_D3Q19_GreyIMRT_Init(double *Dist, int Np, double Den); -extern "C" void ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz, +extern "C" void ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz, double *Poros,double *Perm, double *Velocity,double *Pressure); -extern "C" void ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz, +extern "C" void ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz, double *Poros,double *Perm, double *Velocity,double *Pressure); -extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz, +extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz, double *Poros,double *Perm, double *Velocity,double Den,double *Pressure); -extern "C" void ScaLBL_D3Q19_AAodd_Greyscale_IMRT(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz, +extern "C" void ScaLBL_D3Q19_AAodd_Greyscale_IMRT(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz, double *Poros,double *Perm, double *Velocity,double Den,double *Pressure); diff --git a/cpu/Greyscale.cpp b/cpu/Greyscale.cpp index d1bde7f2..16fad1e0 100644 --- a/cpu/Greyscale.cpp +++ b/cpu/Greyscale.cpp @@ -1,6 +1,6 @@ #include -extern "C" void ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finish, int Np, double rlx, double Gx, double Gy, double Gz, +extern "C" void ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Gx, double Gy, double Gz, double *Poros,double *Perm, double *Velocity, double *Pressure){ int n; // conserved momemnts @@ -14,7 +14,7 @@ extern "C" void ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finis double porosity; double perm;//voxel permeability double c0, c1; //Guo's model parameters - double mu = (1.0/rlx-0.5)/3.0;//kinematic viscosity + double mu_eff = (1.0/rlx_eff-0.5)/3.0;//kinematic viscosity double Fx, Fy, Fz;//The total body force including Brinkman force and user-specified (Gx,Gy,Gz) for (int n=start; n>>(dist,start,finish,Np,rlx,Fx,Fy,Fz,Poros,Perm,Velocity,Pressure); + dvc_ScaLBL_D3Q19_AAeven_Greyscale<<>>(dist,start,finish,Np,rlx,rlx_eff,Fx,Fy,Fz,Poros,Perm,Velocity,Pressure); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ @@ -1389,9 +1389,9 @@ extern "C" void ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finis } } -extern "C" void ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity,double *Pressure){ +extern "C" void ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity,double *Pressure){ - dvc_ScaLBL_D3Q19_AAodd_Greyscale<<>>(neighborList,dist,start,finish,Np,rlx,Fx,Fy,Fz,Poros,Perm,Velocity,Pressure); + dvc_ScaLBL_D3Q19_AAodd_Greyscale<<>>(neighborList,dist,start,finish,Np,rlx,rlx_eff,Fx,Fy,Fz,Poros,Perm,Velocity,Pressure); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ @@ -1399,9 +1399,9 @@ extern "C" void ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist, in } } -extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity,double Den,double *Pressure){ +extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity,double Den,double *Pressure){ - dvc_ScaLBL_D3Q19_AAeven_Greyscale_IMRT<<>>(dist,start,finish,Np,rlx,Fx,Fy,Fz,Poros,Perm,Velocity,Den,Pressure); + dvc_ScaLBL_D3Q19_AAeven_Greyscale_IMRT<<>>(dist,start,finish,Np,rlx,rlx_eff,Fx,Fy,Fz,Poros,Perm,Velocity,Den,Pressure); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ @@ -1409,9 +1409,9 @@ extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, int } } -extern "C" void ScaLBL_D3Q19_AAodd_Greyscale_IMRT(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity,double Den,double *Pressure){ +extern "C" void ScaLBL_D3Q19_AAodd_Greyscale_IMRT(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity,double Den,double *Pressure){ - dvc_ScaLBL_D3Q19_AAodd_Greyscale_IMRT<<>>(neighborList,dist,start,finish,Np,rlx,Fx,Fy,Fz,Poros,Perm,Velocity,Den,Pressure); + dvc_ScaLBL_D3Q19_AAodd_Greyscale_IMRT<<>>(neighborList,dist,start,finish,Np,rlx,rlx_eff,Fx,Fy,Fz,Poros,Perm,Velocity,Den,Pressure); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ diff --git a/models/GreyscaleModel.cpp b/models/GreyscaleModel.cpp index 4b803272..79b7a9c7 100644 --- a/models/GreyscaleModel.cpp +++ b/models/GreyscaleModel.cpp @@ -14,7 +14,7 @@ void DeleteArray( const TYPE *p ) } ScaLBL_GreyscaleModel::ScaLBL_GreyscaleModel(int RANK, int NP, MPI_Comm COMM): -rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tau(0),Den(0),Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),GreyPorosity(0), +rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tau(0),tau_eff(0),Den(0),Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),GreyPorosity(0), Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) { SignDist.resize(Nx,Ny,Nz); @@ -36,6 +36,7 @@ void ScaLBL_GreyscaleModel::ReadParams(string filename){ // set defaults timestepMax = 100000; tau = 1.0; + tau_eff = tau; Den = 1.0;//constant density tolerance = 0.01; Fx = Fy = Fz = 0.0; @@ -52,6 +53,7 @@ void ScaLBL_GreyscaleModel::ReadParams(string filename){ if (greyscale_db->keyExists( "tau" )){ tau = greyscale_db->getScalar( "tau" ); } + tau_eff = greyscale_db->getWithDefault( "tau_eff", tau ); if (greyscale_db->keyExists( "Den" )){ Den = greyscale_db->getScalar( "Den" ); } @@ -453,6 +455,7 @@ void ScaLBL_GreyscaleModel::Run(){ PROFILE_START("Loop"); auto current_db = db->cloneDatabase(); double rlx = 1.0/tau; + double rlx_eff = 1.0/tau_eff; double error = 1.0; double flow_rate_previous = 0.0; while (timestep < timestepMax && error > tolerance) { @@ -462,13 +465,13 @@ void ScaLBL_GreyscaleModel::Run(){ ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL switch (CollisionType){ case 1: - ScaLBL_D3Q19_AAodd_Greyscale_IMRT(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); + ScaLBL_D3Q19_AAodd_Greyscale_IMRT(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, rlx_eff, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); break; case 2: - ScaLBL_D3Q19_AAodd_Greyscale(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Pressure_dvc); + ScaLBL_D3Q19_AAodd_Greyscale(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, rlx_eff, Fx, Fy, Fz,Porosity,Permeability,Velocity,Pressure_dvc); break; default: - ScaLBL_D3Q19_AAodd_Greyscale_IMRT(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); + ScaLBL_D3Q19_AAodd_Greyscale_IMRT(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, rlx_eff, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); break; } ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE @@ -480,13 +483,13 @@ void ScaLBL_GreyscaleModel::Run(){ } switch (CollisionType){ case 1: - ScaLBL_D3Q19_AAodd_Greyscale_IMRT(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); + ScaLBL_D3Q19_AAodd_Greyscale_IMRT(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, rlx_eff, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); break; case 2: - ScaLBL_D3Q19_AAodd_Greyscale(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Pressure_dvc); + ScaLBL_D3Q19_AAodd_Greyscale(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, rlx_eff, Fx, Fy, Fz,Porosity,Permeability,Velocity,Pressure_dvc); break; default: - ScaLBL_D3Q19_AAodd_Greyscale_IMRT(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); + ScaLBL_D3Q19_AAodd_Greyscale_IMRT(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, rlx_eff, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); break; } ScaLBL_DeviceBarrier(); MPI_Barrier(comm); @@ -496,13 +499,13 @@ void ScaLBL_GreyscaleModel::Run(){ ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL switch (CollisionType){ case 1: - ScaLBL_D3Q19_AAeven_Greyscale_IMRT(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); + ScaLBL_D3Q19_AAeven_Greyscale_IMRT(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, rlx_eff, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); break; case 2: - ScaLBL_D3Q19_AAeven_Greyscale(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Pressure_dvc); + ScaLBL_D3Q19_AAeven_Greyscale(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, rlx_eff, Fx, Fy, Fz,Porosity,Permeability,Velocity,Pressure_dvc); break; default: - ScaLBL_D3Q19_AAeven_Greyscale_IMRT(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); + ScaLBL_D3Q19_AAeven_Greyscale_IMRT(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, rlx_eff, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); break; } ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE @@ -514,13 +517,13 @@ void ScaLBL_GreyscaleModel::Run(){ } switch (CollisionType){ case 1: - ScaLBL_D3Q19_AAeven_Greyscale_IMRT(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); + ScaLBL_D3Q19_AAeven_Greyscale_IMRT(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, rlx_eff, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); break; case 2: - ScaLBL_D3Q19_AAeven_Greyscale(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Pressure_dvc); + ScaLBL_D3Q19_AAeven_Greyscale(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, rlx_eff, Fx, Fy, Fz,Porosity,Permeability,Velocity,Pressure_dvc); break; default: - ScaLBL_D3Q19_AAeven_Greyscale_IMRT(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); + ScaLBL_D3Q19_AAeven_Greyscale_IMRT(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, rlx_eff, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); break; } ScaLBL_DeviceBarrier(); MPI_Barrier(comm); diff --git a/models/GreyscaleModel.h b/models/GreyscaleModel.h index 792e87ea..c670239f 100644 --- a/models/GreyscaleModel.h +++ b/models/GreyscaleModel.h @@ -37,6 +37,7 @@ public: int BoundaryCondition; int CollisionType; double tau; + double tau_eff; double Den;//constant density double tolerance; double Fx,Fy,Fz,flux; From 793d294aa33550aa7cefc063f321306bea2c512b Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Sat, 1 Feb 2020 17:03:42 -0500 Subject: [PATCH 038/121] CPU version update: remove the higher-order terms in body force --- cpu/Greyscale.cpp | 422 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 320 insertions(+), 102 deletions(-) diff --git a/cpu/Greyscale.cpp b/cpu/Greyscale.cpp index 16fad1e0..b4b017c8 100644 --- a/cpu/Greyscale.cpp +++ b/cpu/Greyscale.cpp @@ -69,94 +69,173 @@ extern "C" void ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finis Fz=Gz; } + //------------------------ BGK collison where body force has higher-order terms ----------------------------------------------------------// +// // q=0 +// dist[n] = f0*(1.0-rlx)+ rlx*0.3333333333333333*rho*(1. - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// + 0.3333333333333333*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q = 1 +// dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*ux + (4.5*ux*ux)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(3. + (6.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q=2 +// dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*ux + (4.5*ux*ux)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(-3. + (6.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q = 3 +// dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*uy + (4.5*uy*uy)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. + (6.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q = 4 +// dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*uy + (4.5*uy*uy)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. + (6.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q = 5 +// dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*uz + (4.5*uz*uz)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(3. + (6.*uz)/porosity)); +// +// // q = 6 +// dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*uz + (4.5*uz*uz)/porosity - (1.5*(ux*ux+ uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(-3. + (6.*uz)/porosity)); +// +// // q = 7 +// dist[7*Np+n] = f7*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux + uy) + (4.5*(ux + uy)*(ux + uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3. - (3.*ux)/porosity + (9.*(ux + uy))/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(ux + uy))/porosity) + +// Fz*(0. - (3.*uz)/porosity)); +// +// // q = 8 +// dist[8*Np+n] = f8*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux - uy) + (4.5*(-ux - uy)*(-ux - uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3. - (3.*ux)/porosity - (9.*(-ux - uy))/porosity) + Fy*(-3. - (9.*(-ux - uy))/porosity - (3.*uy)/porosity) + +// Fz*(0. - (3.*uz)/porosity)); +// +// // q = 9 +// dist[9*Np+n] = f9*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux - uy) + (4.5*(ux - uy)*(ux - uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3. - (3.*ux)/porosity + (9.*(ux - uy))/porosity) + Fy*(-3. - (9.*(ux - uy))/porosity - (3.*uy)/porosity) + +// Fz*(0. - (3.*uz)/porosity)); +// +// // q = 10 +// dist[10*Np+n] = f10*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux + uy) + (4.5*(-ux + uy)*(-ux + uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3. - (3.*ux)/porosity - (9.*(-ux + uy))/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(-ux + uy))/porosity) + +// Fz*(0. - (3.*uz)/porosity)); +// +// // q = 11 +// dist[11*Np+n] = f11*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux + uz) + (4.5*(ux + uz)*(ux + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(3. - (3.*ux)/porosity + (9.*(ux + uz))/porosity) + +// Fz*(3. - (3.*uz)/porosity + (9.*(ux + uz))/porosity)); +// +// // q = 12 +// dist[12*Np+n] = f12*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux - uz) + (4.5*(-ux - uz)*(-ux - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(-3. - (3.*ux)/porosity - (9.*(-ux - uz))/porosity) + +// Fz*(-3. - (9.*(-ux - uz))/porosity - (3.*uz)/porosity)); +// +// // q = 13 +// dist[13*Np+n] = f13*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux - uz) + (4.5*(ux - uz)*(ux - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(3. - (3.*ux)/porosity + (9.*(ux - uz))/porosity) + +// Fz*(-3. - (9.*(ux - uz))/porosity - (3.*uz)/porosity)); +// +// // q= 14 +// dist[14*Np+n] = f14*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux + uz) + (4.5*(-ux + uz)*(-ux + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(-3. - (3.*ux)/porosity - (9.*(-ux + uz))/porosity) + +// Fz*(3. - (3.*uz)/porosity + (9.*(-ux + uz))/porosity)); +// +// // q = 15 +// dist[15*Np+n] = f15*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(uy + uz) + (4.5*(uy + uz)*(uy + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(uy + uz))/porosity) + +// Fz*(3. - (3.*uz)/porosity + (9.*(uy + uz))/porosity)); +// +// // q = 16 +// dist[16*Np+n] = f16*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-uy - uz) + (4.5*(-uy - uz)*(-uy - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. - (3.*uy)/porosity - (9.*(-uy - uz))/porosity) + +// Fz*(-3. - (9.*(-uy - uz))/porosity - (3.*uz)/porosity)); +// +// // q = 17 +// dist[17*Np+n] = f17*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(uy - uz) + (4.5*(uy - uz)*(uy - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(uy - uz))/porosity) + +// Fz*(-3. - (9.*(uy - uz))/porosity - (3.*uz)/porosity)); +// +// // q = 18 +// dist[18*Np+n] = f18*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-uy + uz) + (4.5*(-uy + uz)*(-uy + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. - (3.*uy)/porosity - (9.*(-uy + uz))/porosity) + +// Fz*(3. - (3.*uz)/porosity + (9.*(-uy + uz))/porosity)); + //----------------------------------------------------------------------------------------------------------------------------------------// + + //------------------------ BGK collison where body force has NO higher-order terms ----------------------------------------------------------// // q=0 - dist[n] = f0*(1.0-rlx)+ rlx*0.3333333333333333*rho*(1. - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - + 0.3333333333333333*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + dist[n] = f0*(1.0-rlx)+ rlx*0.3333333333333333*rho*(1. - (1.5*(ux*ux + uy*uy + uz*uz))/porosity); // q = 1 dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*ux + (4.5*ux*ux)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(3. + (6.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(3.)); // q=2 dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*ux + (4.5*ux*ux)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(-3. + (6.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(-3.)); // q = 3 dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*uy + (4.5*uy*uy)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. + (6.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fy*(3.)); // q = 4 dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*uy + (4.5*uy*uy)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. + (6.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fy*(-3.)); // q = 5 dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*uz + (4.5*uz*uz)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(3. + (6.*uz)/porosity)); + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fz*(3.)); // q = 6 dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*uz + (4.5*uz*uz)/porosity - (1.5*(ux*ux+ uy*uy + uz*uz))/porosity) - +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(-3. + (6.*uz)/porosity)); + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fz*(-3.)); // q = 7 dist[7*Np+n] = f7*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux + uy) + (4.5*(ux + uy)*(ux + uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3. - (3.*ux)/porosity + (9.*(ux + uy))/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(ux + uy))/porosity) + - Fz*(0. - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3.) + Fy*(3.)); // q = 8 dist[8*Np+n] = f8*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux - uy) + (4.5*(-ux - uy)*(-ux - uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3. - (3.*ux)/porosity - (9.*(-ux - uy))/porosity) + Fy*(-3. - (9.*(-ux - uy))/porosity - (3.*uy)/porosity) + - Fz*(0. - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3.) + Fy*(-3.)); // q = 9 dist[9*Np+n] = f9*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux - uy) + (4.5*(ux - uy)*(ux - uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3. - (3.*ux)/porosity + (9.*(ux - uy))/porosity) + Fy*(-3. - (9.*(ux - uy))/porosity - (3.*uy)/porosity) + - Fz*(0. - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3.) + Fy*(-3.)); // q = 10 dist[10*Np+n] = f10*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux + uy) + (4.5*(-ux + uy)*(-ux + uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3. - (3.*ux)/porosity - (9.*(-ux + uy))/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(-ux + uy))/porosity) + - Fz*(0. - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3.) + Fy*(3.)); // q = 11 dist[11*Np+n] = f11*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux + uz) + (4.5*(ux + uz)*(ux + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(3. - (3.*ux)/porosity + (9.*(ux + uz))/porosity) + - Fz*(3. - (3.*uz)/porosity + (9.*(ux + uz))/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3.) + Fz*(3.)); // q = 12 dist[12*Np+n] = f12*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux - uz) + (4.5*(-ux - uz)*(-ux - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(-3. - (3.*ux)/porosity - (9.*(-ux - uz))/porosity) + - Fz*(-3. - (9.*(-ux - uz))/porosity - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3.) + Fz*(-3.)); // q = 13 dist[13*Np+n] = f13*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux - uz) + (4.5*(ux - uz)*(ux - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(3. - (3.*ux)/porosity + (9.*(ux - uz))/porosity) + - Fz*(-3. - (9.*(ux - uz))/porosity - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3.) + Fz*(-3.)); // q= 14 dist[14*Np+n] = f14*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux + uz) + (4.5*(-ux + uz)*(-ux + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(-3. - (3.*ux)/porosity - (9.*(-ux + uz))/porosity) + - Fz*(3. - (3.*uz)/porosity + (9.*(-ux + uz))/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3.) + Fz*(3.)); // q = 15 dist[15*Np+n] = f15*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(uy + uz) + (4.5*(uy + uz)*(uy + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(uy + uz))/porosity) + - Fz*(3. - (3.*uz)/porosity + (9.*(uy + uz))/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(3.) + Fz*(3.)); // q = 16 dist[16*Np+n] = f16*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-uy - uz) + (4.5*(-uy - uz)*(-uy - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. - (3.*uy)/porosity - (9.*(-uy - uz))/porosity) + - Fz*(-3. - (9.*(-uy - uz))/porosity - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(-3.) + Fz*(-3.)); // q = 17 dist[17*Np+n] = f17*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(uy - uz) + (4.5*(uy - uz)*(uy - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(uy - uz))/porosity) + - Fz*(-3. - (9.*(uy - uz))/porosity - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(3.) + Fz*(-3.)); // q = 18 dist[18*Np+n] = f18*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-uy + uz) + (4.5*(-uy + uz)*(-uy + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. - (3.*uy)/porosity - (9.*(-uy + uz))/porosity) + - Fz*(3. - (3.*uz)/porosity + (9.*(-uy + uz))/porosity)); - + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(-3.) + Fz*(3.)); + //-------------------------------------------------------------------------------------------------------------------------------------------// + //Update velocity on device Velocity[0*Np+n] = ux; Velocity[1*Np+n] = uy; @@ -291,93 +370,176 @@ extern "C" void ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist, in Fz=Gz; } + //------------------------ BGK collison where body force has higher-order terms ----------------------------------------------------------// +// // q=0 +// dist[n] = f0*(1.0-rlx) + rlx*0.3333333333333333*rho*(1. - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// + 0.3333333333333333*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q = 1 +// dist[nr2] = f1*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*ux + (4.5*ux*ux)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(3. + (6.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q=2 +// dist[nr1] = f2*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*ux + (4.5*ux*ux)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(-3. + (6.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q = 3 +// dist[nr4] = f3*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*uy + (4.5*uy*uy)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. + (6.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q = 4 +// dist[nr3] = f4*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*uy + (4.5*uy*uy)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. + (6.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q = 5 +// dist[nr6] = f5*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*uz + (4.5*uz*uz)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(3. + (6.*uz)/porosity)); +// +// // q = 6 +// dist[nr5] = f6*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*uz + (4.5*uz*uz)/porosity - (1.5*(ux*ux+ uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(-3. + (6.*uz)/porosity)); +// +// // q = 7 +// dist[nr8] = f7*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux + uy) + (4.5*(ux + uy)*(ux + uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3. - (3.*ux)/porosity + (9.*(ux + uy))/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(ux + uy))/porosity) + +// Fz*(0. - (3.*uz)/porosity)); +// +// // q = 8 +// dist[nr7] = f8*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux - uy) + (4.5*(-ux - uy)*(-ux - uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3. - (3.*ux)/porosity - (9.*(-ux - uy))/porosity) + Fy*(-3. - (9.*(-ux - uy))/porosity - (3.*uy)/porosity) + +// Fz*(0. - (3.*uz)/porosity)); +// +// // q = 9 +// dist[nr10] = f9*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux - uy) + (4.5*(ux - uy)*(ux - uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3. - (3.*ux)/porosity + (9.*(ux - uy))/porosity) + Fy*(-3. - (9.*(ux - uy))/porosity - (3.*uy)/porosity) + +// Fz*(0. - (3.*uz)/porosity)); +// +// // q = 10 +// dist[nr9] = f10*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux + uy) + (4.5*(-ux + uy)*(-ux + uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3. - (3.*ux)/porosity - (9.*(-ux + uy))/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(-ux + uy))/porosity) + +// Fz*(0. - (3.*uz)/porosity)); +// +// // q = 11 +// dist[nr12] = f11*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux + uz) + (4.5*(ux + uz)*(ux + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(3. - (3.*ux)/porosity + (9.*(ux + uz))/porosity) + +// Fz*(3. - (3.*uz)/porosity + (9.*(ux + uz))/porosity)); +// +// // q = 12 +// dist[nr11] = f12*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux - uz) + (4.5*(-ux - uz)*(-ux - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(-3. - (3.*ux)/porosity - (9.*(-ux - uz))/porosity) + +// Fz*(-3. - (9.*(-ux - uz))/porosity - (3.*uz)/porosity)); +// +// // q = 13 +// dist[nr14] = f13*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux - uz) + (4.5*(ux - uz)*(ux - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(3. - (3.*ux)/porosity + (9.*(ux - uz))/porosity) + +// Fz*(-3. - (9.*(ux - uz))/porosity - (3.*uz)/porosity)); +// +// // q= 14 +// dist[nr13] = f14*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux + uz) + (4.5*(-ux + uz)*(-ux + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(-3. - (3.*ux)/porosity - (9.*(-ux + uz))/porosity) + +// Fz*(3. - (3.*uz)/porosity + (9.*(-ux + uz))/porosity)); +// +// // q = 15 +// dist[nr16] = f15*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(uy + uz) + (4.5*(uy + uz)*(uy + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(uy + uz))/porosity) + +// Fz*(3. - (3.*uz)/porosity + (9.*(uy + uz))/porosity)); +// +// // q = 16 +// dist[nr15] = f16*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-uy - uz) + (4.5*(-uy - uz)*(-uy - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. - (3.*uy)/porosity - (9.*(-uy - uz))/porosity) + +// Fz*(-3. - (9.*(-uy - uz))/porosity - (3.*uz)/porosity)); +// +// // q = 17 +// dist[nr18] = f17*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(uy - uz) + (4.5*(uy - uz)*(uy - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(uy - uz))/porosity) + +// Fz*(-3. - (9.*(uy - uz))/porosity - (3.*uz)/porosity)); +// +// // q = 18 +// dist[nr17] = f18*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-uy + uz) + (4.5*(-uy + uz)*(-uy + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. - (3.*uy)/porosity - (9.*(-uy + uz))/porosity) + +// Fz*(3. - (3.*uz)/porosity + (9.*(-uy + uz))/porosity)); + //----------------------------------------------------------------------------------------------------------------------------------------// + + + + //------------------------ BGK collison where body force has NO higher-order terms ----------------------------------------------------------// // q=0 - dist[n] = f0*(1.0-rlx) + rlx*0.3333333333333333*rho*(1. - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - + 0.3333333333333333*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + dist[n] = f0*(1.0-rlx) + rlx*0.3333333333333333*rho*(1. - (1.5*(ux*ux + uy*uy + uz*uz))/porosity); // q = 1 dist[nr2] = f1*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*ux + (4.5*ux*ux)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(3. + (6.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(3.)); // q=2 dist[nr1] = f2*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*ux + (4.5*ux*ux)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(-3. + (6.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(-3.)); // q = 3 dist[nr4] = f3*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*uy + (4.5*uy*uy)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. + (6.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fy*(3.)); // q = 4 dist[nr3] = f4*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*uy + (4.5*uy*uy)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. + (6.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fy*(-3.)); // q = 5 dist[nr6] = f5*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*uz + (4.5*uz*uz)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(3. + (6.*uz)/porosity)); + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fz*(3.)); // q = 6 dist[nr5] = f6*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*uz + (4.5*uz*uz)/porosity - (1.5*(ux*ux+ uy*uy + uz*uz))/porosity) - +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(-3. + (6.*uz)/porosity)); + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fz*(-3.)); // q = 7 dist[nr8] = f7*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux + uy) + (4.5*(ux + uy)*(ux + uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3. - (3.*ux)/porosity + (9.*(ux + uy))/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(ux + uy))/porosity) + - Fz*(0. - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3.) + Fy*(3.)); // q = 8 dist[nr7] = f8*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux - uy) + (4.5*(-ux - uy)*(-ux - uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3. - (3.*ux)/porosity - (9.*(-ux - uy))/porosity) + Fy*(-3. - (9.*(-ux - uy))/porosity - (3.*uy)/porosity) + - Fz*(0. - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3.) + Fy*(-3.)); // q = 9 dist[nr10] = f9*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux - uy) + (4.5*(ux - uy)*(ux - uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3. - (3.*ux)/porosity + (9.*(ux - uy))/porosity) + Fy*(-3. - (9.*(ux - uy))/porosity - (3.*uy)/porosity) + - Fz*(0. - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3.) + Fy*(-3.)); // q = 10 dist[nr9] = f10*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux + uy) + (4.5*(-ux + uy)*(-ux + uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3. - (3.*ux)/porosity - (9.*(-ux + uy))/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(-ux + uy))/porosity) + - Fz*(0. - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3.) + Fy*(3.)); // q = 11 dist[nr12] = f11*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux + uz) + (4.5*(ux + uz)*(ux + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(3. - (3.*ux)/porosity + (9.*(ux + uz))/porosity) + - Fz*(3. - (3.*uz)/porosity + (9.*(ux + uz))/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3.) + Fz*(3.)); // q = 12 dist[nr11] = f12*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux - uz) + (4.5*(-ux - uz)*(-ux - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(-3. - (3.*ux)/porosity - (9.*(-ux - uz))/porosity) + - Fz*(-3. - (9.*(-ux - uz))/porosity - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3.) + Fz*(-3.)); // q = 13 dist[nr14] = f13*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux - uz) + (4.5*(ux - uz)*(ux - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(3. - (3.*ux)/porosity + (9.*(ux - uz))/porosity) + - Fz*(-3. - (9.*(ux - uz))/porosity - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3.) + Fz*(-3.)); // q= 14 dist[nr13] = f14*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux + uz) + (4.5*(-ux + uz)*(-ux + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(-3. - (3.*ux)/porosity - (9.*(-ux + uz))/porosity) + - Fz*(3. - (3.*uz)/porosity + (9.*(-ux + uz))/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3.) + Fz*(3.)); // q = 15 dist[nr16] = f15*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(uy + uz) + (4.5*(uy + uz)*(uy + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(uy + uz))/porosity) + - Fz*(3. - (3.*uz)/porosity + (9.*(uy + uz))/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(3.) + Fz*(3.)); // q = 16 dist[nr15] = f16*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-uy - uz) + (4.5*(-uy - uz)*(-uy - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. - (3.*uy)/porosity - (9.*(-uy - uz))/porosity) + - Fz*(-3. - (9.*(-uy - uz))/porosity - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(-3.) + Fz*(-3.)); // q = 17 dist[nr18] = f17*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(uy - uz) + (4.5*(uy - uz)*(uy - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(uy - uz))/porosity) + - Fz*(-3. - (9.*(uy - uz))/porosity - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(3.) + Fz*(-3.)); // q = 18 dist[nr17] = f18*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-uy + uz) + (4.5*(-uy + uz)*(-uy + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. - (3.*uy)/porosity - (9.*(-uy + uz))/porosity) + - Fz*(3. - (3.*uz)/porosity + (9.*(-uy + uz))/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(-3.) + Fz*(3.)); + //-------------------------------------------------------------------------------------------------------------------------------------------// + + //Update velocity on device Velocity[0*Np+n] = ux; @@ -730,11 +892,45 @@ extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, int //Calculate pressure for Incompressible-MRT model pressure=0.5/porosity*(pressure-0.5*Den*u_mag*u_mag/porosity); + //-------------------- IMRT collison where body force has higher-order terms -------------// +// //..............carry out relaxation process............................................... +// m1 = m1 + rlx_setA*((-30*Den+19*(ux*ux+uy*uy+uz*uz)/porosity + 57*pressure*porosity) - m1) +// + (1-0.5*rlx_setA)*38*(Fx*ux+Fy*uy+Fz*uz)/porosity; +// m2 = m2 + rlx_setA*((12*Den - 5.5*(ux*ux+uy*uy+uz*uz)/porosity-27*pressure*porosity) - m2) +// + (1-0.5*rlx_setA)*11*(-Fx*ux-Fy*uy-Fz*uz)/porosity; +// jx = jx + Fx; +// m4 = m4 + rlx_setB*((-0.6666666666666666*ux*Den) - m4) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); +// jy = jy + Fy; +// m6 = m6 + rlx_setB*((-0.6666666666666666*uy*Den) - m6) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fy); +// jz = jz + Fz; +// m8 = m8 + rlx_setB*((-0.6666666666666666*uz*Den) - m8) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); +// m9 = m9 + rlx_setA*((Den*(2*ux*ux-uy*uy-uz*uz)/porosity) - m9) +// + (1-0.5*rlx_setA)*(4*Fx*ux-2*Fy*uy-2*Fz*uz)/porosity; +// m10 = m10 + rlx_setA*(-0.5*Den*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10) +// + (1-0.5*rlx_setA)*(-2*Fx*ux+Fy*uy+Fz*uz)/porosity; +// m11 = m11 + rlx_setA*((Den*(uy*uy-uz*uz)/porosity) - m11) +// + (1-0.5*rlx_setA)*(2*Fy*uy-2*Fz*uz)/porosity; +// m12 = m12 + rlx_setA*(-0.5*(Den*(uy*uy-uz*uz)/porosity)- m12) +// + (1-0.5*rlx_setA)*(-Fy*uy+Fz*uz)/porosity; +// m13 = m13 + rlx_setA*((Den*ux*uy/porosity) - m13) +// + (1-0.5*rlx_setA)*(Fy*ux+Fx*uy)/porosity; +// m14 = m14 + rlx_setA*((Den*uy*uz/porosity) - m14) +// + (1-0.5*rlx_setA)*(Fz*uy+Fy*uz)/porosity; +// m15 = m15 + rlx_setA*((Den*ux*uz/porosity) - m15) +// + (1-0.5*rlx_setA)*(Fz*ux+Fx*uz)/porosity; +// m16 = m16 + rlx_setB*( - m16); +// m17 = m17 + rlx_setB*( - m17); +// m18 = m18 + rlx_setB*( - m18); +// //....................................................................................................... + + + //-------------------- IMRT collison where body force has NO higher-order terms -------------// //..............carry out relaxation process............................................... - m1 = m1 + rlx_setA*((-30*Den+19*(ux*ux+uy*uy+uz*uz)/porosity + 57*pressure*porosity) - m1) - + (1-0.5*rlx_setA)*38*(Fx*ux+Fy*uy+Fz*uz)/porosity; - m2 = m2 + rlx_setA*((12*Den - 5.5*(ux*ux+uy*uy+uz*uz)/porosity-27*pressure*porosity) - m2) - + (1-0.5*rlx_setA)*11*(-Fx*ux-Fy*uy-Fz*uz)/porosity; + m1 = m1 + rlx_setA*((-30*Den+19*(ux*ux+uy*uy+uz*uz)/porosity + 57*pressure*porosity) - m1); + m2 = m2 + rlx_setA*((12*Den - 5.5*(ux*ux+uy*uy+uz*uz)/porosity-27*pressure*porosity) - m2); jx = jx + Fx; m4 = m4 + rlx_setB*((-0.6666666666666666*ux*Den) - m4) + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); @@ -744,25 +940,19 @@ extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, int jz = jz + Fz; m8 = m8 + rlx_setB*((-0.6666666666666666*uz*Den) - m8) + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); - m9 = m9 + rlx_setA*((Den*(2*ux*ux-uy*uy-uz*uz)/porosity) - m9) - + (1-0.5*rlx_setA)*(4*Fx*ux-2*Fy*uy-2*Fz*uz)/porosity; - m10 = m10 + rlx_setA*(-0.5*Den*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10) - + (1-0.5*rlx_setA)*(-2*Fx*ux+Fy*uy+Fz*uz)/porosity; - m11 = m11 + rlx_setA*((Den*(uy*uy-uz*uz)/porosity) - m11) - + (1-0.5*rlx_setA)*(2*Fy*uy-2*Fz*uz)/porosity; - m12 = m12 + rlx_setA*(-0.5*(Den*(uy*uy-uz*uz)/porosity)- m12) - + (1-0.5*rlx_setA)*(-Fy*uy+Fz*uz)/porosity; - m13 = m13 + rlx_setA*((Den*ux*uy/porosity) - m13) - + (1-0.5*rlx_setA)*(Fy*ux+Fx*uy)/porosity; - m14 = m14 + rlx_setA*((Den*uy*uz/porosity) - m14) - + (1-0.5*rlx_setA)*(Fz*uy+Fy*uz)/porosity; - m15 = m15 + rlx_setA*((Den*ux*uz/porosity) - m15) - + (1-0.5*rlx_setA)*(Fz*ux+Fx*uz)/porosity; + m9 = m9 + rlx_setA*((Den*(2*ux*ux-uy*uy-uz*uz)/porosity) - m9); + m10 = m10 + rlx_setA*(-0.5*Den*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10); + m11 = m11 + rlx_setA*((Den*(uy*uy-uz*uz)/porosity) - m11); + m12 = m12 + rlx_setA*(-0.5*(Den*(uy*uy-uz*uz)/porosity)- m12); + m13 = m13 + rlx_setA*((Den*ux*uy/porosity) - m13); + m14 = m14 + rlx_setA*((Den*uy*uz/porosity) - m14); + m15 = m15 + rlx_setA*((Den*ux*uz/porosity) - m15); m16 = m16 + rlx_setB*( - m16); m17 = m17 + rlx_setB*( - m17); m18 = m18 + rlx_setB*( - m18); //....................................................................................................... + //.................inverse transformation...................................................... // q=0 fq = mrt_V1*Den-mrt_V2*m1+mrt_V3*m2; @@ -1209,11 +1399,45 @@ extern "C" void ScaLBL_D3Q19_AAodd_Greyscale_IMRT(int *neighborList, double *dis //Calculate pressure for Incompressible-MRT model pressure=0.5/porosity*(pressure-0.5*Den*u_mag*u_mag/porosity); + //-------------------- IMRT collison where body force has higher-order terms -------------// +// //..............carry out relaxation process............................................... +// m1 = m1 + rlx_setA*((-30*Den+19*(ux*ux+uy*uy+uz*uz)/porosity + 57*pressure*porosity) - m1) +// + (1-0.5*rlx_setA)*38*(Fx*ux+Fy*uy+Fz*uz)/porosity; +// m2 = m2 + rlx_setA*((12*Den - 5.5*(ux*ux+uy*uy+uz*uz)/porosity-27*pressure*porosity) - m2) +// + (1-0.5*rlx_setA)*11*(-Fx*ux-Fy*uy-Fz*uz)/porosity; +// jx = jx + Fx; +// m4 = m4 + rlx_setB*((-0.6666666666666666*ux*Den) - m4) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); +// jy = jy + Fy; +// m6 = m6 + rlx_setB*((-0.6666666666666666*uy*Den) - m6) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fy); +// jz = jz + Fz; +// m8 = m8 + rlx_setB*((-0.6666666666666666*uz*Den) - m8) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); +// m9 = m9 + rlx_setA*((Den*(2*ux*ux-uy*uy-uz*uz)/porosity) - m9) +// + (1-0.5*rlx_setA)*(4*Fx*ux-2*Fy*uy-2*Fz*uz)/porosity; +// m10 = m10 + rlx_setA*(-0.5*Den*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10) +// + (1-0.5*rlx_setA)*(-2*Fx*ux+Fy*uy+Fz*uz)/porosity; +// m11 = m11 + rlx_setA*((Den*(uy*uy-uz*uz)/porosity) - m11) +// + (1-0.5*rlx_setA)*(2*Fy*uy-2*Fz*uz)/porosity; +// m12 = m12 + rlx_setA*(-0.5*(Den*(uy*uy-uz*uz)/porosity)- m12) +// + (1-0.5*rlx_setA)*(-Fy*uy+Fz*uz)/porosity; +// m13 = m13 + rlx_setA*((Den*ux*uy/porosity) - m13) +// + (1-0.5*rlx_setA)*(Fy*ux+Fx*uy)/porosity; +// m14 = m14 + rlx_setA*((Den*uy*uz/porosity) - m14) +// + (1-0.5*rlx_setA)*(Fz*uy+Fy*uz)/porosity; +// m15 = m15 + rlx_setA*((Den*ux*uz/porosity) - m15) +// + (1-0.5*rlx_setA)*(Fz*ux+Fx*uz)/porosity; +// m16 = m16 + rlx_setB*( - m16); +// m17 = m17 + rlx_setB*( - m17); +// m18 = m18 + rlx_setB*( - m18); +// //....................................................................................................... + + + //-------------------- IMRT collison where body force has NO higher-order terms -------------// //..............carry out relaxation process............................................... - m1 = m1 + rlx_setA*((-30*Den+19*(ux*ux+uy*uy+uz*uz)/porosity + 57*pressure*porosity) - m1) - + (1-0.5*rlx_setA)*38*(Fx*ux+Fy*uy+Fz*uz)/porosity; - m2 = m2 + rlx_setA*((12*Den - 5.5*(ux*ux+uy*uy+uz*uz)/porosity-27*pressure*porosity) - m2) - + (1-0.5*rlx_setA)*11*(-Fx*ux-Fy*uy-Fz*uz)/porosity; + m1 = m1 + rlx_setA*((-30*Den+19*(ux*ux+uy*uy+uz*uz)/porosity + 57*pressure*porosity) - m1); + m2 = m2 + rlx_setA*((12*Den - 5.5*(ux*ux+uy*uy+uz*uz)/porosity-27*pressure*porosity) - m2); jx = jx + Fx; m4 = m4 + rlx_setB*((-0.6666666666666666*ux*Den) - m4) + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); @@ -1223,25 +1447,19 @@ extern "C" void ScaLBL_D3Q19_AAodd_Greyscale_IMRT(int *neighborList, double *dis jz = jz + Fz; m8 = m8 + rlx_setB*((-0.6666666666666666*uz*Den) - m8) + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); - m9 = m9 + rlx_setA*((Den*(2*ux*ux-uy*uy-uz*uz)/porosity) - m9) - + (1-0.5*rlx_setA)*(4*Fx*ux-2*Fy*uy-2*Fz*uz)/porosity; - m10 = m10 + rlx_setA*(-0.5*Den*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10) - + (1-0.5*rlx_setA)*(-2*Fx*ux+Fy*uy+Fz*uz)/porosity; - m11 = m11 + rlx_setA*((Den*(uy*uy-uz*uz)/porosity) - m11) - + (1-0.5*rlx_setA)*(2*Fy*uy-2*Fz*uz)/porosity; - m12 = m12 + rlx_setA*(-0.5*(Den*(uy*uy-uz*uz)/porosity)- m12) - + (1-0.5*rlx_setA)*(-Fy*uy+Fz*uz)/porosity; - m13 = m13 + rlx_setA*((Den*ux*uy/porosity) - m13) - + (1-0.5*rlx_setA)*(Fy*ux+Fx*uy)/porosity; - m14 = m14 + rlx_setA*((Den*uy*uz/porosity) - m14) - + (1-0.5*rlx_setA)*(Fz*uy+Fy*uz)/porosity; - m15 = m15 + rlx_setA*((Den*ux*uz/porosity) - m15) - + (1-0.5*rlx_setA)*(Fz*ux+Fx*uz)/porosity; + m9 = m9 + rlx_setA*((Den*(2*ux*ux-uy*uy-uz*uz)/porosity) - m9); + m10 = m10 + rlx_setA*(-0.5*Den*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10); + m11 = m11 + rlx_setA*((Den*(uy*uy-uz*uz)/porosity) - m11); + m12 = m12 + rlx_setA*(-0.5*(Den*(uy*uy-uz*uz)/porosity)- m12); + m13 = m13 + rlx_setA*((Den*ux*uy/porosity) - m13); + m14 = m14 + rlx_setA*((Den*uy*uz/porosity) - m14); + m15 = m15 + rlx_setA*((Den*ux*uz/porosity) - m15); m16 = m16 + rlx_setB*( - m16); m17 = m17 + rlx_setB*( - m17); m18 = m18 + rlx_setB*( - m18); //....................................................................................................... - + + //.................inverse transformation...................................................... // q=0 fq = mrt_V1*Den-mrt_V2*m1+mrt_V3*m2; From 46b8c1de7fb0253c3a14414a5b1036f0a3f4b972 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Sat, 1 Feb 2020 17:22:13 -0500 Subject: [PATCH 039/121] GPU version update: remove higher-order terms in body force --- gpu/Greyscale.cu | 414 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 313 insertions(+), 101 deletions(-) diff --git a/gpu/Greyscale.cu b/gpu/Greyscale.cu index d3fd52ab..0a9a63e0 100644 --- a/gpu/Greyscale.cu +++ b/gpu/Greyscale.cu @@ -77,93 +77,173 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int f Fz=Gz; } + //------------------------ BGK collison where body force has higher-order terms ----------------------------------------------------------// +// // q=0 +// dist[n] = f0*(1.0-rlx)+ rlx*0.3333333333333333*rho*(1. - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// + 0.3333333333333333*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q = 1 +// dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*ux + (4.5*ux*ux)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(3. + (6.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q=2 +// dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*ux + (4.5*ux*ux)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(-3. + (6.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q = 3 +// dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*uy + (4.5*uy*uy)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. + (6.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q = 4 +// dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*uy + (4.5*uy*uy)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. + (6.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q = 5 +// dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*uz + (4.5*uz*uz)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(3. + (6.*uz)/porosity)); +// +// // q = 6 +// dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*uz + (4.5*uz*uz)/porosity - (1.5*(ux*ux+ uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(-3. + (6.*uz)/porosity)); +// +// // q = 7 +// dist[7*Np+n] = f7*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux + uy) + (4.5*(ux + uy)*(ux + uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3. - (3.*ux)/porosity + (9.*(ux + uy))/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(ux + uy))/porosity) + +// Fz*(0. - (3.*uz)/porosity)); +// +// // q = 8 +// dist[8*Np+n] = f8*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux - uy) + (4.5*(-ux - uy)*(-ux - uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3. - (3.*ux)/porosity - (9.*(-ux - uy))/porosity) + Fy*(-3. - (9.*(-ux - uy))/porosity - (3.*uy)/porosity) + +// Fz*(0. - (3.*uz)/porosity)); +// +// // q = 9 +// dist[9*Np+n] = f9*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux - uy) + (4.5*(ux - uy)*(ux - uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3. - (3.*ux)/porosity + (9.*(ux - uy))/porosity) + Fy*(-3. - (9.*(ux - uy))/porosity - (3.*uy)/porosity) + +// Fz*(0. - (3.*uz)/porosity)); +// +// // q = 10 +// dist[10*Np+n] = f10*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux + uy) + (4.5*(-ux + uy)*(-ux + uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3. - (3.*ux)/porosity - (9.*(-ux + uy))/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(-ux + uy))/porosity) + +// Fz*(0. - (3.*uz)/porosity)); +// +// // q = 11 +// dist[11*Np+n] = f11*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux + uz) + (4.5*(ux + uz)*(ux + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(3. - (3.*ux)/porosity + (9.*(ux + uz))/porosity) + +// Fz*(3. - (3.*uz)/porosity + (9.*(ux + uz))/porosity)); +// +// // q = 12 +// dist[12*Np+n] = f12*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux - uz) + (4.5*(-ux - uz)*(-ux - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(-3. - (3.*ux)/porosity - (9.*(-ux - uz))/porosity) + +// Fz*(-3. - (9.*(-ux - uz))/porosity - (3.*uz)/porosity)); +// +// // q = 13 +// dist[13*Np+n] = f13*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux - uz) + (4.5*(ux - uz)*(ux - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(3. - (3.*ux)/porosity + (9.*(ux - uz))/porosity) + +// Fz*(-3. - (9.*(ux - uz))/porosity - (3.*uz)/porosity)); +// +// // q= 14 +// dist[14*Np+n] = f14*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux + uz) + (4.5*(-ux + uz)*(-ux + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(-3. - (3.*ux)/porosity - (9.*(-ux + uz))/porosity) + +// Fz*(3. - (3.*uz)/porosity + (9.*(-ux + uz))/porosity)); +// +// // q = 15 +// dist[15*Np+n] = f15*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(uy + uz) + (4.5*(uy + uz)*(uy + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(uy + uz))/porosity) + +// Fz*(3. - (3.*uz)/porosity + (9.*(uy + uz))/porosity)); +// +// // q = 16 +// dist[16*Np+n] = f16*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-uy - uz) + (4.5*(-uy - uz)*(-uy - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. - (3.*uy)/porosity - (9.*(-uy - uz))/porosity) + +// Fz*(-3. - (9.*(-uy - uz))/porosity - (3.*uz)/porosity)); +// +// // q = 17 +// dist[17*Np+n] = f17*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(uy - uz) + (4.5*(uy - uz)*(uy - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(uy - uz))/porosity) + +// Fz*(-3. - (9.*(uy - uz))/porosity - (3.*uz)/porosity)); +// +// // q = 18 +// dist[18*Np+n] = f18*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-uy + uz) + (4.5*(-uy + uz)*(-uy + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. - (3.*uy)/porosity - (9.*(-uy + uz))/porosity) + +// Fz*(3. - (3.*uz)/porosity + (9.*(-uy + uz))/porosity)); + //----------------------------------------------------------------------------------------------------------------------------------------// + + + //------------------------ BGK collison where body force has NO higher-order terms ----------------------------------------------------------// // q=0 - dist[n] = f0*(1.0-rlx)+ rlx*0.3333333333333333*rho*(1. - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - + 0.3333333333333333*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + dist[n] = f0*(1.0-rlx)+ rlx*0.3333333333333333*rho*(1. - (1.5*(ux*ux + uy*uy + uz*uz))/porosity); // q = 1 dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*ux + (4.5*ux*ux)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(3. + (6.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(3.)); // q=2 dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*ux + (4.5*ux*ux)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(-3. + (6.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(-3.)); // q = 3 dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*uy + (4.5*uy*uy)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. + (6.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fy*(3.)); // q = 4 dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*uy + (4.5*uy*uy)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. + (6.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fy*(-3.)); // q = 5 dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*uz + (4.5*uz*uz)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(3. + (6.*uz)/porosity)); + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fz*(3.)); // q = 6 dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*uz + (4.5*uz*uz)/porosity - (1.5*(ux*ux+ uy*uy + uz*uz))/porosity) - +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(-3. + (6.*uz)/porosity)); + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fz*(-3.)); // q = 7 dist[7*Np+n] = f7*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux + uy) + (4.5*(ux + uy)*(ux + uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3. - (3.*ux)/porosity + (9.*(ux + uy))/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(ux + uy))/porosity) + - Fz*(0. - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3.) + Fy*(3.)); // q = 8 dist[8*Np+n] = f8*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux - uy) + (4.5*(-ux - uy)*(-ux - uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3. - (3.*ux)/porosity - (9.*(-ux - uy))/porosity) + Fy*(-3. - (9.*(-ux - uy))/porosity - (3.*uy)/porosity) + - Fz*(0. - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3.) + Fy*(-3.)); // q = 9 dist[9*Np+n] = f9*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux - uy) + (4.5*(ux - uy)*(ux - uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3. - (3.*ux)/porosity + (9.*(ux - uy))/porosity) + Fy*(-3. - (9.*(ux - uy))/porosity - (3.*uy)/porosity) + - Fz*(0. - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3.) + Fy*(-3.)); // q = 10 dist[10*Np+n] = f10*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux + uy) + (4.5*(-ux + uy)*(-ux + uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3. - (3.*ux)/porosity - (9.*(-ux + uy))/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(-ux + uy))/porosity) + - Fz*(0. - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3.) + Fy*(3.)); // q = 11 dist[11*Np+n] = f11*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux + uz) + (4.5*(ux + uz)*(ux + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(3. - (3.*ux)/porosity + (9.*(ux + uz))/porosity) + - Fz*(3. - (3.*uz)/porosity + (9.*(ux + uz))/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3.) + Fz*(3.)); // q = 12 dist[12*Np+n] = f12*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux - uz) + (4.5*(-ux - uz)*(-ux - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(-3. - (3.*ux)/porosity - (9.*(-ux - uz))/porosity) + - Fz*(-3. - (9.*(-ux - uz))/porosity - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3.) + Fz*(-3.)); // q = 13 dist[13*Np+n] = f13*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux - uz) + (4.5*(ux - uz)*(ux - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(3. - (3.*ux)/porosity + (9.*(ux - uz))/porosity) + - Fz*(-3. - (9.*(ux - uz))/porosity - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3.) + Fz*(-3.)); // q= 14 dist[14*Np+n] = f14*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux + uz) + (4.5*(-ux + uz)*(-ux + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(-3. - (3.*ux)/porosity - (9.*(-ux + uz))/porosity) + - Fz*(3. - (3.*uz)/porosity + (9.*(-ux + uz))/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3.) + Fz*(3.)); // q = 15 dist[15*Np+n] = f15*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(uy + uz) + (4.5*(uy + uz)*(uy + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(uy + uz))/porosity) + - Fz*(3. - (3.*uz)/porosity + (9.*(uy + uz))/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(3.) + Fz*(3.)); // q = 16 dist[16*Np+n] = f16*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-uy - uz) + (4.5*(-uy - uz)*(-uy - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. - (3.*uy)/porosity - (9.*(-uy - uz))/porosity) + - Fz*(-3. - (9.*(-uy - uz))/porosity - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(-3.) + Fz*(-3.)); // q = 17 dist[17*Np+n] = f17*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(uy - uz) + (4.5*(uy - uz)*(uy - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(uy - uz))/porosity) + - Fz*(-3. - (9.*(uy - uz))/porosity - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(3.) + Fz*(-3.)); // q = 18 dist[18*Np+n] = f18*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-uy + uz) + (4.5*(-uy + uz)*(-uy + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. - (3.*uy)/porosity - (9.*(-uy + uz))/porosity) + - Fz*(3. - (3.*uz)/porosity + (9.*(-uy + uz))/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(-3.) + Fz*(3.)); + //-------------------------------------------------------------------------------------------------------------------------------------------// //Update velocity on device Velocity[0*Np+n] = ux; @@ -304,93 +384,174 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist Fz=Gz; } + //------------------------ BGK collison where body force has higher-order terms ----------------------------------------------------------// +// // q=0 +// dist[n] = f0*(1.0-rlx) + rlx*0.3333333333333333*rho*(1. - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// + 0.3333333333333333*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q = 1 +// dist[nr2] = f1*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*ux + (4.5*ux*ux)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(3. + (6.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q=2 +// dist[nr1] = f2*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*ux + (4.5*ux*ux)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(-3. + (6.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q = 3 +// dist[nr4] = f3*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*uy + (4.5*uy*uy)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. + (6.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q = 4 +// dist[nr3] = f4*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*uy + (4.5*uy*uy)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. + (6.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q = 5 +// dist[nr6] = f5*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*uz + (4.5*uz*uz)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(3. + (6.*uz)/porosity)); +// +// // q = 6 +// dist[nr5] = f6*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*uz + (4.5*uz*uz)/porosity - (1.5*(ux*ux+ uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(-3. + (6.*uz)/porosity)); +// +// // q = 7 +// dist[nr8] = f7*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux + uy) + (4.5*(ux + uy)*(ux + uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3. - (3.*ux)/porosity + (9.*(ux + uy))/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(ux + uy))/porosity) + +// Fz*(0. - (3.*uz)/porosity)); +// +// // q = 8 +// dist[nr7] = f8*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux - uy) + (4.5*(-ux - uy)*(-ux - uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3. - (3.*ux)/porosity - (9.*(-ux - uy))/porosity) + Fy*(-3. - (9.*(-ux - uy))/porosity - (3.*uy)/porosity) + +// Fz*(0. - (3.*uz)/porosity)); +// +// // q = 9 +// dist[nr10] = f9*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux - uy) + (4.5*(ux - uy)*(ux - uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3. - (3.*ux)/porosity + (9.*(ux - uy))/porosity) + Fy*(-3. - (9.*(ux - uy))/porosity - (3.*uy)/porosity) + +// Fz*(0. - (3.*uz)/porosity)); +// +// // q = 10 +// dist[nr9] = f10*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux + uy) + (4.5*(-ux + uy)*(-ux + uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3. - (3.*ux)/porosity - (9.*(-ux + uy))/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(-ux + uy))/porosity) + +// Fz*(0. - (3.*uz)/porosity)); +// +// // q = 11 +// dist[nr12] = f11*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux + uz) + (4.5*(ux + uz)*(ux + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(3. - (3.*ux)/porosity + (9.*(ux + uz))/porosity) + +// Fz*(3. - (3.*uz)/porosity + (9.*(ux + uz))/porosity)); +// +// // q = 12 +// dist[nr11] = f12*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux - uz) + (4.5*(-ux - uz)*(-ux - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(-3. - (3.*ux)/porosity - (9.*(-ux - uz))/porosity) + +// Fz*(-3. - (9.*(-ux - uz))/porosity - (3.*uz)/porosity)); +// +// // q = 13 +// dist[nr14] = f13*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux - uz) + (4.5*(ux - uz)*(ux - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(3. - (3.*ux)/porosity + (9.*(ux - uz))/porosity) + +// Fz*(-3. - (9.*(ux - uz))/porosity - (3.*uz)/porosity)); +// +// // q= 14 +// dist[nr13] = f14*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux + uz) + (4.5*(-ux + uz)*(-ux + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(-3. - (3.*ux)/porosity - (9.*(-ux + uz))/porosity) + +// Fz*(3. - (3.*uz)/porosity + (9.*(-ux + uz))/porosity)); +// +// // q = 15 +// dist[nr16] = f15*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(uy + uz) + (4.5*(uy + uz)*(uy + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(uy + uz))/porosity) + +// Fz*(3. - (3.*uz)/porosity + (9.*(uy + uz))/porosity)); +// +// // q = 16 +// dist[nr15] = f16*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-uy - uz) + (4.5*(-uy - uz)*(-uy - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. - (3.*uy)/porosity - (9.*(-uy - uz))/porosity) + +// Fz*(-3. - (9.*(-uy - uz))/porosity - (3.*uz)/porosity)); +// +// // q = 17 +// dist[nr18] = f17*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(uy - uz) + (4.5*(uy - uz)*(uy - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(uy - uz))/porosity) + +// Fz*(-3. - (9.*(uy - uz))/porosity - (3.*uz)/porosity)); +// +// // q = 18 +// dist[nr17] = f18*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-uy + uz) + (4.5*(-uy + uz)*(-uy + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. - (3.*uy)/porosity - (9.*(-uy + uz))/porosity) + +// Fz*(3. - (3.*uz)/porosity + (9.*(-uy + uz))/porosity)); + //----------------------------------------------------------------------------------------------------------------------------------------// + + + //------------------------ BGK collison where body force has NO higher-order terms ----------------------------------------------------------// // q=0 - dist[n] = f0*(1.0-rlx) + rlx*0.3333333333333333*rho*(1. - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - + 0.3333333333333333*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + dist[n] = f0*(1.0-rlx) + rlx*0.3333333333333333*rho*(1. - (1.5*(ux*ux + uy*uy + uz*uz))/porosity); // q = 1 dist[nr2] = f1*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*ux + (4.5*ux*ux)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(3. + (6.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(3.)); // q=2 dist[nr1] = f2*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*ux + (4.5*ux*ux)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(-3. + (6.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(-3.)); // q = 3 dist[nr4] = f3*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*uy + (4.5*uy*uy)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. + (6.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fy*(3.)); // q = 4 dist[nr3] = f4*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*uy + (4.5*uy*uy)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. + (6.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fy*(-3.)); // q = 5 dist[nr6] = f5*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*uz + (4.5*uz*uz)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(3. + (6.*uz)/porosity)); + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fz*(3.)); // q = 6 dist[nr5] = f6*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*uz + (4.5*uz*uz)/porosity - (1.5*(ux*ux+ uy*uy + uz*uz))/porosity) - +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(-3. + (6.*uz)/porosity)); + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fz*(-3.)); // q = 7 dist[nr8] = f7*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux + uy) + (4.5*(ux + uy)*(ux + uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3. - (3.*ux)/porosity + (9.*(ux + uy))/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(ux + uy))/porosity) + - Fz*(0. - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3.) + Fy*(3.)); // q = 8 dist[nr7] = f8*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux - uy) + (4.5*(-ux - uy)*(-ux - uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3. - (3.*ux)/porosity - (9.*(-ux - uy))/porosity) + Fy*(-3. - (9.*(-ux - uy))/porosity - (3.*uy)/porosity) + - Fz*(0. - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3.) + Fy*(-3.)); // q = 9 dist[nr10] = f9*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux - uy) + (4.5*(ux - uy)*(ux - uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3. - (3.*ux)/porosity + (9.*(ux - uy))/porosity) + Fy*(-3. - (9.*(ux - uy))/porosity - (3.*uy)/porosity) + - Fz*(0. - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3.) + Fy*(-3.)); // q = 10 dist[nr9] = f10*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux + uy) + (4.5*(-ux + uy)*(-ux + uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3. - (3.*ux)/porosity - (9.*(-ux + uy))/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(-ux + uy))/porosity) + - Fz*(0. - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3.) + Fy*(3.)); // q = 11 dist[nr12] = f11*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux + uz) + (4.5*(ux + uz)*(ux + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(3. - (3.*ux)/porosity + (9.*(ux + uz))/porosity) + - Fz*(3. - (3.*uz)/porosity + (9.*(ux + uz))/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3.) + Fz*(3.)); // q = 12 dist[nr11] = f12*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux - uz) + (4.5*(-ux - uz)*(-ux - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(-3. - (3.*ux)/porosity - (9.*(-ux - uz))/porosity) + - Fz*(-3. - (9.*(-ux - uz))/porosity - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3.) + Fz*(-3.)); // q = 13 dist[nr14] = f13*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux - uz) + (4.5*(ux - uz)*(ux - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(3. - (3.*ux)/porosity + (9.*(ux - uz))/porosity) + - Fz*(-3. - (9.*(ux - uz))/porosity - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3.) + Fz*(-3.)); // q= 14 dist[nr13] = f14*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux + uz) + (4.5*(-ux + uz)*(-ux + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(-3. - (3.*ux)/porosity - (9.*(-ux + uz))/porosity) + - Fz*(3. - (3.*uz)/porosity + (9.*(-ux + uz))/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3.) + Fz*(3.)); // q = 15 dist[nr16] = f15*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(uy + uz) + (4.5*(uy + uz)*(uy + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(uy + uz))/porosity) + - Fz*(3. - (3.*uz)/porosity + (9.*(uy + uz))/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(3.) + Fz*(3.)); // q = 16 dist[nr15] = f16*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-uy - uz) + (4.5*(-uy - uz)*(-uy - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. - (3.*uy)/porosity - (9.*(-uy - uz))/porosity) + - Fz*(-3. - (9.*(-uy - uz))/porosity - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(-3.) + Fz*(-3.)); // q = 17 dist[nr18] = f17*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(uy - uz) + (4.5*(uy - uz)*(uy - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(uy - uz))/porosity) + - Fz*(-3. - (9.*(uy - uz))/porosity - (3.*uz)/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(3.) + Fz*(-3.)); // q = 18 dist[nr17] = f18*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-uy + uz) + (4.5*(-uy + uz)*(-uy + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) - +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. - (3.*uy)/porosity - (9.*(-uy + uz))/porosity) + - Fz*(3. - (3.*uz)/porosity + (9.*(-uy + uz))/porosity)); + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(-3.) + Fz*(3.)); + //-------------------------------------------------------------------------------------------------------------------------------------------// + //Update velocity on device Velocity[0*Np+n] = ux; @@ -750,11 +911,43 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, //Calculate pressure for Incompressible-MRT model pressure=0.5/porosity*(pressure-0.5*Den*u_mag*u_mag/porosity); +// //..............carry out relaxation process............................................... +// m1 = m1 + rlx_setA*((-30*Den+19*(ux*ux+uy*uy+uz*uz)/porosity + 57*pressure*porosity) - m1) +// + (1-0.5*rlx_setA)*38*(Fx*ux+Fy*uy+Fz*uz)/porosity; +// m2 = m2 + rlx_setA*((12*Den - 5.5*(ux*ux+uy*uy+uz*uz)/porosity-27*pressure*porosity) - m2) +// + (1-0.5*rlx_setA)*11*(-Fx*ux-Fy*uy-Fz*uz)/porosity; +// jx = jx + Fx; +// m4 = m4 + rlx_setB*((-0.6666666666666666*ux*Den) - m4) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); +// jy = jy + Fy; +// m6 = m6 + rlx_setB*((-0.6666666666666666*uy*Den) - m6) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fy); +// jz = jz + Fz; +// m8 = m8 + rlx_setB*((-0.6666666666666666*uz*Den) - m8) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); +// m9 = m9 + rlx_setA*((Den*(2*ux*ux-uy*uy-uz*uz)/porosity) - m9) +// + (1-0.5*rlx_setA)*(4*Fx*ux-2*Fy*uy-2*Fz*uz)/porosity; +// m10 = m10 + rlx_setA*(-0.5*Den*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10) +// + (1-0.5*rlx_setA)*(-2*Fx*ux+Fy*uy+Fz*uz)/porosity; +// m11 = m11 + rlx_setA*((Den*(uy*uy-uz*uz)/porosity) - m11) +// + (1-0.5*rlx_setA)*(2*Fy*uy-2*Fz*uz)/porosity; +// m12 = m12 + rlx_setA*(-0.5*(Den*(uy*uy-uz*uz)/porosity)- m12) +// + (1-0.5*rlx_setA)*(-Fy*uy+Fz*uz)/porosity; +// m13 = m13 + rlx_setA*((Den*ux*uy/porosity) - m13) +// + (1-0.5*rlx_setA)*(Fy*ux+Fx*uy)/porosity; +// m14 = m14 + rlx_setA*((Den*uy*uz/porosity) - m14) +// + (1-0.5*rlx_setA)*(Fz*uy+Fy*uz)/porosity; +// m15 = m15 + rlx_setA*((Den*ux*uz/porosity) - m15) +// + (1-0.5*rlx_setA)*(Fz*ux+Fx*uz)/porosity; +// m16 = m16 + rlx_setB*( - m16); +// m17 = m17 + rlx_setB*( - m17); +// m18 = m18 + rlx_setB*( - m18); +// //....................................................................................................... + + //-------------------- IMRT collison where body force has NO higher-order terms -------------// //..............carry out relaxation process............................................... - m1 = m1 + rlx_setA*((-30*Den+19*(ux*ux+uy*uy+uz*uz)/porosity + 57*pressure*porosity) - m1) - + (1-0.5*rlx_setA)*38*(Fx*ux+Fy*uy+Fz*uz)/porosity; - m2 = m2 + rlx_setA*((12*Den - 5.5*(ux*ux+uy*uy+uz*uz)/porosity-27*pressure*porosity) - m2) - + (1-0.5*rlx_setA)*11*(-Fx*ux-Fy*uy-Fz*uz)/porosity; + m1 = m1 + rlx_setA*((-30*Den+19*(ux*ux+uy*uy+uz*uz)/porosity + 57*pressure*porosity) - m1); + m2 = m2 + rlx_setA*((12*Den - 5.5*(ux*ux+uy*uy+uz*uz)/porosity-27*pressure*porosity) - m2); jx = jx + Fx; m4 = m4 + rlx_setB*((-0.6666666666666666*ux*Den) - m4) + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); @@ -764,20 +957,13 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, jz = jz + Fz; m8 = m8 + rlx_setB*((-0.6666666666666666*uz*Den) - m8) + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); - m9 = m9 + rlx_setA*((Den*(2*ux*ux-uy*uy-uz*uz)/porosity) - m9) - + (1-0.5*rlx_setA)*(4*Fx*ux-2*Fy*uy-2*Fz*uz)/porosity; - m10 = m10 + rlx_setA*(-0.5*Den*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10) - + (1-0.5*rlx_setA)*(-2*Fx*ux+Fy*uy+Fz*uz)/porosity; - m11 = m11 + rlx_setA*((Den*(uy*uy-uz*uz)/porosity) - m11) - + (1-0.5*rlx_setA)*(2*Fy*uy-2*Fz*uz)/porosity; - m12 = m12 + rlx_setA*(-0.5*(Den*(uy*uy-uz*uz)/porosity)- m12) - + (1-0.5*rlx_setA)*(-Fy*uy+Fz*uz)/porosity; - m13 = m13 + rlx_setA*((Den*ux*uy/porosity) - m13) - + (1-0.5*rlx_setA)*(Fy*ux+Fx*uy)/porosity; - m14 = m14 + rlx_setA*((Den*uy*uz/porosity) - m14) - + (1-0.5*rlx_setA)*(Fz*uy+Fy*uz)/porosity; - m15 = m15 + rlx_setA*((Den*ux*uz/porosity) - m15) - + (1-0.5*rlx_setA)*(Fz*ux+Fx*uz)/porosity; + m9 = m9 + rlx_setA*((Den*(2*ux*ux-uy*uy-uz*uz)/porosity) - m9); + m10 = m10 + rlx_setA*(-0.5*Den*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10); + m11 = m11 + rlx_setA*((Den*(uy*uy-uz*uz)/porosity) - m11); + m12 = m12 + rlx_setA*(-0.5*(Den*(uy*uy-uz*uz)/porosity)- m12); + m13 = m13 + rlx_setA*((Den*ux*uy/porosity) - m13); + m14 = m14 + rlx_setA*((Den*uy*uz/porosity) - m14); + m15 = m15 + rlx_setA*((Den*ux*uz/porosity) - m15); m16 = m16 + rlx_setB*( - m16); m17 = m17 + rlx_setB*( - m17); m18 = m18 + rlx_setB*( - m18); @@ -1238,11 +1424,43 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_Greyscale_IMRT(int *neighborList, double //Calculate pressure for Incompressible-MRT model pressure=0.5/porosity*(pressure-0.5*Den*u_mag*u_mag/porosity); +// //..............carry out relaxation process............................................... +// m1 = m1 + rlx_setA*((-30*Den+19*(ux*ux+uy*uy+uz*uz)/porosity + 57*pressure*porosity) - m1) +// + (1-0.5*rlx_setA)*38*(Fx*ux+Fy*uy+Fz*uz)/porosity; +// m2 = m2 + rlx_setA*((12*Den - 5.5*(ux*ux+uy*uy+uz*uz)/porosity-27*pressure*porosity) - m2) +// + (1-0.5*rlx_setA)*11*(-Fx*ux-Fy*uy-Fz*uz)/porosity; +// jx = jx + Fx; +// m4 = m4 + rlx_setB*((-0.6666666666666666*ux*Den) - m4) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); +// jy = jy + Fy; +// m6 = m6 + rlx_setB*((-0.6666666666666666*uy*Den) - m6) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fy); +// jz = jz + Fz; +// m8 = m8 + rlx_setB*((-0.6666666666666666*uz*Den) - m8) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); +// m9 = m9 + rlx_setA*((Den*(2*ux*ux-uy*uy-uz*uz)/porosity) - m9) +// + (1-0.5*rlx_setA)*(4*Fx*ux-2*Fy*uy-2*Fz*uz)/porosity; +// m10 = m10 + rlx_setA*(-0.5*Den*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10) +// + (1-0.5*rlx_setA)*(-2*Fx*ux+Fy*uy+Fz*uz)/porosity; +// m11 = m11 + rlx_setA*((Den*(uy*uy-uz*uz)/porosity) - m11) +// + (1-0.5*rlx_setA)*(2*Fy*uy-2*Fz*uz)/porosity; +// m12 = m12 + rlx_setA*(-0.5*(Den*(uy*uy-uz*uz)/porosity)- m12) +// + (1-0.5*rlx_setA)*(-Fy*uy+Fz*uz)/porosity; +// m13 = m13 + rlx_setA*((Den*ux*uy/porosity) - m13) +// + (1-0.5*rlx_setA)*(Fy*ux+Fx*uy)/porosity; +// m14 = m14 + rlx_setA*((Den*uy*uz/porosity) - m14) +// + (1-0.5*rlx_setA)*(Fz*uy+Fy*uz)/porosity; +// m15 = m15 + rlx_setA*((Den*ux*uz/porosity) - m15) +// + (1-0.5*rlx_setA)*(Fz*ux+Fx*uz)/porosity; +// m16 = m16 + rlx_setB*( - m16); +// m17 = m17 + rlx_setB*( - m17); +// m18 = m18 + rlx_setB*( - m18); +// //....................................................................................................... + + //-------------------- IMRT collison where body force has NO higher-order terms -------------// //..............carry out relaxation process............................................... - m1 = m1 + rlx_setA*((-30*Den+19*(ux*ux+uy*uy+uz*uz)/porosity + 57*pressure*porosity) - m1) - + (1-0.5*rlx_setA)*38*(Fx*ux+Fy*uy+Fz*uz)/porosity; - m2 = m2 + rlx_setA*((12*Den - 5.5*(ux*ux+uy*uy+uz*uz)/porosity-27*pressure*porosity) - m2) - + (1-0.5*rlx_setA)*11*(-Fx*ux-Fy*uy-Fz*uz)/porosity; + m1 = m1 + rlx_setA*((-30*Den+19*(ux*ux+uy*uy+uz*uz)/porosity + 57*pressure*porosity) - m1); + m2 = m2 + rlx_setA*((12*Den - 5.5*(ux*ux+uy*uy+uz*uz)/porosity-27*pressure*porosity) - m2); jx = jx + Fx; m4 = m4 + rlx_setB*((-0.6666666666666666*ux*Den) - m4) + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); @@ -1252,25 +1470,19 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_Greyscale_IMRT(int *neighborList, double jz = jz + Fz; m8 = m8 + rlx_setB*((-0.6666666666666666*uz*Den) - m8) + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); - m9 = m9 + rlx_setA*((Den*(2*ux*ux-uy*uy-uz*uz)/porosity) - m9) - + (1-0.5*rlx_setA)*(4*Fx*ux-2*Fy*uy-2*Fz*uz)/porosity; - m10 = m10 + rlx_setA*(-0.5*Den*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10) - + (1-0.5*rlx_setA)*(-2*Fx*ux+Fy*uy+Fz*uz)/porosity; - m11 = m11 + rlx_setA*((Den*(uy*uy-uz*uz)/porosity) - m11) - + (1-0.5*rlx_setA)*(2*Fy*uy-2*Fz*uz)/porosity; - m12 = m12 + rlx_setA*(-0.5*(Den*(uy*uy-uz*uz)/porosity)- m12) - + (1-0.5*rlx_setA)*(-Fy*uy+Fz*uz)/porosity; - m13 = m13 + rlx_setA*((Den*ux*uy/porosity) - m13) - + (1-0.5*rlx_setA)*(Fy*ux+Fx*uy)/porosity; - m14 = m14 + rlx_setA*((Den*uy*uz/porosity) - m14) - + (1-0.5*rlx_setA)*(Fz*uy+Fy*uz)/porosity; - m15 = m15 + rlx_setA*((Den*ux*uz/porosity) - m15) - + (1-0.5*rlx_setA)*(Fz*ux+Fx*uz)/porosity; + m9 = m9 + rlx_setA*((Den*(2*ux*ux-uy*uy-uz*uz)/porosity) - m9); + m10 = m10 + rlx_setA*(-0.5*Den*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10); + m11 = m11 + rlx_setA*((Den*(uy*uy-uz*uz)/porosity) - m11); + m12 = m12 + rlx_setA*(-0.5*(Den*(uy*uy-uz*uz)/porosity)- m12); + m13 = m13 + rlx_setA*((Den*ux*uy/porosity) - m13); + m14 = m14 + rlx_setA*((Den*uy*uz/porosity) - m14); + m15 = m15 + rlx_setA*((Den*ux*uz/porosity) - m15); m16 = m16 + rlx_setB*( - m16); m17 = m17 + rlx_setB*( - m17); m18 = m18 + rlx_setB*( - m18); //....................................................................................................... - + + //.................inverse transformation...................................................... // q=0 fq = mrt_V1*Den-mrt_V2*m1+mrt_V3*m2; From 8751fa245bbe3bad5c0d2a0799d57a943faf14ba Mon Sep 17 00:00:00 2001 From: Mark Berrill Date: Mon, 3 Feb 2020 12:41:09 -0500 Subject: [PATCH 040/121] Fixing minor issues with some operating systems --- common/Utilities.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/common/Utilities.cpp b/common/Utilities.cpp index 11d2b261..723b34f8 100644 --- a/common/Utilities.cpp +++ b/common/Utilities.cpp @@ -16,6 +16,20 @@ #include +// OS specific includes / definitions +// clang-format off +#if defined( WIN32 ) || defined( _WIN32 ) || defined( WIN64 ) || defined( _WIN64 ) + #define USE_WINDOWS +#elif defined( __APPLE__ ) + #define USE_MAC +#elif defined( __linux ) || defined( __linux__ ) || defined( __unix ) || defined( __posix ) + #define USE_LINUX +#else + #error Unknown OS +#endif +// clang-format on + + // Mutex for Utility functions static std::mutex Utilities_mutex; From 6ed57841b87dbc0893e21521609181c348e4ae41 Mon Sep 17 00:00:00 2001 From: JamesEMcclure Date: Mon, 3 Feb 2020 12:59:52 -0500 Subject: [PATCH 041/121] update TwoPhase analysis for vector / tensor objects --- analysis/TwoPhase.cpp | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/analysis/TwoPhase.cpp b/analysis/TwoPhase.cpp index 1dbdfbfa..d878a663 100644 --- a/analysis/TwoPhase.cpp +++ b/analysis/TwoPhase.cpp @@ -1098,13 +1098,19 @@ void TwoPhase::Reduce() vol_n_global = Dm->Comm.sumReduce( vol_n ); paw_global = Dm->Comm.sumReduce( paw ); pan_global = Dm->Comm.sumReduce( pan ); - vaw_global(0) = Dm->Comm.sumReduce( vaw(0) ); - van_global(0) = Dm->Comm.sumReduce( van(0) ); - vawn_global(0) = Dm->Comm.sumReduce( vawn(0) ); - vawns_global(0) = Dm->Comm.sumReduce( vawns(0) ); - Gwn_global(0) = Dm->Comm.sumReduce( Gwn(0) ); - Gns_global(0) = Dm->Comm.sumReduce( Gns(0) ); - Gws_global(0) = Dm->Comm.sumReduce( Gws(0) ); + for (int idx=0; idx<3; idx++) + vaw_global(idx) = Dm->Comm.sumReduce( vaw(idx) ); + for (int idx=0; idx<3; idx++) + van_global(idx) = Dm->Comm.sumReduce( van(idx)); + for (int idx=0; idx<3; idx++) + vawn_global(idx) = Dm->Comm.sumReduce( vawn(idx) ); + for (int idx=0; idx<3; idx++) + vawns_global(idx) = Dm->Comm.sumReduce( vawns(idx) ); + for (int idx=0; idx<6; idx++){ + Gwn_global(idx) = Dm->Comm.sumReduce( Gwn(idx) ); + Gns_global(idx) = Dm->Comm.sumReduce( Gns(idx) ); + Gws_global(idx) = Dm->Comm.sumReduce( Gws(idx) ); + } trawn_global = Dm->Comm.sumReduce( trawn ); trJwn_global = Dm->Comm.sumReduce( trJwn ); trRwn_global = Dm->Comm.sumReduce( trRwn ); From b73208b4718604c4e95a0593bd83531d9b9b9971 Mon Sep 17 00:00:00 2001 From: JamesEMcclure Date: Mon, 3 Feb 2020 14:05:23 -0500 Subject: [PATCH 042/121] fix water seed --- models/ColorModel.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 5a9c56d4..3b58fff1 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -937,7 +937,7 @@ void ScaLBL_ColorModel::Run(){ else if (USE_SEED){ delta_volume = volA*Dm->Volume - initial_volume; CURRENT_MORPH_TIMESTEPS += analysis_interval; - //double massChange = SeedPhaseField(seed_water); + double massChange = SeedPhaseField(seed_water); if (rank==0) printf("***Seed water in oil %f, volume change %f / %f ***\n", seed_water, delta_volume, delta_volume_target); } else if (USE_MORPHOPEN_OIL){ From c426aa7d1db9fd637f72308d88ed5b89158aeb83 Mon Sep 17 00:00:00 2001 From: JamesEMcclure Date: Mon, 3 Feb 2020 15:13:45 -0500 Subject: [PATCH 043/121] remove deprecated pressure BC routines --- common/ScaLBL.h | 5 -- cpu/D3Q19.cpp | 139 ------------------------------------------------ 2 files changed, 144 deletions(-) diff --git a/common/ScaLBL.h b/common/ScaLBL.h index d7f012d1..610fce5d 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -101,11 +101,6 @@ extern "C" void ScaLBL_D3Q19_Gradient_DFH(int *NeighborList, double *Phi, double // BOUNDARY CONDITION ROUTINES -//extern "C" void ScaLBL_D3Q19_Pressure_BC_z(double *disteven, double *distodd, double din, -// int Nx, int Ny, int Nz); -//extern "C" void ScaLBL_D3Q19_Pressure_BC_Z(double *disteven, double *distodd, double dout, -// int Nx, int Ny, int Nz, int outlet); - extern "C" void ScaLBL_D3Q19_AAodd_Pressure_BC_z(int *neighborList, int *list, double *dist, double din, int count, int Np); extern "C" void ScaLBL_D3Q19_AAodd_Pressure_BC_Z(int *neighborList, int *list, double *dist, double dout, int count, int Np); diff --git a/cpu/D3Q19.cpp b/cpu/D3Q19.cpp index be081528..564eb96d 100644 --- a/cpu/D3Q19.cpp +++ b/cpu/D3Q19.cpp @@ -680,145 +680,6 @@ extern "C" void ScaLBL_D3Q19_AAodd_Pressure_BC_Z(int *d_neighborList, int *list, } } -extern "C" void ScaLBL_D3Q19_Pressure_BC_z(int *list, double *dist, double din, int count, int Np) -{ - int n; - // distributions - double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9; - double f10,f11,f12,f13,f14,f15,f16,f17,f18; - double ux,uy,uz; - double Cxz,Cyz; - - for (int idx=0; idx Date: Mon, 3 Feb 2020 17:22:13 -0500 Subject: [PATCH 044/121] add a weighting factor to the water seeding method --- models/ColorModel.cpp | 133 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 110 insertions(+), 23 deletions(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index df4afab9..bcffa9df 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -1202,35 +1202,26 @@ double ScaLBL_ColorModel::SeedPhaseField(const double seed_water_in_oil){ double mass_loss =0.f; double count =0.f; double *Aq_tmp, *Bq_tmp; + double *Vel_tmp; - Aq_tmp = new double [7*Np]; - Bq_tmp = new double [7*Np]; + Aq_tmp = new double [7*Np]; + Bq_tmp = new double [7*Np]; + Vel_tmp = new double [3*Np]; ScaLBL_CopyToHost(Aq_tmp, Aq, 7*Np*sizeof(double)); ScaLBL_CopyToHost(Bq_tmp, Bq, 7*Np*sizeof(double)); + ScaLBL_CopyToHost(Vel_tmp, Velocity, 7*Np*sizeof(double)); -/* for (int k=1; kgnb.Px+Averages->gwb.Px)/(Averages->gnb.M+Averages->gwb.M); + double vy_glb = (Averages->gnb.Py+Averages->gwb.Py)/(Averages->gnb.M+Averages->gwb.M); + double vz_glb = (Averages->gnb.Pz+Averages->gwb.Pz)/(Averages->gnb.M+Averages->gwb.M); + double v_mag_glb = sqrt(vx_glb*vx_glb+vy_glb*vy_glb+vz_glb*vz_glb); - if (Averages->SDs(i,j,k) < 0.f){ - // skip - } - else if (phase(i,j,k) > 0.f ){ - phase(i,j,k) -= random_value*seed_water_in_oil; - mass_loss += random_value*seed_water_in_oil; - count++; - } - else { - - } - } - } - } - */ for (int n=0; n < ScaLBL_Comm->LastExterior(); n++){ - double random_value = seed_water_in_oil*double(rand())/ RAND_MAX; + double v_mag_local = sqrt(Vel_tmp[n]*Vel_tmp[n]+Vel_tmp[n+1*Np]*Vel_tmp[n+1*Np]+Vel_tmp[n+2*Np]*Vel_tmp[n+2*Np]); + double weight = (v_mag_localFirstInterior(); n < ScaLBL_Comm->LastInterior(); n++){ - double random_value = seed_water_in_oil*double(rand())/ RAND_MAX; + double v_mag_local = sqrt(Vel_tmp[n]*Vel_tmp[n]+Vel_tmp[n+1*Np]*Vel_tmp[n+1*Np]+Vel_tmp[n+2*Np]*Vel_tmp[n+2*Np]); + double weight = (v_mag_localSDs(i,j,k) < 0.f){ +// // skip +// } +// else if (phase(i,j,k) > 0.f ){ +// phase(i,j,k) -= random_value*seed_water_in_oil; +// mass_loss += random_value*seed_water_in_oil; +// count++; +// } +// else { +// +// } +// } +// } +// } +// */ +// for (int n=0; n < ScaLBL_Comm->LastExterior(); n++){ +// double random_value = seed_water_in_oil*double(rand())/ RAND_MAX; +// double dA = Aq_tmp[n] + Aq_tmp[n+Np] + Aq_tmp[n+2*Np] + Aq_tmp[n+3*Np] + Aq_tmp[n+4*Np] + Aq_tmp[n+5*Np] + Aq_tmp[n+6*Np]; +// double dB = Bq_tmp[n] + Bq_tmp[n+Np] + Bq_tmp[n+2*Np] + Bq_tmp[n+3*Np] + Bq_tmp[n+4*Np] + Bq_tmp[n+5*Np] + Bq_tmp[n+6*Np]; +// double phase_id = (dA - dB) / (dA + dB); +// if (phase_id > 0.0){ +// Aq_tmp[n] -= 0.3333333333333333*random_value; +// Aq_tmp[n+Np] -= 0.1111111111111111*random_value; +// Aq_tmp[n+2*Np] -= 0.1111111111111111*random_value; +// Aq_tmp[n+3*Np] -= 0.1111111111111111*random_value; +// Aq_tmp[n+4*Np] -= 0.1111111111111111*random_value; +// Aq_tmp[n+5*Np] -= 0.1111111111111111*random_value; +// Aq_tmp[n+6*Np] -= 0.1111111111111111*random_value; +// +// Bq_tmp[n] += 0.3333333333333333*random_value; +// Bq_tmp[n+Np] += 0.1111111111111111*random_value; +// Bq_tmp[n+2*Np] += 0.1111111111111111*random_value; +// Bq_tmp[n+3*Np] += 0.1111111111111111*random_value; +// Bq_tmp[n+4*Np] += 0.1111111111111111*random_value; +// Bq_tmp[n+5*Np] += 0.1111111111111111*random_value; +// Bq_tmp[n+6*Np] += 0.1111111111111111*random_value; +// } +// mass_loss += random_value*seed_water_in_oil; +// } +// +// for (int n=ScaLBL_Comm->FirstInterior(); n < ScaLBL_Comm->LastInterior(); n++){ +// double random_value = seed_water_in_oil*double(rand())/ RAND_MAX; +// double dA = Aq_tmp[n] + Aq_tmp[n+Np] + Aq_tmp[n+2*Np] + Aq_tmp[n+3*Np] + Aq_tmp[n+4*Np] + Aq_tmp[n+5*Np] + Aq_tmp[n+6*Np]; +// double dB = Bq_tmp[n] + Bq_tmp[n+Np] + Bq_tmp[n+2*Np] + Bq_tmp[n+3*Np] + Bq_tmp[n+4*Np] + Bq_tmp[n+5*Np] + Bq_tmp[n+6*Np]; +// double phase_id = (dA - dB) / (dA + dB); +// if (phase_id > 0.0){ +// Aq_tmp[n] -= 0.3333333333333333*random_value; +// Aq_tmp[n+Np] -= 0.1111111111111111*random_value; +// Aq_tmp[n+2*Np] -= 0.1111111111111111*random_value; +// Aq_tmp[n+3*Np] -= 0.1111111111111111*random_value; +// Aq_tmp[n+4*Np] -= 0.1111111111111111*random_value; +// Aq_tmp[n+5*Np] -= 0.1111111111111111*random_value; +// Aq_tmp[n+6*Np] -= 0.1111111111111111*random_value; +// +// Bq_tmp[n] += 0.3333333333333333*random_value; +// Bq_tmp[n+Np] += 0.1111111111111111*random_value; +// Bq_tmp[n+2*Np] += 0.1111111111111111*random_value; +// Bq_tmp[n+3*Np] += 0.1111111111111111*random_value; +// Bq_tmp[n+4*Np] += 0.1111111111111111*random_value; +// Bq_tmp[n+5*Np] += 0.1111111111111111*random_value; +// Bq_tmp[n+6*Np] += 0.1111111111111111*random_value; +// } +// mass_loss += random_value*seed_water_in_oil; +// } +// +// count = Dm->Comm.sumReduce( count ); +// mass_loss = Dm->Comm.sumReduce( mass_loss ); +// if (rank == 0) printf("Remove mass %f from %f voxels \n",mass_loss,count); +// +// // Need to initialize Aq, Bq, Den, Phi directly +// //ScaLBL_CopyToDevice(Phi,phase.data(),7*Np*sizeof(double)); +// ScaLBL_CopyToDevice(Aq, Aq_tmp, 7*Np*sizeof(double)); +// ScaLBL_CopyToDevice(Bq, Bq_tmp, 7*Np*sizeof(double)); +// +// return(mass_loss); +//} + double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta_volume){ const RankInfoStruct rank_info(rank,nprocx,nprocy,nprocz); From a372d604503739f3e1564c7bec4bc683cfbb189a Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Tue, 4 Feb 2020 13:58:06 -0500 Subject: [PATCH 045/121] resolve some minor issues after the MPI backend updates --- models/GreyscaleModel.cpp | 24 +++++++++++++++--------- models/GreyscaleModel.h | 2 +- tests/lbpm_greyscale_simulator.cpp | 2 +- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/models/GreyscaleModel.cpp b/models/GreyscaleModel.cpp index 79b7a9c7..11d92c80 100644 --- a/models/GreyscaleModel.cpp +++ b/models/GreyscaleModel.cpp @@ -261,7 +261,7 @@ void ScaLBL_GreyscaleModel::AssignComponentLabels(double *Porosity, double *Perm // Set Dm to match Mask for (int i=0; iid[i] = Mask->id[i]; - for (int idx=0; idxComm, label_count[idx]); + for (int idx=0; idxComm.sumReduce(label_count[idx]); //Initialize a weighted porosity after considering grey voxels GreyPorosity=0.0; @@ -595,11 +595,16 @@ void ScaLBL_GreyscaleModel::Run(){ } } } - MPI_Allreduce(&vax_loc,&vax,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&vay_loc,&vay,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&vaz_loc,&vaz,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + //MPI_Allreduce(&vax_loc,&vax,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + //MPI_Allreduce(&vay_loc,&vay,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + //MPI_Allreduce(&vaz_loc,&vaz,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + //MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + vax = Mask->Comm.sumReduce( vax_loc ); + vay = Mask->Comm.sumReduce( vay_loc ); + vaz = Mask->Comm.sumReduce( vaz_loc ); + count = Mask->Comm.sumReduce( count_loc ); + vax /= count; vay /= count; vaz /= count; @@ -629,10 +634,11 @@ void ScaLBL_GreyscaleModel::Run(){ double As = Morphology.A(); double Hs = Morphology.H(); double Xs = Morphology.X(); - Vs=sumReduce( Dm->Comm, Vs); - As=sumReduce( Dm->Comm, As); - Hs=sumReduce( Dm->Comm, Hs); - Xs=sumReduce( Dm->Comm, Xs); + Vs = Dm->Comm.sumReduce( Vs); + As = Dm->Comm.sumReduce( As); + Hs = Dm->Comm.sumReduce( Hs); + Xs = Dm->Comm.sumReduce( Xs); + double h = Dm->voxel_length; //double absperm = h*h*mu*Mask->Porosity()*flow_rate / force_mag; double absperm = h*h*mu*GreyPorosity*flow_rate / force_mag; diff --git a/models/GreyscaleModel.h b/models/GreyscaleModel.h index c670239f..a99925b1 100644 --- a/models/GreyscaleModel.h +++ b/models/GreyscaleModel.h @@ -10,7 +10,7 @@ Implementation of color lattice boltzmann model #include #include "common/Communication.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Database.h" #include "common/ScaLBL.h" #include "ProfilerApp.h" diff --git a/tests/lbpm_greyscale_simulator.cpp b/tests/lbpm_greyscale_simulator.cpp index b7ed442e..a54b6fc4 100644 --- a/tests/lbpm_greyscale_simulator.cpp +++ b/tests/lbpm_greyscale_simulator.cpp @@ -8,7 +8,7 @@ #include "common/ScaLBL.h" #include "common/Communication.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "models/GreyscaleModel.h" //#define WRITE_SURFACES From 6d4e68d8b8b870519862a07584e3cfcb3808b554 Mon Sep 17 00:00:00 2001 From: JamesEMcclure Date: Tue, 4 Feb 2020 14:02:49 -0500 Subject: [PATCH 046/121] set morphological target from kr --- models/ColorModel.cpp | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index bcffa9df..b8578f4e 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -524,6 +524,24 @@ void ScaLBL_ColorModel::Run(){ int RESCALE_FORCE_COUNT = 0; int RESCALE_FORCE_MAX = 0; + /* history for morphological algoirthm */ + double KRA_MORPH_FACTOR=0.8; + double volA_prev = 0.0; + double log_krA_prev = 1.0; + double log_krA_target = 1.0; + double log_krA = 0.0; + double slope_krA_volume = 0.0; + if (color_db->keyExists( "vol_A_previous" )){ + volA_prev = color_db->getScalar( "vol_A_previous" ); + } + if (color_db->keyExists( "log_krA_previous" )){ + log_krA_prev = color_db->getScalar( "log_krA_previous" ); + } + if (color_db->keyExists( "krA_morph_factor" )){ + KRA_MORPH_FACTOR = color_db->getScalar( "krA_morph_factor" ); + } + + /* defaults for simulation protocols */ auto protocol = color_db->getWithDefault( "protocol", "none" ); if (protocol == "image sequence"){ // Get the list of images @@ -811,7 +829,17 @@ void ScaLBL_ColorModel::Run(){ if ( isSteady ){ MORPH_ADAPT = true; CURRENT_MORPH_TIMESTEPS=0; - delta_volume_target = Dm->Volume*volA *morph_delta; // set target volume change + //delta_volume_target = Dm->Volume*volA *morph_delta; // set target volume change + /** morphological target based on relative permeability for A **/ + double krA_TMP= fabs(muA*flow_rate_A / force_mag); + log_krA = log(krA_TMP); + log_krA_target = log(KRA_MORPH_FACTOR*(krA_TMP)); + slope_krA_volume = (log_krA - log_krA_prev)/(Dm->Volume*(volA - volA_prev)); + delta_volume_target=Dm->Volume*(volA+(log_krA_target - log_krA)/slope_krA_volume); + log_krA_prev = log_krA; + volA_prev = volA; + printf(" ",log_krA, log_krA_target, vol_A, ); + /** compute averages & write data **/ Averages->Full(); Averages->Write(timestep); analysis.WriteVisData(timestep, current_db, *Averages, Phi, Pressure, Velocity, fq, Den ); @@ -1279,7 +1307,7 @@ double ScaLBL_ColorModel::SeedPhaseField(const double seed_water_in_oil){ // Need to initialize Aq, Bq, Den, Phi directly //ScaLBL_CopyToDevice(Phi,phase.data(),7*Np*sizeof(double)); ScaLBL_CopyToDevice(Aq, Aq_tmp, 7*Np*sizeof(double)); - ScaLBL_CopyToDevice(Bq, Bq_tmp, 7*Np*sizeof(double)); + ScaLBL_CopyToDevice(Bq, Bq_tmp, 7*Np*sizeof(double)); return(mass_loss); } From 57156d16fca13963fa85ed4167911bdca6465b86 Mon Sep 17 00:00:00 2001 From: Mark Berrill Date: Wed, 5 Feb 2020 07:35:13 -0500 Subject: [PATCH 047/121] Fixing build issue --- cmake/libraries.cmake | 1 + common/MPI.cpp | 2 +- gpu/Color.cu | 6 +----- gpu/D3Q19.cu | 1 - 4 files changed, 3 insertions(+), 7 deletions(-) diff --git a/cmake/libraries.cmake b/cmake/libraries.cmake index dca31ea9..43d2726e 100644 --- a/cmake/libraries.cmake +++ b/cmake/libraries.cmake @@ -308,5 +308,6 @@ MACRO ( CONFIGURE_LBPM ) # Suppress some common warnings IF ( USING_GCC ) SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-reorder -Wno-unused-parameter") + SET( CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --compiler-options -Wno-reorder,-Wno-unused-parameter") ENDIF() ENDMACRO () diff --git a/common/MPI.cpp b/common/MPI.cpp index 9495372d..8b09bc49 100644 --- a/common/MPI.cpp +++ b/common/MPI.cpp @@ -35,7 +35,7 @@ // Using MAC #define USE_MAC #include -#elif defined( __linux ) || defined( __unix ) || defined( __posix ) +#elif defined( __linux ) || defined( __linux__ ) || defined( __unix ) || defined( __posix ) // We are using linux #define USE_LINUX #include diff --git a/gpu/Color.cu b/gpu/Color.cu index 347858b9..30c16b51 100644 --- a/gpu/Color.cu +++ b/gpu/Color.cu @@ -128,7 +128,7 @@ __global__ void dvc_ScaLBL_Color_InitDistance(char *ID, double *Den, double *Ph __global__ void dvc_ScaLBL_Color_BC(int *list, int *Map, double *Phi, double *Den, double vA, double vB, int count, int Np) { - int idx,n,nm; + int idx,n; // Fill the outlet with component b idx = blockIdx.x*blockDim.x + threadIdx.x; if (idx < count){ @@ -3471,13 +3471,11 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_ColorMass(double *Aq, double *Bq, double double *Velocity, double *ColorGrad, double beta, int start, int finish, int Np){ int n; - double fq; // non-conserved moments double nA,nB; // number density double a1,b1,a2,b2,nAB,delta; double C,nx,ny,nz; //color gradient magnitude and direction double ux,uy,uz; - double phi,tau,rho0,rlx_setA,rlx_setB; int S = Np/NBLOCKS/NTHREADS + 1; for (int s=0; s Date: Wed, 12 Feb 2020 14:19:16 -0500 Subject: [PATCH 048/121] support for grid file in MRT model --- models/MRTModel.cpp | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/models/MRTModel.cpp b/models/MRTModel.cpp index 2ddba403..60847e54 100644 --- a/models/MRTModel.cpp +++ b/models/MRTModel.cpp @@ -3,6 +3,7 @@ */ #include "models/MRTModel.h" #include "analysis/distance.h" +#include "common/ReadMicroCT.h" ScaLBL_MRTModel::ScaLBL_MRTModel(int RANK, int NP, const Utilities::MPI& COMM): rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tau(0), @@ -98,15 +99,29 @@ void ScaLBL_MRTModel::ReadInput(){ sprintf(LocalRankFilename,"%s%s","ID.",LocalRankString); sprintf(LocalRestartFile,"%s%s","Restart.",LocalRankString); - if (domain_db->keyExists( "Filename" )){ - auto Filename = domain_db->getScalar( "Filename" ); - Mask->Decomp(Filename); - } - else{ - Mask->ReadIDs(); - } + + if (domain_db->keyExists( "Filename" )){ + auto Filename = domain_db->getScalar( "Filename" ); + Mask->Decomp(Filename); + } + else if (domain_db->keyExists( "GridFile" )){ + // Read the local domain data + auto input_id = readMicroCT( *domain_db, comm ); + // Fill the halo (assuming GCW of 1) + array size0 = { (int) input_id.size(0), (int) input_id.size(1), (int) input_id.size(2) }; + ArraySize size1 = { (size_t) Mask->Nx, (size_t) Mask->Ny, (size_t) Mask->Nz }; + ASSERT( (int) size1[0] == size0[0]+2 && (int) size1[1] == size0[1]+2 && (int) size1[2] == size0[2]+2 ); + fillHalo fill( comm, Mask->rank_info, size0, { 1, 1, 1 }, 0, 1 ); + Array id_view; + id_view.viewRaw( size1, Mask->id ); + fill.copy( input_id, id_view ); + fill.fill( id_view ); + } + else{ + Mask->ReadIDs(); + } - // Generate the signed distance map + // Generate the signed distance map // Initialize the domain and communication Array id_solid(Nx,Ny,Nz); // Solve for the position of the solid phase From 46c407695620df3b45f3eabba073576b643c453f Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Mon, 17 Feb 2020 12:06:58 -0500 Subject: [PATCH 049/121] fix some typo --- models/ColorModel.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index b8578f4e..36c40224 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -838,7 +838,7 @@ void ScaLBL_ColorModel::Run(){ delta_volume_target=Dm->Volume*(volA+(log_krA_target - log_krA)/slope_krA_volume); log_krA_prev = log_krA; volA_prev = volA; - printf(" ",log_krA, log_krA_target, vol_A, ); + printf(" ",log_krA, log_krA_target, volA); /** compute averages & write data **/ Averages->Full(); Averages->Write(timestep); From 586bc09f842efac51a399a5842e5469399f8a4eb Mon Sep 17 00:00:00 2001 From: JamesEMcclure Date: Fri, 21 Feb 2020 11:11:59 -0500 Subject: [PATCH 050/121] fix print bug --- models/ColorModel.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index b8578f4e..097d53e4 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -838,7 +838,7 @@ void ScaLBL_ColorModel::Run(){ delta_volume_target=Dm->Volume*(volA+(log_krA_target - log_krA)/slope_krA_volume); log_krA_prev = log_krA; volA_prev = volA; - printf(" ",log_krA, log_krA_target, vol_A, ); + printf(" log(kr)=%f, TARGET log(kr)=%f, volume=%f \n",log_krA, log_krA_target, vol_A, ); /** compute averages & write data **/ Averages->Full(); Averages->Write(timestep); From a42a0c84408d652d382cf8b5c66910a89718d44c Mon Sep 17 00:00:00 2001 From: JamesEMcclure Date: Fri, 21 Feb 2020 11:16:26 -0500 Subject: [PATCH 051/121] fix print bug --- models/ColorModel.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 097d53e4..1f695bed 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -838,7 +838,7 @@ void ScaLBL_ColorModel::Run(){ delta_volume_target=Dm->Volume*(volA+(log_krA_target - log_krA)/slope_krA_volume); log_krA_prev = log_krA; volA_prev = volA; - printf(" log(kr)=%f, TARGET log(kr)=%f, volume=%f \n",log_krA, log_krA_target, vol_A, ); + printf(" log(kr)=%f, TARGET log(kr)=%f, volume=%f \n",log_krA, log_krA_target, vol_A); /** compute averages & write data **/ Averages->Full(); Averages->Write(timestep); From b99d32ef0c120e9fa7d9399d7a68b4c4a5b85779 Mon Sep 17 00:00:00 2001 From: JamesEMcclure Date: Fri, 21 Feb 2020 11:28:18 -0500 Subject: [PATCH 052/121] fix print --- models/ColorModel.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 82680a1c..8293e09f 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -838,7 +838,7 @@ void ScaLBL_ColorModel::Run(){ delta_volume_target=Dm->Volume*(volA+(log_krA_target - log_krA)/slope_krA_volume); log_krA_prev = log_krA; volA_prev = volA; - printf(" log(kr)=%f, TARGET log(kr)=%f, volume=%f \n",log_krA, log_krA_target, volA); + printf(" log(kr)=%f, volume=%f, TARGET log(kr)=%f, volume change=%f \n",log_krA, volA, log_krA_target, delta_volume_target/(volA*Dm->Volume)); /** compute averages & write data **/ Averages->Full(); Averages->Write(timestep); From 81a25b99977f03f729fffd20d9d8938ca455eeb6 Mon Sep 17 00:00:00 2001 From: JamesEMcclure Date: Fri, 21 Feb 2020 11:43:58 -0500 Subject: [PATCH 053/121] try for better Ca target --- models/ColorModel.cpp | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 8293e09f..2c773c8e 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -521,8 +521,6 @@ void ScaLBL_ColorModel::Run(){ double NOISE_THRESHOLD = 0.0; double BUMP_RATE = 2.0; bool USE_BUMP_RATE = false; - int RESCALE_FORCE_COUNT = 0; - int RESCALE_FORCE_MAX = 0; /* history for morphological algoirthm */ double KRA_MORPH_FACTOR=0.8; @@ -801,6 +799,20 @@ void ScaLBL_ColorModel::Run(){ double flow_rate_B = volB*(vB_x*dir_x + vB_y*dir_y + vB_z*dir_z); double Ca = fabs(muA*flow_rate_A + muB*flow_rate_B)/(5.796*alpha); + if (SET_CAPILLARY_NUMBER && CURRENT_STEADY_TIMESTEPS%MIN_STEADY_TIMESTEPS < analysis_interval ){ + Fx *= capillary_number / Ca; + Fy *= capillary_number / Ca; + Fz *= capillary_number / Ca; + if (force_mag > 1e-3){ + Fx *= 1e-3/force_mag; // impose ceiling for stability + Fy *= 1e-3/force_mag; + Fz *= 1e-3/force_mag; + } + if (rank == 0) printf(" -- adjust force by factor %f \n ",capillary_number / Ca); + Averages->SetParams(rhoA,rhoB,tauA,tauB,Fx,Fy,Fz,alpha,beta); + color_db->putVector("F",{Fx,Fy,Fz}); + } + if ( morph_timesteps > morph_interval ){ bool isSteady = false; @@ -808,23 +820,6 @@ void ScaLBL_ColorModel::Run(){ isSteady = true; if (CURRENT_STEADY_TIMESTEPS > MAX_STEADY_TIMESTEPS) isSteady = true; - - if (SET_CAPILLARY_NUMBER && RESCALE_FORCE_COUNT < RESCALE_FORCE_MAX){ - RESCALE_FORCE_COUNT++; - Fx *= capillary_number / Ca; - Fy *= capillary_number / Ca; - Fz *= capillary_number / Ca; - - if (force_mag > 1e-3){ - Fx *= 1e-3/force_mag; // impose ceiling for stability - Fy *= 1e-3/force_mag; - Fz *= 1e-3/force_mag; - } - - if (rank == 0) printf(" -- adjust force by factor %f \n ",capillary_number / Ca); - Averages->SetParams(rhoA,rhoB,tauA,tauB,Fx,Fy,Fz,alpha,beta); - color_db->putVector("F",{Fx,Fy,Fz}); - } if ( isSteady ){ MORPH_ADAPT = true; @@ -913,7 +908,6 @@ void ScaLBL_ColorModel::Run(){ Fx *= capillary_number / Ca; Fy *= capillary_number / Ca; Fz *= capillary_number / Ca; - RESCALE_FORCE_COUNT = 1; if (force_mag > 1e-3){ Fx *= 1e-3/force_mag; // impose ceiling for stability Fy *= 1e-3/force_mag; @@ -933,6 +927,7 @@ void ScaLBL_ColorModel::Run(){ Averages->SetParams(rhoA,rhoB,tauA,tauB,Fx,Fy,Fz,alpha,beta); color_db->putVector("F",{Fx,Fy,Fz}); } + CURRENT_STEADY_TIMESTEPS = 0; } else{ From 1694f4530ccaeb93aa484ad8883a320c0c714b4c Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Fri, 21 Feb 2020 21:22:54 -0500 Subject: [PATCH 054/121] comment out the variable rescale_force_count that was deprecated --- models/ColorModel.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 2c773c8e..3fef03d1 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -583,9 +583,9 @@ void ScaLBL_ColorModel::Run(){ SET_CAPILLARY_NUMBER=true; //RESCALE_FORCE_MAX = 1; } - if (analysis_db->keyExists( "rescale_force_count" )){ - RESCALE_FORCE_MAX = analysis_db->getScalar( "rescale_force_count" ); - } +// if (analysis_db->keyExists( "rescale_force_count" )){ +// RESCALE_FORCE_MAX = analysis_db->getScalar( "rescale_force_count" ); +// } if (color_db->keyExists( "timestep" )){ timestep = color_db->getScalar( "timestep" ); } From fa61d19095187f8fac5c1671ee756eedda565c4d Mon Sep 17 00:00:00 2001 From: James McClure Date: Wed, 4 Mar 2020 14:50:53 -0500 Subject: [PATCH 055/121] Update helper functions to read input database --- example/Workflow/HelperFunctions.R | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/example/Workflow/HelperFunctions.R b/example/Workflow/HelperFunctions.R index 497cb262..6c8bd903 100644 --- a/example/Workflow/HelperFunctions.R +++ b/example/Workflow/HelperFunctions.R @@ -2,6 +2,31 @@ require("ggplot2") GG_THEME=theme_bw()+theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank()) +ReadDatabase<-function(FILE){ + + INPUT<-gsub(';','',readLines(FILE)) + + S<-gsub('tauA = ','',gsub("\\s+"," ",(grep("tauA",INPUT,value=TRUE)))) + TAU_A = as.numeric(S) + S<-gsub('tauB = ','',gsub("\\s+"," ",(grep("tauB",INPUT,value=TRUE)))) + TAU_B = as.numeric(S) + S<-gsub('rhoA = ','',gsub("\\s+"," ",(grep("rhoA",INPUT,value=TRUE)))) + RHO_A = as.numeric(S) + S<-gsub('rhoB = ','',gsub("\\s+"," ",(grep("rhoB",INPUT,value=TRUE)))) + RHO_B = as.numeric(S) + + S<-gsub('alpha = ','',gsub("\\s+"," ",(grep("alpha",INPUT,value=TRUE)))) + ALPHA = as.numeric(S) + + # Read the affinity + S<-gsub('ComponentAffinity = ','',gsub("\\s+"," ",(grep("ComponentAffinity",INPUT,value=TRUE)))) + AFFINITY<-as.numeric(unlist(strsplit(S,", "))) + + PARAMETERS<-c(TAU_A,TAU_B,RHO_A,RHO_B,ALPHA,AFFINITY) + + return(PARAMETERS) +} + ReadSubphase<-function(PATH){ FILE=paste0(PATH,"/subphase.csv") S<-read.csv(FILE,head=TRUE,sep=" ") From 7bb01557d838a1231bc5780a5737f0af4da5d43c Mon Sep 17 00:00:00 2001 From: JamesEMcclure Date: Tue, 17 Mar 2020 13:45:51 -0400 Subject: [PATCH 056/121] updated bugfix with old ScaLBL --- common/ScaLBL.cpp | 251 +++++++++++++++++++++++----------------------- common/ScaLBL.h | 9 +- 2 files changed, 134 insertions(+), 126 deletions(-) diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index 6f2966e7..21656757 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -5,7 +5,9 @@ ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr Dm){ Lock=false; // unlock the communicator //...................................................................................... // Create a separate copy of the communicator for the device - MPI_COMM_SCALBL = Dm->Comm.dup(); + //MPI_Comm_group(Dm->Comm,&Group); + //MPI_Comm_create(Dm->Comm,Group,&MPI_COMM_SCALBL); + MPI_Comm_dup(Dm->Comm,&MPI_COMM_SCALBL); //...................................................................................... // Copy the domain size and communication information directly from Dm Nx = Dm->Nx; @@ -213,7 +215,7 @@ ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr Dm){ ScaLBL_CopyToZeroCopy(dvcRecvList_Yz,Dm->recvList_Yz,recvCount_Yz*sizeof(int)); //...................................................................................... - MPI_COMM_SCALBL.barrier(); + MPI_Barrier(MPI_COMM_SCALBL); //................................................................................... // Set up the recieve distribution lists @@ -286,7 +288,7 @@ ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr Dm){ //................................................................................... //...................................................................................... - MPI_COMM_SCALBL.barrier(); + MPI_Barrier(MPI_COMM_SCALBL); ScaLBL_DeviceBarrier(); //...................................................................................... SendCount = sendCount_x+sendCount_X+sendCount_y+sendCount_Y+sendCount_z+sendCount_Z+ @@ -363,7 +365,7 @@ int ScaLBL_Communicator::MemoryOptimizedLayoutAA(IntArray &Map, int *neighborLis int idx,i,j,k,n; // Check that Map has size matching sub-domain - if ( (int) Map.size(0) != Nx) + if (Map.size(0) != Nx) ERROR("ScaLBL_Communicator::MemoryOptimizedLayout: Map array dimensions do not match! \n"); // Initialize Map @@ -867,8 +869,8 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){ ScaLBL_D3Q19_Pack(12,dvcSendList_x,3*sendCount_x,sendCount_x,sendbuf_x,dist,N); ScaLBL_D3Q19_Pack(14,dvcSendList_x,4*sendCount_x,sendCount_x,sendbuf_x,dist,N); - req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 5*sendCount_x,rank_x,sendtag); - req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 5*recvCount_X,rank_X,recvtag); + MPI_Isend(sendbuf_x, 5*sendCount_x,MPI_DOUBLE,rank_x,sendtag,MPI_COMM_SCALBL,&req1[0]); + MPI_Irecv(recvbuf_X, 5*recvCount_X,MPI_DOUBLE,rank_X,recvtag,MPI_COMM_SCALBL,&req2[0]); //...Packing for X face(1,7,9,11,13)................................ ScaLBL_D3Q19_Pack(1,dvcSendList_X,0,sendCount_X,sendbuf_X,dist,N); ScaLBL_D3Q19_Pack(7,dvcSendList_X,sendCount_X,sendCount_X,sendbuf_X,dist,N); @@ -876,8 +878,8 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){ ScaLBL_D3Q19_Pack(11,dvcSendList_X,3*sendCount_X,sendCount_X,sendbuf_X,dist,N); ScaLBL_D3Q19_Pack(13,dvcSendList_X,4*sendCount_X,sendCount_X,sendbuf_X,dist,N); - req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 5*sendCount_X,rank_X,sendtag); - req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 5*recvCount_x,rank_x,recvtag); + MPI_Isend(sendbuf_X, 5*sendCount_X,MPI_DOUBLE,rank_X,sendtag,MPI_COMM_SCALBL,&req1[1]); + MPI_Irecv(recvbuf_x, 5*recvCount_x,MPI_DOUBLE,rank_x,recvtag,MPI_COMM_SCALBL,&req2[1]); //...Packing for y face(4,8,9,16,18)................................. ScaLBL_D3Q19_Pack(4,dvcSendList_y,0,sendCount_y,sendbuf_y,dist,N); ScaLBL_D3Q19_Pack(8,dvcSendList_y,sendCount_y,sendCount_y,sendbuf_y,dist,N); @@ -885,8 +887,8 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){ ScaLBL_D3Q19_Pack(16,dvcSendList_y,3*sendCount_y,sendCount_y,sendbuf_y,dist,N); ScaLBL_D3Q19_Pack(18,dvcSendList_y,4*sendCount_y,sendCount_y,sendbuf_y,dist,N); - req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 5*sendCount_y,rank_y,sendtag); - req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 5*recvCount_Y,rank_Y,recvtag); + MPI_Isend(sendbuf_y, 5*sendCount_y,MPI_DOUBLE,rank_y,sendtag,MPI_COMM_SCALBL,&req1[2]); + MPI_Irecv(recvbuf_Y, 5*recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,MPI_COMM_SCALBL,&req2[2]); //...Packing for Y face(3,7,10,15,17)................................. ScaLBL_D3Q19_Pack(3,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,dist,N); ScaLBL_D3Q19_Pack(7,dvcSendList_Y,sendCount_Y,sendCount_Y,sendbuf_Y,dist,N); @@ -894,8 +896,8 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){ ScaLBL_D3Q19_Pack(15,dvcSendList_Y,3*sendCount_Y,sendCount_Y,sendbuf_Y,dist,N); ScaLBL_D3Q19_Pack(17,dvcSendList_Y,4*sendCount_Y,sendCount_Y,sendbuf_Y,dist,N); - req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 5*sendCount_Y,rank_Y,sendtag); - req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 5*recvCount_y,rank_y,recvtag); + MPI_Isend(sendbuf_Y, 5*sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,MPI_COMM_SCALBL,&req1[3]); + MPI_Irecv(recvbuf_y, 5*recvCount_y,MPI_DOUBLE,rank_y,recvtag,MPI_COMM_SCALBL,&req2[3]); //...Packing for z face(6,12,13,16,17)................................ ScaLBL_D3Q19_Pack(6,dvcSendList_z,0,sendCount_z,sendbuf_z,dist,N); ScaLBL_D3Q19_Pack(12,dvcSendList_z,sendCount_z,sendCount_z,sendbuf_z,dist,N); @@ -903,8 +905,8 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){ ScaLBL_D3Q19_Pack(16,dvcSendList_z,3*sendCount_z,sendCount_z,sendbuf_z,dist,N); ScaLBL_D3Q19_Pack(17,dvcSendList_z,4*sendCount_z,sendCount_z,sendbuf_z,dist,N); - req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 5*sendCount_z,rank_z,sendtag); - req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 5*recvCount_Z,rank_Z,recvtag); + MPI_Isend(sendbuf_z, 5*sendCount_z,MPI_DOUBLE,rank_z,sendtag,MPI_COMM_SCALBL,&req1[4]); + MPI_Irecv(recvbuf_Z, 5*recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,MPI_COMM_SCALBL,&req2[4]); //...Packing for Z face(5,11,14,15,18)................................ ScaLBL_D3Q19_Pack(5,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,dist,N); @@ -913,57 +915,57 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){ ScaLBL_D3Q19_Pack(15,dvcSendList_Z,3*sendCount_Z,sendCount_Z,sendbuf_Z,dist,N); ScaLBL_D3Q19_Pack(18,dvcSendList_Z,4*sendCount_Z,sendCount_Z,sendbuf_Z,dist,N); - req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 5*sendCount_Z,rank_Z,sendtag); - req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 5*recvCount_z,rank_z,recvtag); + MPI_Isend(sendbuf_Z, 5*sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,MPI_COMM_SCALBL,&req1[5]); + MPI_Irecv(recvbuf_z, 5*recvCount_z,MPI_DOUBLE,rank_z,recvtag,MPI_COMM_SCALBL,&req2[5]); //...Pack the xy edge (8)................................ ScaLBL_D3Q19_Pack(8,dvcSendList_xy,0,sendCount_xy,sendbuf_xy,dist,N); - req1[6] = MPI_COMM_SCALBL.Isend(sendbuf_xy, sendCount_xy,rank_xy,sendtag); - req2[6] = MPI_COMM_SCALBL.Irecv(recvbuf_XY, recvCount_XY,rank_XY,recvtag); + MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_xy,sendtag,MPI_COMM_SCALBL,&req1[6]); + MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_XY,recvtag,MPI_COMM_SCALBL,&req2[6]); //...Pack the Xy edge (9)................................ ScaLBL_D3Q19_Pack(9,dvcSendList_Xy,0,sendCount_Xy,sendbuf_Xy,dist,N); - req1[8] = MPI_COMM_SCALBL.Isend(sendbuf_Xy, sendCount_Xy,rank_Xy,sendtag); - req2[8] = MPI_COMM_SCALBL.Irecv(recvbuf_xY, recvCount_xY,rank_xY,recvtag); + MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_Xy,sendtag,MPI_COMM_SCALBL,&req1[8]); + MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_xY,recvtag,MPI_COMM_SCALBL,&req2[8]); //...Pack the xY edge (10)................................ ScaLBL_D3Q19_Pack(10,dvcSendList_xY,0,sendCount_xY,sendbuf_xY,dist,N); - req1[9] = MPI_COMM_SCALBL.Isend(sendbuf_xY, sendCount_xY,rank_xY,sendtag); - req2[9] = MPI_COMM_SCALBL.Irecv(recvbuf_Xy, recvCount_Xy,rank_Xy,recvtag); + MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_xY,sendtag,MPI_COMM_SCALBL,&req1[9]); + MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_Xy,recvtag,MPI_COMM_SCALBL,&req2[9]); //...Pack the XY edge (7)................................ ScaLBL_D3Q19_Pack(7,dvcSendList_XY,0,sendCount_XY,sendbuf_XY,dist,N); - req1[7] = MPI_COMM_SCALBL.Isend(sendbuf_XY, sendCount_XY,rank_XY,sendtag); - req2[7] = MPI_COMM_SCALBL.Irecv(recvbuf_xy, recvCount_xy,rank_xy,recvtag); + MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_XY,sendtag,MPI_COMM_SCALBL,&req1[7]); + MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_xy,recvtag,MPI_COMM_SCALBL,&req2[7]); //...Pack the xz edge (12)................................ ScaLBL_D3Q19_Pack(12,dvcSendList_xz,0,sendCount_xz,sendbuf_xz,dist,N); - req1[10] = MPI_COMM_SCALBL.Isend(sendbuf_xz, sendCount_xz,rank_xz,sendtag); - req2[10] = MPI_COMM_SCALBL.Irecv(recvbuf_XZ, recvCount_XZ,rank_XZ,recvtag); + MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_xz,sendtag,MPI_COMM_SCALBL,&req1[10]); + MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_XZ,recvtag,MPI_COMM_SCALBL,&req2[10]); //...Pack the xZ edge (14)................................ ScaLBL_D3Q19_Pack(14,dvcSendList_xZ,0,sendCount_xZ,sendbuf_xZ,dist,N); - req1[13] = MPI_COMM_SCALBL.Isend(sendbuf_xZ, sendCount_xZ,rank_xZ,sendtag); - req2[13] = MPI_COMM_SCALBL.Irecv(recvbuf_Xz, recvCount_Xz,rank_Xz,recvtag); + MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_xZ,sendtag,MPI_COMM_SCALBL,&req1[13]); + MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_Xz,recvtag,MPI_COMM_SCALBL,&req2[13]); //...Pack the Xz edge (13)................................ ScaLBL_D3Q19_Pack(13,dvcSendList_Xz,0,sendCount_Xz,sendbuf_Xz,dist,N); - req1[12] = MPI_COMM_SCALBL.Isend(sendbuf_Xz, sendCount_Xz,rank_Xz,sendtag); - req2[12] = MPI_COMM_SCALBL.Irecv(recvbuf_xZ, recvCount_xZ,rank_xZ,recvtag); + MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_Xz,sendtag,MPI_COMM_SCALBL,&req1[12]); + MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_xZ,recvtag,MPI_COMM_SCALBL,&req2[12]); //...Pack the XZ edge (11)................................ ScaLBL_D3Q19_Pack(11,dvcSendList_XZ,0,sendCount_XZ,sendbuf_XZ,dist,N); - req1[11] = MPI_COMM_SCALBL.Isend(sendbuf_XZ, sendCount_XZ,rank_XZ,sendtag); - req2[11] = MPI_COMM_SCALBL.Irecv(recvbuf_xz, recvCount_xz,rank_xz,recvtag); + MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_XZ,sendtag,MPI_COMM_SCALBL,&req1[11]); + MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_xz,recvtag,MPI_COMM_SCALBL,&req2[11]); //...Pack the yz edge (16)................................ ScaLBL_D3Q19_Pack(16,dvcSendList_yz,0,sendCount_yz,sendbuf_yz,dist,N); - req1[14] = MPI_COMM_SCALBL.Isend(sendbuf_yz, sendCount_yz,rank_yz,sendtag); - req2[14] = MPI_COMM_SCALBL.Irecv(recvbuf_YZ, recvCount_YZ,rank_YZ,recvtag); + MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_yz,sendtag,MPI_COMM_SCALBL,&req1[14]); + MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_YZ,recvtag,MPI_COMM_SCALBL,&req2[14]); //...Pack the yZ edge (18)................................ ScaLBL_D3Q19_Pack(18,dvcSendList_yZ,0,sendCount_yZ,sendbuf_yZ,dist,N); - req1[17] = MPI_COMM_SCALBL.Isend(sendbuf_yZ, sendCount_yZ,rank_yZ,sendtag); - req2[17] = MPI_COMM_SCALBL.Irecv(recvbuf_Yz, recvCount_Yz,rank_Yz,recvtag); + MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_yZ,sendtag,MPI_COMM_SCALBL,&req1[17]); + MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_Yz,recvtag,MPI_COMM_SCALBL,&req2[17]); //...Pack the Yz edge (17)................................ ScaLBL_D3Q19_Pack(17,dvcSendList_Yz,0,sendCount_Yz,sendbuf_Yz,dist,N); - req1[16] = MPI_COMM_SCALBL.Isend(sendbuf_Yz, sendCount_Yz,rank_Yz,sendtag); - req2[16] = MPI_COMM_SCALBL.Irecv(recvbuf_yZ, recvCount_yZ,rank_yZ,recvtag); + MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_Yz,sendtag,MPI_COMM_SCALBL,&req1[16]); + MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_yZ,recvtag,MPI_COMM_SCALBL,&req2[16]); //...Pack the YZ edge (15)................................ ScaLBL_D3Q19_Pack(15,dvcSendList_YZ,0,sendCount_YZ,sendbuf_YZ,dist,N); - req1[15] = MPI_COMM_SCALBL.Isend(sendbuf_YZ, sendCount_YZ,rank_YZ,sendtag); - req2[15] = MPI_COMM_SCALBL.Irecv(recvbuf_yz, recvCount_yz,rank_yz,recvtag); + MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_YZ,sendtag,MPI_COMM_SCALBL,&req1[15]); + MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_yz,recvtag,MPI_COMM_SCALBL,&req2[15]); //................................................................................... } @@ -973,8 +975,8 @@ void ScaLBL_Communicator::RecvD3Q19AA(double *dist){ // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 //................................................................................... // Wait for completion of D3Q19 communication - MPI_COMM_SCALBL.waitAll(18,req1); - MPI_COMM_SCALBL.waitAll(18,req2); + MPI_Waitall(18,req1,stat1); + MPI_Waitall(18,req2,stat2); ScaLBL_DeviceBarrier(); //................................................................................... @@ -1057,8 +1059,8 @@ void ScaLBL_Communicator::RecvGrad(double *phi, double *grad){ // Recieves halo and incorporates into D3Q19 based stencil gradient computation //................................................................................... // Wait for completion of D3Q19 communication - MPI_COMM_SCALBL.waitAll(18,req1); - MPI_COMM_SCALBL.waitAll(18,req2); + MPI_Waitall(18,req1,stat1); + MPI_Waitall(18,req2,stat2); ScaLBL_DeviceBarrier(); //................................................................................... @@ -1151,36 +1153,36 @@ void ScaLBL_Communicator::BiSendD3Q7AA(double *Aq, double *Bq){ ScaLBL_D3Q19_Pack(2,dvcSendList_x,0,sendCount_x,sendbuf_x,Aq,N); ScaLBL_D3Q19_Pack(2,dvcSendList_x,sendCount_x,sendCount_x,sendbuf_x,Bq,N); - req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 2*sendCount_x,rank_x,sendtag); - req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 2*recvCount_X,rank_X,recvtag); + MPI_Isend(sendbuf_x, 2*sendCount_x,MPI_DOUBLE,rank_x,sendtag,MPI_COMM_SCALBL,&req1[0]); + MPI_Irecv(recvbuf_X, 2*recvCount_X,MPI_DOUBLE,rank_X,recvtag,MPI_COMM_SCALBL,&req2[0]); //...Packing for X face(1,7,9,11,13)................................ ScaLBL_D3Q19_Pack(1,dvcSendList_X,0,sendCount_X,sendbuf_X,Aq,N); ScaLBL_D3Q19_Pack(1,dvcSendList_X,sendCount_X,sendCount_X,sendbuf_X,Bq,N); - req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 2*sendCount_X,rank_X,sendtag); - req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 2*recvCount_x,rank_x,recvtag); + MPI_Isend(sendbuf_X, 2*sendCount_X,MPI_DOUBLE,rank_X,sendtag,MPI_COMM_SCALBL,&req1[1]); + MPI_Irecv(recvbuf_x, 2*recvCount_x,MPI_DOUBLE,rank_x,recvtag,MPI_COMM_SCALBL,&req2[1]); //...Packing for y face(4,8,9,16,18)................................. ScaLBL_D3Q19_Pack(4,dvcSendList_y,0,sendCount_y,sendbuf_y,Aq,N); ScaLBL_D3Q19_Pack(4,dvcSendList_y,sendCount_y,sendCount_y,sendbuf_y,Bq,N); - req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 2*sendCount_y,rank_y,sendtag); - req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 2*recvCount_Y,rank_Y,recvtag); + MPI_Isend(sendbuf_y, 2*sendCount_y,MPI_DOUBLE,rank_y,sendtag,MPI_COMM_SCALBL,&req1[2]); + MPI_Irecv(recvbuf_Y, 2*recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,MPI_COMM_SCALBL,&req2[2]); //...Packing for Y face(3,7,10,15,17)................................. ScaLBL_D3Q19_Pack(3,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,Aq,N); ScaLBL_D3Q19_Pack(3,dvcSendList_Y,sendCount_Y,sendCount_Y,sendbuf_Y,Bq,N); - req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 2*sendCount_Y,rank_Y,sendtag); - req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 2*recvCount_y,rank_y,recvtag); + MPI_Isend(sendbuf_Y, 2*sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,MPI_COMM_SCALBL,&req1[3]); + MPI_Irecv(recvbuf_y, 2*recvCount_y,MPI_DOUBLE,rank_y,recvtag,MPI_COMM_SCALBL,&req2[3]); //...Packing for z face(6,12,13,16,17)................................ ScaLBL_D3Q19_Pack(6,dvcSendList_z,0,sendCount_z,sendbuf_z,Aq,N); ScaLBL_D3Q19_Pack(6,dvcSendList_z,sendCount_z,sendCount_z,sendbuf_z,Bq,N); - req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 2*sendCount_z,rank_z,sendtag); - req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 2*recvCount_Z,rank_Z,recvtag); + MPI_Isend(sendbuf_z, 2*sendCount_z,MPI_DOUBLE,rank_z,sendtag,MPI_COMM_SCALBL,&req1[4]); + MPI_Irecv(recvbuf_Z, 2*recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,MPI_COMM_SCALBL,&req2[4]); //...Packing for Z face(5,11,14,15,18)................................ ScaLBL_D3Q19_Pack(5,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,Aq,N); @@ -1188,8 +1190,8 @@ void ScaLBL_Communicator::BiSendD3Q7AA(double *Aq, double *Bq){ //................................................................................... // Send all the distributions - req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 2*sendCount_Z,rank_Z,sendtag); - req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 2*recvCount_z,rank_z,recvtag); + MPI_Isend(sendbuf_Z, 2*sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,MPI_COMM_SCALBL,&req1[5]); + MPI_Irecv(recvbuf_z, 2*recvCount_z,MPI_DOUBLE,rank_z,recvtag,MPI_COMM_SCALBL,&req2[5]); } @@ -1199,8 +1201,8 @@ void ScaLBL_Communicator::BiRecvD3Q7AA(double *Aq, double *Bq){ // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 //................................................................................... // Wait for completion of D3Q19 communication - MPI_COMM_SCALBL.waitAll(6,req1); - MPI_COMM_SCALBL.waitAll(6,req2); + MPI_Waitall(6,req1,stat1); + MPI_Waitall(6,req2,stat2); ScaLBL_DeviceBarrier(); //................................................................................... @@ -1291,18 +1293,18 @@ void ScaLBL_Communicator::TriSendD3Q7AA(double *Aq, double *Bq, double *Cq){ //................................................................................... // Send all the distributions - req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 3*sendCount_x,rank_x,sendtag); - req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 3*recvCount_X,rank_X,recvtag); - req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 3*sendCount_X,rank_X,sendtag); - req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 3*recvCount_x,rank_x,recvtag); - req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 3*sendCount_y,rank_y,sendtag); - req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 3*recvCount_Y,rank_Y,recvtag); - req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 3*sendCount_Y,rank_Y,sendtag); - req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 3*recvCount_y,rank_y,recvtag); - req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 3*sendCount_z,rank_z,sendtag); - req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 3*recvCount_Z,rank_Z,recvtag); - req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 3*sendCount_Z,rank_Z,sendtag); - req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 3*recvCount_z,rank_z,recvtag); + MPI_Isend(sendbuf_x, 3*sendCount_x,MPI_DOUBLE,rank_x,sendtag,MPI_COMM_SCALBL,&req1[0]); + MPI_Irecv(recvbuf_X, 3*recvCount_X,MPI_DOUBLE,rank_X,recvtag,MPI_COMM_SCALBL,&req2[0]); + MPI_Isend(sendbuf_X, 3*sendCount_X,MPI_DOUBLE,rank_X,sendtag,MPI_COMM_SCALBL,&req1[1]); + MPI_Irecv(recvbuf_x, 3*recvCount_x,MPI_DOUBLE,rank_x,recvtag,MPI_COMM_SCALBL,&req2[1]); + MPI_Isend(sendbuf_y, 3*sendCount_y,MPI_DOUBLE,rank_y,sendtag,MPI_COMM_SCALBL,&req1[2]); + MPI_Irecv(recvbuf_Y, 3*recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,MPI_COMM_SCALBL,&req2[2]); + MPI_Isend(sendbuf_Y, 3*sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,MPI_COMM_SCALBL,&req1[3]); + MPI_Irecv(recvbuf_y, 3*recvCount_y,MPI_DOUBLE,rank_y,recvtag,MPI_COMM_SCALBL,&req2[3]); + MPI_Isend(sendbuf_z, 3*sendCount_z,MPI_DOUBLE,rank_z,sendtag,MPI_COMM_SCALBL,&req1[4]); + MPI_Irecv(recvbuf_Z, 3*recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,MPI_COMM_SCALBL,&req2[4]); + MPI_Isend(sendbuf_Z, 3*sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,MPI_COMM_SCALBL,&req1[5]); + MPI_Irecv(recvbuf_z, 3*recvCount_z,MPI_DOUBLE,rank_z,recvtag,MPI_COMM_SCALBL,&req2[5]); } @@ -1312,8 +1314,8 @@ void ScaLBL_Communicator::TriRecvD3Q7AA(double *Aq, double *Bq, double *Cq){ // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 //................................................................................... // Wait for completion of D3Q19 communication - MPI_COMM_SCALBL.waitAll(6,req1); - MPI_COMM_SCALBL.waitAll(6,req2); + MPI_Waitall(6,req1,stat1); + MPI_Waitall(6,req2,stat2); ScaLBL_DeviceBarrier(); //................................................................................... @@ -1407,49 +1409,49 @@ void ScaLBL_Communicator::SendHalo(double *data){ // Send / Recv all the phase indcator field values //................................................................................... - req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, sendCount_x,rank_x,sendtag); - req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, recvCount_X,rank_X,recvtag); - req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, sendCount_X,rank_X,sendtag); - req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, recvCount_x,rank_x,recvtag); - req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, sendCount_y,rank_y,sendtag); - req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, recvCount_Y,rank_Y,recvtag); - req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, sendCount_Y,rank_Y,sendtag); - req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, recvCount_y,rank_y,recvtag); - req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, sendCount_z,rank_z,sendtag); - req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, recvCount_Z,rank_Z,recvtag); - req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, sendCount_Z,rank_Z,sendtag); - req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, recvCount_z,rank_z,recvtag); - req1[6] = MPI_COMM_SCALBL.Isend(sendbuf_xy, sendCount_xy,rank_xy,sendtag); - req2[6] = MPI_COMM_SCALBL.Irecv(recvbuf_XY, recvCount_XY,rank_XY,recvtag); - req1[7] = MPI_COMM_SCALBL.Isend(sendbuf_XY, sendCount_XY,rank_XY,sendtag); - req2[7] = MPI_COMM_SCALBL.Irecv(recvbuf_xy, recvCount_xy,rank_xy,recvtag); - req1[8] = MPI_COMM_SCALBL.Isend(sendbuf_Xy, sendCount_Xy,rank_Xy,sendtag); - req2[8] = MPI_COMM_SCALBL.Irecv(recvbuf_xY, recvCount_xY,rank_xY,recvtag); - req1[9] = MPI_COMM_SCALBL.Isend(sendbuf_xY, sendCount_xY,rank_xY,sendtag); - req2[9] = MPI_COMM_SCALBL.Irecv(recvbuf_Xy, recvCount_Xy,rank_Xy,recvtag); - req1[10] = MPI_COMM_SCALBL.Isend(sendbuf_xz, sendCount_xz,rank_xz,sendtag); - req2[10] = MPI_COMM_SCALBL.Irecv(recvbuf_XZ, recvCount_XZ,rank_XZ,recvtag); - req1[11] = MPI_COMM_SCALBL.Isend(sendbuf_XZ, sendCount_XZ,rank_XZ,sendtag); - req2[11] = MPI_COMM_SCALBL.Irecv(recvbuf_xz, recvCount_xz,rank_xz,recvtag); - req1[12] = MPI_COMM_SCALBL.Isend(sendbuf_Xz, sendCount_Xz,rank_Xz,sendtag); - req2[12] = MPI_COMM_SCALBL.Irecv(recvbuf_xZ, recvCount_xZ,rank_xZ,recvtag); - req1[13] = MPI_COMM_SCALBL.Isend(sendbuf_xZ, sendCount_xZ,rank_xZ,sendtag); - req2[13] = MPI_COMM_SCALBL.Irecv(recvbuf_Xz, recvCount_Xz,rank_Xz,recvtag); - req1[14] = MPI_COMM_SCALBL.Isend(sendbuf_yz, sendCount_yz,rank_yz,sendtag); - req2[14] = MPI_COMM_SCALBL.Irecv(recvbuf_YZ, recvCount_YZ,rank_YZ,recvtag); - req1[15] = MPI_COMM_SCALBL.Isend(sendbuf_YZ, sendCount_YZ,rank_YZ,sendtag); - req2[15] = MPI_COMM_SCALBL.Irecv(recvbuf_yz, recvCount_yz,rank_yz,recvtag); - req1[16] = MPI_COMM_SCALBL.Isend(sendbuf_Yz, sendCount_Yz,rank_Yz,sendtag); - req2[16] = MPI_COMM_SCALBL.Irecv(recvbuf_yZ, recvCount_yZ,rank_yZ,recvtag); - req1[17] = MPI_COMM_SCALBL.Isend(sendbuf_yZ, sendCount_yZ,rank_yZ,sendtag); - req2[17] = MPI_COMM_SCALBL.Irecv(recvbuf_Yz, recvCount_Yz,rank_Yz,recvtag); + MPI_Isend(sendbuf_x, sendCount_x,MPI_DOUBLE,rank_x,sendtag,MPI_COMM_SCALBL,&req1[0]); + MPI_Irecv(recvbuf_X, recvCount_X,MPI_DOUBLE,rank_X,recvtag,MPI_COMM_SCALBL,&req2[0]); + MPI_Isend(sendbuf_X, sendCount_X,MPI_DOUBLE,rank_X,sendtag,MPI_COMM_SCALBL,&req1[1]); + MPI_Irecv(recvbuf_x, recvCount_x,MPI_DOUBLE,rank_x,recvtag,MPI_COMM_SCALBL,&req2[1]); + MPI_Isend(sendbuf_y, sendCount_y,MPI_DOUBLE,rank_y,sendtag,MPI_COMM_SCALBL,&req1[2]); + MPI_Irecv(recvbuf_Y, recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,MPI_COMM_SCALBL,&req2[2]); + MPI_Isend(sendbuf_Y, sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,MPI_COMM_SCALBL,&req1[3]); + MPI_Irecv(recvbuf_y, recvCount_y,MPI_DOUBLE,rank_y,recvtag,MPI_COMM_SCALBL,&req2[3]); + MPI_Isend(sendbuf_z, sendCount_z,MPI_DOUBLE,rank_z,sendtag,MPI_COMM_SCALBL,&req1[4]); + MPI_Irecv(recvbuf_Z, recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,MPI_COMM_SCALBL,&req2[4]); + MPI_Isend(sendbuf_Z, sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,MPI_COMM_SCALBL,&req1[5]); + MPI_Irecv(recvbuf_z, recvCount_z,MPI_DOUBLE,rank_z,recvtag,MPI_COMM_SCALBL,&req2[5]); + MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_xy,sendtag,MPI_COMM_SCALBL,&req1[6]); + MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_XY,recvtag,MPI_COMM_SCALBL,&req2[6]); + MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_XY,sendtag,MPI_COMM_SCALBL,&req1[7]); + MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_xy,recvtag,MPI_COMM_SCALBL,&req2[7]); + MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_Xy,sendtag,MPI_COMM_SCALBL,&req1[8]); + MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_xY,recvtag,MPI_COMM_SCALBL,&req2[8]); + MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_xY,sendtag,MPI_COMM_SCALBL,&req1[9]); + MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_Xy,recvtag,MPI_COMM_SCALBL,&req2[9]); + MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_xz,sendtag,MPI_COMM_SCALBL,&req1[10]); + MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_XZ,recvtag,MPI_COMM_SCALBL,&req2[10]); + MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_XZ,sendtag,MPI_COMM_SCALBL,&req1[11]); + MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_xz,recvtag,MPI_COMM_SCALBL,&req2[11]); + MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_Xz,sendtag,MPI_COMM_SCALBL,&req1[12]); + MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_xZ,recvtag,MPI_COMM_SCALBL,&req2[12]); + MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_xZ,sendtag,MPI_COMM_SCALBL,&req1[13]); + MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_Xz,recvtag,MPI_COMM_SCALBL,&req2[13]); + MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_yz,sendtag,MPI_COMM_SCALBL,&req1[14]); + MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_YZ,recvtag,MPI_COMM_SCALBL,&req2[14]); + MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_YZ,sendtag,MPI_COMM_SCALBL,&req1[15]); + MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_yz,recvtag,MPI_COMM_SCALBL,&req2[15]); + MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_Yz,sendtag,MPI_COMM_SCALBL,&req1[16]); + MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_yZ,recvtag,MPI_COMM_SCALBL,&req2[16]); + MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_yZ,sendtag,MPI_COMM_SCALBL,&req1[17]); + MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_Yz,recvtag,MPI_COMM_SCALBL,&req2[17]); //................................................................................... } void ScaLBL_Communicator::RecvHalo(double *data){ //................................................................................... - MPI_COMM_SCALBL.waitAll(18,req1); - MPI_COMM_SCALBL.waitAll(18,req2); + MPI_Waitall(18,req1,stat1); + MPI_Waitall(18,req2,stat2); ScaLBL_DeviceBarrier(); //................................................................................... //................................................................................... @@ -1478,6 +1480,7 @@ void ScaLBL_Communicator::RecvHalo(double *data){ void ScaLBL_Communicator::RegularLayout(IntArray map, const double *data, DoubleArray ®data){ // Gets data from the device and stores in regular layout + int i,j,k,n,idx; int Nx = map.size(0); int Ny = map.size(1); int Nz = map.size(2); @@ -1489,10 +1492,11 @@ void ScaLBL_Communicator::RegularLayout(IntArray map, const double *data, Double double value; TmpDat = new double [N]; ScaLBL_CopyToHost(&TmpDat[0],&data[0], N*sizeof(double)); - for (int k=0; k Date: Tue, 17 Mar 2020 21:23:18 -0400 Subject: [PATCH 057/121] Revert "Moving more MPI calls to the wrapper" This reverts commit 0f91767b6c870101084fbae0978280c04c85a004. --- IO/netcdf.cpp | 2 +- StackTrace/ErrorHandlers.h | 2 +- StackTrace/Utilities.cpp | 2 +- analysis/TwoPhase.cpp | 7 +- analysis/morphology.cpp | 132 ++--- common/Communication.h | 216 ++++---- common/Domain.cpp | 206 ++++---- common/Domain.h | 3 + common/MPI.I | 33 -- common/MPI.cpp | 48 -- common/MPI.h | 7 - common/ScaLBL.h | 1 + common/Utilities.cpp | 2 +- cpu/exe/lb2_Color_mpi.cpp | 538 +++++++++---------- cpu/exe/lb2_Color_wia_mpi_bubble.cpp | 711 ++++++++++++++------------ gpu/exe/lb1_MRT_mpi.cpp | 348 +++++++------ gpu/exe/lb1_MRT_mpi.cu | 352 +++++++------ gpu/exe/lb2_Color.cu | 65 ++- gpu/exe/lb2_Color_mpi.cpp | 541 ++++++++++---------- gpu/exe/lb2_Color_pBC_wia_mpi.cpp | 621 ++++++++++++---------- models/ColorModel.cpp | 8 +- models/DFHModel.cpp | 4 +- models/MRTModel.cpp | 4 +- tests/BlobAnalyzeParallel.cpp | 22 +- tests/GenerateSphereTest.cpp | 54 +- tests/TestBlobAnalyze.cpp | 28 +- tests/TestBubble.cpp | 41 +- tests/TestBubbleDFH.cpp | 4 +- tests/TestColorGrad.cpp | 20 +- tests/TestCommD3Q19.cpp | 4 +- tests/TestForceD3Q19.cpp | 4 +- tests/TestForceMoments.cpp | 4 +- tests/TestMRT.cpp | 28 +- tests/TestMicroCTReader.cpp | 1 + tests/TestMomentsD3Q19.cpp | 2 +- tests/TestNetcdf.cpp | 2 +- tests/TestSegDist.cpp | 4 +- tests/lb2_CMT_wia.cpp | 30 +- tests/lb2_Color_blob_wia_mpi.cpp | 427 ++++++++-------- tests/lbpm_BGK_simulator.cpp | 48 +- tests/lbpm_color_macro_simulator.cpp | 61 +-- tests/lbpm_disc_pp.cpp | 34 +- tests/lbpm_inkbottle_pp.cpp | 22 +- tests/lbpm_juanes_bench_disc_pp.cpp | 35 +- tests/lbpm_nondarcy_simulator.cpp | 52 +- tests/lbpm_nonnewtonian_simulator.cpp | 26 +- tests/lbpm_plates_pp.cpp | 24 +- tests/lbpm_porenetwork_pp.cpp | 25 +- tests/lbpm_random_pp.cpp | 92 ++-- tests/lbpm_segmented_decomp.cpp | 48 +- tests/lbpm_segmented_pp.cpp | 2 +- tests/lbpm_sphere_pp.cpp | 16 +- tests/lbpm_squaretube_pp.cpp | 25 +- 53 files changed, 2678 insertions(+), 2360 deletions(-) diff --git a/IO/netcdf.cpp b/IO/netcdf.cpp index 6c3773e3..e061579a 100644 --- a/IO/netcdf.cpp +++ b/IO/netcdf.cpp @@ -119,7 +119,7 @@ std::string VariableTypeName( VariableType type ) int open( const std::string& filename, FileMode mode, const Utilities::MPI& comm ) { int fid = 0; - if ( comm.isNull() ) { + if ( comm == MPI_COMM_NULL ) { if ( mode == READ ) { int err = nc_open( filename.c_str(), NC_NOWRITE, &fid ); CHECK_NC_ERR( err ); diff --git a/StackTrace/ErrorHandlers.h b/StackTrace/ErrorHandlers.h index e43a4688..12b8d7de 100644 --- a/StackTrace/ErrorHandlers.h +++ b/StackTrace/ErrorHandlers.h @@ -6,7 +6,7 @@ #include -#include "common/MPI.h" +#include "mpi.h" namespace StackTrace diff --git a/StackTrace/Utilities.cpp b/StackTrace/Utilities.cpp index 5fb8e9b8..11f05777 100644 --- a/StackTrace/Utilities.cpp +++ b/StackTrace/Utilities.cpp @@ -14,7 +14,7 @@ #include #ifdef USE_MPI -#include "common/MPI.h" +#include "mpi.h" #endif #ifdef USE_TIMER diff --git a/analysis/TwoPhase.cpp b/analysis/TwoPhase.cpp index d878a663..812490e7 100644 --- a/analysis/TwoPhase.cpp +++ b/analysis/TwoPhase.cpp @@ -890,14 +890,14 @@ void TwoPhase::ComponentAverages() RecvBuffer.resize(BLOB_AVG_COUNT,NumberComponents_NWP); /* for (int b=0; bComm.barrier(); - Dm->Comm.sumReduce(&ComponentAverages_NWP(0,b),&RecvBuffer(0),BLOB_AVG_COUNT); + MPI_Barrier(Dm->Comm); + MPI_Allreduce(&ComponentAverages_NWP(0,b),&RecvBuffer(0),BLOB_AVG_COUNT,MPI_DOUBLE,MPI_SUM,Dm->Comm); for (int idx=0; idxComm.barrier(); Dm->Comm.sumReduce(ComponentAverages_NWP.data(),RecvBuffer.data(),BLOB_AVG_COUNT*NumberComponents_NWP); - // Dm->Comm.sumReduce(ComponentAverages_NWP.data(),RecvBuffer.data(),BLOB_AVG_COUNT); + // MPI_Reduce(ComponentAverages_NWP.data(),RecvBuffer.data(),BLOB_AVG_COUNT,MPI_DOUBLE,MPI_SUM,0,Dm->Comm); if (Dm->rank()==0){ printf("rescaling... \n"); @@ -994,6 +994,7 @@ void TwoPhase::ComponentAverages() // reduce the wetting phase averages for (int b=0; bComm.barrier(); +// MPI_Allreduce(&ComponentAverages_WP(0,b),RecvBuffer.data(),BLOB_AVG_COUNT,MPI_DOUBLE,MPI_SUM,Dm->Comm); Dm->Comm.sumReduce(&ComponentAverages_WP(0,b),RecvBuffer.data(),BLOB_AVG_COUNT); for (int idx=0; idx PackID(Dm->sendList_yZ, Dm->sendCount_yZ ,sendID_yZ, id); PackID(Dm->sendList_YZ, Dm->sendCount_YZ ,sendID_YZ, id); //...................................................................................... - Dm->Comm.sendrecv(sendID_x,Dm->sendCount_x,Dm->rank_x(),sendtag,recvID_X,Dm->recvCount_X,Dm->rank_X(),recvtag); - Dm->Comm.sendrecv(sendID_X,Dm->sendCount_X,Dm->rank_X(),sendtag,recvID_x,Dm->recvCount_x,Dm->rank_x(),recvtag); - Dm->Comm.sendrecv(sendID_y,Dm->sendCount_y,Dm->rank_y(),sendtag,recvID_Y,Dm->recvCount_Y,Dm->rank_Y(),recvtag); - Dm->Comm.sendrecv(sendID_Y,Dm->sendCount_Y,Dm->rank_Y(),sendtag,recvID_y,Dm->recvCount_y,Dm->rank_y(),recvtag); - Dm->Comm.sendrecv(sendID_z,Dm->sendCount_z,Dm->rank_z(),sendtag,recvID_Z,Dm->recvCount_Z,Dm->rank_Z(),recvtag); - Dm->Comm.sendrecv(sendID_Z,Dm->sendCount_Z,Dm->rank_Z(),sendtag,recvID_z,Dm->recvCount_z,Dm->rank_z(),recvtag); - Dm->Comm.sendrecv(sendID_xy,Dm->sendCount_xy,Dm->rank_xy(),sendtag,recvID_XY,Dm->recvCount_XY,Dm->rank_XY(),recvtag); - Dm->Comm.sendrecv(sendID_XY,Dm->sendCount_XY,Dm->rank_XY(),sendtag,recvID_xy,Dm->recvCount_xy,Dm->rank_xy(),recvtag); - Dm->Comm.sendrecv(sendID_Xy,Dm->sendCount_Xy,Dm->rank_Xy(),sendtag,recvID_xY,Dm->recvCount_xY,Dm->rank_xY(),recvtag); - Dm->Comm.sendrecv(sendID_xY,Dm->sendCount_xY,Dm->rank_xY(),sendtag,recvID_Xy,Dm->recvCount_Xy,Dm->rank_Xy(),recvtag); - Dm->Comm.sendrecv(sendID_xz,Dm->sendCount_xz,Dm->rank_xz(),sendtag,recvID_XZ,Dm->recvCount_XZ,Dm->rank_XZ(),recvtag); - Dm->Comm.sendrecv(sendID_XZ,Dm->sendCount_XZ,Dm->rank_XZ(),sendtag,recvID_xz,Dm->recvCount_xz,Dm->rank_xz(),recvtag); - Dm->Comm.sendrecv(sendID_Xz,Dm->sendCount_Xz,Dm->rank_Xz(),sendtag,recvID_xZ,Dm->recvCount_xZ,Dm->rank_xZ(),recvtag); - Dm->Comm.sendrecv(sendID_xZ,Dm->sendCount_xZ,Dm->rank_xZ(),sendtag,recvID_Xz,Dm->recvCount_Xz,Dm->rank_Xz(),recvtag); - Dm->Comm.sendrecv(sendID_yz,Dm->sendCount_yz,Dm->rank_yz(),sendtag,recvID_YZ,Dm->recvCount_YZ,Dm->rank_YZ(),recvtag); - Dm->Comm.sendrecv(sendID_YZ,Dm->sendCount_YZ,Dm->rank_YZ(),sendtag,recvID_yz,Dm->recvCount_yz,Dm->rank_yz(),recvtag); - Dm->Comm.sendrecv(sendID_Yz,Dm->sendCount_Yz,Dm->rank_Yz(),sendtag,recvID_yZ,Dm->recvCount_yZ,Dm->rank_yZ(),recvtag); - Dm->Comm.sendrecv(sendID_yZ,Dm->sendCount_yZ,Dm->rank_yZ(),sendtag,recvID_Yz,Dm->recvCount_Yz,Dm->rank_Yz(),recvtag); + MPI_Sendrecv(sendID_x,Dm->sendCount_x,MPI_CHAR,Dm->rank_x(),sendtag, + recvID_X,Dm->recvCount_X,MPI_CHAR,Dm->rank_X(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_X,Dm->sendCount_X,MPI_CHAR,Dm->rank_X(),sendtag, + recvID_x,Dm->recvCount_x,MPI_CHAR,Dm->rank_x(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_y,Dm->sendCount_y,MPI_CHAR,Dm->rank_y(),sendtag, + recvID_Y,Dm->recvCount_Y,MPI_CHAR,Dm->rank_Y(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Y,Dm->sendCount_Y,MPI_CHAR,Dm->rank_Y(),sendtag, + recvID_y,Dm->recvCount_y,MPI_CHAR,Dm->rank_y(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_z,Dm->sendCount_z,MPI_CHAR,Dm->rank_z(),sendtag, + recvID_Z,Dm->recvCount_Z,MPI_CHAR,Dm->rank_Z(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Z,Dm->sendCount_Z,MPI_CHAR,Dm->rank_Z(),sendtag, + recvID_z,Dm->recvCount_z,MPI_CHAR,Dm->rank_z(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xy,Dm->sendCount_xy,MPI_CHAR,Dm->rank_xy(),sendtag, + recvID_XY,Dm->recvCount_XY,MPI_CHAR,Dm->rank_XY(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_XY,Dm->sendCount_XY,MPI_CHAR,Dm->rank_XY(),sendtag, + recvID_xy,Dm->recvCount_xy,MPI_CHAR,Dm->rank_xy(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Xy,Dm->sendCount_Xy,MPI_CHAR,Dm->rank_Xy(),sendtag, + recvID_xY,Dm->recvCount_xY,MPI_CHAR,Dm->rank_xY(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xY,Dm->sendCount_xY,MPI_CHAR,Dm->rank_xY(),sendtag, + recvID_Xy,Dm->recvCount_Xy,MPI_CHAR,Dm->rank_Xy(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xz,Dm->sendCount_xz,MPI_CHAR,Dm->rank_xz(),sendtag, + recvID_XZ,Dm->recvCount_XZ,MPI_CHAR,Dm->rank_XZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_XZ,Dm->sendCount_XZ,MPI_CHAR,Dm->rank_XZ(),sendtag, + recvID_xz,Dm->recvCount_xz,MPI_CHAR,Dm->rank_xz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Xz,Dm->sendCount_Xz,MPI_CHAR,Dm->rank_Xz(),sendtag, + recvID_xZ,Dm->recvCount_xZ,MPI_CHAR,Dm->rank_xZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xZ,Dm->sendCount_xZ,MPI_CHAR,Dm->rank_xZ(),sendtag, + recvID_Xz,Dm->recvCount_Xz,MPI_CHAR,Dm->rank_Xz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_yz,Dm->sendCount_yz,MPI_CHAR,Dm->rank_yz(),sendtag, + recvID_YZ,Dm->recvCount_YZ,MPI_CHAR,Dm->rank_YZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_YZ,Dm->sendCount_YZ,MPI_CHAR,Dm->rank_YZ(),sendtag, + recvID_yz,Dm->recvCount_yz,MPI_CHAR,Dm->rank_yz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Yz,Dm->sendCount_Yz,MPI_CHAR,Dm->rank_Yz(),sendtag, + recvID_yZ,Dm->recvCount_yZ,MPI_CHAR,Dm->rank_yZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_yZ,Dm->sendCount_yZ,MPI_CHAR,Dm->rank_yZ(),sendtag, + recvID_Yz,Dm->recvCount_Yz,MPI_CHAR,Dm->rank_Yz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); //...................................................................................... UnpackID(Dm->recvList_x, Dm->recvCount_x ,recvID_x, id); UnpackID(Dm->recvList_X, Dm->recvCount_X ,recvID_X, id); @@ -285,7 +303,7 @@ double morph_open() fillHalo fillChar(Dm->Comm,Dm->rank_info,{Nx-2,Ny-2,Nz-2},{1,1,1},0,1); - GlobalNumber = Dm->Comm.sumReduce( LocalNumber ); + MPI_Allreduce(&LocalNumber,&GlobalNumber,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); count = 0.f; for (int k=1; kComm.sumReduce( count ); + MPI_Allreduce(&count,&countGlobal,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); return countGlobal; } */ @@ -488,42 +506,42 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptrsendList_yZ, Dm->sendCount_yZ ,sendID_yZ, id); PackID(Dm->sendList_YZ, Dm->sendCount_YZ ,sendID_YZ, id); //...................................................................................... - Dm->Comm.sendrecv(sendID_x,Dm->sendCount_x,Dm->rank_x(),sendtag, - recvID_X,Dm->recvCount_X,Dm->rank_X(),recvtag); - Dm->Comm.sendrecv(sendID_X,Dm->sendCount_X,Dm->rank_X(),sendtag, - recvID_x,Dm->recvCount_x,Dm->rank_x(),recvtag); - Dm->Comm.sendrecv(sendID_y,Dm->sendCount_y,Dm->rank_y(),sendtag, - recvID_Y,Dm->recvCount_Y,Dm->rank_Y(),recvtag); - Dm->Comm.sendrecv(sendID_Y,Dm->sendCount_Y,Dm->rank_Y(),sendtag, - recvID_y,Dm->recvCount_y,Dm->rank_y(),recvtag); - Dm->Comm.sendrecv(sendID_z,Dm->sendCount_z,Dm->rank_z(),sendtag, - recvID_Z,Dm->recvCount_Z,Dm->rank_Z(),recvtag); - Dm->Comm.sendrecv(sendID_Z,Dm->sendCount_Z,Dm->rank_Z(),sendtag, - recvID_z,Dm->recvCount_z,Dm->rank_z(),recvtag); - Dm->Comm.sendrecv(sendID_xy,Dm->sendCount_xy,Dm->rank_xy(),sendtag, - recvID_XY,Dm->recvCount_XY,Dm->rank_XY(),recvtag); - Dm->Comm.sendrecv(sendID_XY,Dm->sendCount_XY,Dm->rank_XY(),sendtag, - recvID_xy,Dm->recvCount_xy,Dm->rank_xy(),recvtag); - Dm->Comm.sendrecv(sendID_Xy,Dm->sendCount_Xy,Dm->rank_Xy(),sendtag, - recvID_xY,Dm->recvCount_xY,Dm->rank_xY(),recvtag); - Dm->Comm.sendrecv(sendID_xY,Dm->sendCount_xY,Dm->rank_xY(),sendtag, - recvID_Xy,Dm->recvCount_Xy,Dm->rank_Xy(),recvtag); - Dm->Comm.sendrecv(sendID_xz,Dm->sendCount_xz,Dm->rank_xz(),sendtag, - recvID_XZ,Dm->recvCount_XZ,Dm->rank_XZ(),recvtag); - Dm->Comm.sendrecv(sendID_XZ,Dm->sendCount_XZ,Dm->rank_XZ(),sendtag, - recvID_xz,Dm->recvCount_xz,Dm->rank_xz(),recvtag); - Dm->Comm.sendrecv(sendID_Xz,Dm->sendCount_Xz,Dm->rank_Xz(),sendtag, - recvID_xZ,Dm->recvCount_xZ,Dm->rank_xZ(),recvtag); - Dm->Comm.sendrecv(sendID_xZ,Dm->sendCount_xZ,Dm->rank_xZ(),sendtag, - recvID_Xz,Dm->recvCount_Xz,Dm->rank_Xz(),recvtag); - Dm->Comm.sendrecv(sendID_yz,Dm->sendCount_yz,Dm->rank_yz(),sendtag, - recvID_YZ,Dm->recvCount_YZ,Dm->rank_YZ(),recvtag); - Dm->Comm.sendrecv(sendID_YZ,Dm->sendCount_YZ,Dm->rank_YZ(),sendtag, - recvID_yz,Dm->recvCount_yz,Dm->rank_yz(),recvtag); - Dm->Comm.sendrecv(sendID_Yz,Dm->sendCount_Yz,Dm->rank_Yz(),sendtag, - recvID_yZ,Dm->recvCount_yZ,Dm->rank_yZ(),recvtag); - Dm->Comm.sendrecv(sendID_yZ,Dm->sendCount_yZ,Dm->rank_yZ(),sendtag, - recvID_Yz,Dm->recvCount_Yz,Dm->rank_Yz(),recvtag); + MPI_Sendrecv(sendID_x,Dm->sendCount_x,MPI_CHAR,Dm->rank_x(),sendtag, + recvID_X,Dm->recvCount_X,MPI_CHAR,Dm->rank_X(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_X,Dm->sendCount_X,MPI_CHAR,Dm->rank_X(),sendtag, + recvID_x,Dm->recvCount_x,MPI_CHAR,Dm->rank_x(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_y,Dm->sendCount_y,MPI_CHAR,Dm->rank_y(),sendtag, + recvID_Y,Dm->recvCount_Y,MPI_CHAR,Dm->rank_Y(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Y,Dm->sendCount_Y,MPI_CHAR,Dm->rank_Y(),sendtag, + recvID_y,Dm->recvCount_y,MPI_CHAR,Dm->rank_y(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_z,Dm->sendCount_z,MPI_CHAR,Dm->rank_z(),sendtag, + recvID_Z,Dm->recvCount_Z,MPI_CHAR,Dm->rank_Z(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Z,Dm->sendCount_Z,MPI_CHAR,Dm->rank_Z(),sendtag, + recvID_z,Dm->recvCount_z,MPI_CHAR,Dm->rank_z(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xy,Dm->sendCount_xy,MPI_CHAR,Dm->rank_xy(),sendtag, + recvID_XY,Dm->recvCount_XY,MPI_CHAR,Dm->rank_XY(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_XY,Dm->sendCount_XY,MPI_CHAR,Dm->rank_XY(),sendtag, + recvID_xy,Dm->recvCount_xy,MPI_CHAR,Dm->rank_xy(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Xy,Dm->sendCount_Xy,MPI_CHAR,Dm->rank_Xy(),sendtag, + recvID_xY,Dm->recvCount_xY,MPI_CHAR,Dm->rank_xY(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xY,Dm->sendCount_xY,MPI_CHAR,Dm->rank_xY(),sendtag, + recvID_Xy,Dm->recvCount_Xy,MPI_CHAR,Dm->rank_Xy(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xz,Dm->sendCount_xz,MPI_CHAR,Dm->rank_xz(),sendtag, + recvID_XZ,Dm->recvCount_XZ,MPI_CHAR,Dm->rank_XZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_XZ,Dm->sendCount_XZ,MPI_CHAR,Dm->rank_XZ(),sendtag, + recvID_xz,Dm->recvCount_xz,MPI_CHAR,Dm->rank_xz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Xz,Dm->sendCount_Xz,MPI_CHAR,Dm->rank_Xz(),sendtag, + recvID_xZ,Dm->recvCount_xZ,MPI_CHAR,Dm->rank_xZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xZ,Dm->sendCount_xZ,MPI_CHAR,Dm->rank_xZ(),sendtag, + recvID_Xz,Dm->recvCount_Xz,MPI_CHAR,Dm->rank_Xz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_yz,Dm->sendCount_yz,MPI_CHAR,Dm->rank_yz(),sendtag, + recvID_YZ,Dm->recvCount_YZ,MPI_CHAR,Dm->rank_YZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_YZ,Dm->sendCount_YZ,MPI_CHAR,Dm->rank_YZ(),sendtag, + recvID_yz,Dm->recvCount_yz,MPI_CHAR,Dm->rank_yz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Yz,Dm->sendCount_Yz,MPI_CHAR,Dm->rank_Yz(),sendtag, + recvID_yZ,Dm->recvCount_yZ,MPI_CHAR,Dm->rank_yZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_yZ,Dm->sendCount_yZ,MPI_CHAR,Dm->rank_yZ(),sendtag, + recvID_Yz,Dm->recvCount_Yz,MPI_CHAR,Dm->rank_Yz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); //...................................................................................... UnpackID(Dm->recvList_x, Dm->recvCount_x ,recvID_x, id); UnpackID(Dm->recvList_X, Dm->recvCount_X ,recvID_X, id); @@ -599,7 +617,7 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptrrank_info,phase,SignDist,vF,vS,phase_label,Dm->Comm); - Dm->Comm.barrier(); + MPI_Barrier(Dm->Comm); for (int k=1; k -void MPI_CLASS::sendrecv( const char*, int, int, int, char*, int, int, int ) const; -template<> -void MPI_CLASS::sendrecv( const int*, int, int, int, int*, int, int, int ) const; -template<> -void MPI_CLASS::sendrecv( const float*, int, int, int, float*, int, int, int ) const; -template<> -void MPI_CLASS::sendrecv( const double*, int, int, int, double*, int, int, int ) const; -template -void MPI_CLASS::sendrecv( const TYPE *sendbuf, int sendcount, int dest, int sendtag, - TYPE *recvbuf, int recvcount, int source, int recvtag ) const -{ - ERROR( "Not implimented" ); -} -#else -template -void MPI_CLASS::sendrecv( const TYPE *sendbuf, int sendcount, int dest, int sendtag, - TYPE *recvbuf, int recvcount, int source, int recvtag ) const -{ - ASSERT( dest == 0 ); - ASSERT( source == 0 ); - ASSERT( sendcount == recvcount ); - ASSERT( sendtag == recvtag ); - memcpy( recvbuf, sendbuf, sendcount * sizeof( TYPE ) ); -} -#endif - - - /************************************************************************ * allGather * ************************************************************************/ diff --git a/common/MPI.cpp b/common/MPI.cpp index 8b09bc49..73932d03 100644 --- a/common/MPI.cpp +++ b/common/MPI.cpp @@ -2805,54 +2805,6 @@ MPI_Request MPI_CLASS::IrecvBytes( } - -/************************************************************************ - * sendrecv * - ************************************************************************/ -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::sendrecv( const char* sendbuf, int sendcount, int dest, int sendtag, - char* recvbuf, int recvcount, int source, int recvtag ) const -{ - PROFILE_START( "sendrecv", profile_level ); - MPI_Sendrecv( sendbuf, sendcount, MPI_CHAR, dest, sendtag, - recvbuf, recvcount, MPI_CHAR, source, recvtag, - communicator, MPI_STATUS_IGNORE ); - PROFILE_STOP( "sendrecv", profile_level ); -} -template<> -void MPI_CLASS::sendrecv( const int* sendbuf, int sendcount, int dest, int sendtag, - int* recvbuf, int recvcount, int source, int recvtag ) const -{ - PROFILE_START( "sendrecv", profile_level ); - MPI_Sendrecv( sendbuf, sendcount, MPI_INT, dest, sendtag, - recvbuf, recvcount, MPI_INT, source, recvtag, - communicator, MPI_STATUS_IGNORE ); - PROFILE_STOP( "sendrecv", profile_level ); -} -template<> -void MPI_CLASS::sendrecv( const float* sendbuf, int sendcount, int dest, int sendtag, - float* recvbuf, int recvcount, int source, int recvtag ) const -{ - PROFILE_START( "sendrecv", profile_level ); - MPI_Sendrecv( sendbuf, sendcount, MPI_FLOAT, dest, sendtag, - recvbuf, recvcount, MPI_FLOAT, source, recvtag, - communicator, MPI_STATUS_IGNORE ); - PROFILE_STOP( "sendrecv", profile_level ); -} -template<> -void MPI_CLASS::sendrecv( const double* sendbuf, int sendcount, int dest, int sendtag, - double* recvbuf, int recvcount, int source, int recvtag ) const -{ - PROFILE_START( "sendrecv", profile_level ); - MPI_Sendrecv( sendbuf, sendcount, MPI_DOUBLE, dest, sendtag, - recvbuf, recvcount, MPI_DOUBLE, source, recvtag, - communicator, MPI_STATUS_IGNORE ); - PROFILE_STOP( "sendrecv", profile_level ); -} -#endif - - /************************************************************************ * allGather * * Note: these specializations are only called when using MPI. * diff --git a/common/MPI.h b/common/MPI.h index 4161d6a7..e3fd3e13 100644 --- a/common/MPI.h +++ b/common/MPI.h @@ -792,13 +792,6 @@ public: // Member functions void *buf, const int N_bytes, const int send_proc, const int tag ) const; - /*! - * @brief This function sends and recieves data using a blocking call - */ - template - void sendrecv( const type *sendbuf, int sendcount, int dest, int sendtag, type *recvbuf, int recvcount, int source, int recvtag ) const; - - /*! * Each processor sends every other processor a single value. * @param[in] x Input value for allGather diff --git a/common/ScaLBL.h b/common/ScaLBL.h index 610fce5d..51195f5a 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -201,6 +201,7 @@ private: int sendtag,recvtag; // Give the object it's own MPI communicator RankInfoStruct rank_info; + MPI_Group Group; // Group of processors associated with this domain Utilities::MPI MPI_COMM_SCALBL; // MPI Communicator for this domain MPI_Request req1[18],req2[18]; //...................................................................................... diff --git a/common/Utilities.cpp b/common/Utilities.cpp index 723b34f8..9c89e024 100644 --- a/common/Utilities.cpp +++ b/common/Utilities.cpp @@ -8,7 +8,7 @@ #endif #ifdef USE_MPI -#include "common/MPI.h" +#include "mpi.h" #endif #include diff --git a/cpu/exe/lb2_Color_mpi.cpp b/cpu/exe/lb2_Color_mpi.cpp index cdf56af9..0cade21e 100644 --- a/cpu/exe/lb2_Color_mpi.cpp +++ b/cpu/exe/lb2_Color_mpi.cpp @@ -36,11 +36,15 @@ inline void UnpackID(int *list, int count, char *recvbuf, char *ID){ //*************************************************************************************** int main(int argc, char **argv) { + //***************************************** + // ***** MPI STUFF **************** + //***************************************** // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); // parallel domain size (# of sub-domains) int nprocx,nprocy,nprocz; int iproc,jproc,kproc; @@ -54,6 +58,7 @@ int main(int argc, char **argv) int rank_yz,rank_YZ,rank_yZ,rank_Yz; //********************************** MPI_Request req1[18],req2[18]; + MPI_Status stat1[18],stat2[18]; if (rank == 0){ printf("********************************************************\n"); @@ -110,30 +115,31 @@ int main(int argc, char **argv) } // ************************************************************** // Broadcast simulation parameters from rank 0 to all other procs - comm.barrier(); + MPI_Barrier(comm); //................................................. - comm.bcast(&Nz,1,0); - comm.bcast(&nBlocks,1,0); - comm.bcast(&nthreads,1,0); - comm.bcast(&Fx,1,0); - comm.bcast(&Fy,1,0); - comm.bcast(&Fz,1,0); - comm.bcast(&tau,1,0); - comm.bcast(&alpha,1,0); - comm.bcast(&beta,1,0); - comm.bcast(&das,1,0); - comm.bcast(&dbs,1,0); - comm.bcast(&pBC,1,0); - comm.bcast(&din,1,0); - comm.bcast(&dout,1,0); - comm.bcast(×tepMax,1,0); - comm.bcast(&interval,1,0); - comm.bcast(&tol,1,0); - comm.bcast(&nprocx,1,0); - comm.bcast(&nprocy,1,0); - comm.bcast(&nprocz,1,0); + MPI_Bcast(&Nz,1,MPI_INT,0,comm); + MPI_Bcast(&nBlocks,1,MPI_INT,0,comm); + MPI_Bcast(&nthreads,1,MPI_INT,0,comm); + MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&alpha,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&beta,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&das,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&dbs,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&pBC,1,MPI_LOGICAL,0,comm); + MPI_Bcast(&din,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&dout,1,MPI_DOUBLE,0,comm); + MPI_Bcast(×tepMax,1,MPI_INT,0,comm); + MPI_Bcast(&interval,1,MPI_INT,0,comm); + MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm); + + MPI_Bcast(&nprocx,1,MPI_INT,0,comm); + MPI_Bcast(&nprocy,1,MPI_INT,0,comm); + MPI_Bcast(&nprocz,1,MPI_INT,0,comm); //................................................. - comm.barrier(); + MPI_Barrier(comm); // ************************************************************** // ************************************************************** @@ -163,7 +169,7 @@ int main(int argc, char **argv) } - comm.barrier(); + MPI_Barrier(comm); kproc = rank/(nprocx*nprocy); jproc = (rank-nprocx*nprocy*kproc)/nprocx; iproc = rank-nprocx*nprocy*kproc-nprocz*jproc; @@ -445,7 +451,7 @@ int main(int argc, char **argv) PM.close(); // printf("File porosity = %f\n", double(sum)/N); //........................................................................... - comm.barrier(); + MPI_Barrier(comm); if (rank == 0) cout << "Domain set." << endl; //........................................................................... // Write the communcation structure into a file for debugging @@ -582,7 +588,7 @@ int main(int argc, char **argv) } } } - comm.barrier(); + MPI_Barrier(comm); if (rank==0) printf ("SendLists are ready on host\n"); //...................................................................................... // Use MPI to fill in the recvCounts form the associated processes @@ -593,46 +599,46 @@ int main(int argc, char **argv) //********************************************************************************** // Fill in the recieve counts using MPI sendtag = recvtag = 3; - comm.Send(&sendCount_x,1,rank_X,sendtag); - comm.Recv(&recvCount_X,1,rank_x,recvtag); - comm.Send(&sendCount_X,1,rank_x,sendtag); - comm.Recv(&recvCount_x,1,rank_X,recvtag); - comm.Send(&sendCount_y,1,rank_Y,sendtag); - comm.Recv(&recvCount_Y,1,rank_y,recvtag); - comm.Send(&sendCount_Y,1,rank_y,sendtag); - comm.Recv(&recvCount_y,1,rank_Y,recvtag); - comm.Send(&sendCount_z,1,rank_Z,sendtag); - comm.Recv(&recvCount_Z,1,rank_z,recvtag); - comm.Send(&sendCount_Z,1,rank_z,sendtag); - comm.Recv(&recvCount_z,1,rank_Z,recvtag); + MPI_Send(&sendCount_x,1,MPI_INT,rank_X,sendtag,comm); + MPI_Recv(&recvCount_X,1,MPI_INT,rank_x,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_X,1,MPI_INT,rank_x,sendtag,comm); + MPI_Recv(&recvCount_x,1,MPI_INT,rank_X,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_y,1,MPI_INT,rank_Y,sendtag,comm); + MPI_Recv(&recvCount_Y,1,MPI_INT,rank_y,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_Y,1,MPI_INT,rank_y,sendtag,comm); + MPI_Recv(&recvCount_y,1,MPI_INT,rank_Y,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_z,1,MPI_INT,rank_Z,sendtag,comm); + MPI_Recv(&recvCount_Z,1,MPI_INT,rank_z,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_Z,1,MPI_INT,rank_z,sendtag,comm); + MPI_Recv(&recvCount_z,1,MPI_INT,rank_Z,recvtag,comm,MPI_STATUS_IGNORE); - comm.Send(&sendCount_xy,1,rank_XY,sendtag); - comm.Recv(&recvCount_XY,1,rank_xy,recvtag); - comm.Send(&sendCount_XY,1,rank_xy,sendtag); - comm.Recv(&recvCount_xy,1,rank_XY,recvtag); - comm.Send(&sendCount_Xy,1,rank_xY,sendtag); - comm.Recv(&recvCount_xY,1,rank_Xy,recvtag); - comm.Send(&sendCount_xY,1,rank_Xy,sendtag); - comm.Recv(&recvCount_Xy,1,rank_xY,recvtag); + MPI_Send(&sendCount_xy,1,MPI_INT,rank_XY,sendtag,comm); + MPI_Recv(&recvCount_XY,1,MPI_INT,rank_xy,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_XY,1,MPI_INT,rank_xy,sendtag,comm); + MPI_Recv(&recvCount_xy,1,MPI_INT,rank_XY,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_Xy,1,MPI_INT,rank_xY,sendtag,comm); + MPI_Recv(&recvCount_xY,1,MPI_INT,rank_Xy,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_xY,1,MPI_INT,rank_Xy,sendtag,comm); + MPI_Recv(&recvCount_Xy,1,MPI_INT,rank_xY,recvtag,comm,MPI_STATUS_IGNORE); - comm.Send(&sendCount_xz,1,rank_XZ,sendtag); - comm.Recv(&recvCount_XZ,1,rank_xz,recvtag); - comm.Send(&sendCount_XZ,1,rank_xz,sendtag); - comm.Recv(&recvCount_xz,1,rank_XZ,recvtag); - comm.Send(&sendCount_Xz,1,rank_xZ,sendtag); - comm.Recv(&recvCount_xZ,1,rank_Xz,recvtag); - comm.Send(&sendCount_xZ,1,rank_Xz,sendtag); - comm.Recv(&recvCount_Xz,1,rank_xZ,recvtag); + MPI_Send(&sendCount_xz,1,MPI_INT,rank_XZ,sendtag,comm); + MPI_Recv(&recvCount_XZ,1,MPI_INT,rank_xz,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_XZ,1,MPI_INT,rank_xz,sendtag,comm); + MPI_Recv(&recvCount_xz,1,MPI_INT,rank_XZ,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_Xz,1,MPI_INT,rank_xZ,sendtag,comm); + MPI_Recv(&recvCount_xZ,1,MPI_INT,rank_Xz,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_xZ,1,MPI_INT,rank_Xz,sendtag,comm); + MPI_Recv(&recvCount_Xz,1,MPI_INT,rank_xZ,recvtag,comm,MPI_STATUS_IGNORE); - comm.Send(&sendCount_yz,1,rank_YZ,sendtag); - comm.Recv(&recvCount_YZ,1,rank_yz,recvtag); - comm.Send(&sendCount_YZ,1,rank_yz,sendtag); - comm.Recv(&recvCount_yz,1,rank_YZ,recvtag); - comm.Send(&sendCount_Yz,1,rank_yZ,sendtag); - comm.Recv(&recvCount_yZ,1,rank_Yz,recvtag); - comm.Send(&sendCount_yZ,1,rank_Yz,sendtag); - comm.Recv(&recvCount_Yz,1,rank_yZ,recvtag); - comm.barrier(); + MPI_Send(&sendCount_yz,1,MPI_INT,rank_YZ,sendtag,comm); + MPI_Recv(&recvCount_YZ,1,MPI_INT,rank_yz,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_YZ,1,MPI_INT,rank_yz,sendtag,comm); + MPI_Recv(&recvCount_yz,1,MPI_INT,rank_YZ,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_Yz,1,MPI_INT,rank_yZ,sendtag,comm); + MPI_Recv(&recvCount_yZ,1,MPI_INT,rank_Yz,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_yZ,1,MPI_INT,rank_Yz,sendtag,comm); + MPI_Recv(&recvCount_Yz,1,MPI_INT,rank_yZ,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Barrier(comm); //********************************************************************************** //...................................................................................... int *recvList_x, *recvList_y, *recvList_z, *recvList_X, *recvList_Y, *recvList_Z; @@ -663,48 +669,48 @@ int main(int argc, char **argv) // Use MPI to fill in the appropriate values for recvList // Fill in the recieve lists using MPI sendtag = recvtag = 4; - req1[0] = comm.Isend(sendList_x,sendCount_x,rank_X,sendtag); - req2[0] = comm.Irecv(recvList_X,recvCount_X,rank_x,recvtag); - req1[1] = comm.Isend(sendList_X,sendCount_X,rank_x,sendtag); - req2[1] = comm.Irecv(recvList_x,recvCount_x,rank_X,recvtag); - req1[2] = comm.Isend(sendList_y,sendCount_y,rank_Y,sendtag); - req2[2] = comm.Irecv(recvList_Y,recvCount_Y,rank_y,recvtag); - req1[3] = comm.Isend(sendList_Y,sendCount_Y,rank_y,sendtag); - req2[3] = comm.Irecv(recvList_y,recvCount_y,rank_Y,recvtag); - req1[4] = comm.Isend(sendList_z,sendCount_z,rank_Z,sendtag); - req2[4] = comm.Irecv(recvList_Z,recvCount_Z,rank_z,recvtag); - req1[5] = comm.Isend(sendList_Z,sendCount_Z,rank_z,sendtag); - req2[5] = comm.Irecv(recvList_z,recvCount_z,rank_Z,recvtag); + MPI_Isend(sendList_x, sendCount_x,MPI_INT,rank_X,sendtag,comm,&req1[0]); + MPI_Irecv(recvList_X, recvCount_X,MPI_INT,rank_x,recvtag,comm,&req2[0]); + MPI_Isend(sendList_X, sendCount_X,MPI_INT,rank_x,sendtag,comm,&req1[1]); + MPI_Irecv(recvList_x, recvCount_x,MPI_INT,rank_X,recvtag,comm,&req2[1]); + MPI_Isend(sendList_y, sendCount_y,MPI_INT,rank_Y,sendtag,comm,&req1[2]); + MPI_Irecv(recvList_Y, recvCount_Y,MPI_INT,rank_y,recvtag,comm,&req2[2]); + MPI_Isend(sendList_Y, sendCount_Y,MPI_INT,rank_y,sendtag,comm,&req1[3]); + MPI_Irecv(recvList_y, recvCount_y,MPI_INT,rank_Y,recvtag,comm,&req2[3]); + MPI_Isend(sendList_z, sendCount_z,MPI_INT,rank_Z,sendtag,comm,&req1[4]); + MPI_Irecv(recvList_Z, recvCount_Z,MPI_INT,rank_z,recvtag,comm,&req2[4]); + MPI_Isend(sendList_Z, sendCount_Z,MPI_INT,rank_z,sendtag,comm,&req1[5]); + MPI_Irecv(recvList_z, recvCount_z,MPI_INT,rank_Z,recvtag,comm,&req2[5]); - req1[6] = comm.Isend(sendList_xy,sendCount_xy,rank_XY,sendtag); - req2[6] = comm.Irecv(recvList_XY,recvCount_XY,rank_xy,recvtag); - req1[7] = comm.Isend(sendList_XY,sendCount_XY,rank_xy,sendtag); - req2[7] = comm.Irecv(recvList_xy,recvCount_xy,rank_XY,recvtag); - req1[8] = comm.Isend(sendList_Xy,sendCount_Xy,rank_xY,sendtag); - req2[8] = comm.Irecv(recvList_xY,recvCount_xY,rank_Xy,recvtag); - req1[9] = comm.Isend(sendList_xY,sendCount_xY,rank_Xy,sendtag); - req2[9] = comm.Irecv(recvList_Xy,recvCount_Xy,rank_xY,recvtag); + MPI_Isend(sendList_xy, sendCount_xy,MPI_INT,rank_XY,sendtag,comm,&req1[6]); + MPI_Irecv(recvList_XY, recvCount_XY,MPI_INT,rank_xy,recvtag,comm,&req2[6]); + MPI_Isend(sendList_XY, sendCount_XY,MPI_INT,rank_xy,sendtag,comm,&req1[7]); + MPI_Irecv(recvList_xy, recvCount_xy,MPI_INT,rank_XY,recvtag,comm,&req2[7]); + MPI_Isend(sendList_Xy, sendCount_Xy,MPI_INT,rank_xY,sendtag,comm,&req1[8]); + MPI_Irecv(recvList_xY, recvCount_xY,MPI_INT,rank_Xy,recvtag,comm,&req2[8]); + MPI_Isend(sendList_xY, sendCount_xY,MPI_INT,rank_Xy,sendtag,comm,&req1[9]); + MPI_Irecv(recvList_Xy, recvCount_Xy,MPI_INT,rank_xY,recvtag,comm,&req2[9]); - req1[10] = comm.Isend(sendList_xz,sendCount_xz,rank_XZ,sendtag); - req2[10] = comm.Irecv(recvList_XZ,recvCount_XZ,rank_xz,recvtag); - req1[11] = comm.Isend(sendList_XZ,sendCount_XZ,rank_xz,sendtag); - req2[11] = comm.Irecv(recvList_xz,recvCount_xz,rank_XZ,recvtag); - req1[12] = comm.Isend(sendList_Xz,sendCount_Xz,rank_xZ,sendtag); - req2[12] = comm.Irecv(recvList_xZ,recvCount_xZ,rank_Xz,recvtag); - req1[13] = comm.Isend(sendList_xZ,sendCount_xZ,rank_Xz,sendtag); - req2[13] = comm.Irecv(recvList_Xz,recvCount_Xz,rank_xZ,recvtag); + MPI_Isend(sendList_xz, sendCount_xz,MPI_INT,rank_XZ,sendtag,comm,&req1[10]); + MPI_Irecv(recvList_XZ, recvCount_XZ,MPI_INT,rank_xz,recvtag,comm,&req2[10]); + MPI_Isend(sendList_XZ, sendCount_XZ,MPI_INT,rank_xz,sendtag,comm,&req1[11]); + MPI_Irecv(recvList_xz, recvCount_xz,MPI_INT,rank_XZ,recvtag,comm,&req2[11]); + MPI_Isend(sendList_Xz, sendCount_Xz,MPI_INT,rank_xZ,sendtag,comm,&req1[12]); + MPI_Irecv(recvList_xZ, recvCount_xZ,MPI_INT,rank_Xz,recvtag,comm,&req2[12]); + MPI_Isend(sendList_xZ, sendCount_xZ,MPI_INT,rank_Xz,sendtag,comm,&req1[13]); + MPI_Irecv(recvList_Xz, recvCount_Xz,MPI_INT,rank_xZ,recvtag,comm,&req2[13]); - req1[14] = comm.Isend(sendList_yz,sendCount_yz,rank_YZ,sendtag); - req2[14] = comm.Irecv(recvList_YZ,recvCount_YZ,rank_yz,recvtag); - req1[15] = comm.Isend(sendList_YZ,sendCount_YZ,rank_yz,sendtag); - req2[15] = comm.Irecv(recvList_yz,recvCount_yz,rank_YZ,recvtag); - req1[16] = comm.Isend(sendList_Yz,sendCount_Yz,rank_yZ,sendtag); - req2[16] = comm.Irecv(recvList_yZ,recvCount_yZ,rank_Yz,recvtag); - req1[17] = comm.Isend(sendList_yZ,sendCount_yZ,rank_Yz,sendtag); - req2[17] = comm.Irecv(recvList_Yz,recvCount_Yz,rank_yZ,recvtag); - comm.waitAll(18,req1); - comm.waitAll(18,req2); - comm.barrier(); + MPI_Isend(sendList_yz, sendCount_yz,MPI_INT,rank_YZ,sendtag,comm,&req1[14]); + MPI_Irecv(recvList_YZ, recvCount_YZ,MPI_INT,rank_yz,recvtag,comm,&req2[14]); + MPI_Isend(sendList_YZ, sendCount_YZ,MPI_INT,rank_yz,sendtag,comm,&req1[15]); + MPI_Irecv(recvList_yz, recvCount_yz,MPI_INT,rank_YZ,recvtag,comm,&req2[15]); + MPI_Isend(sendList_Yz, sendCount_Yz,MPI_INT,rank_yZ,sendtag,comm,&req1[16]); + MPI_Irecv(recvList_yZ, recvCount_yZ,MPI_INT,rank_Yz,recvtag,comm,&req2[16]); + MPI_Isend(sendList_yZ, sendCount_yZ,MPI_INT,rank_Yz,sendtag,comm,&req1[17]); + MPI_Irecv(recvList_Yz, recvCount_Yz,MPI_INT,rank_yZ,recvtag,comm,&req2[17]); + MPI_Waitall(18,req1,stat1); + MPI_Waitall(18,req2,stat2); + MPI_Barrier(comm); //...................................................................................... for (int idx=0; idx #include #include -#include "common/MPI.h" +#include #include using namespace std; @@ -64,11 +64,15 @@ inline void UnpackID(int *list, int count, char *recvbuf, char *ID){ int main(int argc, char **argv) { + //***************************************** + // ***** MPI STUFF **************** + //***************************************** // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); // parallel domain size (# of sub-domains) int nprocx,nprocy,nprocz; int iproc,jproc,kproc; @@ -82,6 +86,7 @@ int main(int argc, char **argv) int rank_yz,rank_YZ,rank_yZ,rank_Yz; //********************************** MPI_Request req1[18],req2[18]; + MPI_Status stat1[18],stat2[18]; //********************************** //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! //!!!!!!!!!!! Random debugging communications!!!!!!!!!!!!!!!!!!!!!!!!!!!!! @@ -131,23 +136,24 @@ int main(int argc, char **argv) // ************************************************************** // Broadcast simulation parameters from rank 0 to all other procs - comm.barrier(); + MPI_Barrier(comm); //................................................. - comm.bcast(&Nz,1,0); - comm.bcast(&nBlocks,1,0); - comm.bcast(&nthreads,1,0); - comm.bcast(&tau,1,0); - comm.bcast(&Fx,1,0); - comm.bcast(&Fy,1,0); - comm.bcast(&Fz,1,0); - comm.bcast(×tepMax,1,0); - comm.bcast(&interval,1,0); - comm.bcast(&tol,1,0); - comm.bcast(&nprocx,1,0); - comm.bcast(&nprocy,1,0); - comm.bcast(&nprocz,1,0); + MPI_Bcast(&Nz,1,MPI_INT,0,comm); + MPI_Bcast(&nBlocks,1,MPI_INT,0,comm); + MPI_Bcast(&nthreads,1,MPI_INT,0,comm); + MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm); + MPI_Bcast(×tepMax,1,MPI_INT,0,comm); + MPI_Bcast(&interval,1,MPI_INT,0,comm); + MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm); + + MPI_Bcast(&nprocx,1,MPI_INT,0,comm); + MPI_Bcast(&nprocy,1,MPI_INT,0,comm); + MPI_Bcast(&nprocz,1,MPI_INT,0,comm); //................................................. - comm.barrier(); + MPI_Barrier(comm); // ************************************************************** double rlx_setA = 1.f/tau; @@ -170,7 +176,7 @@ int main(int argc, char **argv) printf("Sub-domain size = %i x %i x %i\n",Nz,Nz,Nz); } - comm.barrier(); + MPI_Barrier(comm); kproc = rank/(nprocx*nprocy); jproc = (rank-nprocx*nprocy*kproc)/nprocx; iproc = rank-nprocx*nprocy*kproc-nprocz*jproc; @@ -451,7 +457,7 @@ int main(int argc, char **argv) PM.close(); // printf("File porosity = %f\n", double(sum)/N); //........................................................................... - comm.barrier(); + MPI_Barrier(comm); if (rank == 0) cout << "Domain set." << endl; //........................................................................... // Write the communcation structure into a file for debugging @@ -588,7 +594,7 @@ int main(int argc, char **argv) } } } - comm.barrier(); + MPI_Barrier(comm); if (rank==0) printf ("SendLists are ready on host\n"); //...................................................................................... // Use MPI to fill in the recvCounts form the associated processes @@ -599,46 +605,46 @@ int main(int argc, char **argv) //********************************************************************************** // Fill in the recieve counts using MPI sendtag = recvtag = 3; - comm.send(&sendCount_x,1,rank_X,sendtag); - comm.recv(&recvCount_X,1,rank_x,recvtag); - comm.send(&sendCount_X,1,rank_x,sendtag); - comm.recv(&recvCount_x,1,rank_X,recvtag); - comm.send(&sendCount_y,1,rank_Y,sendtag); - comm.recv(&recvCount_Y,1,rank_y,recvtag); - comm.send(&sendCount_Y,1,rank_y,sendtag); - comm.recv(&recvCount_y,1,rank_Y,recvtag); - comm.send(&sendCount_z,1,rank_Z,sendtag); - comm.recv(&recvCount_Z,1,rank_z,recvtag); - comm.send(&sendCount_Z,1,rank_z,sendtag); - comm.recv(&recvCount_z,1,rank_Z,recvtag); + MPI_Send(&sendCount_x,1,MPI_INT,rank_X,sendtag,comm); + MPI_Recv(&recvCount_X,1,MPI_INT,rank_x,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_X,1,MPI_INT,rank_x,sendtag,comm); + MPI_Recv(&recvCount_x,1,MPI_INT,rank_X,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_y,1,MPI_INT,rank_Y,sendtag,comm); + MPI_Recv(&recvCount_Y,1,MPI_INT,rank_y,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_Y,1,MPI_INT,rank_y,sendtag,comm); + MPI_Recv(&recvCount_y,1,MPI_INT,rank_Y,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_z,1,MPI_INT,rank_Z,sendtag,comm); + MPI_Recv(&recvCount_Z,1,MPI_INT,rank_z,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_Z,1,MPI_INT,rank_z,sendtag,comm); + MPI_Recv(&recvCount_z,1,MPI_INT,rank_Z,recvtag,comm,MPI_STATUS_IGNORE); - comm.send(&sendCount_xy,1,rank_XY,sendtag); - comm.recv(&recvCount_XY,1,rank_xy,recvtag); - comm.send(&sendCount_XY,1,rank_xy,sendtag); - comm.recv(&recvCount_xy,1,rank_XY,recvtag); - comm.send(&sendCount_Xy,1,rank_xY,sendtag); - comm.recv(&recvCount_xY,1,rank_Xy,recvtag); - comm.send(&sendCount_xY,1,rank_Xy,sendtag); - comm.recv(&recvCount_Xy,1,rank_xY,recvtag); + MPI_Send(&sendCount_xy,1,MPI_INT,rank_XY,sendtag,comm); + MPI_Recv(&recvCount_XY,1,MPI_INT,rank_xy,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_XY,1,MPI_INT,rank_xy,sendtag,comm); + MPI_Recv(&recvCount_xy,1,MPI_INT,rank_XY,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_Xy,1,MPI_INT,rank_xY,sendtag,comm); + MPI_Recv(&recvCount_xY,1,MPI_INT,rank_Xy,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_xY,1,MPI_INT,rank_Xy,sendtag,comm); + MPI_Recv(&recvCount_Xy,1,MPI_INT,rank_xY,recvtag,comm,MPI_STATUS_IGNORE); - comm.send(&sendCount_xz,1,rank_XZ,sendtag); - comm.recv(&recvCount_XZ,1,rank_xz,recvtag); - comm.send(&sendCount_XZ,1,rank_xz,sendtag); - comm.recv(&recvCount_xz,1,rank_XZ,recvtag); - comm.send(&sendCount_Xz,1,rank_xZ,sendtag); - comm.recv(&recvCount_xZ,1,rank_Xz,recvtag); - comm.send(&sendCount_xZ,1,rank_Xz,sendtag); - comm.recv(&recvCount_Xz,1,rank_xZ,recvtag); + MPI_Send(&sendCount_xz,1,MPI_INT,rank_XZ,sendtag,comm); + MPI_Recv(&recvCount_XZ,1,MPI_INT,rank_xz,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_XZ,1,MPI_INT,rank_xz,sendtag,comm); + MPI_Recv(&recvCount_xz,1,MPI_INT,rank_XZ,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_Xz,1,MPI_INT,rank_xZ,sendtag,comm); + MPI_Recv(&recvCount_xZ,1,MPI_INT,rank_Xz,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_xZ,1,MPI_INT,rank_Xz,sendtag,comm); + MPI_Recv(&recvCount_Xz,1,MPI_INT,rank_xZ,recvtag,comm,MPI_STATUS_IGNORE); - comm.send(&sendCount_yz,1,rank_YZ,sendtag); - comm.recv(&recvCount_YZ,1,rank_yz,recvtag); - comm.send(&sendCount_YZ,1,rank_yz,sendtag); - comm.recv(&recvCount_yz,1,rank_YZ,recvtag); - comm.send(&sendCount_Yz,1,rank_yZ,sendtag); - comm.recv(&recvCount_yZ,1,rank_Yz,recvtag); - comm.send(&sendCount_yZ,1,rank_Yz,sendtag); - comm.recv(&recvCount_Yz,1,rank_yZ,recvtag); - comm.barrier(); + MPI_Send(&sendCount_yz,1,MPI_INT,rank_YZ,sendtag,comm); + MPI_Recv(&recvCount_YZ,1,MPI_INT,rank_yz,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_YZ,1,MPI_INT,rank_yz,sendtag,comm); + MPI_Recv(&recvCount_yz,1,MPI_INT,rank_YZ,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_Yz,1,MPI_INT,rank_yZ,sendtag,comm); + MPI_Recv(&recvCount_yZ,1,MPI_INT,rank_Yz,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_yZ,1,MPI_INT,rank_Yz,sendtag,comm); + MPI_Recv(&recvCount_Yz,1,MPI_INT,rank_yZ,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Barrier(comm); //********************************************************************************** //...................................................................................... int *recvList_x, *recvList_y, *recvList_z, *recvList_X, *recvList_Y, *recvList_Z; @@ -669,48 +675,48 @@ int main(int argc, char **argv) // Use MPI to fill in the appropriate values for recvList // Fill in the recieve lists using MPI sendtag = recvtag = 4; - req1[0] = comm.Isend(sendList_x,sendCount_x,rank_X,sendtag); - req2[0] = comm.Irecv(recvList_X,recvCount_X,rank_x,recvtag); - req1[1] = comm.Isend(sendList_X,sendCount_X,rank_x,sendtag); - req2[1] = comm.Irecv(recvList_x,recvCount_x,rank_X,recvtag); - req1[2] = comm.Isend(sendList_y,sendCount_y,rank_Y,sendtag); - req2[2] = comm.Irecv(recvList_Y,recvCount_Y,rank_y,recvtag); - req1[3] = comm.Isend(sendList_Y,sendCount_Y,rank_y,sendtag); - req2[3] = comm.Irecv(recvList_y,recvCount_y,rank_Y,recvtag); - req1[4] = comm.Isend(sendList_z,sendCount_z,rank_Z,sendtag); - req2[4] = comm.Irecv(recvList_Z,recvCount_Z,rank_z,recvtag); - req1[5] = comm.Isend(sendList_Z,sendCount_Z,rank_z,sendtag); - req2[5] = comm.Irecv(recvList_z,recvCount_z,rank_Z,recvtag); + MPI_Isend(sendList_x, sendCount_x,MPI_INT,rank_X,sendtag,comm,&req1[0]); + MPI_Irecv(recvList_X, recvCount_X,MPI_INT,rank_x,recvtag,comm,&req2[0]); + MPI_Isend(sendList_X, sendCount_X,MPI_INT,rank_x,sendtag,comm,&req1[1]); + MPI_Irecv(recvList_x, recvCount_x,MPI_INT,rank_X,recvtag,comm,&req2[1]); + MPI_Isend(sendList_y, sendCount_y,MPI_INT,rank_Y,sendtag,comm,&req1[2]); + MPI_Irecv(recvList_Y, recvCount_Y,MPI_INT,rank_y,recvtag,comm,&req2[2]); + MPI_Isend(sendList_Y, sendCount_Y,MPI_INT,rank_y,sendtag,comm,&req1[3]); + MPI_Irecv(recvList_y, recvCount_y,MPI_INT,rank_Y,recvtag,comm,&req2[3]); + MPI_Isend(sendList_z, sendCount_z,MPI_INT,rank_Z,sendtag,comm,&req1[4]); + MPI_Irecv(recvList_Z, recvCount_Z,MPI_INT,rank_z,recvtag,comm,&req2[4]); + MPI_Isend(sendList_Z, sendCount_Z,MPI_INT,rank_z,sendtag,comm,&req1[5]); + MPI_Irecv(recvList_z, recvCount_z,MPI_INT,rank_Z,recvtag,comm,&req2[5]); - req1[6] = comm.Isend(sendList_xy,sendCount_xy,rank_XY,sendtag); - req2[6] = comm.Irecv(recvList_XY,recvCount_XY,rank_xy,recvtag); - req1[7] = comm.Isend(sendList_XY,sendCount_XY,rank_xy,sendtag); - req2[7] = comm.Irecv(recvList_xy,recvCount_xy,rank_XY,recvtag); - req1[8] = comm.Isend(sendList_Xy,sendCount_Xy,rank_xY,sendtag); - req2[8] = comm.Irecv(recvList_xY,recvCount_xY,rank_Xy,recvtag); - req1[9] = comm.Isend(sendList_xY,sendCount_xY,rank_Xy,sendtag); - req2[9] = comm.Irecv(recvList_Xy,recvCount_Xy,rank_xY,recvtag); + MPI_Isend(sendList_xy, sendCount_xy,MPI_INT,rank_XY,sendtag,comm,&req1[6]); + MPI_Irecv(recvList_XY, recvCount_XY,MPI_INT,rank_xy,recvtag,comm,&req2[6]); + MPI_Isend(sendList_XY, sendCount_XY,MPI_INT,rank_xy,sendtag,comm,&req1[7]); + MPI_Irecv(recvList_xy, recvCount_xy,MPI_INT,rank_XY,recvtag,comm,&req2[7]); + MPI_Isend(sendList_Xy, sendCount_Xy,MPI_INT,rank_xY,sendtag,comm,&req1[8]); + MPI_Irecv(recvList_xY, recvCount_xY,MPI_INT,rank_Xy,recvtag,comm,&req2[8]); + MPI_Isend(sendList_xY, sendCount_xY,MPI_INT,rank_Xy,sendtag,comm,&req1[9]); + MPI_Irecv(recvList_Xy, recvCount_Xy,MPI_INT,rank_xY,recvtag,comm,&req2[9]); - req1[10] = comm.Isend(sendList_xz,sendCount_xz,rank_XZ,sendtag); - req2[10] = comm.Irecv(recvList_XZ,recvCount_XZ,rank_xz,recvtag); - req1[11] = comm.Isend(sendList_XZ,sendCount_XZ,rank_xz,sendtag); - req2[11] = comm.Irecv(recvList_xz,recvCount_xz,rank_XZ,recvtag); - req1[12] = comm.Isend(sendList_Xz,sendCount_Xz,rank_xZ,sendtag); - req2[12] = comm.Irecv(recvList_xZ,recvCount_xZ,rank_Xz,recvtag); - req1[13] = comm.Isend(sendList_xZ,sendCount_xZ,rank_Xz,sendtag); - req2[13] = comm.Irecv(recvList_Xz,recvCount_Xz,rank_xZ,recvtag); + MPI_Isend(sendList_xz, sendCount_xz,MPI_INT,rank_XZ,sendtag,comm,&req1[10]); + MPI_Irecv(recvList_XZ, recvCount_XZ,MPI_INT,rank_xz,recvtag,comm,&req2[10]); + MPI_Isend(sendList_XZ, sendCount_XZ,MPI_INT,rank_xz,sendtag,comm,&req1[11]); + MPI_Irecv(recvList_xz, recvCount_xz,MPI_INT,rank_XZ,recvtag,comm,&req2[11]); + MPI_Isend(sendList_Xz, sendCount_Xz,MPI_INT,rank_xZ,sendtag,comm,&req1[12]); + MPI_Irecv(recvList_xZ, recvCount_xZ,MPI_INT,rank_Xz,recvtag,comm,&req2[12]); + MPI_Isend(sendList_xZ, sendCount_xZ,MPI_INT,rank_Xz,sendtag,comm,&req1[13]); + MPI_Irecv(recvList_Xz, recvCount_Xz,MPI_INT,rank_xZ,recvtag,comm,&req2[13]); - req1[14] = comm.Isend(sendList_yz,sendCount_yz,rank_YZ,sendtag); - req2[14] = comm.Irecv(recvList_YZ,recvCount_YZ,rank_yz,recvtag); - req1[15] = comm.Isend(sendList_YZ,sendCount_YZ,rank_yz,sendtag); - req2[15] = comm.Irecv(recvList_yz,recvCount_yz,rank_YZ,recvtag); - req1[16] = comm.Isend(sendList_Yz,sendCount_Yz,rank_yZ,sendtag); - req2[16] = comm.Irecv(recvList_yZ,recvCount_yZ,rank_Yz,recvtag); - req1[17] = comm.Isend(sendList_yZ,sendCount_yZ,rank_Yz,sendtag); - req2[17] = comm.Irecv(recvList_Yz,recvCount_Yz,rank_yZ,recvtag); - comm.waitAll(18,req1); - comm.waitAll(18,req2); - comm.barrier(); + MPI_Isend(sendList_yz, sendCount_yz,MPI_INT,rank_YZ,sendtag,comm,&req1[14]); + MPI_Irecv(recvList_YZ, recvCount_YZ,MPI_INT,rank_yz,recvtag,comm,&req2[14]); + MPI_Isend(sendList_YZ, sendCount_YZ,MPI_INT,rank_yz,sendtag,comm,&req1[15]); + MPI_Irecv(recvList_yz, recvCount_yz,MPI_INT,rank_YZ,recvtag,comm,&req2[15]); + MPI_Isend(sendList_Yz, sendCount_Yz,MPI_INT,rank_yZ,sendtag,comm,&req1[16]); + MPI_Irecv(recvList_yZ, recvCount_yZ,MPI_INT,rank_Yz,recvtag,comm,&req2[16]); + MPI_Isend(sendList_yZ, sendCount_yZ,MPI_INT,rank_Yz,sendtag,comm,&req1[17]); + MPI_Irecv(recvList_Yz, recvCount_Yz,MPI_INT,rank_yZ,recvtag,comm,&req2[17]); + MPI_Waitall(18,req1,stat1); + MPI_Waitall(18,req2,stat2); + MPI_Barrier(comm); //...................................................................................... double *sendbuf_x, *sendbuf_y, *sendbuf_z, *sendbuf_X, *sendbuf_Y, *sendbuf_Z; double *sendbuf_xy, *sendbuf_yz, *sendbuf_xz, *sendbuf_Xy, *sendbuf_Yz, *sendbuf_xZ; @@ -909,24 +915,42 @@ int main(int argc, char **argv) PackID(sendList_yZ, sendCount_yZ ,sendID_yZ, id); PackID(sendList_YZ, sendCount_YZ ,sendID_YZ, id); //...................................................................................... - comm.sendrecv(sendID_x,sendCount_x,rank_X,sendtag,recvID_X,recvCount_X,rank_x,recvtag); - comm.sendrecv(sendID_X,sendCount_X,rank_x,sendtag,recvID_x,recvCount_x,rank_X,recvtag); - comm.sendrecv(sendID_y,sendCount_y,rank_Y,sendtag,recvID_Y,recvCount_Y,rank_y,recvtag); - comm.sendrecv(sendID_Y,sendCount_Y,rank_y,sendtag,recvID_y,recvCount_y,rank_Y,recvtag); - comm.sendrecv(sendID_z,sendCount_z,rank_Z,sendtag,recvID_Z,recvCount_Z,rank_z,recvtag); - comm.sendrecv(sendID_Z,sendCount_Z,rank_z,sendtag,recvID_z,recvCount_z,rank_Z,recvtag); - comm.sendrecv(sendID_xy,sendCount_xy,rank_XY,sendtag,recvID_XY,recvCount_XY,rank_xy,recvtag); - comm.sendrecv(sendID_XY,sendCount_XY,rank_xy,sendtag,recvID_xy,recvCount_xy,rank_XY,recvtag); - comm.sendrecv(sendID_Xy,sendCount_Xy,rank_xY,sendtag,recvID_xY,recvCount_xY,rank_Xy,recvtag); - comm.sendrecv(sendID_xY,sendCount_xY,rank_Xy,sendtag,recvID_Xy,recvCount_Xy,rank_xY,recvtag); - comm.sendrecv(sendID_xz,sendCount_xz,rank_XZ,sendtag,recvID_XZ,recvCount_XZ,rank_xz,recvtag); - comm.sendrecv(sendID_XZ,sendCount_XZ,rank_xz,sendtag,recvID_xz,recvCount_xz,rank_XZ,recvtag); - comm.sendrecv(sendID_Xz,sendCount_Xz,rank_xZ,sendtag,recvID_xZ,recvCount_xZ,rank_Xz,recvtag); - comm.sendrecv(sendID_xZ,sendCount_xZ,rank_Xz,sendtag,recvID_Xz,recvCount_Xz,rank_xZ,recvtag); - comm.sendrecv(sendID_yz,sendCount_yz,rank_YZ,sendtag,recvID_YZ,recvCount_YZ,rank_yz,recvtag); - comm.sendrecv(sendID_YZ,sendCount_YZ,rank_yz,sendtag,recvID_yz,recvCount_yz,rank_YZ,recvtag); - comm.sendrecv(sendID_Yz,sendCount_Yz,rank_yZ,sendtag,recvID_yZ,recvCount_yZ,rank_Yz,recvtag); - comm.sendrecv(sendID_yZ,sendCount_yZ,rank_Yz,sendtag,recvID_Yz,recvCount_Yz,rank_yZ,recvtag); + MPI_Sendrecv(sendID_x,sendCount_x,MPI_CHAR,rank_X,sendtag, + recvID_X,recvCount_X,MPI_CHAR,rank_x,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_X,sendCount_X,MPI_CHAR,rank_x,sendtag, + recvID_x,recvCount_x,MPI_CHAR,rank_X,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_y,sendCount_y,MPI_CHAR,rank_Y,sendtag, + recvID_Y,recvCount_Y,MPI_CHAR,rank_y,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Y,sendCount_Y,MPI_CHAR,rank_y,sendtag, + recvID_y,recvCount_y,MPI_CHAR,rank_Y,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_z,sendCount_z,MPI_CHAR,rank_Z,sendtag, + recvID_Z,recvCount_Z,MPI_CHAR,rank_z,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Z,sendCount_Z,MPI_CHAR,rank_z,sendtag, + recvID_z,recvCount_z,MPI_CHAR,rank_Z,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xy,sendCount_xy,MPI_CHAR,rank_XY,sendtag, + recvID_XY,recvCount_XY,MPI_CHAR,rank_xy,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_XY,sendCount_XY,MPI_CHAR,rank_xy,sendtag, + recvID_xy,recvCount_xy,MPI_CHAR,rank_XY,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Xy,sendCount_Xy,MPI_CHAR,rank_xY,sendtag, + recvID_xY,recvCount_xY,MPI_CHAR,rank_Xy,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xY,sendCount_xY,MPI_CHAR,rank_Xy,sendtag, + recvID_Xy,recvCount_Xy,MPI_CHAR,rank_xY,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xz,sendCount_xz,MPI_CHAR,rank_XZ,sendtag, + recvID_XZ,recvCount_XZ,MPI_CHAR,rank_xz,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_XZ,sendCount_XZ,MPI_CHAR,rank_xz,sendtag, + recvID_xz,recvCount_xz,MPI_CHAR,rank_XZ,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Xz,sendCount_Xz,MPI_CHAR,rank_xZ,sendtag, + recvID_xZ,recvCount_xZ,MPI_CHAR,rank_Xz,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xZ,sendCount_xZ,MPI_CHAR,rank_Xz,sendtag, + recvID_Xz,recvCount_Xz,MPI_CHAR,rank_xZ,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_yz,sendCount_yz,MPI_CHAR,rank_YZ,sendtag, + recvID_YZ,recvCount_YZ,MPI_CHAR,rank_yz,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_YZ,sendCount_YZ,MPI_CHAR,rank_yz,sendtag, + recvID_yz,recvCount_yz,MPI_CHAR,rank_YZ,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Yz,sendCount_Yz,MPI_CHAR,rank_yZ,sendtag, + recvID_yZ,recvCount_yZ,MPI_CHAR,rank_Yz,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_yZ,sendCount_yZ,MPI_CHAR,rank_Yz,sendtag, + recvID_Yz,recvCount_Yz,MPI_CHAR,rank_yZ,recvtag,comm,MPI_STATUS_IGNORE); //...................................................................................... UnpackID(recvList_x, recvCount_x ,recvID_x, id); UnpackID(recvList_X, recvCount_X ,recvID_X, id); @@ -959,7 +983,7 @@ int main(int argc, char **argv) free(recvID_yz); free(recvID_YZ); free(recvID_yZ); free(recvID_Yz); //...................................................................................... if (rank==0) printf ("Devices are ready to communicate. \n"); - comm.barrier(); + MPI_Barrier(comm); //...........device phase ID................................................. if (rank==0) printf ("Copying phase ID to device \n"); @@ -999,8 +1023,8 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; - comm.barrier(); - starttime = Utilities::MPI::time(); + MPI_Barrier(comm); + starttime = MPI_Wtime(); // Old cuda timer is below // cudaEvent_t start, stop; // float time; @@ -1112,48 +1136,48 @@ int main(int argc, char **argv) //................................................................................... // Send all the distributions - req1[0] = comm.Isend(sendbuf_x,5*sendCount_x,rank_X,sendtag); - req2[0] = comm.Irecv(recvbuf_X,5*recvCount_X,rank_x,recvtag); - req1[1] = comm.Isend(sendbuf_X,5*sendCount_X,rank_x,sendtag); - req2[1] = comm.Irecv(recvbuf_x,5*recvCount_x,rank_X,recvtag); - req1[2] = comm.Isend(sendbuf_y,5*sendCount_y,rank_Y,sendtag); - req2[2] = comm.Irecv(recvbuf_Y,5*recvCount_Y,rank_y,recvtag); - req1[3] = comm.Isend(sendbuf_Y,5*sendCount_Y,rank_y,sendtag); - req2[3] = comm.Irecv(recvbuf_y,5*recvCount_y,rank_Y,recvtag); - req1[4] = comm.Isend(sendbuf_z,5*sendCount_z,rank_Z,sendtag); - req2[4] = comm.Irecv(recvbuf_Z,5*recvCount_Z,rank_z,recvtag); - req1[5] = comm.Isend(sendbuf_Z,5*sendCount_Z,rank_z,sendtag); - req2[5] = comm.Irecv(recvbuf_z,5*recvCount_z,rank_Z,recvtag); - req1[6] = comm.Isend(sendbuf_xy,sendCount_xy,rank_XY,sendtag); - req2[6] = comm.Irecv(recvbuf_XY,recvCount_XY,rank_xy,recvtag); - req1[7] = comm.Isend(sendbuf_XY,sendCount_XY,rank_xy,sendtag); - req2[7] = comm.Irecv(recvbuf_xy,recvCount_xy,rank_XY,recvtag); - req1[8] = comm.Isend(sendbuf_Xy,sendCount_Xy,rank_xY,sendtag); - req2[8] = comm.Irecv(recvbuf_xY,recvCount_xY,rank_Xy,recvtag); - req1[9] = comm.Isend(sendbuf_xY,sendCount_xY,rank_Xy,sendtag); - req2[9] = comm.Irecv(recvbuf_Xy,recvCount_Xy,rank_xY,recvtag); - req1[10] = comm.Isend(sendbuf_xz,sendCount_xz,rank_XZ,sendtag); - req2[10] = comm.Irecv(recvbuf_XZ,recvCount_XZ,rank_xz,recvtag); - req1[11] = comm.Isend(sendbuf_XZ,sendCount_XZ,rank_xz,sendtag); - req2[11] = comm.Irecv(recvbuf_xz,recvCount_xz,rank_XZ,recvtag); - req1[12] = comm.Isend(sendbuf_Xz,sendCount_Xz,rank_xZ,sendtag); - req2[12] = comm.Irecv(recvbuf_xZ,recvCount_xZ,rank_Xz,recvtag); - req1[13] = comm.Isend(sendbuf_xZ,sendCount_xZ,rank_Xz,sendtag); - req2[13] = comm.Irecv(recvbuf_Xz,recvCount_Xz,rank_xZ,recvtag); - req1[14] = comm.Isend(sendbuf_yz,sendCount_yz,rank_YZ,sendtag); - req2[14] = comm.Irecv(recvbuf_YZ,recvCount_YZ,rank_yz,recvtag); - req1[15] = comm.Isend(sendbuf_YZ,sendCount_YZ,rank_yz,sendtag); - req2[15] = comm.Irecv(recvbuf_yz,recvCount_yz,rank_YZ,recvtag); - req1[16] = comm.Isend(sendbuf_Yz,sendCount_Yz,rank_yZ,sendtag); - req2[16] = comm.Irecv(recvbuf_yZ,recvCount_yZ,rank_Yz,recvtag); - req1[17] = comm.Isend(sendbuf_yZ,sendCount_yZ,rank_Yz,sendtag); - req2[17] = comm.Irecv(recvbuf_Yz,recvCount_Yz,rank_yZ,recvtag); + MPI_Isend(sendbuf_x, 5*sendCount_x,MPI_DOUBLE,rank_X,sendtag,comm,&req1[0]); + MPI_Irecv(recvbuf_X, 5*recvCount_X,MPI_DOUBLE,rank_x,recvtag,comm,&req2[0]); + MPI_Isend(sendbuf_X, 5*sendCount_X,MPI_DOUBLE,rank_x,sendtag,comm,&req1[1]); + MPI_Irecv(recvbuf_x, 5*recvCount_x,MPI_DOUBLE,rank_X,recvtag,comm,&req2[1]); + MPI_Isend(sendbuf_y, 5*sendCount_y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[2]); + MPI_Irecv(recvbuf_Y, 5*recvCount_Y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[2]); + MPI_Isend(sendbuf_Y, 5*sendCount_Y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[3]); + MPI_Irecv(recvbuf_y, 5*recvCount_y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[3]); + MPI_Isend(sendbuf_z, 5*sendCount_z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[4]); + MPI_Irecv(recvbuf_Z, 5*recvCount_Z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[4]); + MPI_Isend(sendbuf_Z, 5*sendCount_Z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[5]); + MPI_Irecv(recvbuf_z, 5*recvCount_z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[5]); + MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_XY,sendtag,comm,&req1[6]); + MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_xy,recvtag,comm,&req2[6]); + MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_xy,sendtag,comm,&req1[7]); + MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_XY,recvtag,comm,&req2[7]); + MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_xY,sendtag,comm,&req1[8]); + MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_Xy,recvtag,comm,&req2[8]); + MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_Xy,sendtag,comm,&req1[9]); + MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_xY,recvtag,comm,&req2[9]); + MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_XZ,sendtag,comm,&req1[10]); + MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_xz,recvtag,comm,&req2[10]); + MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_xz,sendtag,comm,&req1[11]); + MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_XZ,recvtag,comm,&req2[11]); + MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_xZ,sendtag,comm,&req1[12]); + MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_Xz,recvtag,comm,&req2[12]); + MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_Xz,sendtag,comm,&req1[13]); + MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_xZ,recvtag,comm,&req2[13]); + MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_YZ,sendtag,comm,&req1[14]); + MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_yz,recvtag,comm,&req2[14]); + MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_yz,sendtag,comm,&req1[15]); + MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_YZ,recvtag,comm,&req2[15]); + MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_yZ,sendtag,comm,&req1[16]); + MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_Yz,recvtag,comm,&req2[16]); + MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_Yz,sendtag,comm,&req1[17]); + MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_yZ,recvtag,comm,&req2[17]); //................................................................................... //................................................................................... // Wait for completion of D3Q19 communication - comm.waitAll(18,req1); - comm.waitAll(18,req2); + MPI_Waitall(18,req1,stat1); + MPI_Waitall(18,req2,stat2); //................................................................................... // Unpack the distributions on the device //................................................................................... @@ -1236,7 +1260,7 @@ int main(int argc, char **argv) //***************************************************************************** //***************************************************************************** - comm.barrier(); + MPI_Barrier(comm); // Iteration completed! timestep++; //................................................................... @@ -1245,8 +1269,8 @@ int main(int argc, char **argv) // cudaThreadSynchronize(); dvc_Barrier(); - comm.barrier(); - stoptime = Utilities::MPI::time(); + MPI_Barrier(comm); + stoptime = MPI_Wtime(); // cout << "CPU time: " << (stoptime - starttime) << " seconds" << endl; cputime = stoptime - starttime; // cout << "Lattice update rate: "<< double(Nx*Ny*Nz*timestep)/cputime/1000000 << " MLUPS" << endl; @@ -1280,7 +1304,7 @@ int main(int argc, char **argv) // dvc_CopyToDevice(velocity, vel, 3*dist_mem_size, dvc_CopyToDeviceDeviceToHost); //.............................................................................. // cudaThreadSynchronize(); -// comm.barrier(); +// MPI_Barrier(comm); //............................................................ //....Write the z-velocity to test poiseuille flow............ // double vz,vz_avg; @@ -1309,7 +1333,7 @@ int main(int argc, char **argv) // free (velocity); free(id); // **************************************************** - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); // **************************************************** } diff --git a/gpu/exe/lb1_MRT_mpi.cu b/gpu/exe/lb1_MRT_mpi.cu index 776ea29f..0c0863c7 100644 --- a/gpu/exe/lb1_MRT_mpi.cu +++ b/gpu/exe/lb1_MRT_mpi.cu @@ -1,10 +1,8 @@ -#include "common/MPI.h" - #include #include #include #include - +#include inline void PackID(int *list, int count, char *sendbuf, char *ID){ // Fill in the phase ID values from neighboring processors @@ -555,11 +553,15 @@ void Write_Out(double *array, int Nx, int Ny, int Nz){ int main(int argc, char **argv) { + //***************************************** + // ***** MPI STUFF **************** + //***************************************** // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); // parallel domain size (# of sub-domains) int nprocx,nprocy,nprocz; int iproc,jproc,kproc; @@ -573,6 +575,7 @@ int main(int argc, char **argv) int rank_yz,rank_YZ,rank_yZ,rank_Yz; //********************************** MPI_Request req1[18],req2[18]; + MPI_Status stat1[18],stat2[18]; //********************************** //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! //!!!!!!!!!!! Random debugging communications!!!!!!!!!!!!!!!!!!!!!!!!!!!!! @@ -622,21 +625,24 @@ int main(int argc, char **argv) // ************************************************************** // Broadcast simulation parameters from rank 0 to all other procs - comm.barrier(); + MPI_Barrier(comm); //................................................. - comm.bcast(&Nz,1,0); - comm.bcast(&nBlocks,1,0); - comm.bcast(&nthreads,1,0); - comm.bcast(&tau,1,0); - comm.bcast(&Fx,1,0); - comm.bcast(&Fy,1,0); - comm.bcast(&Fz,1,0); - comm.bcast(&iterMax,1,0); - comm.bcast(&interval,1,0); - comm.bcast(&tol,1,0); - comm.bcast(&nprocx,1,0); - comm.bcast(&nprocy,1,0); - comm.bcast(&nprocz,1,0); + MPI_Bcast(&Nz,1,MPI_INT,0,comm); + MPI_Bcast(&nBlocks,1,MPI_INT,0,comm); + MPI_Bcast(&nthreads,1,MPI_INT,0,comm); + MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&iterMax,1,MPI_INT,0,comm); + MPI_Bcast(&interval,1,MPI_INT,0,comm); + MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm); + + MPI_Bcast(&nprocx,1,MPI_INT,0,comm); + MPI_Bcast(&nprocy,1,MPI_INT,0,comm); + MPI_Bcast(&nprocz,1,MPI_INT,0,comm); + //................................................. + MPI_Barrier(comm); // ************************************************************** double rlx_setA = 1.f/tau; @@ -659,7 +665,7 @@ int main(int argc, char **argv) printf("Sub-domain size = %i x %i x %i\n",Nz,Nz,Nz); } - comm.barrier(); + MPI_Barrier(comm); kproc = rank/(nprocx*nprocy); jproc = (rank-nprocx*nprocy*kproc)/nprocx; iproc = rank-nprocx*nprocy*kproc-nprocz*jproc; @@ -940,7 +946,7 @@ int main(int argc, char **argv) PM.close(); // printf("File porosity = %f\n", double(sum)/N); //........................................................................... - comm.barrier(); + MPI_Barrier(comm); if (rank == 0) cout << "Domain set." << endl; //........................................................................... // Write the communcation structure into a file for debugging @@ -1077,7 +1083,7 @@ int main(int argc, char **argv) } } } - comm.barrier(); + MPI_Barrier(comm); if (rank==0) printf ("SendLists are ready on host\n"); //...................................................................................... // Use MPI to fill in the recvCounts form the associated processes @@ -1088,46 +1094,46 @@ int main(int argc, char **argv) //********************************************************************************** // Fill in the recieve counts using MPI sendtag = recvtag = 3; - comm.send(&sendCount_x,1,rank_X,sendtag); - comm.recv(&recvCount_X,1,rank_x,recvtag); - comm.send(&sendCount_X,1,rank_x,sendtag); - comm.recv(&recvCount_x,1,rank_X,recvtag); - comm.send(&sendCount_y,1,rank_Y,sendtag); - comm.recv(&recvCount_Y,1,rank_y,recvtag); - comm.send(&sendCount_Y,1,rank_y,sendtag); - comm.recv(&recvCount_y,1,rank_Y,recvtag); - comm.send(&sendCount_z,1,rank_Z,sendtag); - comm.recv(&recvCount_Z,1,rank_z,recvtag); - comm.send(&sendCount_Z,1,rank_z,sendtag); - comm.recv(&recvCount_z,1,rank_Z,recvtag); + MPI_Send(&sendCount_x,1,MPI_INT,rank_X,sendtag,comm); + MPI_Recv(&recvCount_X,1,MPI_INT,rank_x,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_X,1,MPI_INT,rank_x,sendtag,comm); + MPI_Recv(&recvCount_x,1,MPI_INT,rank_X,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_y,1,MPI_INT,rank_Y,sendtag,comm); + MPI_Recv(&recvCount_Y,1,MPI_INT,rank_y,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_Y,1,MPI_INT,rank_y,sendtag,comm); + MPI_Recv(&recvCount_y,1,MPI_INT,rank_Y,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_z,1,MPI_INT,rank_Z,sendtag,comm); + MPI_Recv(&recvCount_Z,1,MPI_INT,rank_z,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_Z,1,MPI_INT,rank_z,sendtag,comm); + MPI_Recv(&recvCount_z,1,MPI_INT,rank_Z,recvtag,comm,MPI_STATUS_IGNORE); - comm.send(&sendCount_xy,1,rank_XY,sendtag); - comm.recv(&recvCount_XY,1,rank_xy,recvtag); - comm.send(&sendCount_XY,1,rank_xy,sendtag); - comm.recv(&recvCount_xy,1,rank_XY,recvtag); - comm.send(&sendCount_Xy,1,rank_xY,sendtag); - comm.recv(&recvCount_xY,1,rank_Xy,recvtag); - comm.send(&sendCount_xY,1,rank_Xy,sendtag); - comm.recv(&recvCount_Xy,1,rank_xY,recvtag); + MPI_Send(&sendCount_xy,1,MPI_INT,rank_XY,sendtag,comm); + MPI_Recv(&recvCount_XY,1,MPI_INT,rank_xy,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_XY,1,MPI_INT,rank_xy,sendtag,comm); + MPI_Recv(&recvCount_xy,1,MPI_INT,rank_XY,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_Xy,1,MPI_INT,rank_xY,sendtag,comm); + MPI_Recv(&recvCount_xY,1,MPI_INT,rank_Xy,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_xY,1,MPI_INT,rank_Xy,sendtag,comm); + MPI_Recv(&recvCount_Xy,1,MPI_INT,rank_xY,recvtag,comm,MPI_STATUS_IGNORE); - comm.send(&sendCount_xz,1,rank_XZ,sendtag); - comm.recv(&recvCount_XZ,1,rank_xz,recvtag); - comm.send(&sendCount_XZ,1,rank_xz,sendtag); - comm.recv(&recvCount_xz,1,rank_XZ,recvtag); - comm.send(&sendCount_Xz,1,rank_xZ,sendtag); - comm.recv(&recvCount_xZ,1,rank_Xz,recvtag); - comm.send(&sendCount_xZ,1,rank_Xz,sendtag); - comm.recv(&recvCount_Xz,1,rank_xZ,recvtag); + MPI_Send(&sendCount_xz,1,MPI_INT,rank_XZ,sendtag,comm); + MPI_Recv(&recvCount_XZ,1,MPI_INT,rank_xz,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_XZ,1,MPI_INT,rank_xz,sendtag,comm); + MPI_Recv(&recvCount_xz,1,MPI_INT,rank_XZ,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_Xz,1,MPI_INT,rank_xZ,sendtag,comm); + MPI_Recv(&recvCount_xZ,1,MPI_INT,rank_Xz,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_xZ,1,MPI_INT,rank_Xz,sendtag,comm); + MPI_Recv(&recvCount_Xz,1,MPI_INT,rank_xZ,recvtag,comm,MPI_STATUS_IGNORE); - comm.send(&sendCount_yz,1,rank_YZ,sendtag); - comm.recv(&recvCount_YZ,1,rank_yz,recvtag); - comm.send(&sendCount_YZ,1,rank_yz,sendtag); - comm.recv(&recvCount_yz,1,rank_YZ,recvtag); - comm.send(&sendCount_Yz,1,rank_yZ,sendtag); - comm.recv(&recvCount_yZ,1,rank_Yz,recvtag); - comm.send(&sendCount_yZ,1,rank_Yz,sendtag); - comm.recv(&recvCount_Yz,1,rank_yZ,recvtag); - comm.barrier(); + MPI_Send(&sendCount_yz,1,MPI_INT,rank_YZ,sendtag,comm); + MPI_Recv(&recvCount_YZ,1,MPI_INT,rank_yz,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_YZ,1,MPI_INT,rank_yz,sendtag,comm); + MPI_Recv(&recvCount_yz,1,MPI_INT,rank_YZ,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_Yz,1,MPI_INT,rank_yZ,sendtag,comm); + MPI_Recv(&recvCount_yZ,1,MPI_INT,rank_Yz,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_yZ,1,MPI_INT,rank_Yz,sendtag,comm); + MPI_Recv(&recvCount_Yz,1,MPI_INT,rank_yZ,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Barrier(comm); //********************************************************************************** //recvCount_x = sendCount_x; //recvCount_X = sendCount_X; @@ -1151,7 +1157,7 @@ int main(int argc, char **argv) //...................................................................................... // Use MPI to fill in the appropriate values // int tag = 5; - // Mcomm.sendrecv(sendCount_x,1,rank_x,tag,sendCount_X,1); + // MPI_Sendrecv(sendCount_x,1,MPI_INT,rank_x,tag,sendCount_X,1,MPI_INT,comm,req); //...................................................................................... int *recvList_x, *recvList_y, *recvList_z, *recvList_X, *recvList_Y, *recvList_Z; int *recvList_xy, *recvList_yz, *recvList_xz, *recvList_Xy, *recvList_Yz, *recvList_xZ; @@ -1181,48 +1187,48 @@ int main(int argc, char **argv) // Use MPI to fill in the appropriate values for recvList // Fill in the recieve lists using MPI sendtag = recvtag = 4; - req1[0] = comm.Isend(sendList_x,sendCount_x,rank_X,sendtag); - req2[0] = comm.Irecv(recvList_X,recvCount_X,rank_x,recvtag); - req1[1] = comm.Isend(sendList_X,sendCount_X,rank_x,sendtag); - req2[1] = comm.Irecv(recvList_x,recvCount_x,rank_X,recvtag); - req1[2] = comm.Isend(sendList_y,sendCount_y,rank_Y,sendtag); - req2[2] = comm.Irecv(recvList_Y,recvCount_Y,rank_y,recvtag); - req1[3] = comm.Isend(sendList_Y,sendCount_Y,rank_y,sendtag); - req2[3] = comm.Irecv(recvList_y,recvCount_y,rank_Y,recvtag); - req1[4] = comm.Isend(sendList_z,sendCount_z,rank_Z,sendtag); - req2[4] = comm.Irecv(recvList_Z,recvCount_Z,rank_z,recvtag); - req1[5] = comm.Isend(sendList_Z,sendCount_Z,rank_z,sendtag); - req2[5] = comm.Irecv(recvList_z,recvCount_z,rank_Z,recvtag); + MPI_Isend(sendList_x, sendCount_x,MPI_INT,rank_X,sendtag,comm,&req1[0]); + MPI_Irecv(recvList_X, recvCount_X,MPI_INT,rank_x,recvtag,comm,&req2[0]); + MPI_Isend(sendList_X, sendCount_X,MPI_INT,rank_x,sendtag,comm,&req1[1]); + MPI_Irecv(recvList_x, recvCount_x,MPI_INT,rank_X,recvtag,comm,&req2[1]); + MPI_Isend(sendList_y, sendCount_y,MPI_INT,rank_Y,sendtag,comm,&req1[2]); + MPI_Irecv(recvList_Y, recvCount_Y,MPI_INT,rank_y,recvtag,comm,&req2[2]); + MPI_Isend(sendList_Y, sendCount_Y,MPI_INT,rank_y,sendtag,comm,&req1[3]); + MPI_Irecv(recvList_y, recvCount_y,MPI_INT,rank_Y,recvtag,comm,&req2[3]); + MPI_Isend(sendList_z, sendCount_z,MPI_INT,rank_Z,sendtag,comm,&req1[4]); + MPI_Irecv(recvList_Z, recvCount_Z,MPI_INT,rank_z,recvtag,comm,&req2[4]); + MPI_Isend(sendList_Z, sendCount_Z,MPI_INT,rank_z,sendtag,comm,&req1[5]); + MPI_Irecv(recvList_z, recvCount_z,MPI_INT,rank_Z,recvtag,comm,&req2[5]); - req1[6] = comm.Isend(sendList_xy,sendCount_xy,rank_XY,sendtag); - req2[6] = comm.Irecv(recvList_XY,recvCount_XY,rank_xy,recvtag); - req1[7] = comm.Isend(sendList_XY,sendCount_XY,rank_xy,sendtag); - req2[7] = comm.Irecv(recvList_xy,recvCount_xy,rank_XY,recvtag); - req1[8] = comm.Isend(sendList_Xy,sendCount_Xy,rank_xY,sendtag); - req2[8] = comm.Irecv(recvList_xY,recvCount_xY,rank_Xy,recvtag); - req1[9] = comm.Isend(sendList_xY,sendCount_xY,rank_Xy,sendtag); - req2[9] = comm.Irecv(recvList_Xy,recvCount_Xy,rank_xY,recvtag); + MPI_Isend(sendList_xy, sendCount_xy,MPI_INT,rank_XY,sendtag,comm,&req1[6]); + MPI_Irecv(recvList_XY, recvCount_XY,MPI_INT,rank_xy,recvtag,comm,&req2[6]); + MPI_Isend(sendList_XY, sendCount_XY,MPI_INT,rank_xy,sendtag,comm,&req1[7]); + MPI_Irecv(recvList_xy, recvCount_xy,MPI_INT,rank_XY,recvtag,comm,&req2[7]); + MPI_Isend(sendList_Xy, sendCount_Xy,MPI_INT,rank_xY,sendtag,comm,&req1[8]); + MPI_Irecv(recvList_xY, recvCount_xY,MPI_INT,rank_Xy,recvtag,comm,&req2[8]); + MPI_Isend(sendList_xY, sendCount_xY,MPI_INT,rank_Xy,sendtag,comm,&req1[9]); + MPI_Irecv(recvList_Xy, recvCount_Xy,MPI_INT,rank_xY,recvtag,comm,&req2[9]); - req1[10] = comm.Isend(sendList_xz,sendCount_xz,rank_XZ,sendtag); - req2[10] = comm.Irecv(recvList_XZ,recvCount_XZ,rank_xz,recvtag); - req1[11] = comm.Isend(sendList_XZ,sendCount_XZ,rank_xz,sendtag); - req2[11] = comm.Irecv(recvList_xz,recvCount_xz,rank_XZ,recvtag); - req1[12] = comm.Isend(sendList_Xz,sendCount_Xz,rank_xZ,sendtag); - req2[12] = comm.Irecv(recvList_xZ,recvCount_xZ,rank_Xz,recvtag); - req1[13] = comm.Isend(sendList_xZ,sendCount_xZ,rank_Xz,sendtag); - req2[13] = comm.Irecv(recvList_Xz,recvCount_Xz,rank_xZ,recvtag); + MPI_Isend(sendList_xz, sendCount_xz,MPI_INT,rank_XZ,sendtag,comm,&req1[10]); + MPI_Irecv(recvList_XZ, recvCount_XZ,MPI_INT,rank_xz,recvtag,comm,&req2[10]); + MPI_Isend(sendList_XZ, sendCount_XZ,MPI_INT,rank_xz,sendtag,comm,&req1[11]); + MPI_Irecv(recvList_xz, recvCount_xz,MPI_INT,rank_XZ,recvtag,comm,&req2[11]); + MPI_Isend(sendList_Xz, sendCount_Xz,MPI_INT,rank_xZ,sendtag,comm,&req1[12]); + MPI_Irecv(recvList_xZ, recvCount_xZ,MPI_INT,rank_Xz,recvtag,comm,&req2[12]); + MPI_Isend(sendList_xZ, sendCount_xZ,MPI_INT,rank_Xz,sendtag,comm,&req1[13]); + MPI_Irecv(recvList_Xz, recvCount_Xz,MPI_INT,rank_xZ,recvtag,comm,&req2[13]); - req1[14] = comm.Isend(sendList_yz,sendCount_yz,rank_YZ,sendtag); - req2[14] = comm.Irecv(recvList_YZ,recvCount_YZ,rank_yz,recvtag); - req1[15] = comm.Isend(sendList_YZ,sendCount_YZ,rank_yz,sendtag); - req2[15] = comm.Irecv(recvList_yz,recvCount_yz,rank_YZ,recvtag); - req1[16] = comm.Isend(sendList_Yz,sendCount_Yz,rank_yZ,sendtag); - req2[16] = comm.Irecv(recvList_yZ,recvCount_yZ,rank_Yz,recvtag); - req1[17] = comm.Isend(sendList_yZ,sendCount_yZ,rank_Yz,sendtag); - req2[17] = comm.Irecv(recvList_Yz,recvCount_Yz,rank_yZ,recvtag); - comm.waitAll(18,req1); - comm.waitAll(18,req2); - comm.barrier(); + MPI_Isend(sendList_yz, sendCount_yz,MPI_INT,rank_YZ,sendtag,comm,&req1[14]); + MPI_Irecv(recvList_YZ, recvCount_YZ,MPI_INT,rank_yz,recvtag,comm,&req2[14]); + MPI_Isend(sendList_YZ, sendCount_YZ,MPI_INT,rank_yz,sendtag,comm,&req1[15]); + MPI_Irecv(recvList_yz, recvCount_yz,MPI_INT,rank_YZ,recvtag,comm,&req2[15]); + MPI_Isend(sendList_Yz, sendCount_Yz,MPI_INT,rank_yZ,sendtag,comm,&req1[16]); + MPI_Irecv(recvList_yZ, recvCount_yZ,MPI_INT,rank_Yz,recvtag,comm,&req2[16]); + MPI_Isend(sendList_yZ, sendCount_yZ,MPI_INT,rank_Yz,sendtag,comm,&req1[17]); + MPI_Irecv(recvList_Yz, recvCount_Yz,MPI_INT,rank_yZ,recvtag,comm,&req2[17]); + MPI_Waitall(18,req1,stat1); + MPI_Waitall(18,req2,stat2); + MPI_Barrier(comm); //...................................................................................... double *sendbuf_x, *sendbuf_y, *sendbuf_z, *sendbuf_X, *sendbuf_Y, *sendbuf_Z; double *sendbuf_xy, *sendbuf_yz, *sendbuf_xz, *sendbuf_Xy, *sendbuf_Yz, *sendbuf_xZ; @@ -1421,24 +1427,42 @@ int main(int argc, char **argv) PackID(sendList_yZ, sendCount_yZ ,sendID_yZ, id); PackID(sendList_YZ, sendCount_YZ ,sendID_YZ, id); //...................................................................................... - comm.sendrecv(sendID_x,sendCount_x,rank_X,sendtag,recvID_X,recvCount_X,rank_x,recvtag); - comm.sendrecv(sendID_X,sendCount_X,rank_x,sendtag,recvID_x,recvCount_x,rank_X,recvtag); - comm.sendrecv(sendID_y,sendCount_y,rank_Y,sendtag,recvID_Y,recvCount_Y,rank_y,recvtag); - comm.sendrecv(sendID_Y,sendCount_Y,rank_y,sendtag,recvID_y,recvCount_y,rank_Y,recvtag); - comm.sendrecv(sendID_z,sendCount_z,rank_Z,sendtag,recvID_Z,recvCount_Z,rank_z,recvtag); - comm.sendrecv(sendID_Z,sendCount_Z,rank_z,sendtag,recvID_z,recvCount_z,rank_Z,recvtag); - comm.sendrecv(sendID_xy,sendCount_xy,rank_XY,sendtag,recvID_XY,recvCount_XY,rank_xy,recvtag); - comm.sendrecv(sendID_XY,sendCount_XY,rank_xy,sendtag,recvID_xy,recvCount_xy,rank_XY,recvtag); - comm.sendrecv(sendID_Xy,sendCount_Xy,rank_xY,sendtag,recvID_xY,recvCount_xY,rank_Xy,recvtag); - comm.sendrecv(sendID_xY,sendCount_xY,rank_Xy,sendtag,recvID_Xy,recvCount_Xy,rank_xY,recvtag); - comm.sendrecv(sendID_xz,sendCount_xz,rank_XZ,sendtag,recvID_XZ,recvCount_XZ,rank_xz,recvtag); - comm.sendrecv(sendID_XZ,sendCount_XZ,rank_xz,sendtag,recvID_xz,recvCount_xz,rank_XZ,recvtag); - comm.sendrecv(sendID_Xz,sendCount_Xz,rank_xZ,sendtag,recvID_xZ,recvCount_xZ,rank_Xz,recvtag); - comm.sendrecv(sendID_xZ,sendCount_xZ,rank_Xz,sendtag,recvID_Xz,recvCount_Xz,rank_xZ,recvtag); - comm.sendrecv(sendID_yz,sendCount_yz,rank_YZ,sendtag,recvID_YZ,recvCount_YZ,rank_yz,recvtag); - comm.sendrecv(sendID_YZ,sendCount_YZ,rank_yz,sendtag,recvID_yz,recvCount_yz,rank_YZ,recvtag); - comm.sendrecv(sendID_Yz,sendCount_Yz,rank_yZ,sendtag,recvID_yZ,recvCount_yZ,rank_Yz,recvtag); - comm.sendrecv(sendID_yZ,sendCount_yZ,rank_Yz,sendtag,recvID_Yz,recvCount_Yz,rank_yZ,recvtag); + MPI_Sendrecv(sendID_x,sendCount_x,MPI_CHAR,rank_X,sendtag, + recvID_X,recvCount_X,MPI_CHAR,rank_x,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_X,sendCount_X,MPI_CHAR,rank_x,sendtag, + recvID_x,recvCount_x,MPI_CHAR,rank_X,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_y,sendCount_y,MPI_CHAR,rank_Y,sendtag, + recvID_Y,recvCount_Y,MPI_CHAR,rank_y,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Y,sendCount_Y,MPI_CHAR,rank_y,sendtag, + recvID_y,recvCount_y,MPI_CHAR,rank_Y,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_z,sendCount_z,MPI_CHAR,rank_Z,sendtag, + recvID_Z,recvCount_Z,MPI_CHAR,rank_z,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Z,sendCount_Z,MPI_CHAR,rank_z,sendtag, + recvID_z,recvCount_z,MPI_CHAR,rank_Z,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xy,sendCount_xy,MPI_CHAR,rank_XY,sendtag, + recvID_XY,recvCount_XY,MPI_CHAR,rank_xy,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_XY,sendCount_XY,MPI_CHAR,rank_xy,sendtag, + recvID_xy,recvCount_xy,MPI_CHAR,rank_XY,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Xy,sendCount_Xy,MPI_CHAR,rank_xY,sendtag, + recvID_xY,recvCount_xY,MPI_CHAR,rank_Xy,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xY,sendCount_xY,MPI_CHAR,rank_Xy,sendtag, + recvID_Xy,recvCount_Xy,MPI_CHAR,rank_xY,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xz,sendCount_xz,MPI_CHAR,rank_XZ,sendtag, + recvID_XZ,recvCount_XZ,MPI_CHAR,rank_xz,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_XZ,sendCount_XZ,MPI_CHAR,rank_xz,sendtag, + recvID_xz,recvCount_xz,MPI_CHAR,rank_XZ,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Xz,sendCount_Xz,MPI_CHAR,rank_xZ,sendtag, + recvID_xZ,recvCount_xZ,MPI_CHAR,rank_Xz,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xZ,sendCount_xZ,MPI_CHAR,rank_Xz,sendtag, + recvID_Xz,recvCount_Xz,MPI_CHAR,rank_xZ,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_yz,sendCount_yz,MPI_CHAR,rank_YZ,sendtag, + recvID_YZ,recvCount_YZ,MPI_CHAR,rank_yz,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_YZ,sendCount_YZ,MPI_CHAR,rank_yz,sendtag, + recvID_yz,recvCount_yz,MPI_CHAR,rank_YZ,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Yz,sendCount_Yz,MPI_CHAR,rank_yZ,sendtag, + recvID_yZ,recvCount_yZ,MPI_CHAR,rank_Yz,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_yZ,sendCount_yZ,MPI_CHAR,rank_Yz,sendtag, + recvID_Yz,recvCount_Yz,MPI_CHAR,rank_yZ,recvtag,comm,MPI_STATUS_IGNORE); //...................................................................................... UnpackID(recvList_x, recvCount_x ,recvID_x, id); UnpackID(recvList_X, recvCount_X ,recvID_X, id); @@ -1471,7 +1495,7 @@ int main(int argc, char **argv) free(recvID_yz); free(recvID_YZ); free(recvID_yZ); free(recvID_Yz); //...................................................................................... if (rank==0) printf ("Devices are ready to communicate. \n"); - comm.barrier(); + MPI_Barrier(comm); //...........device phase ID................................................. if (rank==0) printf ("Copying phase ID to device \n"); @@ -1511,8 +1535,8 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; - comm.barrier(); - starttime = Utilities::MPI::time(); + MPI_Barrier(comm); + starttime = MPI_Wtime(); // Old cuda timer is below // cudaEvent_t start, stop; // float time; @@ -1609,48 +1633,48 @@ int main(int argc, char **argv) //................................................................................... // Send all the distributions - req1[0] = comm.Isend(sendbuf_x,5*sendCount_x,rank_X,sendtag); - req2[0] = comm.Irecv(recvbuf_X,5*recvCount_X,rank_x,recvtag); - req1[1] = comm.Isend(sendbuf_X,5*sendCount_X,rank_x,sendtag); - req2[1] = comm.Irecv(recvbuf_x,5*recvCount_x,rank_X,recvtag); - req1[2] = comm.Isend(sendbuf_y,5*sendCount_y,rank_Y,sendtag); - req2[2] = comm.Irecv(recvbuf_Y,5*recvCount_Y,rank_y,recvtag); - req1[3] = comm.Isend(sendbuf_Y,5*sendCount_Y,rank_y,sendtag); - req2[3] = comm.Irecv(recvbuf_y,5*recvCount_y,rank_Y,recvtag); - req1[4] = comm.Isend(sendbuf_z,5*sendCount_z,rank_Z,sendtag); - req2[4] = comm.Irecv(recvbuf_Z,5*recvCount_Z,rank_z,recvtag); - req1[5] = comm.Isend(sendbuf_Z,5*sendCount_Z,rank_z,sendtag); - req2[5] = comm.Irecv(recvbuf_z,5*recvCount_z,rank_Z,recvtag); - req1[6] = comm.Isend(sendbuf_xy,sendCount_xy,rank_XY,sendtag); - req2[6] = comm.Irecv(recvbuf_XY,recvCount_XY,rank_xy,recvtag); - req1[7] = comm.Isend(sendbuf_XY,sendCount_XY,rank_xy,sendtag); - req2[7] = comm.Irecv(recvbuf_xy,recvCount_xy,rank_XY,recvtag); - req1[8] = comm.Isend(sendbuf_Xy,sendCount_Xy,rank_xY,sendtag); - req2[8] = comm.Irecv(recvbuf_xY,recvCount_xY,rank_Xy,recvtag); - req1[9] = comm.Isend(sendbuf_xY,sendCount_xY,rank_Xy,sendtag); - req2[9] = comm.Irecv(recvbuf_Xy,recvCount_Xy,rank_xY,recvtag); - req1[10] = comm.Isend(sendbuf_xz,sendCount_xz,rank_XZ,sendtag); - req2[10] = comm.Irecv(recvbuf_XZ,recvCount_XZ,rank_xz,recvtag); - req1[11] = comm.Isend(sendbuf_XZ,sendCount_XZ,rank_xz,sendtag); - req2[11] = comm.Irecv(recvbuf_xz,recvCount_xz,rank_XZ,recvtag); - req1[12] = comm.Isend(sendbuf_Xz,sendCount_Xz,rank_xZ,sendtag); - req2[12] = comm.Irecv(recvbuf_xZ,recvCount_xZ,rank_Xz,recvtag); - req1[13] = comm.Isend(sendbuf_xZ,sendCount_xZ,rank_Xz,sendtag); - req2[13] = comm.Irecv(recvbuf_Xz,recvCount_Xz,rank_xZ,recvtag); - req1[14] = comm.Isend(sendbuf_yz,sendCount_yz,rank_YZ,sendtag); - req2[14] = comm.Irecv(recvbuf_YZ,recvCount_YZ,rank_yz,recvtag); - req1[15] = comm.Isend(sendbuf_YZ,sendCount_YZ,rank_yz,sendtag); - req2[15] = comm.Irecv(recvbuf_yz,recvCount_yz,rank_YZ,recvtag); - req1[16] = comm.Isend(sendbuf_Yz,sendCount_Yz,rank_yZ,sendtag); - req2[16] = comm.Irecv(recvbuf_yZ,recvCount_yZ,rank_Yz,recvtag); - req1[17] = comm.Isend(sendbuf_yZ,sendCount_yZ,rank_Yz,sendtag); - req2[17] = comm.Irecv(recvbuf_Yz,recvCount_Yz,rank_yZ,recvtag); + MPI_Isend(sendbuf_x, 5*sendCount_x,MPI_DOUBLE,rank_X,sendtag,comm,&req1[0]); + MPI_Irecv(recvbuf_X, 5*recvCount_X,MPI_DOUBLE,rank_x,recvtag,comm,&req2[0]); + MPI_Isend(sendbuf_X, 5*sendCount_X,MPI_DOUBLE,rank_x,sendtag,comm,&req1[1]); + MPI_Irecv(recvbuf_x, 5*recvCount_x,MPI_DOUBLE,rank_X,recvtag,comm,&req2[1]); + MPI_Isend(sendbuf_y, 5*sendCount_y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[2]); + MPI_Irecv(recvbuf_Y, 5*recvCount_Y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[2]); + MPI_Isend(sendbuf_Y, 5*sendCount_Y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[3]); + MPI_Irecv(recvbuf_y, 5*recvCount_y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[3]); + MPI_Isend(sendbuf_z, 5*sendCount_z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[4]); + MPI_Irecv(recvbuf_Z, 5*recvCount_Z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[4]); + MPI_Isend(sendbuf_Z, 5*sendCount_Z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[5]); + MPI_Irecv(recvbuf_z, 5*recvCount_z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[5]); + MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_XY,sendtag,comm,&req1[6]); + MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_xy,recvtag,comm,&req2[6]); + MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_xy,sendtag,comm,&req1[7]); + MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_XY,recvtag,comm,&req2[7]); + MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_xY,sendtag,comm,&req1[8]); + MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_Xy,recvtag,comm,&req2[8]); + MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_Xy,sendtag,comm,&req1[9]); + MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_xY,recvtag,comm,&req2[9]); + MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_XZ,sendtag,comm,&req1[10]); + MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_xz,recvtag,comm,&req2[10]); + MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_xz,sendtag,comm,&req1[11]); + MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_XZ,recvtag,comm,&req2[11]); + MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_xZ,sendtag,comm,&req1[12]); + MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_Xz,recvtag,comm,&req2[12]); + MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_Xz,sendtag,comm,&req1[13]); + MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_xZ,recvtag,comm,&req2[13]); + MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_YZ,sendtag,comm,&req1[14]); + MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_yz,recvtag,comm,&req2[14]); + MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_yz,sendtag,comm,&req1[15]); + MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_YZ,recvtag,comm,&req2[15]); + MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_yZ,sendtag,comm,&req1[16]); + MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_Yz,recvtag,comm,&req2[16]); + MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_Yz,sendtag,comm,&req1[17]); + MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_yZ,recvtag,comm,&req2[17]); //................................................................................... //................................................................................... // Wait for completion of D3Q19 communication - comm.waitAll(18,req1); - comm.waitAll(18,req2); + MPI_Waitall(18,req1,stat1); + MPI_Waitall(18,req2,stat2); //................................................................................... // Unpack the distributions on the device //................................................................................... @@ -1734,7 +1758,7 @@ int main(int argc, char **argv) //***************************************************************************** //***************************************************************************** - comm.barrier(); + MPI_Barrier(comm); // Iteration completed! iter++; //................................................................... @@ -1742,8 +1766,8 @@ int main(int argc, char **argv) //************************************************************************/ cudaThreadSynchronize(); - comm.barrier(); - stoptime = Utilities::MPI::time(); + MPI_Barrier(comm); + stoptime = MPI_Wtime(); // cout << "CPU time: " << (stoptime - starttime) << " seconds" << endl; cputime = stoptime - starttime; // cout << "Lattice update rate: "<< double(Nx*Ny*Nz*iter)/cputime/1000000 << " MLUPS" << endl; @@ -1778,7 +1802,7 @@ int main(int argc, char **argv) cudaMemcpy(velocity, vel, 3*dist_mem_size, cudaMemcpyDeviceToHost); //.............................................................................. cudaThreadSynchronize(); - comm.barrier(); + MPI_Barrier(comm); //............................................................ //....Write the z-velocity to test poiseuille flow............ double vz,vz_avg; @@ -1807,7 +1831,7 @@ int main(int argc, char **argv) free (velocity); free(id); // **************************************************** - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); // **************************************************** } diff --git a/gpu/exe/lb2_Color.cu b/gpu/exe/lb2_Color.cu index 1f227d08..1871b23c 100644 --- a/gpu/exe/lb2_Color.cu +++ b/gpu/exe/lb2_Color.cu @@ -1,4 +1,6 @@ -#include "common/MPI.h" +#ifdef useMPI +#include +#endif #include #include @@ -60,10 +62,18 @@ int main(int argc, char *argv[]) { //********** Initialize MPI **************** + int numprocs,rank; +#ifdef useMPI + MPI_Status stat; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int numprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_size(comm,&numprocs); + MPI_Comm_rank(comm,&rank); +#else + MPI_Comm comm = MPI_COMM_WORLD; + numprocs = 1; + rank = 0; +#endif //****************************************** if (rank == 0){ @@ -113,31 +123,32 @@ int main(int argc, char *argv[]) input >> tol; // error tolerance //............................................................. } +#ifdef useMPI // ************************************************************** // Broadcast simulation parameters from rank 0 to all other procs - comm.barrier(); + MPI_Barrier(comm); //................................................. - comm.bcast(&Nz,1,0); - comm.bcast(&nBlocks,1,0); - comm.bcast(&nthreads,1,0); - comm.bcast(&Fx,1,0); - comm.bcast(&Fy,1,0); - comm.bcast(&Fz,1,0); - comm.bcast(&tau,1,0); - comm.bcast(&alpha,1,0); - comm.bcast(&beta,1,0); - comm.bcast(&das,1,0); - comm.bcast(&dbs,1,0); - comm.bcast(&pBC,1,0); - comm.bcast(&din,1,0); - comm.bcast(&dout,1,0); - - comm.bcast(×tepMax,1,0); - comm.bcast(&interval,1,0); - comm.bcast(&tol,1,0); + MPI_Bcast(&Nz,1,MPI_INT,0,comm); + MPI_Bcast(&nBlocks,1,MPI_INT,0,comm); + MPI_Bcast(&nthreads,1,MPI_INT,0,comm); + MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&alpha,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&beta,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&das,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&dbs,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&pBC,1,MPI_LOGICAL,0,comm); + MPI_Bcast(&din,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&dout,1,MPI_DOUBLE,0,comm); + MPI_Bcast(×tepMax,1,MPI_INT,0,comm); + MPI_Bcast(&interval,1,MPI_INT,0,comm); + MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm); //................................................. - comm.barrier(); + MPI_Barrier(comm); // ************************************************************** +#endif double rlxA = 1.f/tau; double rlxB = 8.f*(2.f-rlxA)/(8.f-rlxA); @@ -232,7 +243,11 @@ int main(int argc, char *argv[]) if (k==4) k=Nz-5; } } - comm.bcast(&id[0],N,0); +#ifdef useMPI //............................................................ + MPI_Barrier(comm); + MPI_Bcast(&id[0],N,MPI_CHAR,0,comm); + MPI_Barrier(comm); +#endif if (rank == 0) printf("Domain set.\n"); //........................................................................... diff --git a/gpu/exe/lb2_Color_mpi.cpp b/gpu/exe/lb2_Color_mpi.cpp index a2f3d8a9..fe11d32f 100644 --- a/gpu/exe/lb2_Color_mpi.cpp +++ b/gpu/exe/lb2_Color_mpi.cpp @@ -2,7 +2,7 @@ #include #include #include -#include "common/MPI.h" +#include using namespace std; @@ -98,11 +98,15 @@ inline void UnpackID(int *list, int count, char *recvbuf, char *ID){ int main(int argc, char **argv) { + //***************************************** + // ***** MPI STUFF **************** + //***************************************** // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); // parallel domain size (# of sub-domains) int nprocx,nprocy,nprocz; int iproc,jproc,kproc; @@ -116,6 +120,7 @@ int main(int argc, char **argv) int rank_yz,rank_YZ,rank_yZ,rank_Yz; //********************************** MPI_Request req1[18],req2[18]; + MPI_Status stat1[18],stat2[18]; if (rank == 0){ printf("********************************************************\n"); @@ -172,30 +177,31 @@ int main(int argc, char **argv) } // ************************************************************** // Broadcast simulation parameters from rank 0 to all other procs - comm.barrier(); + MPI_Barrier(comm); //................................................. - comm.bcast(&Nz,1,0); - comm.bcast(&nBlocks,1,0); - comm.bcast(&nthreads,1,0); - comm.bcast(&Fx,1,0); - comm.bcast(&Fy,1,0); - comm.bcast(&Fz,1,0); - comm.bcast(&tau,1,0); - comm.bcast(&alpha,1,0); - comm.bcast(&beta,1,0); - comm.bcast(&das,1,0); - comm.bcast(&dbs,1,0); - comm.bcast(&pBC,1,0); - comm.bcast(&din,1,0); - comm.bcast(&dout,1,0); - comm.bcast(×tepMax,1,0); - comm.bcast(&interval,1,0); - comm.bcast(&tol,1,0); - comm.bcast(&nprocx,1,0); - comm.bcast(&nprocy,1,0); - comm.bcast(&nprocz,1,0); + MPI_Bcast(&Nz,1,MPI_INT,0,comm); + MPI_Bcast(&nBlocks,1,MPI_INT,0,comm); + MPI_Bcast(&nthreads,1,MPI_INT,0,comm); + MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&alpha,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&beta,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&das,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&dbs,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&pBC,1,MPI_LOGICAL,0,comm); + MPI_Bcast(&din,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&dout,1,MPI_DOUBLE,0,comm); + MPI_Bcast(×tepMax,1,MPI_INT,0,comm); + MPI_Bcast(&interval,1,MPI_INT,0,comm); + MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm); + + MPI_Bcast(&nprocx,1,MPI_INT,0,comm); + MPI_Bcast(&nprocy,1,MPI_INT,0,comm); + MPI_Bcast(&nprocz,1,MPI_INT,0,comm); //................................................. - comm.barrier(); + MPI_Barrier(comm); // ************************************************************** // ************************************************************** @@ -225,7 +231,7 @@ int main(int argc, char **argv) } - comm.barrier(); + MPI_Barrier(comm); kproc = rank/(nprocx*nprocy); jproc = (rank-nprocx*nprocy*kproc)/nprocx; iproc = rank-nprocx*nprocy*kproc-nprocz*jproc; @@ -507,7 +513,7 @@ int main(int argc, char **argv) PM.close(); // printf("File porosity = %f\n", double(sum)/N); //........................................................................... - comm.barrier(); + MPI_Barrier(comm); if (rank == 0) cout << "Domain set." << endl; //........................................................................... // Write the communcation structure into a file for debugging @@ -644,7 +650,7 @@ int main(int argc, char **argv) } } } - comm.barrier(); + MPI_Barrier(comm); if (rank==0) printf ("SendLists are ready on host\n"); //...................................................................................... // Use MPI to fill in the recvCounts form the associated processes @@ -655,46 +661,46 @@ int main(int argc, char **argv) //********************************************************************************** // Fill in the recieve counts using MPI sendtag = recvtag = 3; - comm.Send(&sendCount_x,1,rank_X,sendtag); - comm.Recv(&recvCount_X,1,rank_x,recvtag); - comm.Send(&sendCount_X,1,rank_x,sendtag); - comm.Recv(&recvCount_x,1,rank_X,recvtag); - comm.Send(&sendCount_y,1,rank_Y,sendtag); - comm.Recv(&recvCount_Y,1,rank_y,recvtag); - comm.Send(&sendCount_Y,1,rank_y,sendtag); - comm.Recv(&recvCount_y,1,rank_Y,recvtag); - comm.Send(&sendCount_z,1,rank_Z,sendtag); - comm.Recv(&recvCount_Z,1,rank_z,recvtag); - comm.Send(&sendCount_Z,1,rank_z,sendtag); - comm.Recv(&recvCount_z,1,rank_Z,recvtag); + MPI_Send(&sendCount_x,1,MPI_INT,rank_X,sendtag,comm); + MPI_Recv(&recvCount_X,1,MPI_INT,rank_x,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_X,1,MPI_INT,rank_x,sendtag,comm); + MPI_Recv(&recvCount_x,1,MPI_INT,rank_X,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_y,1,MPI_INT,rank_Y,sendtag,comm); + MPI_Recv(&recvCount_Y,1,MPI_INT,rank_y,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_Y,1,MPI_INT,rank_y,sendtag,comm); + MPI_Recv(&recvCount_y,1,MPI_INT,rank_Y,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_z,1,MPI_INT,rank_Z,sendtag,comm); + MPI_Recv(&recvCount_Z,1,MPI_INT,rank_z,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_Z,1,MPI_INT,rank_z,sendtag,comm); + MPI_Recv(&recvCount_z,1,MPI_INT,rank_Z,recvtag,comm,MPI_STATUS_IGNORE); - comm.Send(&sendCount_xy,1,rank_XY,sendtag); - comm.Recv(&recvCount_XY,1,rank_xy,recvtag); - comm.Send(&sendCount_XY,1,rank_xy,sendtag); - comm.Recv(&recvCount_xy,1,rank_XY,recvtag); - comm.Send(&sendCount_Xy,1,rank_xY,sendtag); - comm.Recv(&recvCount_xY,1,rank_Xy,recvtag); - comm.Send(&sendCount_xY,1,rank_Xy,sendtag); - comm.Recv(&recvCount_Xy,1,rank_xY,recvtag); + MPI_Send(&sendCount_xy,1,MPI_INT,rank_XY,sendtag,comm); + MPI_Recv(&recvCount_XY,1,MPI_INT,rank_xy,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_XY,1,MPI_INT,rank_xy,sendtag,comm); + MPI_Recv(&recvCount_xy,1,MPI_INT,rank_XY,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_Xy,1,MPI_INT,rank_xY,sendtag,comm); + MPI_Recv(&recvCount_xY,1,MPI_INT,rank_Xy,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_xY,1,MPI_INT,rank_Xy,sendtag,comm); + MPI_Recv(&recvCount_Xy,1,MPI_INT,rank_xY,recvtag,comm,MPI_STATUS_IGNORE); - comm.Send(&sendCount_xz,1,rank_XZ,sendtag); - comm.Recv(&recvCount_XZ,1,rank_xz,recvtag); - comm.Send(&sendCount_XZ,1,rank_xz,sendtag); - comm.Recv(&recvCount_xz,1,rank_XZ,recvtag); - comm.Send(&sendCount_Xz,1,rank_xZ,sendtag); - comm.Recv(&recvCount_xZ,1,rank_Xz,recvtag); - comm.Send(&sendCount_xZ,1,rank_Xz,sendtag); - comm.Recv(&recvCount_Xz,1,rank_xZ,recvtag); + MPI_Send(&sendCount_xz,1,MPI_INT,rank_XZ,sendtag,comm); + MPI_Recv(&recvCount_XZ,1,MPI_INT,rank_xz,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_XZ,1,MPI_INT,rank_xz,sendtag,comm); + MPI_Recv(&recvCount_xz,1,MPI_INT,rank_XZ,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_Xz,1,MPI_INT,rank_xZ,sendtag,comm); + MPI_Recv(&recvCount_xZ,1,MPI_INT,rank_Xz,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_xZ,1,MPI_INT,rank_Xz,sendtag,comm); + MPI_Recv(&recvCount_Xz,1,MPI_INT,rank_xZ,recvtag,comm,MPI_STATUS_IGNORE); - comm.Send(&sendCount_yz,1,rank_YZ,sendtag); - comm.Recv(&recvCount_YZ,1,rank_yz,recvtag); - comm.Send(&sendCount_YZ,1,rank_yz,sendtag); - comm.Recv(&recvCount_yz,1,rank_YZ,recvtag); - comm.Send(&sendCount_Yz,1,rank_yZ,sendtag); - comm.Recv(&recvCount_yZ,1,rank_Yz,recvtag); - comm.Send(&sendCount_yZ,1,rank_Yz,sendtag); - comm.Recv(&recvCount_Yz,1,rank_yZ,recvtag); - comm.barrier(); + MPI_Send(&sendCount_yz,1,MPI_INT,rank_YZ,sendtag,comm); + MPI_Recv(&recvCount_YZ,1,MPI_INT,rank_yz,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_YZ,1,MPI_INT,rank_yz,sendtag,comm); + MPI_Recv(&recvCount_yz,1,MPI_INT,rank_YZ,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_Yz,1,MPI_INT,rank_yZ,sendtag,comm); + MPI_Recv(&recvCount_yZ,1,MPI_INT,rank_Yz,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_yZ,1,MPI_INT,rank_Yz,sendtag,comm); + MPI_Recv(&recvCount_Yz,1,MPI_INT,rank_yZ,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Barrier(comm); //********************************************************************************** //...................................................................................... int *recvList_x, *recvList_y, *recvList_z, *recvList_X, *recvList_Y, *recvList_Z; @@ -725,48 +731,48 @@ int main(int argc, char **argv) // Use MPI to fill in the appropriate values for recvList // Fill in the recieve lists using MPI sendtag = recvtag = 4; - req1[0] = comm.Isend(sendList_x,sendCount_x,rank_X,sendtag); - req2[0] = comm.Irecv(recvList_X,recvCount_X,rank_x,recvtag); - req1[1] = comm.Isend(sendList_X,sendCount_X,rank_x,sendtag); - req2[1] = comm.Irecv(recvList_x,recvCount_x,rank_X,recvtag); - req1[2] = comm.Isend(sendList_y,sendCount_y,rank_Y,sendtag); - req2[2] = comm.Irecv(recvList_Y,recvCount_Y,rank_y,recvtag); - req1[3] = comm.Isend(sendList_Y,sendCount_Y,rank_y,sendtag); - req2[3] = comm.Irecv(recvList_y,recvCount_y,rank_Y,recvtag); - req1[4] = comm.Isend(sendList_z,sendCount_z,rank_Z,sendtag); - req2[4] = comm.Irecv(recvList_Z,recvCount_Z,rank_z,recvtag); - req1[5] = comm.Isend(sendList_Z,sendCount_Z,rank_z,sendtag); - req2[5] = comm.Irecv(recvList_z,recvCount_z,rank_Z,recvtag); + MPI_Isend(sendList_x, sendCount_x,MPI_INT,rank_X,sendtag,comm,&req1[0]); + MPI_Irecv(recvList_X, recvCount_X,MPI_INT,rank_x,recvtag,comm,&req2[0]); + MPI_Isend(sendList_X, sendCount_X,MPI_INT,rank_x,sendtag,comm,&req1[1]); + MPI_Irecv(recvList_x, recvCount_x,MPI_INT,rank_X,recvtag,comm,&req2[1]); + MPI_Isend(sendList_y, sendCount_y,MPI_INT,rank_Y,sendtag,comm,&req1[2]); + MPI_Irecv(recvList_Y, recvCount_Y,MPI_INT,rank_y,recvtag,comm,&req2[2]); + MPI_Isend(sendList_Y, sendCount_Y,MPI_INT,rank_y,sendtag,comm,&req1[3]); + MPI_Irecv(recvList_y, recvCount_y,MPI_INT,rank_Y,recvtag,comm,&req2[3]); + MPI_Isend(sendList_z, sendCount_z,MPI_INT,rank_Z,sendtag,comm,&req1[4]); + MPI_Irecv(recvList_Z, recvCount_Z,MPI_INT,rank_z,recvtag,comm,&req2[4]); + MPI_Isend(sendList_Z, sendCount_Z,MPI_INT,rank_z,sendtag,comm,&req1[5]); + MPI_Irecv(recvList_z, recvCount_z,MPI_INT,rank_Z,recvtag,comm,&req2[5]); - req1[6] = comm.Isend(sendList_xy,sendCount_xy,rank_XY,sendtag); - req2[6] = comm.Irecv(recvList_XY,recvCount_XY,rank_xy,recvtag); - req1[7] = comm.Isend(sendList_XY,sendCount_XY,rank_xy,sendtag); - req2[7] = comm.Irecv(recvList_xy,recvCount_xy,rank_XY,recvtag); - req1[8] = comm.Isend(sendList_Xy,sendCount_Xy,rank_xY,sendtag); - req2[8] = comm.Irecv(recvList_xY,recvCount_xY,rank_Xy,recvtag); - req1[9] = comm.Isend(sendList_xY,sendCount_xY,rank_Xy,sendtag); - req2[9] = comm.Irecv(recvList_Xy,recvCount_Xy,rank_xY,recvtag); + MPI_Isend(sendList_xy, sendCount_xy,MPI_INT,rank_XY,sendtag,comm,&req1[6]); + MPI_Irecv(recvList_XY, recvCount_XY,MPI_INT,rank_xy,recvtag,comm,&req2[6]); + MPI_Isend(sendList_XY, sendCount_XY,MPI_INT,rank_xy,sendtag,comm,&req1[7]); + MPI_Irecv(recvList_xy, recvCount_xy,MPI_INT,rank_XY,recvtag,comm,&req2[7]); + MPI_Isend(sendList_Xy, sendCount_Xy,MPI_INT,rank_xY,sendtag,comm,&req1[8]); + MPI_Irecv(recvList_xY, recvCount_xY,MPI_INT,rank_Xy,recvtag,comm,&req2[8]); + MPI_Isend(sendList_xY, sendCount_xY,MPI_INT,rank_Xy,sendtag,comm,&req1[9]); + MPI_Irecv(recvList_Xy, recvCount_Xy,MPI_INT,rank_xY,recvtag,comm,&req2[9]); - req1[10] = comm.Isend(sendList_xz,sendCount_xz,rank_XZ,sendtag); - req2[10] = comm.Irecv(recvList_XZ,recvCount_XZ,rank_xz,recvtag); - req1[11] = comm.Isend(sendList_XZ,sendCount_XZ,rank_xz,sendtag); - req2[11] = comm.Irecv(recvList_xz,recvCount_xz,rank_XZ,recvtag); - req1[12] = comm.Isend(sendList_Xz,sendCount_Xz,rank_xZ,sendtag); - req2[12] = comm.Irecv(recvList_xZ,recvCount_xZ,rank_Xz,recvtag); - req1[13] = comm.Isend(sendList_xZ,sendCount_xZ,rank_Xz,sendtag); - req2[13] = comm.Irecv(recvList_Xz,recvCount_Xz,rank_xZ,recvtag); + MPI_Isend(sendList_xz, sendCount_xz,MPI_INT,rank_XZ,sendtag,comm,&req1[10]); + MPI_Irecv(recvList_XZ, recvCount_XZ,MPI_INT,rank_xz,recvtag,comm,&req2[10]); + MPI_Isend(sendList_XZ, sendCount_XZ,MPI_INT,rank_xz,sendtag,comm,&req1[11]); + MPI_Irecv(recvList_xz, recvCount_xz,MPI_INT,rank_XZ,recvtag,comm,&req2[11]); + MPI_Isend(sendList_Xz, sendCount_Xz,MPI_INT,rank_xZ,sendtag,comm,&req1[12]); + MPI_Irecv(recvList_xZ, recvCount_xZ,MPI_INT,rank_Xz,recvtag,comm,&req2[12]); + MPI_Isend(sendList_xZ, sendCount_xZ,MPI_INT,rank_Xz,sendtag,comm,&req1[13]); + MPI_Irecv(recvList_Xz, recvCount_Xz,MPI_INT,rank_xZ,recvtag,comm,&req2[13]); - req1[14] = comm.Isend(sendList_yz,sendCount_yz,rank_YZ,sendtag); - req2[14] = comm.Irecv(recvList_YZ,recvCount_YZ,rank_yz,recvtag); - req1[15] = comm.Isend(sendList_YZ,sendCount_YZ,rank_yz,sendtag); - req2[15] = comm.Irecv(recvList_yz,recvCount_yz,rank_YZ,recvtag); - req1[16] = comm.Isend(sendList_Yz,sendCount_Yz,rank_yZ,sendtag); - req2[16] = comm.Irecv(recvList_yZ,recvCount_yZ,rank_Yz,recvtag); - req1[17] = comm.Isend(sendList_yZ,sendCount_yZ,rank_Yz,sendtag); - req2[17] = comm.Irecv(recvList_Yz,recvCount_Yz,rank_yZ,recvtag); - comm.waitAll(18,req1); - comm.waitAll(18,req2); - comm.barrier(); + MPI_Isend(sendList_yz, sendCount_yz,MPI_INT,rank_YZ,sendtag,comm,&req1[14]); + MPI_Irecv(recvList_YZ, recvCount_YZ,MPI_INT,rank_yz,recvtag,comm,&req2[14]); + MPI_Isend(sendList_YZ, sendCount_YZ,MPI_INT,rank_yz,sendtag,comm,&req1[15]); + MPI_Irecv(recvList_yz, recvCount_yz,MPI_INT,rank_YZ,recvtag,comm,&req2[15]); + MPI_Isend(sendList_Yz, sendCount_Yz,MPI_INT,rank_yZ,sendtag,comm,&req1[16]); + MPI_Irecv(recvList_yZ, recvCount_yZ,MPI_INT,rank_Yz,recvtag,comm,&req2[16]); + MPI_Isend(sendList_yZ, sendCount_yZ,MPI_INT,rank_Yz,sendtag,comm,&req1[17]); + MPI_Irecv(recvList_Yz, recvCount_Yz,MPI_INT,rank_yZ,recvtag,comm,&req2[17]); + MPI_Waitall(18,req1,stat1); + MPI_Waitall(18,req2,stat2); + MPI_Barrier(comm); //...................................................................................... for (int idx=0; idx #include #include -#include "common/MPI.h" +#include #include "pmmc.h" #include "Domain.h" @@ -101,11 +101,15 @@ inline void UnpackID(int *list, int count, char *recvbuf, char *ID){ int main(int argc, char **argv) { + //***************************************** + // ***** MPI STUFF **************** + //***************************************** // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); // parallel domain size (# of sub-domains) int nprocx,nprocy,nprocz; int iproc,jproc,kproc; @@ -119,6 +123,7 @@ int main(int argc, char **argv) int rank_yz,rank_YZ,rank_yZ,rank_Yz; //********************************** MPI_Request req1[18],req2[18]; + MPI_Status stat1[18],stat2[18]; if (rank == 0){ printf("********************************************************\n"); @@ -198,35 +203,35 @@ int main(int argc, char **argv) } // ************************************************************** // Broadcast simulation parameters from rank 0 to all other procs - comm.barrier(); + MPI_Barrier(comm); //................................................. - comm.bcast(&tau,1,0); - comm.bcast(&alpha,1,0); - comm.bcast(&beta,1,0); - comm.bcast(&das,1,0); - comm.bcast(&dbs,1,0); - comm.bcast(&pBC,1,0); - comm.bcast(&din,1,0); - comm.bcast(&dout,1,0); - comm.bcast(&Fx,1,0); - comm.bcast(&Fy,1,0); - comm.bcast(&Fz,1,0); - comm.bcast(×tepMax,1,0); - comm.bcast(&interval,1,0); - comm.bcast(&tol,1,0); + MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&alpha,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&beta,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&das,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&dbs,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&pBC,1,MPI_LOGICAL,0,comm); + MPI_Bcast(&din,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&dout,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm); + MPI_Bcast(×tepMax,1,MPI_INT,0,comm); + MPI_Bcast(&interval,1,MPI_INT,0,comm); + MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm); // Computational domain - comm.bcast(&Nz,1,0); - comm.bcast(&nBlocks,1,0); - comm.bcast(&nthreads,1,0); - comm.bcast(&nprocx,1,0); - comm.bcast(&nprocy,1,0); - comm.bcast(&nprocz,1,0); - comm.bcast(&nspheres,1,0); - comm.bcast(&Lx,1,0); - comm.bcast(&Ly,1,0); - comm.bcast(&Lz,1,0); + MPI_Bcast(&Nz,1,MPI_INT,0,comm); + MPI_Bcast(&nBlocks,1,MPI_INT,0,comm); + MPI_Bcast(&nthreads,1,MPI_INT,0,comm); + MPI_Bcast(&nprocx,1,MPI_INT,0,comm); + MPI_Bcast(&nprocy,1,MPI_INT,0,comm); + MPI_Bcast(&nprocz,1,MPI_INT,0,comm); + MPI_Bcast(&nspheres,1,MPI_INT,0,comm); + MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. - comm.barrier(); + MPI_Barrier(comm); // ************************************************************** // ************************************************************** double Ps = -(das-dbs)/(das+dbs); @@ -258,7 +263,7 @@ int main(int argc, char **argv) printf("********************************************************\n"); } - comm.barrier(); + MPI_Barrier(comm); kproc = rank/(nprocx*nprocy); jproc = (rank-nprocx*nprocy*kproc)/nprocx; iproc = rank-nprocx*nprocy*kproc-nprocz*jproc; @@ -556,14 +561,14 @@ int main(int argc, char **argv) //....................................................................... if (rank == 0) printf("Reading the sphere packing \n"); if (rank == 0) ReadSpherePacking(nspheres,cx,cy,cz,rad); - comm.barrier(); + MPI_Barrier(comm); // Broadcast the sphere packing to all processes - comm.bcast(cx,nspheres,0); - comm.bcast(cy,nspheres,0); - comm.bcast(cz,nspheres,0); - comm.bcast(rad,nspheres,0); + MPI_Bcast(cx,nspheres,MPI_DOUBLE,0,comm); + MPI_Bcast(cy,nspheres,MPI_DOUBLE,0,comm); + MPI_Bcast(cz,nspheres,MPI_DOUBLE,0,comm); + MPI_Bcast(rad,nspheres,MPI_DOUBLE,0,comm); //........................................................................... - comm.barrier(); + MPI_Barrier(comm); if (rank == 0) cout << "Domain set." << endl; //....................................................................... // sprintf(LocalRankString,"%05d",rank); @@ -713,7 +718,7 @@ int main(int argc, char **argv) } } } - comm.barrier(); + MPI_Barrier(comm); if (rank==0) printf ("SendLists are ready on host\n"); //...................................................................................... // Use MPI to fill in the recvCounts form the associated processes @@ -724,49 +729,89 @@ int main(int argc, char **argv) //********************************************************************************** // Fill in the recieve counts using MPI sendtag = recvtag = 3; - req1[0] = comm.Isend(&sendCount_x,1,rank_X,sendtag); - req2[0] = comm.Irecv(&recvCount_X,1,rank_x,recvtag); - req1[1] = comm.Isend(&sendCount_X,1,rank_x,sendtag); - req2[1] = comm.Irecv(&recvCount_x,1,rank_X,recvtag); - req1[2] = comm.Isend(&sendCount_y,1,rank_Y,sendtag); - req2[2] = comm.Irecv(&recvCount_Y,1,rank_y,recvtag); - req1[3] = comm.Isend(&sendCount_Y,1,rank_y,sendtag); - req2[3] = comm.Irecv(&recvCount_y,1,rank_Y,recvtag); - req1[4] = comm.Isend(&sendCount_z,1,rank_Z,sendtag); - req2[4] = comm.Irecv(&recvCount_Z,1,rank_z,recvtag); - req1[5] = comm.Isend(&sendCount_Z,1,rank_z,sendtag); - req2[5] = comm.Irecv(&recvCount_z,1,rank_Z,recvtag); + MPI_Isend(&sendCount_x, 1,MPI_INT,rank_X,sendtag,comm,&req1[0]); + MPI_Irecv(&recvCount_X, 1,MPI_INT,rank_x,recvtag,comm,&req2[0]); + MPI_Isend(&sendCount_X, 1,MPI_INT,rank_x,sendtag,comm,&req1[1]); + MPI_Irecv(&recvCount_x, 1,MPI_INT,rank_X,recvtag,comm,&req2[1]); + MPI_Isend(&sendCount_y, 1,MPI_INT,rank_Y,sendtag,comm,&req1[2]); + MPI_Irecv(&recvCount_Y, 1,MPI_INT,rank_y,recvtag,comm,&req2[2]); + MPI_Isend(&sendCount_Y, 1,MPI_INT,rank_y,sendtag,comm,&req1[3]); + MPI_Irecv(&recvCount_y, 1,MPI_INT,rank_Y,recvtag,comm,&req2[3]); + MPI_Isend(&sendCount_z, 1,MPI_INT,rank_Z,sendtag,comm,&req1[4]); + MPI_Irecv(&recvCount_Z, 1,MPI_INT,rank_z,recvtag,comm,&req2[4]); + MPI_Isend(&sendCount_Z, 1,MPI_INT,rank_z,sendtag,comm,&req1[5]); + MPI_Irecv(&recvCount_z, 1,MPI_INT,rank_Z,recvtag,comm,&req2[5]); - req1[6] = comm.Isend(&sendCount_xy,1,rank_XY,sendtag); - req2[6] = comm.Irecv(&recvCount_XY,1,rank_xy,recvtag); - req1[7] = comm.Isend(&sendCount_XY,1,rank_xy,sendtag); - req2[7] = comm.Irecv(&recvCount_xy,1,rank_XY,recvtag); - req1[8] = comm.Isend(&sendCount_Xy,1,rank_xY,sendtag); - req2[8] = comm.Irecv(&recvCount_xY,1,rank_Xy,recvtag); - req1[9] = comm.Isend(&sendCount_xY,1,rank_Xy,sendtag); - req2[9] = comm.Irecv(&recvCount_Xy,1,rank_xY,recvtag); + MPI_Isend(&sendCount_xy, 1,MPI_INT,rank_XY,sendtag,comm,&req1[6]); + MPI_Irecv(&recvCount_XY, 1,MPI_INT,rank_xy,recvtag,comm,&req2[6]); + MPI_Isend(&sendCount_XY, 1,MPI_INT,rank_xy,sendtag,comm,&req1[7]); + MPI_Irecv(&recvCount_xy, 1,MPI_INT,rank_XY,recvtag,comm,&req2[7]); + MPI_Isend(&sendCount_Xy, 1,MPI_INT,rank_xY,sendtag,comm,&req1[8]); + MPI_Irecv(&recvCount_xY, 1,MPI_INT,rank_Xy,recvtag,comm,&req2[8]); + MPI_Isend(&sendCount_xY, 1,MPI_INT,rank_Xy,sendtag,comm,&req1[9]); + MPI_Irecv(&recvCount_Xy, 1,MPI_INT,rank_xY,recvtag,comm,&req2[9]); - req1[10] = comm.Isend(&sendCount_xz,1,rank_XZ,sendtag); - req2[10] = comm.Irecv(&recvCount_XZ,1,rank_xz,recvtag); - req1[11] = comm.Isend(&sendCount_XZ,1,rank_xz,sendtag); - req2[11] = comm.Irecv(&recvCount_xz,1,rank_XZ,recvtag); - req1[12] = comm.Isend(&sendCount_Xz,1,rank_xZ,sendtag); - req2[12] = comm.Irecv(&recvCount_xZ,1,rank_Xz,recvtag); - req1[13] = comm.Isend(&sendCount_xZ,1,rank_Xz,sendtag); - req2[13] = comm.Irecv(&recvCount_Xz,1,rank_xZ,recvtag); + MPI_Isend(&sendCount_xz, 1,MPI_INT,rank_XZ,sendtag,comm,&req1[10]); + MPI_Irecv(&recvCount_XZ, 1,MPI_INT,rank_xz,recvtag,comm,&req2[10]); + MPI_Isend(&sendCount_XZ, 1,MPI_INT,rank_xz,sendtag,comm,&req1[11]); + MPI_Irecv(&recvCount_xz, 1,MPI_INT,rank_XZ,recvtag,comm,&req2[11]); + MPI_Isend(&sendCount_Xz, 1,MPI_INT,rank_xZ,sendtag,comm,&req1[12]); + MPI_Irecv(&recvCount_xZ, 1,MPI_INT,rank_Xz,recvtag,comm,&req2[12]); + MPI_Isend(&sendCount_xZ, 1,MPI_INT,rank_Xz,sendtag,comm,&req1[13]); + MPI_Irecv(&recvCount_Xz, 1,MPI_INT,rank_xZ,recvtag,comm,&req2[13]); - req1[14] = comm.Isend(&sendCount_yz,1,rank_YZ,sendtag); - req2[14] = comm.Irecv(&recvCount_YZ,1,rank_yz,recvtag); - req1[15] = comm.Isend(&sendCount_YZ,1,rank_yz,sendtag); - req2[15] = comm.Irecv(&recvCount_yz,1,rank_YZ,recvtag); - req1[16] = comm.Isend(&sendCount_Yz,1,rank_yZ,sendtag); - req2[16] = comm.Irecv(&recvCount_yZ,1,rank_Yz,recvtag); - req1[17] = comm.Isend(&sendCount_yZ,1,rank_Yz,sendtag); - req2[17] = comm.Irecv(&recvCount_Yz,1,rank_yZ,recvtag); - comm.waitAll(18,req1); - comm.waitAll(18,req2); - comm.barrier(); - //********************************************************************************** + MPI_Isend(&sendCount_yz, 1,MPI_INT,rank_YZ,sendtag,comm,&req1[14]); + MPI_Irecv(&recvCount_YZ, 1,MPI_INT,rank_yz,recvtag,comm,&req2[14]); + MPI_Isend(&sendCount_YZ, 1,MPI_INT,rank_yz,sendtag,comm,&req1[15]); + MPI_Irecv(&recvCount_yz, 1,MPI_INT,rank_YZ,recvtag,comm,&req2[15]); + MPI_Isend(&sendCount_Yz, 1,MPI_INT,rank_yZ,sendtag,comm,&req1[16]); + MPI_Irecv(&recvCount_yZ, 1,MPI_INT,rank_Yz,recvtag,comm,&req2[16]); + MPI_Isend(&sendCount_yZ, 1,MPI_INT,rank_Yz,sendtag,comm,&req1[17]); + MPI_Irecv(&recvCount_Yz, 1,MPI_INT,rank_yZ,recvtag,comm,&req2[17]); + MPI_Waitall(18,req1,stat1); + MPI_Waitall(18,req2,stat2); + MPI_Barrier(comm); +/* MPI_Send(&sendCount_x,1,MPI_INT,rank_X,sendtag,comm); + MPI_Recv(&recvCount_X,1,MPI_INT,rank_x,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_X,1,MPI_INT,rank_x,sendtag,comm); + MPI_Recv(&recvCount_x,1,MPI_INT,rank_X,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_y,1,MPI_INT,rank_Y,sendtag,comm); + MPI_Recv(&recvCount_Y,1,MPI_INT,rank_y,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_Y,1,MPI_INT,rank_y,sendtag,comm); + MPI_Recv(&recvCount_y,1,MPI_INT,rank_Y,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_z,1,MPI_INT,rank_Z,sendtag,comm); + MPI_Recv(&recvCount_Z,1,MPI_INT,rank_z,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_Z,1,MPI_INT,rank_z,sendtag,comm); + MPI_Recv(&recvCount_z,1,MPI_INT,rank_Z,recvtag,comm,MPI_STATUS_IGNORE); + + MPI_Send(&sendCount_xy,1,MPI_INT,rank_XY,sendtag,comm); + MPI_Recv(&recvCount_XY,1,MPI_INT,rank_xy,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_XY,1,MPI_INT,rank_xy,sendtag,comm); + MPI_Recv(&recvCount_xy,1,MPI_INT,rank_XY,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_Xy,1,MPI_INT,rank_xY,sendtag,comm); + MPI_Recv(&recvCount_xY,1,MPI_INT,rank_Xy,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_xY,1,MPI_INT,rank_Xy,sendtag,comm); + MPI_Recv(&recvCount_Xy,1,MPI_INT,rank_xY,recvtag,comm,MPI_STATUS_IGNORE); + + MPI_Send(&sendCount_xz,1,MPI_INT,rank_XZ,sendtag,comm); + MPI_Recv(&recvCount_XZ,1,MPI_INT,rank_xz,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_XZ,1,MPI_INT,rank_xz,sendtag,comm); + MPI_Recv(&recvCount_xz,1,MPI_INT,rank_XZ,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_Xz,1,MPI_INT,rank_xZ,sendtag,comm); + MPI_Recv(&recvCount_xZ,1,MPI_INT,rank_Xz,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_xZ,1,MPI_INT,rank_Xz,sendtag,comm); + MPI_Recv(&recvCount_Xz,1,MPI_INT,rank_xZ,recvtag,comm,MPI_STATUS_IGNORE); + + MPI_Send(&sendCount_yz,1,MPI_INT,rank_YZ,sendtag,comm); + MPI_Recv(&recvCount_YZ,1,MPI_INT,rank_yz,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_YZ,1,MPI_INT,rank_yz,sendtag,comm); + MPI_Recv(&recvCount_yz,1,MPI_INT,rank_YZ,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_Yz,1,MPI_INT,rank_yZ,sendtag,comm); + MPI_Recv(&recvCount_yZ,1,MPI_INT,rank_Yz,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Send(&sendCount_yZ,1,MPI_INT,rank_Yz,sendtag,comm); + MPI_Recv(&recvCount_Yz,1,MPI_INT,rank_yZ,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Barrier(comm); +*/ //********************************************************************************** //...................................................................................... int *recvList_x, *recvList_y, *recvList_z, *recvList_X, *recvList_Y, *recvList_Z; int *recvList_xy, *recvList_yz, *recvList_xz, *recvList_Xy, *recvList_Yz, *recvList_xZ; @@ -796,48 +841,48 @@ int main(int argc, char **argv) // Use MPI to fill in the appropriate values for recvList // Fill in the recieve lists using MPI sendtag = recvtag = 4; - req1[0] = comm.Isend(sendList_x,sendCount_x,rank_X,sendtag); - req2[0] = comm.Irecv(recvList_X,recvCount_X,rank_x,recvtag); - req1[1] = comm.Isend(sendList_X,sendCount_X,rank_x,sendtag); - req2[1] = comm.Irecv(recvList_x,recvCount_x,rank_X,recvtag); - req1[2] = comm.Isend(sendList_y,sendCount_y,rank_Y,sendtag); - req2[2] = comm.Irecv(recvList_Y,recvCount_Y,rank_y,recvtag); - req1[3] = comm.Isend(sendList_Y,sendCount_Y,rank_y,sendtag); - req2[3] = comm.Irecv(recvList_y,recvCount_y,rank_Y,recvtag); - req1[4] = comm.Isend(sendList_z,sendCount_z,rank_Z,sendtag); - req2[4] = comm.Irecv(recvList_Z,recvCount_Z,rank_z,recvtag); - req1[5] = comm.Isend(sendList_Z,sendCount_Z,rank_z,sendtag); - req2[5] = comm.Irecv(recvList_z,recvCount_z,rank_Z,recvtag); + MPI_Isend(sendList_x, sendCount_x,MPI_INT,rank_X,sendtag,comm,&req1[0]); + MPI_Irecv(recvList_X, recvCount_X,MPI_INT,rank_x,recvtag,comm,&req2[0]); + MPI_Isend(sendList_X, sendCount_X,MPI_INT,rank_x,sendtag,comm,&req1[1]); + MPI_Irecv(recvList_x, recvCount_x,MPI_INT,rank_X,recvtag,comm,&req2[1]); + MPI_Isend(sendList_y, sendCount_y,MPI_INT,rank_Y,sendtag,comm,&req1[2]); + MPI_Irecv(recvList_Y, recvCount_Y,MPI_INT,rank_y,recvtag,comm,&req2[2]); + MPI_Isend(sendList_Y, sendCount_Y,MPI_INT,rank_y,sendtag,comm,&req1[3]); + MPI_Irecv(recvList_y, recvCount_y,MPI_INT,rank_Y,recvtag,comm,&req2[3]); + MPI_Isend(sendList_z, sendCount_z,MPI_INT,rank_Z,sendtag,comm,&req1[4]); + MPI_Irecv(recvList_Z, recvCount_Z,MPI_INT,rank_z,recvtag,comm,&req2[4]); + MPI_Isend(sendList_Z, sendCount_Z,MPI_INT,rank_z,sendtag,comm,&req1[5]); + MPI_Irecv(recvList_z, recvCount_z,MPI_INT,rank_Z,recvtag,comm,&req2[5]); - req1[6] = comm.Isend(sendList_xy,sendCount_xy,rank_XY,sendtag); - req2[6] = comm.Irecv(recvList_XY,recvCount_XY,rank_xy,recvtag); - req1[7] = comm.Isend(sendList_XY,sendCount_XY,rank_xy,sendtag); - req2[7] = comm.Irecv(recvList_xy,recvCount_xy,rank_XY,recvtag); - req1[8] = comm.Isend(sendList_Xy,sendCount_Xy,rank_xY,sendtag); - req2[8] = comm.Irecv(recvList_xY,recvCount_xY,rank_Xy,recvtag); - req1[9] = comm.Isend(sendList_xY,sendCount_xY,rank_Xy,sendtag); - req2[9] = comm.Irecv(recvList_Xy,recvCount_Xy,rank_xY,recvtag); + MPI_Isend(sendList_xy, sendCount_xy,MPI_INT,rank_XY,sendtag,comm,&req1[6]); + MPI_Irecv(recvList_XY, recvCount_XY,MPI_INT,rank_xy,recvtag,comm,&req2[6]); + MPI_Isend(sendList_XY, sendCount_XY,MPI_INT,rank_xy,sendtag,comm,&req1[7]); + MPI_Irecv(recvList_xy, recvCount_xy,MPI_INT,rank_XY,recvtag,comm,&req2[7]); + MPI_Isend(sendList_Xy, sendCount_Xy,MPI_INT,rank_xY,sendtag,comm,&req1[8]); + MPI_Irecv(recvList_xY, recvCount_xY,MPI_INT,rank_Xy,recvtag,comm,&req2[8]); + MPI_Isend(sendList_xY, sendCount_xY,MPI_INT,rank_Xy,sendtag,comm,&req1[9]); + MPI_Irecv(recvList_Xy, recvCount_Xy,MPI_INT,rank_xY,recvtag,comm,&req2[9]); - req1[10] = comm.Isend(sendList_xz,sendCount_xz,rank_XZ,sendtag); - req2[10] = comm.Irecv(recvList_XZ,recvCount_XZ,rank_xz,recvtag); - req1[11] = comm.Isend(sendList_XZ,sendCount_XZ,rank_xz,sendtag); - req2[11] = comm.Irecv(recvList_xz,recvCount_xz,rank_XZ,recvtag); - req1[12] = comm.Isend(sendList_Xz,sendCount_Xz,rank_xZ,sendtag); - req2[12] = comm.Irecv(recvList_xZ,recvCount_xZ,rank_Xz,recvtag); - req1[13] = comm.Isend(sendList_xZ,sendCount_xZ,rank_Xz,sendtag); - req2[13] = comm.Irecv(recvList_Xz,recvCount_Xz,rank_xZ,recvtag); + MPI_Isend(sendList_xz, sendCount_xz,MPI_INT,rank_XZ,sendtag,comm,&req1[10]); + MPI_Irecv(recvList_XZ, recvCount_XZ,MPI_INT,rank_xz,recvtag,comm,&req2[10]); + MPI_Isend(sendList_XZ, sendCount_XZ,MPI_INT,rank_xz,sendtag,comm,&req1[11]); + MPI_Irecv(recvList_xz, recvCount_xz,MPI_INT,rank_XZ,recvtag,comm,&req2[11]); + MPI_Isend(sendList_Xz, sendCount_Xz,MPI_INT,rank_xZ,sendtag,comm,&req1[12]); + MPI_Irecv(recvList_xZ, recvCount_xZ,MPI_INT,rank_Xz,recvtag,comm,&req2[12]); + MPI_Isend(sendList_xZ, sendCount_xZ,MPI_INT,rank_Xz,sendtag,comm,&req1[13]); + MPI_Irecv(recvList_Xz, recvCount_Xz,MPI_INT,rank_xZ,recvtag,comm,&req2[13]); - req1[14] = comm.Isend(sendList_yz,sendCount_yz,rank_YZ,sendtag); - req2[14] = comm.Irecv(recvList_YZ,recvCount_YZ,rank_yz,recvtag); - req1[15] = comm.Isend(sendList_YZ,sendCount_YZ,rank_yz,sendtag); - req2[15] = comm.Irecv(recvList_yz,recvCount_yz,rank_YZ,recvtag); - req1[16] = comm.Isend(sendList_Yz,sendCount_Yz,rank_yZ,sendtag); - req2[16] = comm.Irecv(recvList_yZ,recvCount_yZ,rank_Yz,recvtag); - req1[17] = comm.Isend(sendList_yZ,sendCount_yZ,rank_Yz,sendtag); - req2[17] = comm.Irecv(recvList_Yz,recvCount_Yz,rank_yZ,recvtag); - comm.waitAll(18,req1); - comm.waitAll(18,req2); - comm.barrier(); + MPI_Isend(sendList_yz, sendCount_yz,MPI_INT,rank_YZ,sendtag,comm,&req1[14]); + MPI_Irecv(recvList_YZ, recvCount_YZ,MPI_INT,rank_yz,recvtag,comm,&req2[14]); + MPI_Isend(sendList_YZ, sendCount_YZ,MPI_INT,rank_yz,sendtag,comm,&req1[15]); + MPI_Irecv(recvList_yz, recvCount_yz,MPI_INT,rank_YZ,recvtag,comm,&req2[15]); + MPI_Isend(sendList_Yz, sendCount_Yz,MPI_INT,rank_yZ,sendtag,comm,&req1[16]); + MPI_Irecv(recvList_yZ, recvCount_yZ,MPI_INT,rank_Yz,recvtag,comm,&req2[16]); + MPI_Isend(sendList_yZ, sendCount_yZ,MPI_INT,rank_Yz,sendtag,comm,&req1[17]); + MPI_Irecv(recvList_Yz, recvCount_Yz,MPI_INT,rank_yZ,recvtag,comm,&req2[17]); + MPI_Waitall(18,req1,stat1); + MPI_Waitall(18,req2,stat2); + MPI_Barrier(comm); //...................................................................................... for (int idx=0; idxkeyExists( "GridFile" )){ // Read the local domain data - auto input_id = readMicroCT( *domain_db, comm ); + auto input_id = readMicroCT( *domain_db, MPI_COMM_WORLD ); // Fill the halo (assuming GCW of 1) array size0 = { (int) input_id.size(0), (int) input_id.size(1), (int) input_id.size(2) }; ArraySize size1 = { (size_t) Mask->Nx, (size_t) Mask->Ny, (size_t) Mask->Nz }; ASSERT( (int) size1[0] == size0[0]+2 && (int) size1[1] == size0[1]+2 && (int) size1[2] == size0[2]+2 ); - fillHalo fill( comm, Mask->rank_info, size0, { 1, 1, 1 }, 0, 1 ); + fillHalo fill( MPI_COMM_WORLD, Mask->rank_info, size0, { 1, 1, 1 }, 0, 1 ); Array id_view; id_view.viewRaw( size1, Mask->id ); fill.copy( input_id, id_view ); @@ -652,7 +652,7 @@ void ScaLBL_ColorModel::Run(){ double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); comm.barrier(); - starttime = Utilities::MPI::time(); + starttime = MPI_Wtime(); //......................................... //************ MAIN ITERATION LOOP ***************************************/ @@ -991,7 +991,7 @@ void ScaLBL_ColorModel::Run(){ //************************************************************************ ScaLBL_DeviceBarrier(); comm.barrier(); - stoptime = Utilities::MPI::time(); + stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep cputime = (stoptime - starttime)/timestep; diff --git a/models/DFHModel.cpp b/models/DFHModel.cpp index 9709b107..ced5853f 100644 --- a/models/DFHModel.cpp +++ b/models/DFHModel.cpp @@ -487,7 +487,7 @@ void ScaLBL_DFHModel::Run(){ double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); comm.barrier(); - starttime = Utilities::MPI::time(); + starttime = MPI_Wtime(); //......................................... //************ MAIN ITERATION LOOP ***************************************/ @@ -583,7 +583,7 @@ void ScaLBL_DFHModel::Run(){ //************************************************************************ ScaLBL_DeviceBarrier(); comm.barrier(); - stoptime = Utilities::MPI::time(); + stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep cputime = (stoptime - starttime)/timestep; diff --git a/models/MRTModel.cpp b/models/MRTModel.cpp index 60847e54..23925930 100644 --- a/models/MRTModel.cpp +++ b/models/MRTModel.cpp @@ -227,7 +227,7 @@ void ScaLBL_MRTModel::Run(){ double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); comm.barrier(); - starttime = Utilities::MPI::time(); + starttime = MPI_Wtime(); if (rank==0) printf("Beginning AA timesteps, timestepMax = %i \n", timestepMax); if (rank==0) printf("********************************************************\n"); timestep=0; @@ -325,7 +325,7 @@ void ScaLBL_MRTModel::Run(){ } } //************************************************************************/ - stoptime = Utilities::MPI::time(); + stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep cputime = (stoptime - starttime)/timestep; diff --git a/tests/BlobAnalyzeParallel.cpp b/tests/BlobAnalyzeParallel.cpp index 773309f9..48e9e230 100644 --- a/tests/BlobAnalyzeParallel.cpp +++ b/tests/BlobAnalyzeParallel.cpp @@ -138,16 +138,16 @@ int main(int argc, char **argv) } comm.barrier(); // Computational domain - comm.bcast(&nx,1,0); - comm.bcast(&ny,1,0); - comm.bcast(&nz,1,0); - comm.bcast(&nprocx,1,0); - comm.bcast(&nprocy,1,0); - comm.bcast(&nprocz,1,0); - comm.bcast(&nspheres,1,0); - comm.bcast(&Lx,1,0); - comm.bcast(&Ly,1,0); - comm.bcast(&Lz,1,0); + MPI_Bcast(&nx,1,MPI_INT,0,comm); + MPI_Bcast(&ny,1,MPI_INT,0,comm); + MPI_Bcast(&nz,1,MPI_INT,0,comm); + MPI_Bcast(&nprocx,1,MPI_INT,0,comm); + MPI_Bcast(&nprocy,1,MPI_INT,0,comm); + MPI_Bcast(&nprocz,1,MPI_INT,0,comm); + MPI_Bcast(&nspheres,1,MPI_INT,0,comm); + MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. comm.barrier(); @@ -291,7 +291,7 @@ int main(int argc, char **argv) } Dm.CommInit(); // Initialize communications for domains - sum_global = comm.sumReduce( sum ); + MPI_Allreduce(&sum,&sum_global,1,MPI_DOUBLE,MPI_SUM,comm); porosity = sum_global/Dm.Volume; if (rank==0) printf("Porosity = %f \n",porosity); diff --git a/tests/GenerateSphereTest.cpp b/tests/GenerateSphereTest.cpp index d4340964..43434092 100644 --- a/tests/GenerateSphereTest.cpp +++ b/tests/GenerateSphereTest.cpp @@ -213,24 +213,42 @@ inline void MorphOpen(DoubleArray SignDist, char *id, Domain &Dm, int nx, int ny PackID(Dm.sendList_yZ, Dm.sendCount_yZ ,sendID_yZ, id); PackID(Dm.sendList_YZ, Dm.sendCount_YZ ,sendID_YZ, id); //...................................................................................... - Dm.Comm.sendrecv(sendID_x,Dm.sendCount_x,Dm.rank_x(),sendtag,recvID_X,Dm.recvCount_X,Dm.rank_X(),recvtag); - Dm.Comm.sendrecv(sendID_X,Dm.sendCount_X,Dm.rank_X(),sendtag,recvID_x,Dm.recvCount_x,Dm.rank_x(),recvtag); - Dm.Comm.sendrecv(sendID_y,Dm.sendCount_y,Dm.rank_y(),sendtag,recvID_Y,Dm.recvCount_Y,Dm.rank_Y(),recvtag); - Dm.Comm.sendrecv(sendID_Y,Dm.sendCount_Y,Dm.rank_Y(),sendtag,recvID_y,Dm.recvCount_y,Dm.rank_y(),recvtag); - Dm.Comm.sendrecv(sendID_z,Dm.sendCount_z,Dm.rank_z(),sendtag,recvID_Z,Dm.recvCount_Z,Dm.rank_Z(),recvtag); - Dm.Comm.sendrecv(sendID_Z,Dm.sendCount_Z,Dm.rank_Z(),sendtag,recvID_z,Dm.recvCount_z,Dm.rank_z(),recvtag); - Dm.Comm.sendrecv(sendID_xy,Dm.sendCount_xy,Dm.rank_xy(),sendtag,recvID_XY,Dm.recvCount_XY,Dm.rank_XY(),recvtag); - Dm.Comm.sendrecv(sendID_XY,Dm.sendCount_XY,Dm.rank_XY(),sendtag,recvID_xy,Dm.recvCount_xy,Dm.rank_xy(),recvtag); - Dm.Comm.sendrecv(sendID_Xy,Dm.sendCount_Xy,Dm.rank_Xy(),sendtag,recvID_xY,Dm.recvCount_xY,Dm.rank_xY(),recvtag); - Dm.Comm.sendrecv(sendID_xY,Dm.sendCount_xY,Dm.rank_xY(),sendtag,recvID_Xy,Dm.recvCount_Xy,Dm.rank_Xy(),recvtag); - Dm.Comm.sendrecv(sendID_xz,Dm.sendCount_xz,Dm.rank_xz(),sendtag,recvID_XZ,Dm.recvCount_XZ,Dm.rank_XZ(),recvtag); - Dm.Comm.sendrecv(sendID_XZ,Dm.sendCount_XZ,Dm.rank_XZ(),sendtag,recvID_xz,Dm.recvCount_xz,Dm.rank_xz(),recvtag); - Dm.Comm.sendrecv(sendID_Xz,Dm.sendCount_Xz,Dm.rank_Xz(),sendtag,recvID_xZ,Dm.recvCount_xZ,Dm.rank_xZ(),recvtag); - Dm.Comm.sendrecv(sendID_xZ,Dm.sendCount_xZ,Dm.rank_xZ(),sendtag,recvID_Xz,Dm.recvCount_Xz,Dm.rank_Xz(),recvtag); - Dm.Comm.sendrecv(sendID_yz,Dm.sendCount_yz,Dm.rank_yz(),sendtag,recvID_YZ,Dm.recvCount_YZ,Dm.rank_YZ(),recvtag); - Dm.Comm.sendrecv(sendID_YZ,Dm.sendCount_YZ,Dm.rank_YZ(),sendtag,recvID_yz,Dm.recvCount_yz,Dm.rank_yz(),recvtag); - Dm.Comm.sendrecv(sendID_Yz,Dm.sendCount_Yz,Dm.rank_Yz(),sendtag,recvID_yZ,Dm.recvCount_yZ,Dm.rank_yZ(),recvtag); - Dm.Comm.sendrecv(sendID_yZ,Dm.sendCount_yZ,Dm.rank_yZ(),sendtag,recvID_Yz,Dm.recvCount_Yz,Dm.rank_Yz(),recvtag); + MPI_Sendrecv(sendID_x,Dm.sendCount_x,MPI_CHAR,Dm.rank_x(),sendtag, + recvID_X,Dm.recvCount_X,MPI_CHAR,Dm.rank_X(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_X,Dm.sendCount_X,MPI_CHAR,Dm.rank_X(),sendtag, + recvID_x,Dm.recvCount_x,MPI_CHAR,Dm.rank_x(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_y,Dm.sendCount_y,MPI_CHAR,Dm.rank_y(),sendtag, + recvID_Y,Dm.recvCount_Y,MPI_CHAR,Dm.rank_Y(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Y,Dm.sendCount_Y,MPI_CHAR,Dm.rank_Y(),sendtag, + recvID_y,Dm.recvCount_y,MPI_CHAR,Dm.rank_y(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_z,Dm.sendCount_z,MPI_CHAR,Dm.rank_z(),sendtag, + recvID_Z,Dm.recvCount_Z,MPI_CHAR,Dm.rank_Z(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Z,Dm.sendCount_Z,MPI_CHAR,Dm.rank_Z(),sendtag, + recvID_z,Dm.recvCount_z,MPI_CHAR,Dm.rank_z(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xy,Dm.sendCount_xy,MPI_CHAR,Dm.rank_xy(),sendtag, + recvID_XY,Dm.recvCount_XY,MPI_CHAR,Dm.rank_XY(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_XY,Dm.sendCount_XY,MPI_CHAR,Dm.rank_XY(),sendtag, + recvID_xy,Dm.recvCount_xy,MPI_CHAR,Dm.rank_xy(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Xy,Dm.sendCount_Xy,MPI_CHAR,Dm.rank_Xy(),sendtag, + recvID_xY,Dm.recvCount_xY,MPI_CHAR,Dm.rank_xY(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xY,Dm.sendCount_xY,MPI_CHAR,Dm.rank_xY(),sendtag, + recvID_Xy,Dm.recvCount_Xy,MPI_CHAR,Dm.rank_Xy(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xz,Dm.sendCount_xz,MPI_CHAR,Dm.rank_xz(),sendtag, + recvID_XZ,Dm.recvCount_XZ,MPI_CHAR,Dm.rank_XZ(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_XZ,Dm.sendCount_XZ,MPI_CHAR,Dm.rank_XZ(),sendtag, + recvID_xz,Dm.recvCount_xz,MPI_CHAR,Dm.rank_xz(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Xz,Dm.sendCount_Xz,MPI_CHAR,Dm.rank_Xz(),sendtag, + recvID_xZ,Dm.recvCount_xZ,MPI_CHAR,Dm.rank_xZ(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xZ,Dm.sendCount_xZ,MPI_CHAR,Dm.rank_xZ(),sendtag, + recvID_Xz,Dm.recvCount_Xz,MPI_CHAR,Dm.rank_Xz(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_yz,Dm.sendCount_yz,MPI_CHAR,Dm.rank_yz(),sendtag, + recvID_YZ,Dm.recvCount_YZ,MPI_CHAR,Dm.rank_YZ(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_YZ,Dm.sendCount_YZ,MPI_CHAR,Dm.rank_YZ(),sendtag, + recvID_yz,Dm.recvCount_yz,MPI_CHAR,Dm.rank_yz(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Yz,Dm.sendCount_Yz,MPI_CHAR,Dm.rank_Yz(),sendtag, + recvID_yZ,Dm.recvCount_yZ,MPI_CHAR,Dm.rank_yZ(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_yZ,Dm.sendCount_yZ,MPI_CHAR,Dm.rank_yZ(),sendtag, + recvID_Yz,Dm.recvCount_Yz,MPI_CHAR,Dm.rank_Yz(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); //...................................................................................... UnpackID(Dm.recvList_x, Dm.recvCount_x ,recvID_x, id); UnpackID(Dm.recvList_X, Dm.recvCount_X ,recvID_X, id); diff --git a/tests/TestBlobAnalyze.cpp b/tests/TestBlobAnalyze.cpp index 19360fe3..63d928c1 100644 --- a/tests/TestBlobAnalyze.cpp +++ b/tests/TestBlobAnalyze.cpp @@ -190,16 +190,16 @@ int main(int argc, char **argv) } comm.barrier(); // Computational domain - comm.bcast(&nx,1,0); - comm.bcast(&ny,1,0); - comm.bcast(&nz,1,0); - comm.bcast(&nprocx,1,0); - comm.bcast(&nprocy,1,0); - comm.bcast(&nprocz,1,0); - comm.bcast(&nspheres,1,0); - comm.bcast(&Lx,1,0); - comm.bcast(&Ly,1,0); - comm.bcast(&Lz,1,0); + MPI_Bcast(&nx,1,MPI_INT,0,comm); + MPI_Bcast(&ny,1,MPI_INT,0,comm); + MPI_Bcast(&nz,1,MPI_INT,0,comm); + MPI_Bcast(&nprocx,1,MPI_INT,0,comm); + MPI_Bcast(&nprocy,1,MPI_INT,0,comm); + MPI_Bcast(&nprocz,1,MPI_INT,0,comm); + MPI_Bcast(&nspheres,1,MPI_INT,0,comm); + MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. comm.barrier(); @@ -255,10 +255,10 @@ int main(int argc, char **argv) comm.barrier(); // Broadcast the sphere packing to all processes - comm.bcast(cx,nspheres,0); - comm.bcast(cy,nspheres,0); - comm.bcast(cz,nspheres,0); - comm.bcast(rad,nspheres,0); + MPI_Bcast(cx,nspheres,MPI_DOUBLE,0,comm); + MPI_Bcast(cy,nspheres,MPI_DOUBLE,0,comm); + MPI_Bcast(cz,nspheres,MPI_DOUBLE,0,comm); + MPI_Bcast(rad,nspheres,MPI_DOUBLE,0,comm); //........................................................................... comm.barrier(); //....................................................................... diff --git a/tests/TestBubble.cpp b/tests/TestBubble.cpp index 6eb74b37..e7e0ced8 100644 --- a/tests/TestBubble.cpp +++ b/tests/TestBubble.cpp @@ -45,6 +45,7 @@ int main(int argc, char **argv) int nprocx,nprocy,nprocz; MPI_Request req1[18],req2[18]; + MPI_Status stat1[18],stat2[18]; if (rank == 0){ printf("********************************************************\n"); @@ -433,7 +434,7 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; comm.barrier(); - starttime = Utilities::MPI::time(); + starttime = MPI_Wtime(); //......................................... //........................................................................... // MAIN VARIABLES INITIALIZED HERE @@ -808,25 +809,25 @@ int main(int argc, char **argv) } //........................................................................... comm.barrier(); - nwp_volume_global = comm.sumReduce( nwp_volume ); - awn_global = comm.sumReduce( awn ); - ans_global = comm.sumReduce( ans ); - aws_global = comm.sumReduce( aws ); - lwns_global = comm.sumReduce( lwns ); - As_global = comm.sumReduce( As ); - Jwn_global = comm.sumReduce( Jwn ); - efawns_global = comm.sumReduce( efawns ); + MPI_Allreduce(&nwp_volume,&nwp_volume_global,1,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&awn,&awn_global,1,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&ans,&ans_global,1,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&aws,&aws_global,1,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&lwns,&lwns_global,1,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&As,&As_global,1,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&Jwn,&Jwn_global,1,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&efawns,&efawns_global,1,MPI_DOUBLE,MPI_SUM,comm); // Phase averages - vol_w_global = comm.sumReduce( vol_w ); - vol_n_global = comm.sumReduce( vol_n ); - paw_global = comm.sumReduce( paw ); - pan_global = comm.sumReduce( pan ); - vaw_global(0) = comm.sumReduce( vaw(0) ); - van_global(0) = comm.sumReduce( van(0) ); - vawn_global(0) = comm.sumReduce( vawn(0) ); - Gwn_global(0) = comm.sumReduce( Gwn(0) ); - Gns_global(0) = comm.sumReduce( Gns(0) ); - Gws_global(0) = comm.sumReduce( Gws(0) ); + MPI_Allreduce(&vol_w,&vol_w_global,1,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&vol_n,&vol_n_global,1,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&paw,&paw_global,1,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&pan,&pan_global,1,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&vaw(0),&vaw_global(0),3,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&van(0),&van_global(0),3,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&vawn(0),&vawn_global(0),3,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&Gwn(0),&Gwn_global(0),6,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&Gns(0),&Gns_global(0),6,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&Gws(0),&Gws_global(0),6,MPI_DOUBLE,MPI_SUM,comm); comm.barrier(); //......................................................................... // Compute the change in the total surface energy based on the defined interval @@ -951,7 +952,7 @@ int main(int argc, char **argv) //************************************************************************/ ScaLBL_DeviceBarrier(); comm.barrier(); - stoptime = Utilities::MPI::time(); + stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep cputime = (stoptime - starttime)/timestep; diff --git a/tests/TestBubbleDFH.cpp b/tests/TestBubbleDFH.cpp index 8b4f1a9b..7f5d0047 100644 --- a/tests/TestBubbleDFH.cpp +++ b/tests/TestBubbleDFH.cpp @@ -387,7 +387,7 @@ int main(int argc, char **argv) double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); comm.barrier(); - starttime = Utilities::MPI::time(); + starttime = MPI_Wtime(); //......................................... err = 1.0; @@ -487,7 +487,7 @@ int main(int argc, char **argv) //************************************************************************ ScaLBL_DeviceBarrier(); comm.barrier(); - stoptime = Utilities::MPI::time(); + stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep cputime = (stoptime - starttime)/timestep; diff --git a/tests/TestColorGrad.cpp b/tests/TestColorGrad.cpp index 2566f8c0..df1c1daf 100644 --- a/tests/TestColorGrad.cpp +++ b/tests/TestColorGrad.cpp @@ -114,16 +114,16 @@ int main(int argc, char **argv) // Broadcast simulation parameters from rank 0 to all other procs comm.barrier(); //................................................. - comm.bcast(&Nx,1,0); - comm.bcast(&Ny,1,0); - comm.bcast(&Nz,1,0); - comm.bcast(&nprocx,1,0); - comm.bcast(&nprocy,1,0); - comm.bcast(&nprocz,1,0); - comm.bcast(&nspheres,1,0); - comm.bcast(&Lx,1,0); - comm.bcast(&Ly,1,0); - comm.bcast(&Lz,1,0); + MPI_Bcast(&Nx,1,MPI_INT,0,comm); + MPI_Bcast(&Ny,1,MPI_INT,0,comm); + MPI_Bcast(&Nz,1,MPI_INT,0,comm); + MPI_Bcast(&nprocx,1,MPI_INT,0,comm); + MPI_Bcast(&nprocy,1,MPI_INT,0,comm); + MPI_Bcast(&nprocz,1,MPI_INT,0,comm); + MPI_Bcast(&nspheres,1,MPI_INT,0,comm); + MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. comm.barrier(); // ************************************************************** diff --git a/tests/TestCommD3Q19.cpp b/tests/TestCommD3Q19.cpp index c4a045ae..d2799355 100644 --- a/tests/TestCommD3Q19.cpp +++ b/tests/TestCommD3Q19.cpp @@ -378,7 +378,7 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; comm.barrier(); - starttime = Utilities::MPI::time(); + starttime = MPI_Wtime(); //......................................... @@ -403,7 +403,7 @@ int main(int argc, char **argv) //................................................................... } //************************************************************************/ - stoptime = Utilities::MPI::time(); + stoptime = MPI_Wtime(); // cout << "CPU time: " << (stoptime - starttime) << " seconds" << endl; cputime = stoptime - starttime; // cout << "Lattice update rate: "<< double(Nx*Ny*Nz*timestep)/cputime/1000000 << " MLUPS" << endl; diff --git a/tests/TestForceD3Q19.cpp b/tests/TestForceD3Q19.cpp index 31151584..f8569624 100644 --- a/tests/TestForceD3Q19.cpp +++ b/tests/TestForceD3Q19.cpp @@ -450,7 +450,7 @@ int main (int argc, char **argv) for (int i=0; iSendD3Q19(dist, &dist[10*Np]); @@ -244,7 +244,7 @@ int main(int argc, char **argv) //************************************************************************/ - stoptime = Utilities::MPI::time(); + stoptime = MPI_Wtime(); // cout << "CPU time: " << (stoptime - starttime) << " seconds" << endl; cputime = stoptime - starttime; // cout << "Lattice update rate: "<< double(Nx*Ny*Nz*timestep)/cputime/1000000 << " MLUPS" << endl; diff --git a/tests/TestMRT.cpp b/tests/TestMRT.cpp index e4acba99..5f2c4449 100644 --- a/tests/TestMRT.cpp +++ b/tests/TestMRT.cpp @@ -580,16 +580,16 @@ int main(int argc, char **argv) // Broadcast simulation parameters from rank 0 to all other procs comm.barrier(); //................................................. - comm.bcast(&Nx,1,0); - comm.bcast(&Ny,1,0); - comm.bcast(&Nz,1,0); - comm.bcast(&nprocx,1,0); - comm.bcast(&nprocy,1,0); - comm.bcast(&nprocz,1,0); - comm.bcast(&nspheres,1,0); - comm.bcast(&Lx,1,0); - comm.bcast(&Ly,1,0); - comm.bcast(&Lz,1,0); + MPI_Bcast(&Nx,1,MPI_INT,0,comm); + MPI_Bcast(&Ny,1,MPI_INT,0,comm); + MPI_Bcast(&Nz,1,MPI_INT,0,comm); + MPI_Bcast(&nprocx,1,MPI_INT,0,comm); + MPI_Bcast(&nprocy,1,MPI_INT,0,comm); + MPI_Bcast(&nprocz,1,MPI_INT,0,comm); + MPI_Bcast(&nspheres,1,MPI_INT,0,comm); + MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. comm.barrier(); // ************************************************************** @@ -668,7 +668,7 @@ int main(int argc, char **argv) } } comm.barrier(); - sum = comm.sumReduce( sum_local ); + MPI_Allreduce(&sum_local,&sum,1,MPI_DOUBLE,MPI_SUM,comm); porosity = sum*iVol_global; if (rank==0) printf("Media porosity = %f \n",porosity); @@ -731,7 +731,7 @@ int main(int argc, char **argv) double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); comm.barrier(); - starttime = Utilities::MPI::time(); + starttime = MPI_Wtime(); while (timestep < timesteps) { @@ -752,7 +752,7 @@ int main(int argc, char **argv) } //************************************************************************/ - stoptime = Utilities::MPI::time(); + stoptime = MPI_Wtime(); // cout << "CPU time: " << (stoptime - starttime) << " seconds" << endl; cputime = stoptime - starttime; // cout << "Lattice update rate: "<< double(Nx*Ny*Nz*timestep)/cputime/1000000 << " MLUPS" << endl; @@ -795,7 +795,7 @@ int main(int argc, char **argv) } } } - sum = comm.sumReduce( sum_local ); + MPI_Allreduce(&sum_local,&sum,1,MPI_DOUBLE,MPI_SUM,comm); double PoreVel = sum*iVol_global; if (rank==0) printf("Velocity = %f \n",PoreVel); diff --git a/tests/TestMicroCTReader.cpp b/tests/TestMicroCTReader.cpp index 52a5b9d3..9a54610c 100644 --- a/tests/TestMicroCTReader.cpp +++ b/tests/TestMicroCTReader.cpp @@ -62,6 +62,7 @@ int main(int argc, char **argv) int N_errors = ut.NumFailGlobal(); // Close MPI + MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); return N_errors; } diff --git a/tests/TestMomentsD3Q19.cpp b/tests/TestMomentsD3Q19.cpp index 2660ed26..6bd3e8ff 100644 --- a/tests/TestMomentsD3Q19.cpp +++ b/tests/TestMomentsD3Q19.cpp @@ -539,7 +539,7 @@ int main (int argc, char **argv) error=count; // Finished - comm.barrier(); + MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); return error; } diff --git a/tests/TestNetcdf.cpp b/tests/TestNetcdf.cpp index 3d0498d2..8768c9ea 100644 --- a/tests/TestNetcdf.cpp +++ b/tests/TestNetcdf.cpp @@ -116,7 +116,7 @@ int main(int argc, char **argv) PROFILE_SAVE("TestNetcdf"); // Close MPI - comm.barrier(); + MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); return N_errors; } diff --git a/tests/TestSegDist.cpp b/tests/TestSegDist.cpp index ecb6d6b9..b5e23ec8 100644 --- a/tests/TestSegDist.cpp +++ b/tests/TestSegDist.cpp @@ -100,10 +100,10 @@ int main(int argc, char **argv) comm.barrier(); if (rank==0) printf("Initialized! Converting to Signed Distance function \n"); - double t1 = Utilities::MPI::time(); + double t1 = MPI_Wtime(); DoubleArray Distance(nx,ny,nz); CalcDist(Distance,id,Dm,{false,false,false}); - double t2 = Utilities::MPI::time(); + double t2 = MPI_Wtime(); if (rank==0) printf("Total time: %f seconds \n",t2-t1); diff --git a/tests/lb2_CMT_wia.cpp b/tests/lb2_CMT_wia.cpp index 389bc8a8..820428a3 100644 --- a/tests/lb2_CMT_wia.cpp +++ b/tests/lb2_CMT_wia.cpp @@ -292,18 +292,18 @@ int main(int argc, char **argv) //................................................................................... // Send all the distributions - req1[0] = comm.Isend(sendbuf_x,2*sendCount_x,rank_x,sendtag); - req2[0] = comm.Irecv(recvbuf_X,2*recvCount_X,rank_X,recvtag); - req1[1] = comm.Isend(sendbuf_X,2*sendCount_X,rank_X,sendtag); - req2[1] = comm.Irecv(recvbuf_x,2*recvCount_x,rank_x,recvtag); - req1[2] = comm.Isend(sendbuf_y,2*sendCount_y,rank_y,sendtag); - req2[2] = comm.Irecv(recvbuf_Y,2*recvCount_Y,rank_Y,recvtag); - req1[3] = comm.Isend(sendbuf_Y,2*sendCount_Y,rank_Y,sendtag); - req2[3] = comm.Irecv(recvbuf_y,2*recvCount_y,rank_y,recvtag); - req1[4] = comm.Isend(sendbuf_z,2*sendCount_z,rank_z,sendtag); - req2[4] = comm.Irecv(recvbuf_Z,2*recvCount_Z,rank_Z,recvtag); - req1[5] = comm.Isend(sendbuf_Z,2*sendCount_Z,rank_Z,sendtag); - req2[5] = comm.Irecv(recvbuf_z,2*recvCount_z,rank_z,recvtag); + MPI_Isend(sendbuf_x, 2*sendCount_x,MPI_DOUBLE,rank_x,sendtag,comm,&req1[0]); + MPI_Irecv(recvbuf_X, 2*recvCount_X,MPI_DOUBLE,rank_X,recvtag,comm,&req2[0]); + MPI_Isend(sendbuf_X, 2*sendCount_X,MPI_DOUBLE,rank_X,sendtag,comm,&req1[1]); + MPI_Irecv(recvbuf_x, 2*recvCount_x,MPI_DOUBLE,rank_x,recvtag,comm,&req2[1]); + MPI_Isend(sendbuf_y, 2*sendCount_y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[2]); + MPI_Irecv(recvbuf_Y, 2*recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[2]); + MPI_Isend(sendbuf_Y, 2*sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[3]); + MPI_Irecv(recvbuf_y, 2*recvCount_y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[3]); + MPI_Isend(sendbuf_z, 2*sendCount_z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[4]); + MPI_Irecv(recvbuf_Z, 2*recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[4]); + MPI_Isend(sendbuf_Z, 2*sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[5]); + MPI_Irecv(recvbuf_z, 2*recvCount_z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[5]); */ //................................................................................... ScaLBL_D3Q7_Swap(ID, &packed_even[0], &packed_odd[0], Nx, Ny, Nz); @@ -311,8 +311,8 @@ int main(int argc, char **argv) /* //................................................................................... // Wait for completion of D3Q19 communication - comm.waitAll(6,req1); - comm.waitAll(6,req2); + MPI_Waitall(6,req1,stat1); + MPI_Waitall(6,req2,stat2); //................................................................................... // Unpack the distributions on the device //................................................................................... @@ -358,7 +358,7 @@ int main(int argc, char **argv) fclose(PHASE); // Close MPI - comm.barrier(); + MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); return 0; } diff --git a/tests/lb2_Color_blob_wia_mpi.cpp b/tests/lb2_Color_blob_wia_mpi.cpp index e3323612..70342176 100644 --- a/tests/lb2_Color_blob_wia_mpi.cpp +++ b/tests/lb2_Color_blob_wia_mpi.cpp @@ -114,6 +114,7 @@ int main(int argc, char **argv) int rank_yz,rank_YZ,rank_yZ,rank_Yz; //********************************** MPI_Request req1[18],req2[18]; + MPI_Status stat1[18],stat2[18]; if (rank == 0){ printf("********************************************************\n"); @@ -206,36 +207,36 @@ int main(int argc, char **argv) // Broadcast simulation parameters from rank 0 to all other procs comm.barrier(); //................................................. - comm.bcast(&tau,1,0); - comm.bcast(&alpha,1,0); - comm.bcast(&beta,1,0); - comm.bcast(&das,1,0); - comm.bcast(&dbs,1,0); - comm.bcast(&phi_s,1,0); - comm.bcast(&wp_saturation,1,0); - comm.bcast(&pBC,1,0); - comm.bcast(&Restart,1,0); - comm.bcast(&din,1,0); - comm.bcast(&dout,1,0); - comm.bcast(&Fx,1,0); - comm.bcast(&Fy,1,0); - comm.bcast(&Fz,1,0); - comm.bcast(×tepMax,1,0); - comm.bcast(&interval,1,0); - comm.bcast(&tol,1,0); + MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&alpha,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&beta,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&das,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&dbs,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&phi_s,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&wp_saturation,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&pBC,1,MPI_LOGICAL,0,comm); + MPI_Bcast(&Restart,1,MPI_LOGICAL,0,comm); + MPI_Bcast(&din,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&dout,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm); + MPI_Bcast(×tepMax,1,MPI_INT,0,comm); + MPI_Bcast(&interval,1,MPI_INT,0,comm); + MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm); // Computational domain - comm.bcast(&Nx,1,0); - comm.bcast(&Ny,1,0); - comm.bcast(&Nz,1,0); -// comm.bcast(&nBlocks,1,0); -// comm.bcast(&nthreads,1,0); - comm.bcast(&nprocx,1,0); - comm.bcast(&nprocy,1,0); - comm.bcast(&nprocz,1,0); - comm.bcast(&nspheres,1,0); - comm.bcast(&Lx,1,0); - comm.bcast(&Ly,1,0); - comm.bcast(&Lz,1,0); + MPI_Bcast(&Nx,1,MPI_INT,0,comm); + MPI_Bcast(&Ny,1,MPI_INT,0,comm); + MPI_Bcast(&Nz,1,MPI_INT,0,comm); +// MPI_Bcast(&nBlocks,1,MPI_INT,0,comm); +// MPI_Bcast(&nthreads,1,MPI_INT,0,comm); + MPI_Bcast(&nprocx,1,MPI_INT,0,comm); + MPI_Bcast(&nprocy,1,MPI_INT,0,comm); + MPI_Bcast(&nprocz,1,MPI_INT,0,comm); + MPI_Bcast(&nspheres,1,MPI_INT,0,comm); + MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. comm.barrier(); @@ -398,10 +399,10 @@ int main(int argc, char **argv) if (rank == 0) ReadSpherePacking(nspheres,cx,cy,cz,rad); comm.barrier(); // Broadcast the sphere packing to all processes - comm.bcast(cx,nspheres,0); - comm.bcast(cy,nspheres,0); - comm.bcast(cz,nspheres,0); - comm.bcast(rad,nspheres,0); + MPI_Bcast(cx,nspheres,MPI_DOUBLE,0,comm); + MPI_Bcast(cy,nspheres,MPI_DOUBLE,0,comm); + MPI_Bcast(cz,nspheres,MPI_DOUBLE,0,comm); + MPI_Bcast(rad,nspheres,MPI_DOUBLE,0,comm); //........................................................................... comm.barrier(); if (rank == 0) cout << "Domain set." << endl; @@ -417,7 +418,7 @@ int main(int argc, char **argv) D = 6.0*(Nx-2)*nprocx*totVol / totArea / Lx; printf("Sauter Mean Diameter (computed from sphere packing) = %f \n ",D); } - comm.bcast(&D,1,0); + MPI_Bcast(&D,1,MPI_DOUBLE,0,comm); //....................................................................... // sprintf(LocalRankString,"%05d",rank); @@ -477,7 +478,7 @@ int main(int argc, char **argv) id[(Nz-1)*Nx*Ny] = id[(Nz-1)*Nx*Ny+Nx-1] = id[(Nz-1)*Nx*Ny+(Ny-1)*Nx] = id[(Nz-1)*Nx*Ny+(Ny-1)*Nx + Nx-1] = 0; //......................................................... sum_local = 1.0*sum; - porosity = comm.sumReduce( sum_local ); + MPI_Allreduce(&sum_local,&porosity,1,MPI_DOUBLE,MPI_SUM,comm); porosity = porosity*iVol_global; if (rank==0) printf("Media porosity = %f \n",porosity); @@ -885,24 +886,42 @@ int main(int argc, char **argv) PackID(sendList_yZ, sendCount_yZ ,sendID_yZ, id); PackID(sendList_YZ, sendCount_YZ ,sendID_YZ, id); //...................................................................................... - comm.sendrecv(sendID_x,sendCount_x,rank_x,sendtag,recvID_X,recvCount_X,rank_X,recvtag); - comm.sendrecv(sendID_X,sendCount_X,rank_X,sendtag,recvID_x,recvCount_x,rank_x,recvtag); - comm.sendrecv(sendID_y,sendCount_y,rank_y,sendtag,recvID_Y,recvCount_Y,rank_Y,recvtag); - comm.sendrecv(sendID_Y,sendCount_Y,rank_Y,sendtag,recvID_y,recvCount_y,rank_y,recvtag); - comm.sendrecv(sendID_z,sendCount_z,rank_z,sendtag,recvID_Z,recvCount_Z,rank_Z,recvtag); - comm.sendrecv(sendID_Z,sendCount_Z,rank_Z,sendtag,recvID_z,recvCount_z,rank_z,recvtag); - comm.sendrecv(sendID_xy,sendCount_xy,rank_xy,sendtag,recvID_XY,recvCount_XY,rank_XY,recvtag); - comm.sendrecv(sendID_XY,sendCount_XY,rank_XY,sendtag,recvID_xy,recvCount_xy,rank_xy,recvtag); - comm.sendrecv(sendID_Xy,sendCount_Xy,rank_Xy,sendtag,recvID_xY,recvCount_xY,rank_xY,recvtag); - comm.sendrecv(sendID_xY,sendCount_xY,rank_xY,sendtag,recvID_Xy,recvCount_Xy,rank_Xy,recvtag); - comm.sendrecv(sendID_xz,sendCount_xz,rank_xz,sendtag,recvID_XZ,recvCount_XZ,rank_XZ,recvtag); - comm.sendrecv(sendID_XZ,sendCount_XZ,rank_XZ,sendtag,recvID_xz,recvCount_xz,rank_xz,recvtag); - comm.sendrecv(sendID_Xz,sendCount_Xz,rank_Xz,sendtag,recvID_xZ,recvCount_xZ,rank_xZ,recvtag); - comm.sendrecv(sendID_xZ,sendCount_xZ,rank_xZ,sendtag,recvID_Xz,recvCount_Xz,rank_Xz,recvtag); - comm.sendrecv(sendID_yz,sendCount_yz,rank_yz,sendtag,recvID_YZ,recvCount_YZ,rank_YZ,recvtag); - comm.sendrecv(sendID_YZ,sendCount_YZ,rank_YZ,sendtag,recvID_yz,recvCount_yz,rank_yz,recvtag); - comm.sendrecv(sendID_Yz,sendCount_Yz,rank_Yz,sendtag,recvID_yZ,recvCount_yZ,rank_yZ,recvtag); - comm.sendrecv(sendID_yZ,sendCount_yZ,rank_yZ,sendtag,recvID_Yz,recvCount_Yz,rank_Yz,recvtag); + MPI_Sendrecv(sendID_x,sendCount_x,MPI_CHAR,rank_x,sendtag, + recvID_X,recvCount_X,MPI_CHAR,rank_X,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_X,sendCount_X,MPI_CHAR,rank_X,sendtag, + recvID_x,recvCount_x,MPI_CHAR,rank_x,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_y,sendCount_y,MPI_CHAR,rank_y,sendtag, + recvID_Y,recvCount_Y,MPI_CHAR,rank_Y,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Y,sendCount_Y,MPI_CHAR,rank_Y,sendtag, + recvID_y,recvCount_y,MPI_CHAR,rank_y,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_z,sendCount_z,MPI_CHAR,rank_z,sendtag, + recvID_Z,recvCount_Z,MPI_CHAR,rank_Z,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Z,sendCount_Z,MPI_CHAR,rank_Z,sendtag, + recvID_z,recvCount_z,MPI_CHAR,rank_z,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xy,sendCount_xy,MPI_CHAR,rank_xy,sendtag, + recvID_XY,recvCount_XY,MPI_CHAR,rank_XY,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_XY,sendCount_XY,MPI_CHAR,rank_XY,sendtag, + recvID_xy,recvCount_xy,MPI_CHAR,rank_xy,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Xy,sendCount_Xy,MPI_CHAR,rank_Xy,sendtag, + recvID_xY,recvCount_xY,MPI_CHAR,rank_xY,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xY,sendCount_xY,MPI_CHAR,rank_xY,sendtag, + recvID_Xy,recvCount_Xy,MPI_CHAR,rank_Xy,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xz,sendCount_xz,MPI_CHAR,rank_xz,sendtag, + recvID_XZ,recvCount_XZ,MPI_CHAR,rank_XZ,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_XZ,sendCount_XZ,MPI_CHAR,rank_XZ,sendtag, + recvID_xz,recvCount_xz,MPI_CHAR,rank_xz,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Xz,sendCount_Xz,MPI_CHAR,rank_Xz,sendtag, + recvID_xZ,recvCount_xZ,MPI_CHAR,rank_xZ,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xZ,sendCount_xZ,MPI_CHAR,rank_xZ,sendtag, + recvID_Xz,recvCount_Xz,MPI_CHAR,rank_Xz,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_yz,sendCount_yz,MPI_CHAR,rank_yz,sendtag, + recvID_YZ,recvCount_YZ,MPI_CHAR,rank_YZ,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_YZ,sendCount_YZ,MPI_CHAR,rank_YZ,sendtag, + recvID_yz,recvCount_yz,MPI_CHAR,rank_yz,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Yz,sendCount_Yz,MPI_CHAR,rank_Yz,sendtag, + recvID_yZ,recvCount_yZ,MPI_CHAR,rank_yZ,recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_yZ,sendCount_yZ,MPI_CHAR,rank_yZ,sendtag, + recvID_Yz,recvCount_Yz,MPI_CHAR,rank_Yz,recvtag,comm,MPI_STATUS_IGNORE); //...................................................................................... UnpackID(recvList_x, recvCount_x ,recvID_x, id); UnpackID(recvList_X, recvCount_X ,recvID_X, id); @@ -1361,48 +1380,48 @@ int main(int argc, char **argv) //................................................................................... // Send / Recv all the phase indcator field values //................................................................................... - req1[0] = comm.Isend(sendbuf_x, sendCount_x,rank_x,sendtag); - req2[0] = comm.Irecv(recvbuf_X, recvCount_X,rank_X,recvtag); - req1[1] = comm.Isend(sendbuf_X, sendCount_X,rank_X,sendtag); - req2[1] = comm.Irecv(recvbuf_x, recvCount_x,rank_x,recvtag); - req1[2] = comm.Isend(sendbuf_y, sendCount_y,rank_y,sendtag); - req2[2] = comm.Irecv(recvbuf_Y, recvCount_Y,rank_Y,recvtag); - req1[3] = comm.Isend(sendbuf_Y, sendCount_Y,rank_Y,sendtag); - req2[3] = comm.Irecv(recvbuf_y, recvCount_y,rank_y,recvtag); - req1[4] = comm.Isend(sendbuf_z, sendCount_z,rank_z,sendtag); - req2[4] = comm.Irecv(recvbuf_Z, recvCount_Z,rank_Z,recvtag); - req1[5] = comm.Isend(sendbuf_Z, sendCount_Z,rank_Z,sendtag); - req2[5] = comm.Irecv(recvbuf_z, recvCount_z,rank_z,recvtag); - req1[6] = comm.Isend(sendbuf_xy, sendCount_xy,rank_xy,sendtag); - req2[6] = comm.Irecv(recvbuf_XY, recvCount_XY,rank_XY,recvtag); - req1[7] = comm.Isend(sendbuf_XY, sendCount_XY,rank_XY,sendtag); - req2[7] = comm.Irecv(recvbuf_xy, recvCount_xy,rank_xy,recvtag); - req1[8] = comm.Isend(sendbuf_Xy, sendCount_Xy,rank_Xy,sendtag); - req2[8] = comm.Irecv(recvbuf_xY, recvCount_xY,rank_xY,recvtag); - req1[9] = comm.Isend(sendbuf_xY, sendCount_xY,rank_xY,sendtag); - req2[9] = comm.Irecv(recvbuf_Xy, recvCount_Xy,rank_Xy,recvtag); - req1[10] = comm.Isend(sendbuf_xz, sendCount_xz,rank_xz,sendtag); - req2[10] = comm.Irecv(recvbuf_XZ, recvCount_XZ,rank_XZ,recvtag); - req1[11] = comm.Isend(sendbuf_XZ, sendCount_XZ,rank_XZ,sendtag); - req2[11] = comm.Irecv(recvbuf_xz, recvCount_xz,rank_xz,recvtag); - req1[12] = comm.Isend(sendbuf_Xz, sendCount_Xz,rank_Xz,sendtag); - req2[12] = comm.Irecv(recvbuf_xZ, recvCount_xZ,rank_xZ,recvtag); - req1[13] = comm.Isend(sendbuf_xZ, sendCount_xZ,rank_xZ,sendtag); - req2[13] = comm.Irecv(recvbuf_Xz, recvCount_Xz,rank_Xz,recvtag); - req1[14] = comm.Isend(sendbuf_yz, sendCount_yz,rank_yz,sendtag); - req2[14] = comm.Irecv(recvbuf_YZ, recvCount_YZ,rank_YZ,recvtag); - req1[15] = comm.Isend(sendbuf_YZ, sendCount_YZ,rank_YZ,sendtag); - req2[15] = comm.Irecv(recvbuf_yz, recvCount_yz,rank_yz,recvtag); - req1[16] = comm.Isend(sendbuf_Yz, sendCount_Yz,rank_Yz,sendtag); - req2[16] = comm.Irecv(recvbuf_yZ, recvCount_yZ,rank_yZ,recvtag); - req1[17] = comm.Isend(sendbuf_yZ, sendCount_yZ,rank_yZ,sendtag); - req2[17] = comm.Irecv(recvbuf_Yz, recvCount_Yz,rank_Yz,recvtag); + MPI_Isend(sendbuf_x, sendCount_x,MPI_DOUBLE,rank_x,sendtag,comm,&req1[0]); + MPI_Irecv(recvbuf_X, recvCount_X,MPI_DOUBLE,rank_X,recvtag,comm,&req2[0]); + MPI_Isend(sendbuf_X, sendCount_X,MPI_DOUBLE,rank_X,sendtag,comm,&req1[1]); + MPI_Irecv(recvbuf_x, recvCount_x,MPI_DOUBLE,rank_x,recvtag,comm,&req2[1]); + MPI_Isend(sendbuf_y, sendCount_y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[2]); + MPI_Irecv(recvbuf_Y, recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[2]); + MPI_Isend(sendbuf_Y, sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[3]); + MPI_Irecv(recvbuf_y, recvCount_y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[3]); + MPI_Isend(sendbuf_z, sendCount_z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[4]); + MPI_Irecv(recvbuf_Z, recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[4]); + MPI_Isend(sendbuf_Z, sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[5]); + MPI_Irecv(recvbuf_z, recvCount_z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[5]); + MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_xy,sendtag,comm,&req1[6]); + MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_XY,recvtag,comm,&req2[6]); + MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_XY,sendtag,comm,&req1[7]); + MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_xy,recvtag,comm,&req2[7]); + MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_Xy,sendtag,comm,&req1[8]); + MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_xY,recvtag,comm,&req2[8]); + MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_xY,sendtag,comm,&req1[9]); + MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_Xy,recvtag,comm,&req2[9]); + MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_xz,sendtag,comm,&req1[10]); + MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_XZ,recvtag,comm,&req2[10]); + MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_XZ,sendtag,comm,&req1[11]); + MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_xz,recvtag,comm,&req2[11]); + MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_Xz,sendtag,comm,&req1[12]); + MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_xZ,recvtag,comm,&req2[12]); + MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_xZ,sendtag,comm,&req1[13]); + MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_Xz,recvtag,comm,&req2[13]); + MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_yz,sendtag,comm,&req1[14]); + MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_YZ,recvtag,comm,&req2[14]); + MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_YZ,sendtag,comm,&req1[15]); + MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_yz,recvtag,comm,&req2[15]); + MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_Yz,sendtag,comm,&req1[16]); + MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_yZ,recvtag,comm,&req2[16]); + MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_yZ,sendtag,comm,&req1[17]); + MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_Yz,recvtag,comm,&req2[17]); //................................................................................... //................................................................................... // Wait for completion of Indicator Field communication //................................................................................... - comm.waitAll(18,req1); - comm.waitAll(18,req2); + MPI_Waitall(18,req1,stat1); + MPI_Waitall(18,req2,stat2); ScaLBL_DeviceBarrier(); //................................................................................... //................................................................................... @@ -1478,7 +1497,7 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; comm.barrier(); - starttime = Utilities::MPI::time(); + starttime = MPI_Wtime(); //......................................... sendtag = recvtag = 5; @@ -1574,42 +1593,42 @@ int main(int argc, char **argv) //................................................................................... // Send all the distributions - req1[0] = comm.Isend(sendbuf_x, 5*sendCount_x,rank_x,sendtag); - req2[0] = comm.Irecv(recvbuf_X, 5*recvCount_X,rank_X,recvtag); - req1[1] = comm.Isend(sendbuf_X, 5*sendCount_X,rank_X,sendtag); - req2[1] = comm.Irecv(recvbuf_x, 5*recvCount_x,rank_x,recvtag); - req1[2] = comm.Isend(sendbuf_y, 5*sendCount_y,rank_y,sendtag); - req2[2] = comm.Irecv(recvbuf_Y, 5*recvCount_Y,rank_Y,recvtag); - req1[3] = comm.Isend(sendbuf_Y, 5*sendCount_Y,rank_Y,sendtag); - req2[3] = comm.Irecv(recvbuf_y, 5*recvCount_y,rank_y,recvtag); - req1[4] = comm.Isend(sendbuf_z, 5*sendCount_z,rank_z,sendtag); - req2[4] = comm.Irecv(recvbuf_Z, 5*recvCount_Z,rank_Z,recvtag); - req1[5] = comm.Isend(sendbuf_Z, 5*sendCount_Z,rank_Z,sendtag); - req2[5] = comm.Irecv(recvbuf_z, 5*recvCount_z,rank_z,recvtag); - req1[6] = comm.Isend(sendbuf_xy, sendCount_xy,rank_xy,sendtag); - req2[6] = comm.Irecv(recvbuf_XY, recvCount_XY,rank_XY,recvtag); - req1[7] = comm.Isend(sendbuf_XY, sendCount_XY,rank_XY,sendtag); - req2[7] = comm.Irecv(recvbuf_xy, recvCount_xy,rank_xy,recvtag); - req1[8] = comm.Isend(sendbuf_Xy, sendCount_Xy,rank_Xy,sendtag); - req2[8] = comm.Irecv(recvbuf_xY, recvCount_xY,rank_xY,recvtag); - req1[9] = comm.Isend(sendbuf_xY, sendCount_xY,rank_xY,sendtag); - req2[9] = comm.Irecv(recvbuf_Xy, recvCount_Xy,rank_Xy,recvtag); - req1[10] = comm.Isend(sendbuf_xz, sendCount_xz,rank_xz,sendtag); - req2[10] = comm.Irecv(recvbuf_XZ, recvCount_XZ,rank_XZ,recvtag); - req1[11] = comm.Isend(sendbuf_XZ, sendCount_XZ,rank_XZ,sendtag); - req2[11] = comm.Irecv(recvbuf_xz, recvCount_xz,rank_xz,recvtag); - req1[12] = comm.Isend(sendbuf_Xz, sendCount_Xz,rank_Xz,sendtag); - req2[12] = comm.Irecv(recvbuf_xZ, recvCount_xZ,rank_xZ,recvtag); - req1[13] = comm.Isend(sendbuf_xZ, sendCount_xZ,rank_xZ,sendtag); - req2[13] = comm.Irecv(recvbuf_Xz, recvCount_Xz,rank_Xz,recvtag); - req1[14] = comm.Isend(sendbuf_yz, sendCount_yz,rank_yz,sendtag); - req2[14] = comm.Irecv(recvbuf_YZ, recvCount_YZ,rank_YZ,recvtag); - req1[15] = comm.Isend(sendbuf_YZ, sendCount_YZ,rank_YZ,sendtag); - req2[15] = comm.Irecv(recvbuf_yz, recvCount_yz,rank_yz,recvtag); - req1[16] = comm.Isend(sendbuf_Yz, sendCount_Yz,rank_Yz,sendtag); - req2[16] = comm.Irecv(recvbuf_yZ, recvCount_yZ,rank_yZ,recvtag); - req1[17] = comm.Isend(sendbuf_yZ, sendCount_yZ,rank_yZ,sendtag); - req2[17] = comm.Irecv(recvbuf_Yz, recvCount_Yz,rank_Yz,recvtag); + MPI_Isend(sendbuf_x, 5*sendCount_x,MPI_DOUBLE,rank_x,sendtag,comm,&req1[0]); + MPI_Irecv(recvbuf_X, 5*recvCount_X,MPI_DOUBLE,rank_X,recvtag,comm,&req2[0]); + MPI_Isend(sendbuf_X, 5*sendCount_X,MPI_DOUBLE,rank_X,sendtag,comm,&req1[1]); + MPI_Irecv(recvbuf_x, 5*recvCount_x,MPI_DOUBLE,rank_x,recvtag,comm,&req2[1]); + MPI_Isend(sendbuf_y, 5*sendCount_y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[2]); + MPI_Irecv(recvbuf_Y, 5*recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[2]); + MPI_Isend(sendbuf_Y, 5*sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[3]); + MPI_Irecv(recvbuf_y, 5*recvCount_y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[3]); + MPI_Isend(sendbuf_z, 5*sendCount_z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[4]); + MPI_Irecv(recvbuf_Z, 5*recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[4]); + MPI_Isend(sendbuf_Z, 5*sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[5]); + MPI_Irecv(recvbuf_z, 5*recvCount_z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[5]); + MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_xy,sendtag,comm,&req1[6]); + MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_XY,recvtag,comm,&req2[6]); + MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_XY,sendtag,comm,&req1[7]); + MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_xy,recvtag,comm,&req2[7]); + MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_Xy,sendtag,comm,&req1[8]); + MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_xY,recvtag,comm,&req2[8]); + MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_xY,sendtag,comm,&req1[9]); + MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_Xy,recvtag,comm,&req2[9]); + MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_xz,sendtag,comm,&req1[10]); + MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_XZ,recvtag,comm,&req2[10]); + MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_XZ,sendtag,comm,&req1[11]); + MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_xz,recvtag,comm,&req2[11]); + MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_Xz,sendtag,comm,&req1[12]); + MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_xZ,recvtag,comm,&req2[12]); + MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_xZ,sendtag,comm,&req1[13]); + MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_Xz,recvtag,comm,&req2[13]); + MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_yz,sendtag,comm,&req1[14]); + MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_YZ,recvtag,comm,&req2[14]); + MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_YZ,sendtag,comm,&req1[15]); + MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_yz,recvtag,comm,&req2[15]); + MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_Yz,sendtag,comm,&req1[16]); + MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_yZ,recvtag,comm,&req2[16]); + MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_yZ,sendtag,comm,&req1[17]); + MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_Yz,recvtag,comm,&req2[17]); //................................................................................... //************************************************************************* @@ -1629,8 +1648,8 @@ int main(int argc, char **argv) //................................................................................... // Wait for completion of D3Q19 communication - comm.waitAll(18,req1); - comm.waitAll(18,req2); + MPI_Waitall(18,req1,stat1); + MPI_Waitall(18,req2,stat2); //................................................................................... // Unpack the distributions on the device @@ -1724,18 +1743,18 @@ int main(int argc, char **argv) //................................................................................... // Send all the distributions - req1[0] = comm.Isend(sendbuf_x, 2*sendCount_x,rank_x,sendtag); - req2[0] = comm.Irecv(recvbuf_X, 2*recvCount_X,rank_X,recvtag); - req1[1] = comm.Isend(sendbuf_X, 2*sendCount_X,rank_X,sendtag); - req2[1] = comm.Irecv(recvbuf_x, 2*recvCount_x,rank_x,recvtag); - req1[2] = comm.Isend(sendbuf_y, 2*sendCount_y,rank_y,sendtag); - req2[2] = comm.Irecv(recvbuf_Y, 2*recvCount_Y,rank_Y,recvtag); - req1[3] = comm.Isend(sendbuf_Y, 2*sendCount_Y,rank_Y,sendtag); - req2[3] = comm.Irecv(recvbuf_y, 2*recvCount_y,rank_y,recvtag); - req1[4] = comm.Isend(sendbuf_z, 2*sendCount_z,rank_z,sendtag); - req2[4] = comm.Irecv(recvbuf_Z, 2*recvCount_Z,rank_Z,recvtag); - req1[5] = comm.Isend(sendbuf_Z, 2*sendCount_Z,rank_Z,sendtag); - req2[5] = comm.Irecv(recvbuf_z, 2*recvCount_z,rank_z,recvtag); + MPI_Isend(sendbuf_x, 2*sendCount_x,MPI_DOUBLE,rank_x,sendtag,comm,&req1[0]); + MPI_Irecv(recvbuf_X, 2*recvCount_X,MPI_DOUBLE,rank_X,recvtag,comm,&req2[0]); + MPI_Isend(sendbuf_X, 2*sendCount_X,MPI_DOUBLE,rank_X,sendtag,comm,&req1[1]); + MPI_Irecv(recvbuf_x, 2*recvCount_x,MPI_DOUBLE,rank_x,recvtag,comm,&req2[1]); + MPI_Isend(sendbuf_y, 2*sendCount_y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[2]); + MPI_Irecv(recvbuf_Y, 2*recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[2]); + MPI_Isend(sendbuf_Y, 2*sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[3]); + MPI_Irecv(recvbuf_y, 2*recvCount_y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[3]); + MPI_Isend(sendbuf_z, 2*sendCount_z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[4]); + MPI_Irecv(recvbuf_Z, 2*recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[4]); + MPI_Isend(sendbuf_Z, 2*sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[5]); + MPI_Irecv(recvbuf_z, 2*recvCount_z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[5]); //................................................................................... ScaLBL_D3Q7_Swap(ID, A_even, A_odd, Nx, Ny, Nz); @@ -1743,8 +1762,8 @@ int main(int argc, char **argv) //................................................................................... // Wait for completion of D3Q19 communication - comm.waitAll(6,req1); - comm.waitAll(6,req2); + MPI_Waitall(6,req1,stat1); + MPI_Waitall(6,req2,stat2); //................................................................................... // Unpack the distributions on the device //................................................................................... @@ -1805,48 +1824,48 @@ int main(int argc, char **argv) //................................................................................... // Send / Recv all the phase indcator field values //................................................................................... - req1[0] = comm.Isend(sendbuf_x, sendCount_x,rank_x,sendtag,comm,&req1[0]); - req2[0] = comm.Irecv(recvbuf_X, recvCount_X,rank_X,recvtag,comm,&req2[0]); - req1[1] = comm.Isend(sendbuf_X, sendCount_X,rank_X,sendtag,comm,&req1[1]); - req2[1] = comm.Irecv(recvbuf_x, recvCount_x,rank_x,recvtag,comm,&req2[1]); - req1[2] = comm.Isend(sendbuf_y, sendCount_y,rank_y,sendtag,comm,&req1[2]); - req2[2] = comm.Irecv(recvbuf_Y, recvCount_Y,rank_Y,recvtag,comm,&req2[2]); - req1[3] = comm.Isend(sendbuf_Y, sendCount_Y,rank_Y,sendtag,comm,&req1[3]); - req2[3] = comm.Irecv(recvbuf_y, recvCount_y,rank_y,recvtag,comm,&req2[3]); - req1[4] = comm.Isend(sendbuf_z, sendCount_z,rank_z,sendtag,comm,&req1[4]); - req2[4] = comm.Irecv(recvbuf_Z, recvCount_Z,rank_Z,recvtag,comm,&req2[4]); - req1[5] = comm.Isend(sendbuf_Z, sendCount_Z,rank_Z,sendtag,comm,&req1[5]); - req2[5] = comm.Irecv(recvbuf_z, recvCount_z,rank_z,recvtag,comm,&req2[5]); - req1[6] = comm.Isend(sendbuf_xy, sendCount_xy,rank_xy,sendtag,comm,&req1[6]); - req2[6] = comm.Irecv(recvbuf_XY, recvCount_XY,rank_XY,recvtag,comm,&req2[6]); - req1[7] = comm.Isend(sendbuf_XY, sendCount_XY,rank_XY,sendtag,comm,&req1[7]); - req2[7] = comm.Irecv(recvbuf_xy, recvCount_xy,rank_xy,recvtag,comm,&req2[7]); - req1[8] = comm.Isend(sendbuf_Xy, sendCount_Xy,rank_Xy,sendtag,comm,&req1[8]); - req2[8] = comm.Irecv(recvbuf_xY, recvCount_xY,rank_xY,recvtag,comm,&req2[8]); - req1[9] = comm.Isend(sendbuf_xY, sendCount_xY,rank_xY,sendtag,comm,&req1[9]); - req2[9] = comm.Irecv(recvbuf_Xy, recvCount_Xy,rank_Xy,recvtag,comm,&req2[9]); - req1[10] = comm.Isend(sendbuf_xz, sendCount_xz,rank_xz,sendtag,comm,&req1[10]); - req2[10] = comm.Irecv(recvbuf_XZ, recvCount_XZ,rank_XZ,recvtag,comm,&req2[10]); - req1[11] = comm.Isend(sendbuf_XZ, sendCount_XZ,rank_XZ,sendtag,comm,&req1[11]); - req2[11] = comm.Irecv(recvbuf_xz, recvCount_xz,rank_xz,recvtag,comm,&req2[11]); - req1[12] = comm.Isend(sendbuf_Xz, sendCount_Xz,rank_Xz,sendtag,comm,&req1[12]); - req2[12] = comm.Irecv(recvbuf_xZ, recvCount_xZ,rank_xZ,recvtag,comm,&req2[12]); - req1[13] = comm.Isend(sendbuf_xZ, sendCount_xZ,rank_xZ,sendtag,comm,&req1[13]); - req2[13] = comm.Irecv(recvbuf_Xz, recvCount_Xz,rank_Xz,recvtag,comm,&req2[13]); - req1[14] = comm.Isend(sendbuf_yz, sendCount_yz,rank_yz,sendtag,comm,&req1[14]); - req2[14] = comm.Irecv(recvbuf_YZ, recvCount_YZ,rank_YZ,recvtag,comm,&req2[14]); - req1[15] = comm.Isend(sendbuf_YZ, sendCount_YZ,rank_YZ,sendtag,comm,&req1[15]); - req2[15] = comm.Irecv(recvbuf_yz, recvCount_yz,rank_yz,recvtag,comm,&req2[15]); - req1[16] = comm.Isend(sendbuf_Yz, sendCount_Yz,rank_Yz,sendtag,comm,&req1[16]); - req2[16] = comm.Irecv(recvbuf_yZ, recvCount_yZ,rank_yZ,recvtag,comm,&req2[16]); - req1[17] = comm.Isend(sendbuf_yZ, sendCount_yZ,rank_yZ,sendtag,comm,&req1[17]); - req2[17] = comm.Irecv(recvbuf_Yz, recvCount_Yz,rank_Yz,recvtag,comm,&req2[17]); + MPI_Isend(sendbuf_x, sendCount_x,MPI_DOUBLE,rank_x,sendtag,comm,&req1[0]); + MPI_Irecv(recvbuf_X, recvCount_X,MPI_DOUBLE,rank_X,recvtag,comm,&req2[0]); + MPI_Isend(sendbuf_X, sendCount_X,MPI_DOUBLE,rank_X,sendtag,comm,&req1[1]); + MPI_Irecv(recvbuf_x, recvCount_x,MPI_DOUBLE,rank_x,recvtag,comm,&req2[1]); + MPI_Isend(sendbuf_y, sendCount_y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[2]); + MPI_Irecv(recvbuf_Y, recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[2]); + MPI_Isend(sendbuf_Y, sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[3]); + MPI_Irecv(recvbuf_y, recvCount_y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[3]); + MPI_Isend(sendbuf_z, sendCount_z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[4]); + MPI_Irecv(recvbuf_Z, recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[4]); + MPI_Isend(sendbuf_Z, sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[5]); + MPI_Irecv(recvbuf_z, recvCount_z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[5]); + MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_xy,sendtag,comm,&req1[6]); + MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_XY,recvtag,comm,&req2[6]); + MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_XY,sendtag,comm,&req1[7]); + MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_xy,recvtag,comm,&req2[7]); + MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_Xy,sendtag,comm,&req1[8]); + MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_xY,recvtag,comm,&req2[8]); + MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_xY,sendtag,comm,&req1[9]); + MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_Xy,recvtag,comm,&req2[9]); + MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_xz,sendtag,comm,&req1[10]); + MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_XZ,recvtag,comm,&req2[10]); + MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_XZ,sendtag,comm,&req1[11]); + MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_xz,recvtag,comm,&req2[11]); + MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_Xz,sendtag,comm,&req1[12]); + MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_xZ,recvtag,comm,&req2[12]); + MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_xZ,sendtag,comm,&req1[13]); + MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_Xz,recvtag,comm,&req2[13]); + MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_yz,sendtag,comm,&req1[14]); + MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_YZ,recvtag,comm,&req2[14]); + MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_YZ,sendtag,comm,&req1[15]); + MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_yz,recvtag,comm,&req2[15]); + MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_Yz,sendtag,comm,&req1[16]); + MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_yZ,recvtag,comm,&req2[16]); + MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_yZ,sendtag,comm,&req1[17]); + MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_Yz,recvtag,comm,&req2[17]); //................................................................................... //................................................................................... // Wait for completion of Indicator Field communication //................................................................................... - comm.waitAll(18,req1); - comm.waitAll(18,req2); + MPI_Waitall(18,req1,stat1); + MPI_Waitall(18,req2,stat2); ScaLBL_DeviceBarrier(); //................................................................................... //................................................................................... @@ -2423,28 +2442,28 @@ int main(int argc, char **argv) //........................................................................... comm.barrier(); - nwp_volume_global = comm.sumReduce( nwp_volume ); - awn_global = comm.sumReduce( awn ); - ans_global = comm.sumReduce( ans ); - aws_global = comm.sumReduce( aws ); - lwns_global = comm.sumReduce( lwns ); - As_global = comm.sumReduce( As ); - Jwn_global = comm.sumReduce( Jwn ); - Kwn_global = comm.sumReduce( Kwn ); - efawns_global = comm.sumReduce( efawns ); + MPI_Allreduce(&nwp_volume,&nwp_volume_global,1,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&awn,&awn_global,1,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&ans,&ans_global,1,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&aws,&aws_global,1,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&lwns,&lwns_global,1,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&As,&As_global,1,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&Jwn,&Jwn_global,1,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&Kwn,&Kwn_global,1,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&efawns,&efawns_global,1,MPI_DOUBLE,MPI_SUM,comm); // Phase averages - vol_w_global = comm.sumReduce( vol_w ); - vol_n_global = comm.sumReduce( vol_n ); - paw_global = comm.sumReduce( paw ); - pan_global = comm.sumReduce( pan ); - vaw_global(0) = comm.sumReduce( vaw(0) ); - van_global(0) = comm.sumReduce( van(0) ); - vawn_global(0) = comm.sumReduce( vawn(0) ); - Gwn_global(0) = comm.sumReduce( Gwn(0) ); - Gns_global(0) = comm.sumReduce( Gns(0) ); - Gws_global(0) = comm.sumReduce( Gws(0) ); - trawn_global = comm.sumReduce( trawn ); - trJwn_global = comm.sumReduce( trJwn ); + MPI_Allreduce(&vol_w,&vol_w_global,1,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&vol_n,&vol_n_global,1,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&paw,&paw_global,1,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&pan,&pan_global,1,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&vaw(0),&vaw_global(0),3,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&van(0),&van_global(0),3,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&vawn(0),&vawn_global(0),3,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&Gwn(0),&Gwn_global(0),6,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&Gns(0),&Gns_global(0),6,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&Gws(0),&Gws_global(0),6,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&trawn,&trawn_global,1,MPI_DOUBLE,MPI_SUM,comm); + MPI_Allreduce(&trJwn,&trJwn_global,1,MPI_DOUBLE,MPI_SUM,comm); comm.barrier(); //......................................................................... // Compute the change in the total surface energy based on the defined interval @@ -2670,7 +2689,7 @@ int main(int argc, char **argv) //************************************************************************/ ScaLBL_DeviceBarrier(); comm.barrier(); - stoptime = Utilities::MPI::time(); + stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep cputime = (stoptime - starttime)/timestep; diff --git a/tests/lbpm_BGK_simulator.cpp b/tests/lbpm_BGK_simulator.cpp index 1ac61853..8b079900 100644 --- a/tests/lbpm_BGK_simulator.cpp +++ b/tests/lbpm_BGK_simulator.cpp @@ -97,28 +97,28 @@ int main(int argc, char **argv) // Broadcast simulation parameters from rank 0 to all other procs comm.barrier(); //................................................. - comm.bcast(&tau,1,0); - //comm.bcast(&pBC,1,0); - //comm.bcast(&Restart,1,0); - comm.bcast(&din,1,0); - comm.bcast(&dout,1,0); - comm.bcast(&Fx,1,0); - comm.bcast(&Fy,1,0); - comm.bcast(&Fz,1,0); - comm.bcast(×tepMax,1,0); - comm.bcast(&interval,1,0); - comm.bcast(&tol,1,0); + MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm); + //MPI_Bcast(&pBC,1,MPI_LOGICAL,0,comm); + // MPI_Bcast(&Restart,1,MPI_LOGICAL,0,comm); + MPI_Bcast(&din,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&dout,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm); + MPI_Bcast(×tepMax,1,MPI_INT,0,comm); + MPI_Bcast(&interval,1,MPI_INT,0,comm); + MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm); // Computational domain - comm.bcast(&Nx,1,0); - comm.bcast(&Ny,1,0); - comm.bcast(&Nz,1,0); - comm.bcast(&nprocx,1,0); - comm.bcast(&nprocy,1,0); - comm.bcast(&nprocz,1,0); - //comm.bcast(&nspheres,1,0); - comm.bcast(&Lx,1,0); - comm.bcast(&Ly,1,0); - comm.bcast(&Lz,1,0); + MPI_Bcast(&Nx,1,MPI_INT,0,comm); + MPI_Bcast(&Ny,1,MPI_INT,0,comm); + MPI_Bcast(&Nz,1,MPI_INT,0,comm); + MPI_Bcast(&nprocx,1,MPI_INT,0,comm); + MPI_Bcast(&nprocy,1,MPI_INT,0,comm); + MPI_Bcast(&nprocz,1,MPI_INT,0,comm); + //MPI_Bcast(&nspheres,1,MPI_INT,0,comm); + MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. comm.barrier(); @@ -249,7 +249,7 @@ int main(int argc, char **argv) } } } - sum = comm.sumReduce( sum_local ); + MPI_Allreduce(&sum_local,&sum,1,MPI_DOUBLE,MPI_SUM,comm); porosity = sum*iVol_global; if (rank==0) printf("Media porosity = %f \n",porosity); @@ -331,7 +331,7 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; comm.barrier(); - starttime = Utilities::MPI::time(); + starttime = MPI_Wtime(); //......................................... double D32,Fo,Re,velocity,err1D,mag_force,vel_prev; @@ -410,7 +410,7 @@ int main(int argc, char **argv) //************************************************************************/ ScaLBL_DeviceBarrier(); comm.barrier(); - stoptime = Utilities::MPI::time(); + stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep cputime = (stoptime - starttime)/timestep; diff --git a/tests/lbpm_color_macro_simulator.cpp b/tests/lbpm_color_macro_simulator.cpp index c92b0c45..97df6812 100644 --- a/tests/lbpm_color_macro_simulator.cpp +++ b/tests/lbpm_color_macro_simulator.cpp @@ -39,6 +39,9 @@ int main(int argc, char **argv) int nprocx,nprocy,nprocz; int iproc,jproc,kproc; + MPI_Request req1[18],req2[18]; + MPI_Status stat1[18],stat2[18]; + if (rank == 0){ printf("********************************************************\n"); printf("Running Color LBM \n"); @@ -169,32 +172,32 @@ int main(int argc, char **argv) // Broadcast simulation parameters from rank 0 to all other procs comm.barrier(); //................................................. - comm.bcast(&tauA,1,0); - comm.bcast(&tauB,1,0); - comm.bcast(&rhoA,1,0); - comm.bcast(&rhoB,1,0); - comm.bcast(&alpha,1,0); - comm.bcast(&beta,1,0); - comm.bcast(&BoundaryCondition,1,0); - comm.bcast(&InitialCondition,1,0); - comm.bcast(&din,1,0); - comm.bcast(&dout,1,0); - comm.bcast(&Fx,1,0); - comm.bcast(&Fy,1,0); - comm.bcast(&Fz,1,0); - comm.bcast(×tepMax,1,0); - comm.bcast(&RESTART_INTERVAL,1,0); - comm.bcast(&tol,1,0); + MPI_Bcast(&tauA,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&tauB,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&rhoA,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&rhoB,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&alpha,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&beta,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&BoundaryCondition,1,MPI_INT,0,comm); + MPI_Bcast(&InitialCondition,1,MPI_INT,0,comm); + MPI_Bcast(&din,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&dout,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm); + MPI_Bcast(×tepMax,1,MPI_INT,0,comm); + MPI_Bcast(&RESTART_INTERVAL,1,MPI_INT,0,comm); + MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm); // Computational domain - comm.bcast(&Nx,1,0); - comm.bcast(&Ny,1,0); - comm.bcast(&Nz,1,0); - comm.bcast(&nprocx,1,0); - comm.bcast(&nprocy,1,0); - comm.bcast(&nprocz,1,0); - comm.bcast(&Lx,1,0); - comm.bcast(&Ly,1,0); - comm.bcast(&Lz,1,0); + MPI_Bcast(&Nx,1,MPI_INT,0,comm); + MPI_Bcast(&Ny,1,MPI_INT,0,comm); + MPI_Bcast(&Nz,1,MPI_INT,0,comm); + MPI_Bcast(&nprocx,1,MPI_INT,0,comm); + MPI_Bcast(&nprocy,1,MPI_INT,0,comm); + MPI_Bcast(&nprocz,1,MPI_INT,0,comm); + MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. flux = 0.f; @@ -319,7 +322,7 @@ int main(int argc, char **argv) timestep=0; } } - comm.bcast(×tep,1,0); + MPI_Bcast(×tep,1,MPI_INT,0,comm); FILE *RESTART = fopen(LocalRestartFile,"rb"); if (IDFILE==NULL) ERROR("lbpm_color_simulator: Error opening file: Restart.xxxxx"); readID=fread(id,1,N,RESTART); @@ -358,7 +361,7 @@ int main(int argc, char **argv) } } } - sum - comm.sumReduce( sum_local ); + MPI_Allreduce(&sum_local,&sum,1,MPI_DOUBLE,MPI_SUM,comm); porosity = sum*iVol_global; if (rank==0) printf("Media porosity = %f \n",porosity); //......................................................... @@ -534,7 +537,7 @@ int main(int argc, char **argv) double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); comm.barrier(); - starttime = Utilities::MPI::time(); + starttime = MPI_Wtime(); //......................................... err = 1.0; @@ -634,7 +637,7 @@ int main(int argc, char **argv) //************************************************************************ ScaLBL_DeviceBarrier(); comm.barrier(); - stoptime = Utilities::MPI::time(); + stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep cputime = (stoptime - starttime)/timestep; diff --git a/tests/lbpm_disc_pp.cpp b/tests/lbpm_disc_pp.cpp index 41825c7d..20d41884 100644 --- a/tests/lbpm_disc_pp.cpp +++ b/tests/lbpm_disc_pp.cpp @@ -9,7 +9,7 @@ #include "analysis/pmmc.h" #include "common/Domain.h" #include "common/Communication.h" -#include "common/MPI.h" +#include "common/MPI.h" // This includes mpi.h #include "common/SpherePack.h" /* @@ -147,6 +147,8 @@ int main(int argc, char **argv) int rank_xz,rank_XZ,rank_xZ,rank_Xz; int rank_yz,rank_YZ,rank_yZ,rank_Yz; //********************************** + MPI_Request req1[18],req2[18]; + MPI_Status stat1[18],stat2[18]; int depth; @@ -187,16 +189,16 @@ int main(int argc, char **argv) comm.barrier(); //................................................. // Computational domain - comm.bcast(&Nx,1,0); - comm.bcast(&Ny,1,0); - comm.bcast(&Nz,1,0); - comm.bcast(&nprocx,1,0); - comm.bcast(&nprocy,1,0); - comm.bcast(&nprocz,1,0); - comm.bcast(&ndiscs,1,0); - comm.bcast(&Lx,1,0); - comm.bcast(&Ly,1,0); - comm.bcast(&Lz,1,0); + MPI_Bcast(&Nx,1,MPI_INT,0,comm); + MPI_Bcast(&Ny,1,MPI_INT,0,comm); + MPI_Bcast(&Nz,1,MPI_INT,0,comm); + MPI_Bcast(&nprocx,1,MPI_INT,0,comm); + MPI_Bcast(&nprocy,1,MPI_INT,0,comm); + MPI_Bcast(&nprocz,1,MPI_INT,0,comm); + MPI_Bcast(&ndiscs,1,MPI_INT,0,comm); + MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. comm.barrier(); @@ -273,9 +275,9 @@ int main(int argc, char **argv) if (rank == 0) ReadDiscPacking(ndiscs,cx,cy,rad); comm.barrier(); // Broadcast the sphere packing to all processes - comm.bcast(cx,ndiscs,0); - comm.bcast(cy,ndiscs,0); - comm.bcast(rad,ndiscs,0); + MPI_Bcast(cx,ndiscs,MPI_DOUBLE,0,comm); + MPI_Bcast(cy,ndiscs,MPI_DOUBLE,0,comm); + MPI_Bcast(rad,ndiscs,MPI_DOUBLE,0,comm); //........................................................................... comm.barrier(); if (rank == 0){ @@ -344,7 +346,7 @@ int main(int argc, char **argv) } } sum_local = 1.0*sum; - porosity = comm.sumReduce( sum_local ); + MPI_Allreduce(&sum_local,&porosity,1,MPI_DOUBLE,MPI_SUM,comm); porosity = porosity*iVol_global; if (rank==0) printf("Media porosity = %f \n",porosity); @@ -360,7 +362,7 @@ int main(int argc, char **argv) } } } - pore_vol = comm.sumReduce( sum_local ); + MPI_Allreduce(&sum_local,&pore_vol,1,MPI_DOUBLE,MPI_SUM,comm); //......................................................... // don't perform computations at the eight corners diff --git a/tests/lbpm_inkbottle_pp.cpp b/tests/lbpm_inkbottle_pp.cpp index ca188633..669ab8c0 100644 --- a/tests/lbpm_inkbottle_pp.cpp +++ b/tests/lbpm_inkbottle_pp.cpp @@ -81,16 +81,16 @@ int main(int argc, char **argv) // Broadcast simulation parameters from rank 0 to all other procs comm.barrier(); // Computational domain - comm.bcast(&Nx,1,0); - comm.bcast(&Ny,1,0); - comm.bcast(&Nz,1,0); - comm.bcast(&nprocx,1,0); - comm.bcast(&nprocy,1,0); - comm.bcast(&nprocz,1,0); - comm.bcast(&nspheres,1,0); - comm.bcast(&Lx,1,0); - comm.bcast(&Ly,1,0); - comm.bcast(&Lz,1,0); + MPI_Bcast(&Nx,1,MPI_INT,0,comm); + MPI_Bcast(&Ny,1,MPI_INT,0,comm); + MPI_Bcast(&Nz,1,MPI_INT,0,comm); + MPI_Bcast(&nprocx,1,MPI_INT,0,comm); + MPI_Bcast(&nprocy,1,MPI_INT,0,comm); + MPI_Bcast(&nprocz,1,MPI_INT,0,comm); + MPI_Bcast(&nspheres,1,MPI_INT,0,comm); + MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. comm.barrier(); @@ -197,7 +197,7 @@ int main(int argc, char **argv) } } } - pore_vol = comm.sumReduce( sum_local ); + MPI_Allreduce(&sum_local,&pore_vol,1,MPI_DOUBLE,MPI_SUM,comm); //......................................................... // don't perform computations at the eight corners diff --git a/tests/lbpm_juanes_bench_disc_pp.cpp b/tests/lbpm_juanes_bench_disc_pp.cpp index a90d43f8..47d8cb84 100644 --- a/tests/lbpm_juanes_bench_disc_pp.cpp +++ b/tests/lbpm_juanes_bench_disc_pp.cpp @@ -9,7 +9,7 @@ #include "analysis/pmmc.h" #include "common/Domain.h" #include "common/Communication.h" -#include "common/MPI.h" +#include "common/MPI.h" // This includes mpi.h #include "common/SpherePack.h" /* @@ -147,6 +147,9 @@ int main(int argc, char **argv) int rank_xz,rank_XZ,rank_xZ,rank_Xz; int rank_yz,rank_YZ,rank_yZ,rank_Yz; //********************************** + MPI_Request req1[18],req2[18]; + MPI_Status stat1[18],stat2[18]; + if (rank == 0){ printf("********************************************************\n"); @@ -190,16 +193,16 @@ int main(int argc, char **argv) comm.barrier(); //................................................. // Computational domain - comm.bcast(&Nx,1,0); - comm.bcast(&Ny,1,0); - comm.bcast(&Nz,1,0); - comm.bcast(&nprocx,1,0); - comm.bcast(&nprocy,1,0); - comm.bcast(&nprocz,1,0); - comm.bcast(&ndiscs,1,0); - comm.bcast(&Lx,1,0); - comm.bcast(&Ly,1,0); - comm.bcast(&Lz,1,0); + MPI_Bcast(&Nx,1,MPI_INT,0,comm); + MPI_Bcast(&Ny,1,MPI_INT,0,comm); + MPI_Bcast(&Nz,1,MPI_INT,0,comm); + MPI_Bcast(&nprocx,1,MPI_INT,0,comm); + MPI_Bcast(&nprocy,1,MPI_INT,0,comm); + MPI_Bcast(&nprocz,1,MPI_INT,0,comm); + MPI_Bcast(&ndiscs,1,MPI_INT,0,comm); + MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. comm.barrier(); @@ -289,9 +292,9 @@ int main(int argc, char **argv) if (rank == 0) ReadDiscPacking(ndiscs,cx,cy,rad); comm.barrier(); // Broadcast the sphere packing to all processes - comm.bcast(cx,ndiscs,0); - comm.bcast(cy,ndiscs,0); - comm.bcast(rad,ndiscs,0); + MPI_Bcast(cx,ndiscs,MPI_DOUBLE,0,comm); + MPI_Bcast(cy,ndiscs,MPI_DOUBLE,0,comm); + MPI_Bcast(rad,ndiscs,MPI_DOUBLE,0,comm); //........................................................................... comm.barrier(); /* if (rank == 0){ @@ -433,7 +436,7 @@ int main(int argc, char **argv) } } sum_local = 1.0*sum; - porosity = comm.sumReduce( sum_local ); + MPI_Allreduce(&sum_local,&porosity,1,MPI_DOUBLE,MPI_SUM,comm); porosity = porosity*iVol_global; if (rank==0) printf("Media porosity = %f \n",porosity); @@ -449,7 +452,7 @@ int main(int argc, char **argv) } } } - pore_vol = comm.sumReduce( sum_local ); + MPI_Allreduce(&sum_local,&pore_vol,1,MPI_DOUBLE,MPI_SUM,comm); //......................................................... // don't perform computations at the eight corners diff --git a/tests/lbpm_nondarcy_simulator.cpp b/tests/lbpm_nondarcy_simulator.cpp index a25fef69..096dc790 100644 --- a/tests/lbpm_nondarcy_simulator.cpp +++ b/tests/lbpm_nondarcy_simulator.cpp @@ -94,6 +94,8 @@ int main(int argc, char **argv) int rank_xz,rank_XZ,rank_xZ,rank_Xz; int rank_yz,rank_YZ,rank_yZ,rank_Yz; //********************************** + MPI_Request req1[18],req2[18]; + MPI_Status stat1[18],stat2[18]; double REYNOLDS_NUMBER = 100.f; if (argc > 1){ @@ -156,28 +158,28 @@ int main(int argc, char **argv) // Broadcast simulation parameters from rank 0 to all other procs comm.barrier(); //................................................. - comm.bcast(&tau,1,0); - //comm.bcast(&pBC,1,0); - //comm.bcast(&Restart,1,0); - comm.bcast(&din,1,0); - comm.bcast(&dout,1,0); - comm.bcast(&Fx,1,0); - comm.bcast(&Fy,1,0); - comm.bcast(&Fz,1,0); - comm.bcast(×tepMax,1,0); - comm.bcast(&interval,1,0); - comm.bcast(&tol,1,0); + MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm); + //MPI_Bcast(&pBC,1,MPI_LOGICAL,0,comm); + // MPI_Bcast(&Restart,1,MPI_LOGICAL,0,comm); + MPI_Bcast(&din,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&dout,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm); + MPI_Bcast(×tepMax,1,MPI_INT,0,comm); + MPI_Bcast(&interval,1,MPI_INT,0,comm); + MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm); // Computational domain - comm.bcast(&Nx,1,0); - comm.bcast(&Ny,1,0); - comm.bcast(&Nz,1,0); - comm.bcast(&nprocx,1,0); - comm.bcast(&nprocy,1,0); - comm.bcast(&nprocz,1,0); - comm.bcast(&nspheres,1,0); - comm.bcast(&Lx,1,0); - comm.bcast(&Ly,1,0); - comm.bcast(&Lz,1,0); + MPI_Bcast(&Nx,1,MPI_INT,0,comm); + MPI_Bcast(&Ny,1,MPI_INT,0,comm); + MPI_Bcast(&Nz,1,MPI_INT,0,comm); + MPI_Bcast(&nprocx,1,MPI_INT,0,comm); + MPI_Bcast(&nprocy,1,MPI_INT,0,comm); + MPI_Bcast(&nprocz,1,MPI_INT,0,comm); + MPI_Bcast(&nspheres,1,MPI_INT,0,comm); + MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. comm.barrier(); @@ -306,8 +308,8 @@ int main(int argc, char **argv) } } } - por_vol = comm.sumReduce( sum_local ); - //porosity = comm.sumReduce( sum_local ); + MPI_Allreduce(&sum_local,&pore_vol,1,MPI_DOUBLE,MPI_SUM,comm); + // MPI_Allreduce(&sum_local,&porosity,1,MPI_DOUBLE,MPI_SUM,comm); porosity = pore_vol*iVol_global; if (rank==0) printf("Media porosity = %f \n",porosity); //......................................................... @@ -431,7 +433,7 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; comm.barrier(); - starttime = Utilities::MPI::time(); + starttime = MPI_Wtime(); //......................................... double D32,vawx,vawy,vawz,Fo,Re,velocity,err1D,mag_force,vel_prev; @@ -552,7 +554,7 @@ int main(int argc, char **argv) fclose(NONDARCY); ScaLBL_DeviceBarrier(); comm.barrier(); - stoptime = Utilities::MPI::time(); + stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep cputime = (stoptime - starttime)/timestep; diff --git a/tests/lbpm_nonnewtonian_simulator.cpp b/tests/lbpm_nonnewtonian_simulator.cpp index bea3a814..ff8792e7 100644 --- a/tests/lbpm_nonnewtonian_simulator.cpp +++ b/tests/lbpm_nonnewtonian_simulator.cpp @@ -124,6 +124,8 @@ int main(int argc, char **argv) // int rank_xz,rank_XZ,rank_xZ,rank_Xz; // int rank_yz,rank_YZ,rank_yZ,rank_Yz; //********************************** + MPI_Request req1[18],req2[18]; + MPI_Status stat1[18],stat2[18]; if (rank == 0){ printf("********************************************************\n"); @@ -426,8 +428,8 @@ int main(int argc, char **argv) } } - pore_vol = comm.sumReduce( sum_local ); /* 6 */ - //porosity = comm.sumReduce( sum_local ); + MPI_Allreduce(&sum_local,&pore_vol,1,MPI_DOUBLE,MPI_SUM,comm); /* 6 */ + //MPI_Allreduce(&sum_local,&porosity,1,MPI_DOUBLE,MPI_SUM,comm); porosity = pore_vol*iVol_global; if (rank==0) printf("Media porosity = %f \n",porosity); @@ -572,7 +574,7 @@ int main(int argc, char **argv) timestep=5; } } - comm.bcast(×tep,1,0); + MPI_Bcast(×tep,1,MPI_INT,0,comm); // Read in the restart file to CPU buffers double *cDen = new double[2*N]; @@ -660,7 +662,7 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; comm.barrier(); - starttime = Utilities::MPI::time(); + starttime = MPI_Wtime(); /* * Create the thread pool @@ -808,7 +810,7 @@ int main(int argc, char **argv) //************************************************************************/ ScaLBL_DeviceBarrier(); comm.barrier(); - stoptime = Utilities::MPI::time(); + stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep cputime = (stoptime - starttime)/timestep; @@ -833,6 +835,20 @@ int main(int argc, char **argv) + + + + + + + + + + + + + + // Scrap // if (rank==0){ diff --git a/tests/lbpm_plates_pp.cpp b/tests/lbpm_plates_pp.cpp index 37191979..acd64f52 100644 --- a/tests/lbpm_plates_pp.cpp +++ b/tests/lbpm_plates_pp.cpp @@ -31,6 +31,8 @@ int main(int argc, char **argv) int rank_xz,rank_XZ,rank_xZ,rank_Xz; int rank_yz,rank_YZ,rank_yZ,rank_Yz; //********************************** + MPI_Request req1[18],req2[18]; + MPI_Status stat1[18],stat2[18]; double TubeRadius =15.0; double WIDTH; @@ -75,16 +77,16 @@ int main(int argc, char **argv) // Broadcast simulation parameters from rank 0 to all other procs comm.barrier(); // Computational domain - comm.bcast(&Nx,1,0); - comm.bcast(&Ny,1,0); - comm.bcast(&Nz,1,0); - comm.bcast(&nprocx,1,0); - comm.bcast(&nprocy,1,0); - comm.bcast(&nprocz,1,0); - comm.bcast(&nspheres,1,0); - comm.bcast(&Lx,1,0); - comm.bcast(&Ly,1,0); - comm.bcast(&Lz,1,0); + MPI_Bcast(&Nx,1,MPI_INT,0,comm); + MPI_Bcast(&Ny,1,MPI_INT,0,comm); + MPI_Bcast(&Nz,1,MPI_INT,0,comm); + MPI_Bcast(&nprocx,1,MPI_INT,0,comm); + MPI_Bcast(&nprocy,1,MPI_INT,0,comm); + MPI_Bcast(&nprocz,1,MPI_INT,0,comm); + MPI_Bcast(&nspheres,1,MPI_INT,0,comm); + MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. comm.barrier(); @@ -174,7 +176,7 @@ int main(int argc, char **argv) } } } - pore_vol = comm.sumReduce( sum_local ); + MPI_Allreduce(&sum_local,&pore_vol,1,MPI_DOUBLE,MPI_SUM,comm); //......................................................... // don't perform computations at the eight corners diff --git a/tests/lbpm_porenetwork_pp.cpp b/tests/lbpm_porenetwork_pp.cpp index 1715811f..4a6ccda7 100644 --- a/tests/lbpm_porenetwork_pp.cpp +++ b/tests/lbpm_porenetwork_pp.cpp @@ -24,6 +24,9 @@ int main(int argc, char **argv) int iproc,jproc,kproc; int sendtag,recvtag; //***************************************** + MPI_Request req1[18],req2[18]; + MPI_Status stat1[18],stat2[18]; + //********************************** int nsph,ncyl, BC; nsph = atoi(argv[1]); @@ -64,16 +67,16 @@ int main(int argc, char **argv) // Broadcast simulation parameters from rank 0 to all other procs comm.barrier(); // Computational domain - comm.bcast(&Nx,1,0); - comm.bcast(&Ny,1,0); - comm.bcast(&Nz,1,0); - comm.bcast(&nprocx,1,0); - comm.bcast(&nprocy,1,0); - comm.bcast(&nprocz,1,0); - comm.bcast(&nspheres,1,0); - comm.bcast(&Lx,1,0); - comm.bcast(&Ly,1,0); - comm.bcast(&Lz,1,0); + MPI_Bcast(&Nx,1,MPI_INT,0,comm); + MPI_Bcast(&Ny,1,MPI_INT,0,comm); + MPI_Bcast(&Nz,1,MPI_INT,0,comm); + MPI_Bcast(&nprocx,1,MPI_INT,0,comm); + MPI_Bcast(&nprocy,1,MPI_INT,0,comm); + MPI_Bcast(&nprocz,1,MPI_INT,0,comm); + MPI_Bcast(&nspheres,1,MPI_INT,0,comm); + MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. comm.barrier(); @@ -266,7 +269,7 @@ int main(int argc, char **argv) } } } - pore_vol = comm.sumReduce( sum_local ); + MPI_Allreduce(&sum_local,&pore_vol,1,MPI_DOUBLE,MPI_SUM,comm); if (rank==0) printf("Pore volume = %f \n",pore_vol/double(Nx*Ny*Nz)); //......................................................... // don't perform computations at the eight corners diff --git a/tests/lbpm_random_pp.cpp b/tests/lbpm_random_pp.cpp index 8318f50f..ad4b83cc 100644 --- a/tests/lbpm_random_pp.cpp +++ b/tests/lbpm_random_pp.cpp @@ -98,16 +98,16 @@ int main(int argc, char **argv) } comm.barrier(); // Computational domain - comm.bcast(&nx,1,0); - comm.bcast(&ny,1,0); - comm.bcast(&nz,1,0); - comm.bcast(&nprocx,1,0); - comm.bcast(&nprocy,1,0); - comm.bcast(&nprocz,1,0); - comm.bcast(&nspheres,1,0); - comm.bcast(&Lx,1,0); - comm.bcast(&Ly,1,0); - comm.bcast(&Lz,1,0); + MPI_Bcast(&nx,1,MPI_INT,0,comm); + MPI_Bcast(&ny,1,MPI_INT,0,comm); + MPI_Bcast(&nz,1,MPI_INT,0,comm); + MPI_Bcast(&nprocx,1,MPI_INT,0,comm); + MPI_Bcast(&nprocy,1,MPI_INT,0,comm); + MPI_Bcast(&nprocz,1,MPI_INT,0,comm); + MPI_Bcast(&nspheres,1,MPI_INT,0,comm); + MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. comm.barrier(); @@ -166,7 +166,7 @@ int main(int argc, char **argv) } } // total Global is the number of nodes in the pore-space - totalGlobal = sumReduce( count ); + MPI_Allreduce(&count,&totalGlobal,1,MPI_INT,MPI_SUM,comm); float porosity=float(totalGlobal)/(nprocx*nprocy*nprocz*(nx-2)*(ny-2)*(nz-2)); if (rank==0) printf("Media Porosity: %f \n",porosity); @@ -216,12 +216,12 @@ int main(int argc, char **argv) sizeY = SizeY[bin]; sizeZ = SizeZ[bin]; } - comm.bcast(&x,1,0); - comm.bcast(&y,1,0); - comm.bcast(&z,1,0); - comm.bcast(&sizeX,1,0); - comm.bcast(&sizeY,1,0); - comm.bcast(&sizeZ,1,0); + MPI_Bcast(&x,1,MPI_INT,0,comm); + MPI_Bcast(&y,1,MPI_INT,0,comm); + MPI_Bcast(&z,1,MPI_INT,0,comm); + MPI_Bcast(&sizeX,1,MPI_INT,0,comm); + MPI_Bcast(&sizeY,1,MPI_INT,0,comm); + MPI_Bcast(&sizeZ,1,MPI_INT,0,comm); //if (rank==0) printf("Broadcast block at %i,%i,%i \n",x,y,z); @@ -269,7 +269,7 @@ int main(int argc, char **argv) } } } - countGlobal = sumReduce( count ); + MPI_Allreduce(&count,&countGlobal,1,MPI_INT,MPI_SUM,comm); sat = float(countGlobal)/totalGlobal; //if (rank==0) printf("New count=%i\n",countGlobal); //if (rank==0) printf("New saturation=%f\n",sat); @@ -345,24 +345,42 @@ int main(int argc, char **argv) PackID(Dm.sendList_yZ, Dm.sendCount_yZ ,sendID_yZ, id); PackID(Dm.sendList_YZ, Dm.sendCount_YZ ,sendID_YZ, id); //...................................................................................... - comm.sendrecv(sendID_x,Dm.sendCount_x,Dm.rank_x(),sendtag,recvID_X,Dm.recvCount_X,Dm.rank_X(),recvtag); - comm.sendrecv(sendID_X,Dm.sendCount_X,Dm.rank_X(),sendtag,recvID_x,Dm.recvCount_x,Dm.rank_x(),recvtag); - comm.sendrecv(sendID_y,Dm.sendCount_y,Dm.rank_y(),sendtag,recvID_Y,Dm.recvCount_Y,Dm.rank_Y(),recvtag); - comm.sendrecv(sendID_Y,Dm.sendCount_Y,Dm.rank_Y(),sendtag,recvID_y,Dm.recvCount_y,Dm.rank_y(),recvtag); - comm.sendrecv(sendID_z,Dm.sendCount_z,Dm.rank_z(),sendtag,recvID_Z,Dm.recvCount_Z,Dm.rank_Z(),recvtag); - comm.sendrecv(sendID_Z,Dm.sendCount_Z,Dm.rank_Z(),sendtag,recvID_z,Dm.recvCount_z,Dm.rank_z(),recvtag); - comm.sendrecv(sendID_xy,Dm.sendCount_xy,Dm.rank_xy(),sendtag,recvID_XY,Dm.recvCount_XY,Dm.rank_XY(),recvtag); - comm.sendrecv(sendID_XY,Dm.sendCount_XY,Dm.rank_XY(),sendtag,recvID_xy,Dm.recvCount_xy,Dm.rank_xy(),recvtag); - comm.sendrecv(sendID_Xy,Dm.sendCount_Xy,Dm.rank_Xy(),sendtag,recvID_xY,Dm.recvCount_xY,Dm.rank_xY(),recvtag); - comm.sendrecv(sendID_xY,Dm.sendCount_xY,Dm.rank_xY(),sendtag,recvID_Xy,Dm.recvCount_Xy,Dm.rank_Xy(),recvtag); - comm.sendrecv(sendID_xz,Dm.sendCount_xz,Dm.rank_xz(),sendtag,recvID_XZ,Dm.recvCount_XZ,Dm.rank_XZ(),recvtag); - comm.sendrecv(sendID_XZ,Dm.sendCount_XZ,Dm.rank_XZ(),sendtag,recvID_xz,Dm.recvCount_xz,Dm.rank_xz(),recvtag); - comm.sendrecv(sendID_Xz,Dm.sendCount_Xz,Dm.rank_Xz(),sendtag,recvID_xZ,Dm.recvCount_xZ,Dm.rank_xZ(),recvtag); - comm.sendrecv(sendID_xZ,Dm.sendCount_xZ,Dm.rank_xZ(),sendtag,recvID_Xz,Dm.recvCount_Xz,Dm.rank_Xz(),recvtag); - comm.sendrecv(sendID_yz,Dm.sendCount_yz,Dm.rank_yz(),sendtag,recvID_YZ,Dm.recvCount_YZ,Dm.rank_YZ(),recvtag); - comm.sendrecv(sendID_YZ,Dm.sendCount_YZ,Dm.rank_YZ(),sendtag,recvID_yz,Dm.recvCount_yz,Dm.rank_yz(),recvtag); - comm.sendrecv(sendID_Yz,Dm.sendCount_Yz,Dm.rank_Yz(),sendtag,recvID_yZ,Dm.recvCount_yZ,Dm.rank_yZ(),recvtag); - comm.sendrecv(sendID_yZ,Dm.sendCount_yZ,Dm.rank_yZ(),sendtag,recvID_Yz,Dm.recvCount_Yz,Dm.rank_Yz(),recvtag); + MPI_Sendrecv(sendID_x,Dm.sendCount_x,MPI_CHAR,Dm.rank_x(),sendtag, + recvID_X,Dm.recvCount_X,MPI_CHAR,Dm.rank_X(),recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_X,Dm.sendCount_X,MPI_CHAR,Dm.rank_X(),sendtag, + recvID_x,Dm.recvCount_x,MPI_CHAR,Dm.rank_x(),recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_y,Dm.sendCount_y,MPI_CHAR,Dm.rank_y(),sendtag, + recvID_Y,Dm.recvCount_Y,MPI_CHAR,Dm.rank_Y(),recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Y,Dm.sendCount_Y,MPI_CHAR,Dm.rank_Y(),sendtag, + recvID_y,Dm.recvCount_y,MPI_CHAR,Dm.rank_y(),recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_z,Dm.sendCount_z,MPI_CHAR,Dm.rank_z(),sendtag, + recvID_Z,Dm.recvCount_Z,MPI_CHAR,Dm.rank_Z(),recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Z,Dm.sendCount_Z,MPI_CHAR,Dm.rank_Z(),sendtag, + recvID_z,Dm.recvCount_z,MPI_CHAR,Dm.rank_z(),recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xy,Dm.sendCount_xy,MPI_CHAR,Dm.rank_xy(),sendtag, + recvID_XY,Dm.recvCount_XY,MPI_CHAR,Dm.rank_XY(),recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_XY,Dm.sendCount_XY,MPI_CHAR,Dm.rank_XY(),sendtag, + recvID_xy,Dm.recvCount_xy,MPI_CHAR,Dm.rank_xy(),recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Xy,Dm.sendCount_Xy,MPI_CHAR,Dm.rank_Xy(),sendtag, + recvID_xY,Dm.recvCount_xY,MPI_CHAR,Dm.rank_xY(),recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xY,Dm.sendCount_xY,MPI_CHAR,Dm.rank_xY(),sendtag, + recvID_Xy,Dm.recvCount_Xy,MPI_CHAR,Dm.rank_Xy(),recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xz,Dm.sendCount_xz,MPI_CHAR,Dm.rank_xz(),sendtag, + recvID_XZ,Dm.recvCount_XZ,MPI_CHAR,Dm.rank_XZ(),recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_XZ,Dm.sendCount_XZ,MPI_CHAR,Dm.rank_XZ(),sendtag, + recvID_xz,Dm.recvCount_xz,MPI_CHAR,Dm.rank_xz(),recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Xz,Dm.sendCount_Xz,MPI_CHAR,Dm.rank_Xz(),sendtag, + recvID_xZ,Dm.recvCount_xZ,MPI_CHAR,Dm.rank_xZ(),recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xZ,Dm.sendCount_xZ,MPI_CHAR,Dm.rank_xZ(),sendtag, + recvID_Xz,Dm.recvCount_Xz,MPI_CHAR,Dm.rank_Xz(),recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_yz,Dm.sendCount_yz,MPI_CHAR,Dm.rank_yz(),sendtag, + recvID_YZ,Dm.recvCount_YZ,MPI_CHAR,Dm.rank_YZ(),recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_YZ,Dm.sendCount_YZ,MPI_CHAR,Dm.rank_YZ(),sendtag, + recvID_yz,Dm.recvCount_yz,MPI_CHAR,Dm.rank_yz(),recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Yz,Dm.sendCount_Yz,MPI_CHAR,Dm.rank_Yz(),sendtag, + recvID_yZ,Dm.recvCount_yZ,MPI_CHAR,Dm.rank_yZ(),recvtag,comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_yZ,Dm.sendCount_yZ,MPI_CHAR,Dm.rank_yZ(),sendtag, + recvID_Yz,Dm.recvCount_Yz,MPI_CHAR,Dm.rank_Yz(),recvtag,comm,MPI_STATUS_IGNORE); //...................................................................................... UnpackID(Dm.recvList_x, Dm.recvCount_x ,recvID_x, id); UnpackID(Dm.recvList_X, Dm.recvCount_X ,recvID_X, id); @@ -394,7 +412,7 @@ int main(int argc, char **argv) } } } - countGlobal = comm.sumReduce( count ); + MPI_Allreduce(&count,&countGlobal,1,MPI_INT,MPI_SUM,comm); sat = float(countGlobal)/totalGlobal; if (rank==0) printf("Final saturation=%f\n",sat); diff --git a/tests/lbpm_segmented_decomp.cpp b/tests/lbpm_segmented_decomp.cpp index 65b8576f..1bc89adb 100644 --- a/tests/lbpm_segmented_decomp.cpp +++ b/tests/lbpm_segmented_decomp.cpp @@ -85,23 +85,23 @@ int main(int argc, char **argv) comm.barrier(); // Computational domain //................................................. - comm.bcast(&nx,1,0); - comm.bcast(&ny,1,0); - comm.bcast(&nz,1,0); - comm.bcast(&nprocx,1,0); - comm.bcast(&nprocy,1,0); - comm.bcast(&nprocz,1,0); - comm.bcast(&nspheres,1,0); - comm.bcast(&Lx,1,0); - comm.bcast(&Ly,1,0); - comm.bcast(&Lz,1,0); + MPI_Bcast(&nx,1,MPI_INT,0,comm); + MPI_Bcast(&ny,1,MPI_INT,0,comm); + MPI_Bcast(&nz,1,MPI_INT,0,comm); + MPI_Bcast(&nprocx,1,MPI_INT,0,comm); + MPI_Bcast(&nprocy,1,MPI_INT,0,comm); + MPI_Bcast(&nprocz,1,MPI_INT,0,comm); + MPI_Bcast(&nspheres,1,MPI_INT,0,comm); + MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. - comm.bcast(&Nx,1,0); - comm.bcast(&Ny,1,0); - comm.bcast(&Nz,1,0); - comm.bcast(&xStart,1,0); - comm.bcast(&yStart,1,0); - comm.bcast(&zStart,1,0); + MPI_Bcast(&Nx,1,MPI_INT,0,comm); + MPI_Bcast(&Ny,1,MPI_INT,0,comm); + MPI_Bcast(&Nz,1,MPI_INT,0,comm); + MPI_Bcast(&xStart,1,MPI_INT,0,comm); + MPI_Bcast(&yStart,1,MPI_INT,0,comm); + MPI_Bcast(&zStart,1,MPI_INT,0,comm); //................................................. comm.barrier(); @@ -191,7 +191,7 @@ int main(int argc, char **argv) } else{ printf("Sending data to process %i \n", rnk); - comm.send(tmp,N,rnk,15); + MPI_Send(tmp,N,MPI_CHAR,rnk,15,comm); } } } @@ -200,7 +200,7 @@ int main(int argc, char **argv) else{ // Recieve the subdomain from rank = 0 printf("Ready to recieve data %i at process %i \n", N,rank); - comm.recv(Dm.id,N,0,15); + MPI_Recv(Dm.id,N,MPI_CHAR,0,15,comm,MPI_STATUS_IGNORE); } comm.barrier(); @@ -243,8 +243,8 @@ int main(int argc, char **argv) printf("Original label=%i, New label=%i \n",oldlabel,newlabel); } } - comm.barrier(); - comm.bcast(LabelList,2*NLABELS,0); + MPI_Barrier(MPI_COMM_WORLD); + MPI_Bcast(LabelList,2*NLABELS,MPI_INT,0,MPI_COMM_WORLD); char *newIDs; newIDs= new char [nx*ny*nz]; @@ -278,8 +278,8 @@ int main(int argc, char **argv) } } } - countGlobal = comm.sumReduce( count ); - totalGlobal = comm.sumReduce( total ); + MPI_Allreduce(&count,&countGlobal,1,MPI_INT,MPI_SUM,comm); + MPI_Allreduce(&total,&totalGlobal,1,MPI_INT,MPI_SUM,comm); float porosity = float(totalGlobal-countGlobal)/totalGlobal; @@ -321,8 +321,8 @@ int main(int argc, char **argv) } } } - countGlobal = comm.sumReduce( count ); - totalGlobal = comm.sumReduce( total ); + MPI_Allreduce(&count,&countGlobal,1,MPI_INT,MPI_SUM,comm); + MPI_Allreduce(&total,&totalGlobal,1,MPI_INT,MPI_SUM,comm); float saturation = float(countGlobal)/totalGlobal; if (rank==0) printf("wetting phase saturation=%f\n",saturation); diff --git a/tests/lbpm_segmented_pp.cpp b/tests/lbpm_segmented_pp.cpp index 484a11e2..39cf0bd1 100644 --- a/tests/lbpm_segmented_pp.cpp +++ b/tests/lbpm_segmented_pp.cpp @@ -180,7 +180,7 @@ int main(int argc, char **argv) fflush(stdout); porosity = ReadFromBlock(Dm->id,Dm->iproc(),Dm->jproc(),Dm->kproc(),nx,ny,nz); - comm.barrier(); + MPI_Barrier(MPI_COMM_WORLD); if (rank==0) printf("Writing local ID files (poros=%f) \n",porosity); fflush(stdout); FILE *ID = fopen(LocalRankFilename,"wb"); diff --git a/tests/lbpm_sphere_pp.cpp b/tests/lbpm_sphere_pp.cpp index 0df11b96..2e053eed 100644 --- a/tests/lbpm_sphere_pp.cpp +++ b/tests/lbpm_sphere_pp.cpp @@ -38,6 +38,8 @@ int main(int argc, char **argv) int rank_xz,rank_XZ,rank_xZ,rank_Xz; int rank_yz,rank_YZ,rank_yZ,rank_Yz; //********************************** + MPI_Request req1[18],req2[18]; + MPI_Status stat1[18],stat2[18]; if (rank == 0){ printf("********************************************************\n"); @@ -123,10 +125,10 @@ int main(int argc, char **argv) if (rank == 0) ReadSpherePacking(nspheres,cx,cy,cz,rad); comm.barrier(); // Broadcast the sphere packing to all processes - comm.bcast(cx,nspheres,0); - comm.bcast(cy,nspheres,0); - comm.bcast(cz,nspheres,0); - comm.bcast(rad,nspheres,0); + MPI_Bcast(cx,nspheres,MPI_DOUBLE,0,comm); + MPI_Bcast(cy,nspheres,MPI_DOUBLE,0,comm); + MPI_Bcast(cz,nspheres,MPI_DOUBLE,0,comm); + MPI_Bcast(rad,nspheres,MPI_DOUBLE,0,comm); //........................................................................... comm.barrier(); if (rank == 0) cout << "Domain set." << endl; @@ -142,7 +144,7 @@ int main(int argc, char **argv) D = 6.0*(Nx-2)*nprocx*totVol / totArea / Lx; printf("Sauter Mean Diameter (computed from sphere packing) = %f \n",D); } - comm.bcast(&D,1,0); + MPI_Bcast(&D,1,MPI_DOUBLE,0,comm); //....................................................................... SignedDistance(SignDist.data(),nspheres,cx,cy,cz,rad,Lx,Ly,Lz,Nx,Ny,Nz, @@ -175,7 +177,7 @@ int main(int argc, char **argv) } } sum_local = 1.0*sum; - porosity = comm.sumReduce( sum_local ); + MPI_Allreduce(&sum_local,&porosity,1,MPI_DOUBLE,MPI_SUM,comm); porosity = porosity*iVol_global; if (rank==0) printf("Media porosity = %f \n",porosity); @@ -191,7 +193,7 @@ int main(int argc, char **argv) } } } - pore_vol = comm.sumReduce( sum_local ); + MPI_Allreduce(&sum_local,&pore_vol,1,MPI_DOUBLE,MPI_SUM,comm); //......................................................... // don't perform computations at the eight corners diff --git a/tests/lbpm_squaretube_pp.cpp b/tests/lbpm_squaretube_pp.cpp index a4ee5f60..c1f05aee 100644 --- a/tests/lbpm_squaretube_pp.cpp +++ b/tests/lbpm_squaretube_pp.cpp @@ -30,6 +30,9 @@ int main(int argc, char **argv) int rank_xy,rank_XY,rank_xY,rank_Xy; int rank_xz,rank_XZ,rank_xZ,rank_Xz; int rank_yz,rank_YZ,rank_yZ,rank_Yz; + //********************************** + MPI_Request req1[18],req2[18]; + MPI_Status stat1[18],stat2[18]; int ORIENTATION=2; //default: the tube is aligned with Z axis //ORIENTATION = 0: tube is aligned with X axis @@ -80,16 +83,16 @@ int main(int argc, char **argv) // Broadcast simulation parameters from rank 0 to all other procs comm.barrier(); // Computational domain - comm.bcast(&Nx,1,0); - comm.bcast(&Ny,1,0); - comm.bcast(&Nz,1,0); - comm.bcast(&nprocx,1,0); - comm.bcast(&nprocy,1,0); - comm.bcast(&nprocz,1,0); - comm.bcast(&nspheres,1,0); - comm.bcast(&Lx,1,0); - comm.bcast(&Ly,1,0); - comm.bcast(&Lz,1,0); + MPI_Bcast(&Nx,1,MPI_INT,0,comm); + MPI_Bcast(&Ny,1,MPI_INT,0,comm); + MPI_Bcast(&Nz,1,MPI_INT,0,comm); + MPI_Bcast(&nprocx,1,MPI_INT,0,comm); + MPI_Bcast(&nprocy,1,MPI_INT,0,comm); + MPI_Bcast(&nprocz,1,MPI_INT,0,comm); + MPI_Bcast(&nspheres,1,MPI_INT,0,comm); + MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. comm.barrier(); @@ -232,7 +235,7 @@ int main(int argc, char **argv) } } } - pore_vol = comm.sumReduce( sum_local ); + MPI_Allreduce(&sum_local,&pore_vol,1,MPI_DOUBLE,MPI_SUM,comm); //......................................................... // don't perform computations at the eight corners From 05cafcb525c1a82a922df455eb6e207b430560c9 Mon Sep 17 00:00:00 2001 From: JamesEMcclure Date: Tue, 17 Mar 2020 21:44:45 -0400 Subject: [PATCH 058/121] fix failed merge --- CMakeLists.txt | 344 +- IO/MeshDatabase.cpp | 145 +- IO/MeshDatabase.h | 4 +- IO/PIO.cpp | 12 +- IO/PackData.cpp | 105 - IO/PackData.h | 78 - IO/Writer.cpp | 20 +- IO/Writer.h | 4 +- IO/netcdf.cpp | 12 +- IO/netcdf.h | 4 +- IO/silo.cpp | 2 +- IO/silo.h | 2 +- IO/silo.hpp | 2 +- analysis/Minkowski.cpp | 16 +- analysis/Minkowski.h | 2 +- analysis/SubPhase.cpp | 154 +- analysis/SubPhase.h | 2 +- analysis/TwoPhase.cpp | 89 +- analysis/TwoPhase.h | 2 +- analysis/analysis.cpp | 83 +- analysis/analysis.h | 8 +- analysis/distance.cpp | 2 +- analysis/morphology.cpp | 106 +- analysis/runAnalysis.cpp | 49 +- analysis/runAnalysis.h | 8 +- analysis/uCT.cpp | 11 +- cmake/FindHIP.cmake | 579 ---- common/Communication.h | 198 +- common/Communication.hpp | 53 +- common/Domain.cpp | 230 +- common/Domain.h | 6 +- common/MPI.I | 1143 ------- common/MPI.cpp | 3758 --------------------- common/MPI.h | 1152 ------- common/MPI_Helpers.cpp | 266 ++ common/MPI_Helpers.h | 239 ++ IO/PackData.hpp => common/MPI_Helpers.hpp | 9 +- common/ReadMicroCT.cpp | 4 +- common/ReadMicroCT.h | 3 +- common/ScaLBL.cpp | 226 +- common/ScaLBL.h | 3 +- common/SpherePack.cpp | 1 + common/SpherePack.h | 1 + common/UnitTest.cpp | 211 +- common/UnitTest.h | 71 +- common/UtilityMacros.h | 28 +- cpu/BGK.cpp | 5 +- cpu/Color.cpp | 51 +- cpu/exe/lb2_Color_mpi.cpp | 2 +- cpu/exe/lb2_Color_wia_mpi_bubble.cpp | 2 +- models/ColorModel.cpp | 57 +- models/ColorModel.h | 6 +- models/DFHModel.cpp | 35 +- models/DFHModel.h | 6 +- models/MRTModel.cpp | 49 +- models/MRTModel.h | 6 +- tests/BlobAnalyzeParallel.cpp | 21 +- tests/BlobIdentifyParallel.cpp | 9 +- tests/ColorToBinary.cpp | 9 +- tests/ComponentLabel.cpp | 9 +- tests/GenerateSphereTest.cpp | 75 +- tests/TestBlobAnalyze.cpp | 17 +- tests/TestBlobIdentify.cpp | 37 +- tests/TestBlobIdentifyCorners.cpp | 5 +- tests/TestBubble.cpp | 46 +- tests/TestBubbleDFH.cpp | 32 +- tests/TestColorBubble.cpp | 14 +- tests/TestColorGrad.cpp | 24 +- tests/TestColorGradDFH.cpp | 18 +- tests/TestColorMassBounceback.cpp | 32 +- tests/TestColorSquareTube.cpp | 14 +- tests/TestCommD3Q19.cpp | 23 +- tests/TestDatabase.cpp | 9 +- tests/TestFluxBC.cpp | 18 +- tests/TestForceD3Q19.cpp | 7 +- tests/TestForceMoments.cpp | 30 +- tests/TestInterfaceSpeed.cpp | 32 +- tests/TestMRT.cpp | 38 +- tests/TestMap.cpp | 17 +- tests/TestMassConservationD3Q7.cpp | 11 +- tests/TestMicroCTReader.cpp | 10 +- tests/TestMomentsD3Q19.cpp | 9 +- tests/TestNetcdf.cpp | 10 +- tests/TestPoiseuille.cpp | 18 +- tests/TestPressVel.cpp | 23 +- tests/TestSegDist.cpp | 13 +- tests/TestSubphase.cpp | 9 +- tests/TestTopo3D.cpp | 9 +- tests/TestTorus.cpp | 9 +- tests/TestTorusEvolve.cpp | 9 +- tests/TestTwoPhase.cpp | 11 +- tests/TestWriter.cpp | 21 +- tests/convertIO.cpp | 15 +- tests/hello_world.cpp | 11 +- tests/lb2_CMT_wia.cpp | 2 +- tests/lb2_Color_blob_wia_mpi.cpp | 48 +- tests/lbpm_BGK_simulator.cpp | 33 +- tests/lbpm_captube_pp.cpp | 16 +- tests/lbpm_color_macro_simulator.cpp | 36 +- tests/lbpm_color_simulator.cpp | 14 +- tests/lbpm_dfh_simulator.cpp | 12 +- tests/lbpm_disc_pp.cpp | 24 +- tests/lbpm_inkbottle_pp.cpp | 20 +- tests/lbpm_juanes_bench_disc_pp.cpp | 26 +- tests/lbpm_minkowski_scalar.cpp | 23 +- tests/lbpm_morph_pp.cpp | 22 +- tests/lbpm_morphdrain_pp.cpp | 12 +- tests/lbpm_morphopen_pp.cpp | 12 +- tests/lbpm_nondarcy_simulator.cpp | 28 +- tests/lbpm_nonnewtonian_simulator.cpp | 83 +- tests/lbpm_nonnewtonian_simulator.h | 40 +- tests/lbpm_permeability_simulator.cpp | 13 +- tests/lbpm_plates_pp.cpp | 20 +- tests/lbpm_porenetwork_pp.cpp | 20 +- tests/lbpm_random_pp.cpp | 13 +- tests/lbpm_refine_pp.cpp | 9 +- tests/lbpm_segmented_decomp.cpp | 20 +- tests/lbpm_segmented_pp.cpp | 9 +- tests/lbpm_sphere_pp.cpp | 18 +- tests/lbpm_squaretube_pp.cpp | 20 +- tests/lbpm_uCT_maskfilter.cpp | 16 +- tests/lbpm_uCT_pp.cpp | 37 +- tests/testCommunication.cpp | 34 +- tests/test_dcel_minkowski.cpp | 8 +- tests/test_dcel_tri_normal.cpp | 4 +- 125 files changed, 2544 insertions(+), 8538 deletions(-) delete mode 100644 IO/PackData.cpp delete mode 100644 IO/PackData.h delete mode 100644 cmake/FindHIP.cmake delete mode 100644 common/MPI.I delete mode 100644 common/MPI.cpp delete mode 100644 common/MPI.h create mode 100644 common/MPI_Helpers.cpp create mode 100644 common/MPI_Helpers.h rename IO/PackData.hpp => common/MPI_Helpers.hpp (95%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1e7eeaea..acc2c2dc 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,174 +1,170 @@ -# Set some CMake properties -CMAKE_MINIMUM_REQUIRED( VERSION 3.9 ) - - -MESSAGE("====================") -MESSAGE("Configuring LBPM-WIA") -MESSAGE("====================") - - -# Set the project name -SET( PROJ LBPM ) # Set the project name for CMake -SET( LBPM_LIB lbpm-wia ) # Set the final library name -SET( LBPM_INC ) # Set an optional subfolder for includes (e.g. include/name/...) -SET( TEST_MAX_PROCS 16 ) - - -# Initialize the project -PROJECT( ${PROJ} LANGUAGES CXX ) - - -# Prevent users from building in place -IF ("${CMAKE_CURRENT_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_BINARY_DIR}" ) - MESSAGE( FATAL_ERROR "Building code in place is a bad idea" ) -ENDIF() - - -# Set the default C++ standard -SET( CMAKE_CXX_EXTENSIONS OFF ) -IF ( NOT CMAKE_CXX_STANDARD ) - IF ( CXX_STD ) - MESSAGE( FATAL_ERROR "CXX_STD is obsolete, please set CMAKE_CXX_STANDARD" ) - ENDIF() - SET( CMAKE_CXX_STANDARD 14 ) -ENDIF() -IF ( ( "${CMAKE_CXX_STANDARD}" GREATER "90" ) OR ( "${CMAKE_CXX_STANDARD}" LESS "14" ) ) - MESSAGE( FATAL_ERROR "C++14 or newer required" ) -ENDIF() - - -# Set source/install paths -SET( ${PROJ}_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}" ) -SET( ${PROJ}_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}" ) -IF( ${PROJ}_INSTALL_DIR ) - SET( ${PROJ}_INSTALL_DIR "${${PROJ}_INSTALL_DIR}" ) -ELSEIF( PREFIX ) - SET( ${PROJ}_INSTALL_DIR "${PREFIX}" ) -ELSEIF( NOT ${PROJ}_INSTALL_DIR ) - SET( ${PROJ}_INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}" ) -ENDIF() -INCLUDE_DIRECTORIES( "${${PROJ}_INSTALL_DIR}/include" ) -SET( CMAKE_MODULE_PATH ${${PROJ}_SOURCE_DIR} ${${PROJ}_SOURCE_DIR}/cmake ) - - -# Include macros -INCLUDE( "${CMAKE_CURRENT_SOURCE_DIR}/cmake/macros.cmake" ) -INCLUDE( "${CMAKE_CURRENT_SOURCE_DIR}/cmake/libraries.cmake" ) -INCLUDE( "${CMAKE_CURRENT_SOURCE_DIR}/cmake/LBPM-macros.cmake" ) - - -# Check if we are only compiling docs -CHECK_ENABLE_FLAG( ONLY_BUILD_DOCS 0 ) - - -# Set testing paramaters -SET( DROP_METHOD "http" ) -SET( DROP_SITE "" ) -SET( DROP_LOCATION "/CDash/submit.php?project=LBPM-WIA" ) -SET( TRIGGER_SITE "" ) -SET( DROP_SITE_CDASH TRUE ) -ENABLE_TESTING() -INCLUDE( CTest ) - - -# Check the compile mode and compile flags -IF ( NOT ONLY_BUILD_DOCS ) - CONFIGURE_SYSTEM() -ENDIF() - - -# Add some directories to include -INCLUDE_DIRECTORIES( "${${PROJ}_INSTALL_DIR}/include" ) - - -# Create the target for documentation -ADD_CUSTOM_TARGET( doc ) -ADD_CUSTOM_TARGET( latex_docs ) -CHECK_ENABLE_FLAG( USE_DOXYGEN 1 ) -CHECK_ENABLE_FLAG( USE_LATEX 1 ) -FILE( MAKE_DIRECTORY "${${PROJ}_INSTALL_DIR}/doc" ) -IF ( USE_DOXYGEN ) - SET( DOXYFILE_LATEX YES ) - SET( DOXYFILE_IN "${${PROJ}_SOURCE_DIR}/doxygen/Doxyfile.in" ) - SET( DOXY_HEADER_FILE "${${PROJ}_SOURCE_DIR}/doxygen/html/header.html" ) - SET( DOXY_FOOTER_FILE "${${PROJ}_SOURCE_DIR}/doxygen/html/footer.html" ) - SET( DOXYFILE_OUTPUT_DIR "${${PROJ}_INSTALL_DIR}/doc" ) - SET( DOXYFILE_SRC_HTML_DIR "${${PROJ}_SOURCE_DIR}/doxygen/html" ) - SET( DOXYFILE_SOURCE_DIR "${${PROJ}_SOURCE_DIR}" ) - SET( REL_PACKAGE_HTML "" ) - SET( DOXYGEN_MACROS "" ) - MESSAGE("DOXYGEN_MACROS = ${DOXYGEN_MACROS}") - INCLUDE( "${${PROJ}_SOURCE_DIR}/cmake/UseDoxygen.cmake" ) - IF ( DOXYGEN_FOUND ) - ADD_DEPENDENCIES( doxygen latex_docs ) - ADD_DEPENDENCIES( doc latex_docs doxygen ) - ELSE() - SET( USE_DOXYGEN 0 ) - ENDIF() -ENDIF() - - -# Create custom targets for build-test, check, and distclean -ADD_CUSTOM_TARGET( build-test ) -ADD_CUSTOM_TARGET( build-examples ) -ADD_CUSTOM_TARGET( check COMMAND make test ) -ADD_DISTCLEAN( analysis null_timer tests liblbpm-wia.* cpu gpu example common IO threadpool StackTrace ) - - -# Check for CUDA -CHECK_ENABLE_FLAG( USE_CUDA 0 ) -CHECK_ENABLE_FLAG( USE_HIP 0 ) -NULL_USE( CMAKE_CUDA_FLAGS ) -IF ( USE_CUDA ) - ADD_DEFINITIONS( -DUSE_CUDA ) - ENABLE_LANGUAGE( CUDA ) -ELSEIF ( USE_HIP ) - FIND_PACKAGE( HIP ) - MESSAGE( FATAL_ERROR "STOP" ) -ENDIF() - - -# Configure external packages -IF ( NOT ONLY_BUILD_DOCS ) - CONFIGURE_MPI() # MPI must be before other libraries - CONFIGURE_MIC() - CONFIGURE_NETCDF() - CONFIGURE_SILO() - CONFIGURE_LBPM() - CONFIGURE_TIMER( 0 "${${PROJ}_INSTALL_DIR}/null_timer" ) - CONFIGURE_LINE_COVERAGE() - # Set the external library link list - SET( EXTERNAL_LIBS ${EXTERNAL_LIBS} ${TIMER_LIBS} ) -ENDIF() - - - -# Macro to create 1,2,4 processor tests -MACRO( ADD_LBPM_TEST_1_2_4 EXENAME ${ARGN} ) - ADD_LBPM_TEST( ${EXENAME} ${ARGN} ) - ADD_LBPM_TEST_PARALLEL( ${EXENAME} 2 ${ARGN} ) - ADD_LBPM_TEST_PARALLEL( ${EXENAME} 4 ${ARGN} ) -ENDMACRO() - - -# Add the src directories -IF ( NOT ONLY_BUILD_DOCS ) - BEGIN_PACKAGE_CONFIG( lbpm-wia-library ) - ADD_PACKAGE_SUBDIRECTORY( common ) - ADD_PACKAGE_SUBDIRECTORY( analysis ) - ADD_PACKAGE_SUBDIRECTORY( IO ) - ADD_PACKAGE_SUBDIRECTORY( threadpool ) - ADD_PACKAGE_SUBDIRECTORY( StackTrace ) - ADD_PACKAGE_SUBDIRECTORY( models ) - IF ( USE_CUDA ) - ADD_PACKAGE_SUBDIRECTORY( gpu ) - ELSE() - ADD_PACKAGE_SUBDIRECTORY( cpu ) - ENDIF() - INSTALL_LBPM_TARGET( lbpm-wia-library ) - ADD_SUBDIRECTORY( tests ) - ADD_SUBDIRECTORY( example ) - #ADD_SUBDIRECTORY( workflows ) - INSTALL_PROJ_LIB() -ENDIF() - +# Set some CMake properties +CMAKE_MINIMUM_REQUIRED( VERSION 3.9 ) + + +MESSAGE("====================") +MESSAGE("Configuring LBPM-WIA") +MESSAGE("====================") + + +# Set the project name +SET( PROJ LBPM ) # Set the project name for CMake +SET( LBPM_LIB lbpm-wia ) # Set the final library name +SET( LBPM_INC ) # Set an optional subfolder for includes (e.g. include/name/...) +SET( TEST_MAX_PROCS 16 ) + + +# Initialize the project +PROJECT( ${PROJ} LANGUAGES CXX ) + + +# Prevent users from building in place +IF ("${CMAKE_CURRENT_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_BINARY_DIR}" ) + MESSAGE( FATAL_ERROR "Building code in place is a bad idea" ) +ENDIF() + + +# Set the default C++ standard +SET( CMAKE_CXX_EXTENSIONS OFF ) +IF ( NOT CMAKE_CXX_STANDARD ) + IF ( CXX_STD ) + MESSAGE( FATAL_ERROR "CXX_STD is obsolete, please set CMAKE_CXX_STANDARD" ) + ENDIF() + SET( CMAKE_CXX_STANDARD 14 ) +ENDIF() +IF ( ( "${CMAKE_CXX_STANDARD}" GREATER "90" ) OR ( "${CMAKE_CXX_STANDARD}" LESS "14" ) ) + MESSAGE( FATAL_ERROR "C++14 or newer required" ) +ENDIF() + + +# Set source/install paths +SET( ${PROJ}_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}" ) +SET( ${PROJ}_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}" ) +IF( ${PROJ}_INSTALL_DIR ) + SET( ${PROJ}_INSTALL_DIR "${${PROJ}_INSTALL_DIR}" ) +ELSEIF( PREFIX ) + SET( ${PROJ}_INSTALL_DIR "${PREFIX}" ) +ELSEIF( NOT ${PROJ}_INSTALL_DIR ) + SET( ${PROJ}_INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}" ) +ENDIF() +INCLUDE_DIRECTORIES( "${${PROJ}_INSTALL_DIR}/include" ) +SET( CMAKE_MODULE_PATH ${${PROJ}_SOURCE_DIR} ${${PROJ}_SOURCE_DIR}/cmake ) + + +# Include macros +INCLUDE( "${CMAKE_CURRENT_SOURCE_DIR}/cmake/macros.cmake" ) +INCLUDE( "${CMAKE_CURRENT_SOURCE_DIR}/cmake/libraries.cmake" ) +INCLUDE( "${CMAKE_CURRENT_SOURCE_DIR}/cmake/LBPM-macros.cmake" ) + + +# Check if we are only compiling docs +CHECK_ENABLE_FLAG( ONLY_BUILD_DOCS 0 ) + + +# Set testing paramaters +SET( DROP_METHOD "http" ) +SET( DROP_SITE "" ) +SET( DROP_LOCATION "/CDash/submit.php?project=LBPM-WIA" ) +SET( TRIGGER_SITE "" ) +SET( DROP_SITE_CDASH TRUE ) +ENABLE_TESTING() +INCLUDE( CTest ) + + +# Check the compile mode and compile flags +IF ( NOT ONLY_BUILD_DOCS ) + CONFIGURE_SYSTEM() +ENDIF() + + +# Add some directories to include +INCLUDE_DIRECTORIES( "${${PROJ}_INSTALL_DIR}/include" ) + + +# Create the target for documentation +ADD_CUSTOM_TARGET( doc ) +ADD_CUSTOM_TARGET( latex_docs ) +CHECK_ENABLE_FLAG( USE_DOXYGEN 1 ) +CHECK_ENABLE_FLAG( USE_LATEX 1 ) +FILE( MAKE_DIRECTORY "${${PROJ}_INSTALL_DIR}/doc" ) +IF ( USE_DOXYGEN ) + SET( DOXYFILE_LATEX YES ) + SET( DOXYFILE_IN "${${PROJ}_SOURCE_DIR}/doxygen/Doxyfile.in" ) + SET( DOXY_HEADER_FILE "${${PROJ}_SOURCE_DIR}/doxygen/html/header.html" ) + SET( DOXY_FOOTER_FILE "${${PROJ}_SOURCE_DIR}/doxygen/html/footer.html" ) + SET( DOXYFILE_OUTPUT_DIR "${${PROJ}_INSTALL_DIR}/doc" ) + SET( DOXYFILE_SRC_HTML_DIR "${${PROJ}_SOURCE_DIR}/doxygen/html" ) + SET( DOXYFILE_SOURCE_DIR "${${PROJ}_SOURCE_DIR}" ) + SET( REL_PACKAGE_HTML "" ) + SET( DOXYGEN_MACROS "" ) + MESSAGE("DOXYGEN_MACROS = ${DOXYGEN_MACROS}") + INCLUDE( "${${PROJ}_SOURCE_DIR}/cmake/UseDoxygen.cmake" ) + IF ( DOXYGEN_FOUND ) + ADD_DEPENDENCIES( doxygen latex_docs ) + ADD_DEPENDENCIES( doc latex_docs doxygen ) + ELSE() + SET( USE_DOXYGEN 0 ) + ENDIF() +ENDIF() + + +# Create custom targets for build-test, check, and distclean +ADD_CUSTOM_TARGET( build-test ) +ADD_CUSTOM_TARGET( build-examples ) +ADD_CUSTOM_TARGET( check COMMAND make test ) +ADD_DISTCLEAN( analysis null_timer tests liblbpm-wia.* cpu gpu example common IO threadpool StackTrace ) + + +# Check for CUDA +CHECK_ENABLE_FLAG( USE_CUDA 0 ) +NULL_USE( CMAKE_CUDA_FLAGS ) +IF ( USE_CUDA ) + ADD_DEFINITIONS( -DUSE_CUDA ) + ENABLE_LANGUAGE( CUDA ) +ENDIF() + + +# Configure external packages +IF ( NOT ONLY_BUILD_DOCS ) + CONFIGURE_MPI() # MPI must be before other libraries + CONFIGURE_MIC() + CONFIGURE_NETCDF() + CONFIGURE_SILO() + CONFIGURE_LBPM() + CONFIGURE_TIMER( 0 "${${PROJ}_INSTALL_DIR}/null_timer" ) + CONFIGURE_LINE_COVERAGE() + # Set the external library link list + SET( EXTERNAL_LIBS ${EXTERNAL_LIBS} ${TIMER_LIBS} ) +ENDIF() + + + +# Macro to create 1,2,4 processor tests +MACRO( ADD_LBPM_TEST_1_2_4 EXENAME ${ARGN} ) + ADD_LBPM_TEST( ${EXENAME} ${ARGN} ) + ADD_LBPM_TEST_PARALLEL( ${EXENAME} 2 ${ARGN} ) + ADD_LBPM_TEST_PARALLEL( ${EXENAME} 4 ${ARGN} ) +ENDMACRO() + + +# Add the src directories +IF ( NOT ONLY_BUILD_DOCS ) + BEGIN_PACKAGE_CONFIG( lbpm-wia-library ) + ADD_PACKAGE_SUBDIRECTORY( common ) + ADD_PACKAGE_SUBDIRECTORY( analysis ) + ADD_PACKAGE_SUBDIRECTORY( IO ) + ADD_PACKAGE_SUBDIRECTORY( threadpool ) + ADD_PACKAGE_SUBDIRECTORY( StackTrace ) + ADD_PACKAGE_SUBDIRECTORY( models ) + IF ( USE_CUDA ) + ADD_PACKAGE_SUBDIRECTORY( gpu ) + ELSE() + ADD_PACKAGE_SUBDIRECTORY( cpu ) + ENDIF() + INSTALL_LBPM_TARGET( lbpm-wia-library ) + ADD_SUBDIRECTORY( tests ) + ADD_SUBDIRECTORY( example ) + #ADD_SUBDIRECTORY( workflows ) + INSTALL_PROJ_LIB() +ENDIF() + diff --git a/IO/MeshDatabase.cpp b/IO/MeshDatabase.cpp index 2c03ddde..1fad9231 100644 --- a/IO/MeshDatabase.cpp +++ b/IO/MeshDatabase.cpp @@ -1,8 +1,7 @@ #include "IO/MeshDatabase.h" #include "IO/Mesh.h" -#include "IO/PackData.h" #include "IO/IOHelpers.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "common/Utilities.h" #include @@ -14,6 +13,8 @@ +/**************************************************** +****************************************************/ // MeshType template<> size_t packsize( const IO::MeshType& rhs ) @@ -246,76 +247,80 @@ void DatabaseEntry::read( const std::string& line ) // Gather the mesh databases from all processors inline int tod( int N ) { return (N+7)/sizeof(double); } -std::vector gatherAll( const std::vector& meshes, const Utilities::MPI& comm ) +std::vector gatherAll( const std::vector& meshes, MPI_Comm comm ) { - if ( comm.getSize() == 1 ) - return meshes; - PROFILE_START("gatherAll"); - PROFILE_START("gatherAll-pack",2); - int size = comm.getSize(); - // First pack the mesh data to local buffers - int localsize = 0; - for (size_t i=0; i data; - pos = 0; - while ( pos < globalsize ) { - MeshDatabase tmp; - unpack(tmp,(char*)&globalbuf[pos]); - pos += tod(packsize(tmp)); - std::map::iterator it = data.find(tmp.name); - if ( it==data.end() ) { - data[tmp.name] = tmp; - } else { - for (size_t i=0; isecond.domains.push_back(tmp.domains[i]); - for (size_t i=0; isecond.variables.push_back(tmp.variables[i]); - it->second.variable_data.insert(tmp.variable_data.begin(),tmp.variable_data.end()); + #ifdef USE_MPI + PROFILE_START("gatherAll"); + PROFILE_START("gatherAll-pack",2); + int size = MPI_WORLD_SIZE(); + // First pack the mesh data to local buffers + int localsize = 0; + for (size_t i=0; i data2(it->second.variables.begin(),it->second.variables.end()); - it->second.variables = std::vector(data2.begin(),data2.end()); - } - // Free temporary memory - delete [] localbuf; - delete [] disp; - delete [] globalbuf; - // Return the results - std::vector data2(data.size()); - size_t i=0; - for (std::map::iterator it=data.begin(); it!=data.end(); ++it, ++i) - data2[i] = it->second; - PROFILE_STOP("gatherAll-unpack",2); - PROFILE_STOP("gatherAll"); - return data2; + PROFILE_STOP("gatherAll-pack",2); + // Get the number of bytes each processor will be sending/recieving + PROFILE_START("gatherAll-send1",2); + auto recvsize = new int[size]; + MPI_Allgather(&localsize,1,MPI_INT,recvsize,1,MPI_INT,comm); + int globalsize = recvsize[0]; + auto disp = new int[size]; + disp[0] = 0; + for (int i=1; i data; + pos = 0; + while ( pos < globalsize ) { + MeshDatabase tmp; + unpack(tmp,(char*)&globalbuf[pos]); + pos += tod(packsize(tmp)); + std::map::iterator it = data.find(tmp.name); + if ( it==data.end() ) { + data[tmp.name] = tmp; + } else { + for (size_t i=0; isecond.domains.push_back(tmp.domains[i]); + for (size_t i=0; isecond.variables.push_back(tmp.variables[i]); + it->second.variable_data.insert(tmp.variable_data.begin(),tmp.variable_data.end()); + } + } + for (std::map::iterator it=data.begin(); it!=data.end(); ++it) { + // Get the unique variables + std::set data2(it->second.variables.begin(),it->second.variables.end()); + it->second.variables = std::vector(data2.begin(),data2.end()); + } + // Free temporary memory + delete [] localbuf; + delete [] recvsize; + delete [] disp; + delete [] globalbuf; + // Return the results + std::vector data2(data.size()); + size_t i=0; + for (std::map::iterator it=data.begin(); it!=data.end(); ++it, ++i) + data2[i] = it->second; + PROFILE_STOP("gatherAll-unpack",2); + PROFILE_STOP("gatherAll"); + return data2; + #else + return meshes; + #endif } diff --git a/IO/MeshDatabase.h b/IO/MeshDatabase.h index 8e501624..9f544925 100644 --- a/IO/MeshDatabase.h +++ b/IO/MeshDatabase.h @@ -2,7 +2,7 @@ #define MeshDatabase_INC #include "IO/Mesh.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include #include @@ -70,7 +70,7 @@ public: //! Gather the mesh databases from all processors -std::vector gatherAll( const std::vector& meshes, const Utilities::MPI& comm ); +std::vector gatherAll( const std::vector& meshes, MPI_Comm comm ); //! Write the mesh databases to a file diff --git a/IO/PIO.cpp b/IO/PIO.cpp index 3c2f3934..6c6ece2d 100644 --- a/IO/PIO.cpp +++ b/IO/PIO.cpp @@ -1,6 +1,6 @@ #include "IO/PIO.h" #include "common/Utilities.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include #include @@ -36,7 +36,10 @@ static void shutdownFilestream( ) } void Utilities::logOnlyNodeZero( const std::string &filename ) { - int rank = ::Utilities::MPI( MPI_COMM_WORLD ).getRank(); + int rank = 0; + #ifdef USE_MPI + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); + #endif if ( rank == 0 ) logAllNodes(filename,true); } @@ -51,7 +54,10 @@ void Utilities::logAllNodes( const std::string &filename, bool singleStream ) // Open the log stream and redirect output std::string full_filename = filename; if ( !singleStream ) { - int rank = ::Utilities::MPI( MPI_COMM_WORLD ).getRank(); + int rank = 0; + #ifdef USE_MPI + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); + #endif char tmp[100]; sprintf(tmp,".%04i",rank); full_filename += std::string(tmp); diff --git a/IO/PackData.cpp b/IO/PackData.cpp deleted file mode 100644 index f10d9ca7..00000000 --- a/IO/PackData.cpp +++ /dev/null @@ -1,105 +0,0 @@ -#include "IO/PackData.h" - -#include - - -/******************************************************** -* Concrete implimentations for packing/unpacking * -********************************************************/ -// unsigned char -template<> -size_t packsize( const unsigned char& rhs ) -{ - return sizeof(unsigned char); -} -template<> -void pack( const unsigned char& rhs, char *buffer ) -{ - memcpy(buffer,&rhs,sizeof(unsigned char)); -} -template<> -void unpack( unsigned char& data, const char *buffer ) -{ - memcpy(&data,buffer,sizeof(unsigned char)); -} -// char -template<> -size_t packsize( const char& rhs ) -{ - return sizeof(char); -} -template<> -void pack( const char& rhs, char *buffer ) -{ - memcpy(buffer,&rhs,sizeof(char)); -} -template<> -void unpack( char& data, const char *buffer ) -{ - memcpy(&data,buffer,sizeof(char)); -} -// int -template<> -size_t packsize( const int& rhs ) -{ - return sizeof(int); -} -template<> -void pack( const int& rhs, char *buffer ) -{ - memcpy(buffer,&rhs,sizeof(int)); -} -template<> -void unpack( int& data, const char *buffer ) -{ - memcpy(&data,buffer,sizeof(int)); -} -// unsigned int -template<> -size_t packsize( const unsigned int& rhs ) -{ - return sizeof(unsigned int); -} -template<> -void pack( const unsigned int& rhs, char *buffer ) -{ - memcpy(buffer,&rhs,sizeof(int)); -} -template<> -void unpack( unsigned int& data, const char *buffer ) -{ - memcpy(&data,buffer,sizeof(int)); -} -// size_t -template<> -size_t packsize( const size_t& rhs ) -{ - return sizeof(size_t); -} -template<> -void pack( const size_t& rhs, char *buffer ) -{ - memcpy(buffer,&rhs,sizeof(size_t)); -} -template<> -void unpack( size_t& data, const char *buffer ) -{ - memcpy(&data,buffer,sizeof(size_t)); -} -// std::string -template<> -size_t packsize( const std::string& rhs ) -{ - return rhs.size()+1; -} -template<> -void pack( const std::string& rhs, char *buffer ) -{ - memcpy(buffer,rhs.c_str(),rhs.size()+1); -} -template<> -void unpack( std::string& data, const char *buffer ) -{ - data = std::string(buffer); -} - diff --git a/IO/PackData.h b/IO/PackData.h deleted file mode 100644 index 85326c0b..00000000 --- a/IO/PackData.h +++ /dev/null @@ -1,78 +0,0 @@ -// This file contains unctions to pack/unpack data structures -#ifndef included_PackData -#define included_PackData - -#include -#include -#include - - -//! Template function to return the buffer size required to pack a class -template -size_t packsize( const TYPE& rhs ); - -//! Template function to pack a class to a buffer -template -void pack( const TYPE& rhs, char *buffer ); - -//! Template function to unpack a class from a buffer -template -void unpack( TYPE& data, const char *buffer ); - - -//! Template function to return the buffer size required to pack a std::vector -template -size_t packsize( const std::vector& rhs ); - -//! Template function to pack a class to a buffer -template -void pack( const std::vector& rhs, char *buffer ); - -//! Template function to pack a class to a buffer -template -void unpack( std::vector& data, const char *buffer ); - - -//! Template function to return the buffer size required to pack a std::pair -template -size_t packsize( const std::pair& rhs ); - -//! Template function to pack a class to a buffer -template -void pack( const std::pair& rhs, char *buffer ); - -//! Template function to pack a class to a buffer -template -void unpack( std::pair& data, const char *buffer ); - - -//! Template function to return the buffer size required to pack a std::map -template -size_t packsize( const std::map& rhs ); - -//! Template function to pack a class to a buffer -template -void pack( const std::map& rhs, char *buffer ); - -//! Template function to pack a class to a buffer -template -void unpack( std::map& data, const char *buffer ); - - -//! Template function to return the buffer size required to pack a std::set -template -size_t packsize( const std::set& rhs ); - -//! Template function to pack a class to a buffer -template -void pack( const std::set& rhs, char *buffer ); - -//! Template function to pack a class to a buffer -template -void unpack( std::set& data, const char *buffer ); - - -#include "IO/PackData.hpp" - -#endif - diff --git a/IO/Writer.cpp b/IO/Writer.cpp index 61c333af..6581ad42 100644 --- a/IO/Writer.cpp +++ b/IO/Writer.cpp @@ -2,7 +2,7 @@ #include "IO/MeshDatabase.h" #include "IO/IOHelpers.h" #include "IO/silo.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "common/Utilities.h" #include @@ -36,7 +36,7 @@ void IO::initialize( const std::string& path, const std::string& format, bool ap global_IO_format = Format::SILO; else ERROR("Unknown format"); - int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); + int rank = comm_rank(MPI_COMM_WORLD); if ( !append && rank==0 ) { mkdir(path.c_str(),S_IRWXU|S_IRGRP); std::string filename; @@ -55,7 +55,7 @@ void IO::initialize( const std::string& path, const std::string& format, bool ap // Write the mesh data in the original format static std::vector writeMeshesOrigFormat( const std::vector& meshData, const std::string& path ) { - int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); + int rank = MPI_WORLD_RANK(); std::vector meshes_written; for (size_t i=0; i writeMeshesOrigFormat( const std::vector& meshes_written, cons static std::vector writeMeshesNewFormat( const std::vector& meshData, const std::string& path, int format ) { - int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); + int rank = MPI_WORLD_RANK(); std::vector meshes_written; char filename[100], fullpath[200]; sprintf(filename,"%05i",rank); @@ -419,7 +419,7 @@ static std::vector writeMeshesSilo( const std::vector& meshData, const std::string& path, int format ) { #ifdef USE_SILO - int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); + int rank = MPI_WORLD_RANK(); std::vector meshes_written; char filename[100], fullpath[200]; sprintf(filename,"%05i.silo",rank); @@ -441,12 +441,12 @@ static std::vector writeMeshesSilo( /**************************************************** * Write the mesh data * ****************************************************/ -void IO::writeData( const std::string& subdir, const std::vector& meshData, const Utilities::MPI& comm ) +void IO::writeData( const std::string& subdir, const std::vector& meshData, MPI_Comm comm ) { if ( global_IO_path.empty() ) IO::initialize( ); PROFILE_START("writeData"); - int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); + int rank = comm_rank(comm); // Check the meshData before writing for ( const auto& data : meshData ) { if ( !data.check() ) @@ -457,7 +457,7 @@ void IO::writeData( const std::string& subdir, const std::vector meshes_written; if ( global_IO_format == Format::OLD ) { diff --git a/IO/Writer.h b/IO/Writer.h index dfc22db8..710fa0d8 100644 --- a/IO/Writer.h +++ b/IO/Writer.h @@ -34,7 +34,7 @@ void initialize( const std::string& path="", const std::string& format="silo", b * @param[in] meshData The data to write * @param[in] comm The comm to use for writing (usually MPI_COMM_WORLD or a dup thereof) */ -void writeData( const std::string& subdir, const std::vector& meshData, const Utilities::MPI& comm ); +void writeData( const std::string& subdir, const std::vector& meshData, MPI_Comm comm ); /*! @@ -44,7 +44,7 @@ void writeData( const std::string& subdir, const std::vector * @param[in] meshData The data to write * @param[in] comm The comm to use for writing (usually MPI_COMM_WORLD or a dup thereof) */ -inline void writeData( int timestep, const std::vector& meshData, const Utilities::MPI& comm ) +inline void writeData( int timestep, const std::vector& meshData, MPI_Comm comm ) { char subdir[100]; sprintf(subdir,"vis%03i",timestep); diff --git a/IO/netcdf.cpp b/IO/netcdf.cpp index e061579a..b36bb6d6 100644 --- a/IO/netcdf.cpp +++ b/IO/netcdf.cpp @@ -1,6 +1,6 @@ #include "IO/netcdf.h" #include "common/Utilities.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "ProfilerApp.h" @@ -116,7 +116,7 @@ std::string VariableTypeName( VariableType type ) /**************************************************** * Open/close a file * ****************************************************/ -int open( const std::string& filename, FileMode mode, const Utilities::MPI& comm ) +int open( const std::string& filename, FileMode mode, MPI_Comm comm ) { int fid = 0; if ( comm == MPI_COMM_NULL ) { @@ -134,13 +134,13 @@ int open( const std::string& filename, FileMode mode, const Utilities::MPI& comm } } else { if ( mode == READ ) { - int err = nc_open_par( filename.c_str(), NC_MPIPOSIX, comm.getCommunicator(), MPI_INFO_NULL, &fid ); + int err = nc_open_par( filename.c_str(), NC_MPIPOSIX, comm, MPI_INFO_NULL, &fid ); CHECK_NC_ERR( err ); } else if ( mode == WRITE ) { - int err = nc_open_par( filename.c_str(), NC_WRITE|NC_MPIPOSIX, comm.getCommunicator(), MPI_INFO_NULL, &fid ); + int err = nc_open_par( filename.c_str(), NC_WRITE|NC_MPIPOSIX, comm, MPI_INFO_NULL, &fid ); CHECK_NC_ERR( err ); } else if ( mode == CREATE ) { - int err = nc_create_par( filename.c_str(), NC_NETCDF4|NC_MPIIO, comm.getCommunicator(), MPI_INFO_NULL, &fid ); + int err = nc_create_par( filename.c_str(), NC_NETCDF4|NC_MPIIO, comm, MPI_INFO_NULL, &fid ); CHECK_NC_ERR( err ); } else { ERROR("Unknown file mode"); @@ -375,7 +375,7 @@ Array getVar( int fid, const std::string& var, const std::vector& sta std::vector var_size = getVarDim( fid, var ); for (int d=0; d<(int)var_size.size(); d++) { if ( start[d]<0 || start[d]+stride[d]*(count[d]-1)>(int)var_size[d] ) { - int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); + int rank = comm_rank(MPI_COMM_WORLD); char tmp[1000]; sprintf(tmp,"%i: Range exceeded array dimension:\n" " start[%i]=%i, count[%i]=%i, stride[%i]=%i, var_size[%i]=%i", diff --git a/IO/netcdf.h b/IO/netcdf.h index b4559e51..657747bf 100644 --- a/IO/netcdf.h +++ b/IO/netcdf.h @@ -5,7 +5,7 @@ #include #include "common/Array.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "common/Communication.h" @@ -32,7 +32,7 @@ std::string VariableTypeName( VariableType type ); * @param mode Open the file for reading or writing * @param comm MPI communicator to use (MPI_COMM_WORLD: don't use parallel netcdf) */ -int open( const std::string& filename, FileMode mode, const Utilities::MPI& comm=MPI_COMM_NULL ); +int open( const std::string& filename, FileMode mode, MPI_Comm comm=MPI_COMM_NULL ); /*! diff --git a/IO/silo.cpp b/IO/silo.cpp index ddf3646a..eece8583 100644 --- a/IO/silo.cpp +++ b/IO/silo.cpp @@ -1,6 +1,6 @@ #include "IO/silo.h" #include "common/Utilities.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "ProfilerApp.h" diff --git a/IO/silo.h b/IO/silo.h index 339a5c34..4c7081e5 100644 --- a/IO/silo.h +++ b/IO/silo.h @@ -6,7 +6,7 @@ #include #include "common/Array.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "common/Communication.h" diff --git a/IO/silo.hpp b/IO/silo.hpp index 35852004..312f32d8 100644 --- a/IO/silo.hpp +++ b/IO/silo.hpp @@ -3,7 +3,7 @@ #include "IO/silo.h" #include "common/Utilities.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "ProfilerApp.h" diff --git a/analysis/Minkowski.cpp b/analysis/Minkowski.cpp index 3e3fb35e..faac6142 100644 --- a/analysis/Minkowski.cpp +++ b/analysis/Minkowski.cpp @@ -4,7 +4,7 @@ #include "common/Domain.h" #include "common/Communication.h" #include "common/Utilities.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "IO/MeshDatabase.h" #include "IO/Reader.h" #include "IO/Writer.h" @@ -109,13 +109,13 @@ void Minkowski::ComputeScalar(const DoubleArray& Field, const double isovalue) // convert X for 2D manifold to 3D object Xi *= 0.5; - Dm->Comm.barrier(); + MPI_Barrier(Dm->Comm); // Phase averages - Vi_global = Dm->Comm.sumReduce( Vi ); - Xi_global = Dm->Comm.sumReduce( Xi ); - Ai_global = Dm->Comm.sumReduce( Ai ); - Ji_global = Dm->Comm.sumReduce( Ji ); - Dm->Comm.barrier(); + MPI_Allreduce(&Vi,&Vi_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&Xi,&Xi_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&Ai,&Ai_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&Ji,&Ji_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Barrier(Dm->Comm); PROFILE_STOP("ComputeScalar"); } @@ -168,7 +168,7 @@ int Minkowski::MeasureConnectedPathway(){ double vF=0.0; n_connected_components = ComputeGlobalBlobIDs(Nx-2,Ny-2,Nz-2,Dm->rank_info,distance,distance,vF,vF,label,Dm->Comm); // int n_connected_components = ComputeGlobalPhaseComponent(Nx-2,Ny-2,Nz-2,Dm->rank_info,const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, Dm->Comm ) - Dm->Comm.barrier(); + MPI_Barrier(Dm->Comm); for (int k=0; kComm.sumReduce( wb.V); - gnb.V = Dm->Comm.sumReduce( nb.V); - gwb.M = Dm->Comm.sumReduce( wb.M); - gnb.M = Dm->Comm.sumReduce( nb.M); - gwb.Px = Dm->Comm.sumReduce( wb.Px); - gwb.Py = Dm->Comm.sumReduce( wb.Py); - gwb.Pz = Dm->Comm.sumReduce( wb.Pz); - gnb.Px = Dm->Comm.sumReduce( nb.Px); - gnb.Py = Dm->Comm.sumReduce( nb.Py); - gnb.Pz = Dm->Comm.sumReduce( nb.Pz); + gwb.V=sumReduce( Dm->Comm, wb.V); + gnb.V=sumReduce( Dm->Comm, nb.V); + gwb.M=sumReduce( Dm->Comm, wb.M); + gnb.M=sumReduce( Dm->Comm, nb.M); + gwb.Px=sumReduce( Dm->Comm, wb.Px); + gwb.Py=sumReduce( Dm->Comm, wb.Py); + gwb.Pz=sumReduce( Dm->Comm, wb.Pz); + gnb.Px=sumReduce( Dm->Comm, nb.Px); + gnb.Py=sumReduce( Dm->Comm, nb.Py); + gnb.Pz=sumReduce( Dm->Comm, nb.Pz); - count_w = Dm->Comm.sumReduce( count_w); - count_n = Dm->Comm.sumReduce( count_n); + count_w=sumReduce( Dm->Comm, count_w); + count_n=sumReduce( Dm->Comm, count_n); if (count_w > 0.0) - gwb.p = Dm->Comm.sumReduce(wb.p) / count_w; + gwb.p=sumReduce( Dm->Comm, wb.p) / count_w; else gwb.p = 0.0; if (count_n > 0.0) - gnb.p = Dm->Comm.sumReduce( nb.p) / count_n; + gnb.p=sumReduce( Dm->Comm, nb.p) / count_n; else gnb.p = 0.0; @@ -444,14 +444,14 @@ void SubPhase::Full(){ nd.X -= nc.X; // compute global entities - gnc.V = Dm->Comm.sumReduce( nc.V ); - gnc.A = Dm->Comm.sumReduce( nc.A ); - gnc.H = Dm->Comm.sumReduce( nc.H ); - gnc.X = Dm->Comm.sumReduce( nc.X ); - gnd.V = Dm->Comm.sumReduce( nd.V ); - gnd.A = Dm->Comm.sumReduce( nd.A ); - gnd.H = Dm->Comm.sumReduce( nd.H ); - gnd.X = Dm->Comm.sumReduce( nd.X ); + gnc.V=sumReduce( Dm->Comm, nc.V); + gnc.A=sumReduce( Dm->Comm, nc.A); + gnc.H=sumReduce( Dm->Comm, nc.H); + gnc.X=sumReduce( Dm->Comm, nc.X); + gnd.V=sumReduce( Dm->Comm, nd.V); + gnd.A=sumReduce( Dm->Comm, nd.A); + gnd.H=sumReduce( Dm->Comm, nd.H); + gnd.X=sumReduce( Dm->Comm, nd.X); gnd.Nc = nd.Nc; // wetting for (k=0; kComm.sumReduce( wc.V ); - gwc.A = Dm->Comm.sumReduce( wc.A ); - gwc.H = Dm->Comm.sumReduce( wc.H ); - gwc.X = Dm->Comm.sumReduce( wc.X ); - gwd.V = Dm->Comm.sumReduce( wd.V ); - gwd.A = Dm->Comm.sumReduce( wd.A ); - gwd.H = Dm->Comm.sumReduce( wd.H ); - gwd.X = Dm->Comm.sumReduce( wd.X ); + gwc.V=sumReduce( Dm->Comm, wc.V); + gwc.A=sumReduce( Dm->Comm, wc.A); + gwc.H=sumReduce( Dm->Comm, wc.H); + gwc.X=sumReduce( Dm->Comm, wc.X); + gwd.V=sumReduce( Dm->Comm, wd.V); + gwd.A=sumReduce( Dm->Comm, wd.A); + gwd.H=sumReduce( Dm->Comm, wd.H); + gwd.X=sumReduce( Dm->Comm, wd.X); gwd.Nc = wd.Nc; /* Set up geometric analysis of interface region */ @@ -526,20 +526,20 @@ void SubPhase::Full(){ iwn.A = morph_i->A(); iwn.H = morph_i->H(); iwn.X = morph_i->X(); - giwn.V = Dm->Comm.sumReduce( iwn.V ); - giwn.A = Dm->Comm.sumReduce( iwn.A ); - giwn.H = Dm->Comm.sumReduce( iwn.H ); - giwn.X = Dm->Comm.sumReduce( iwn.X ); + giwn.V=sumReduce( Dm->Comm, iwn.V); + giwn.A=sumReduce( Dm->Comm, iwn.A); + giwn.H=sumReduce( Dm->Comm, iwn.H); + giwn.X=sumReduce( Dm->Comm, iwn.X); // measure only the connected part iwnc.Nc = morph_i->MeasureConnectedPathway(); iwnc.V = morph_i->V(); iwnc.A = morph_i->A(); iwnc.H = morph_i->H(); iwnc.X = morph_i->X(); - giwnc.V = Dm->Comm.sumReduce( iwnc.V ); - giwnc.A = Dm->Comm.sumReduce( iwnc.A ); - giwnc.H = Dm->Comm.sumReduce( iwnc.H ); - giwnc.X = Dm->Comm.sumReduce( iwnc.X ); + giwnc.V=sumReduce( Dm->Comm, iwnc.V); + giwnc.A=sumReduce( Dm->Comm, iwnc.A); + giwnc.H=sumReduce( Dm->Comm, iwnc.H); + giwnc.X=sumReduce( Dm->Comm, iwnc.X); giwnc.Nc = iwnc.Nc; double vol_nc_bulk = 0.0; @@ -630,46 +630,46 @@ void SubPhase::Full(){ } } - gnd.M = Dm->Comm.sumReduce( nd.M ); - gnd.Px = Dm->Comm.sumReduce( nd.Px ); - gnd.Py = Dm->Comm.sumReduce( nd.Py ); - gnd.Pz = Dm->Comm.sumReduce( nd.Pz ); - gnd.K = Dm->Comm.sumReduce( nd.K ); + gnd.M=sumReduce( Dm->Comm, nd.M); + gnd.Px=sumReduce( Dm->Comm, nd.Px); + gnd.Py=sumReduce( Dm->Comm, nd.Py); + gnd.Pz=sumReduce( Dm->Comm, nd.Pz); + gnd.K=sumReduce( Dm->Comm, nd.K); - gwd.M = Dm->Comm.sumReduce( wd.M ); - gwd.Px = Dm->Comm.sumReduce( wd.Px ); - gwd.Py = Dm->Comm.sumReduce( wd.Py ); - gwd.Pz = Dm->Comm.sumReduce( wd.Pz ); - gwd.K = Dm->Comm.sumReduce( wd.K ); + gwd.M=sumReduce( Dm->Comm, wd.M); + gwd.Px=sumReduce( Dm->Comm, wd.Px); + gwd.Py=sumReduce( Dm->Comm, wd.Py); + gwd.Pz=sumReduce( Dm->Comm, wd.Pz); + gwd.K=sumReduce( Dm->Comm, wd.K); - gnc.M = Dm->Comm.sumReduce( nc.M ); - gnc.Px = Dm->Comm.sumReduce( nc.Px ); - gnc.Py = Dm->Comm.sumReduce( nc.Py ); - gnc.Pz = Dm->Comm.sumReduce( nc.Pz ); - gnc.K = Dm->Comm.sumReduce( nc.K ); + gnc.M=sumReduce( Dm->Comm, nc.M); + gnc.Px=sumReduce( Dm->Comm, nc.Px); + gnc.Py=sumReduce( Dm->Comm, nc.Py); + gnc.Pz=sumReduce( Dm->Comm, nc.Pz); + gnc.K=sumReduce( Dm->Comm, nc.K); - gwc.M = Dm->Comm.sumReduce( wc.M ); - gwc.Px = Dm->Comm.sumReduce( wc.Px ); - gwc.Py = Dm->Comm.sumReduce( wc.Py ); - gwc.Pz = Dm->Comm.sumReduce( wc.Pz ); - gwc.K = Dm->Comm.sumReduce( wc.K ); + gwc.M=sumReduce( Dm->Comm, wc.M); + gwc.Px=sumReduce( Dm->Comm, wc.Px); + gwc.Py=sumReduce( Dm->Comm, wc.Py); + gwc.Pz=sumReduce( Dm->Comm, wc.Pz); + gwc.K=sumReduce( Dm->Comm, wc.K); - giwn.Mn = Dm->Comm.sumReduce( iwn.Mn ); - giwn.Pnx = Dm->Comm.sumReduce( iwn.Pnx ); - giwn.Pny = Dm->Comm.sumReduce( iwn.Pny ); - giwn.Pnz = Dm->Comm.sumReduce( iwn.Pnz ); - giwn.Kn = Dm->Comm.sumReduce( iwn.Kn ); - giwn.Mw = Dm->Comm.sumReduce( iwn.Mw ); - giwn.Pwx = Dm->Comm.sumReduce( iwn.Pwx ); - giwn.Pwy = Dm->Comm.sumReduce( iwn.Pwy ); - giwn.Pwz = Dm->Comm.sumReduce( iwn.Pwz ); - giwn.Kw = Dm->Comm.sumReduce( iwn.Kw ); + giwn.Mn=sumReduce( Dm->Comm, iwn.Mn); + giwn.Pnx=sumReduce( Dm->Comm, iwn.Pnx); + giwn.Pny=sumReduce( Dm->Comm, iwn.Pny); + giwn.Pnz=sumReduce( Dm->Comm, iwn.Pnz); + giwn.Kn=sumReduce( Dm->Comm, iwn.Kn); + giwn.Mw=sumReduce( Dm->Comm, iwn.Mw); + giwn.Pwx=sumReduce( Dm->Comm, iwn.Pwx); + giwn.Pwy=sumReduce( Dm->Comm, iwn.Pwy); + giwn.Pwz=sumReduce( Dm->Comm, iwn.Pwz); + giwn.Kw=sumReduce( Dm->Comm, iwn.Kw); // pressure averaging - gnc.p = Dm->Comm.sumReduce( nc.p ); - gnd.p = Dm->Comm.sumReduce( nd.p ); - gwc.p = Dm->Comm.sumReduce( wc.p ); - gwd.p = Dm->Comm.sumReduce( wd.p ); + gnc.p=sumReduce( Dm->Comm, nc.p); + gnd.p=sumReduce( Dm->Comm, nd.p); + gwc.p=sumReduce( Dm->Comm, wc.p); + gwd.p=sumReduce( Dm->Comm, wd.p); if (vol_wc_bulk > 0.0) wc.p = wc.p /vol_wc_bulk; @@ -680,10 +680,10 @@ void SubPhase::Full(){ if (vol_nd_bulk > 0.0) nd.p = nd.p /vol_nd_bulk; - vol_wc_bulk = Dm->Comm.sumReduce( vol_wc_bulk ); - vol_wd_bulk = Dm->Comm.sumReduce( vol_wd_bulk ); - vol_nc_bulk = Dm->Comm.sumReduce( vol_nc_bulk ); - vol_nd_bulk = Dm->Comm.sumReduce( vol_nd_bulk ); + vol_wc_bulk=sumReduce( Dm->Comm, vol_wc_bulk); + vol_wd_bulk=sumReduce( Dm->Comm, vol_wd_bulk); + vol_nc_bulk=sumReduce( Dm->Comm, vol_nc_bulk); + vol_nd_bulk=sumReduce( Dm->Comm, vol_nd_bulk); if (vol_wc_bulk > 0.0) gwc.p = gwc.p /vol_wc_bulk; @@ -719,7 +719,7 @@ void SubPhase::AggregateLabels( const std::string& filename ) } } } - Dm->Comm.barrier(); + MPI_Barrier(Dm->Comm); Dm->AggregateLabels( filename ); diff --git a/analysis/SubPhase.h b/analysis/SubPhase.h index 691c654f..71b87ef0 100644 --- a/analysis/SubPhase.h +++ b/analysis/SubPhase.h @@ -12,7 +12,7 @@ #include "analysis/distance.h" #include "analysis/Minkowski.h" #include "common/Utilities.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "IO/MeshDatabase.h" #include "IO/Reader.h" #include "IO/Writer.h" diff --git a/analysis/TwoPhase.cpp b/analysis/TwoPhase.cpp index 812490e7..9b2e5fd8 100644 --- a/analysis/TwoPhase.cpp +++ b/analysis/TwoPhase.cpp @@ -5,7 +5,7 @@ #include "common/Domain.h" #include "common/Communication.h" #include "common/Utilities.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "IO/MeshDatabase.h" #include "IO/Reader.h" #include "IO/Writer.h" @@ -882,7 +882,7 @@ void TwoPhase::ComponentAverages() } } - Dm->Comm.barrier(); + MPI_Barrier(Dm->Comm); if (Dm->rank()==0){ printf("Component averages computed locally -- reducing result... \n"); } @@ -895,8 +895,8 @@ void TwoPhase::ComponentAverages() for (int idx=0; idxComm.barrier(); - Dm->Comm.sumReduce(ComponentAverages_NWP.data(),RecvBuffer.data(),BLOB_AVG_COUNT*NumberComponents_NWP); + MPI_Barrier(Dm->Comm); + MPI_Allreduce(ComponentAverages_NWP.data(),RecvBuffer.data(),BLOB_AVG_COUNT*NumberComponents_NWP, MPI_DOUBLE,MPI_SUM,Dm->Comm); // MPI_Reduce(ComponentAverages_NWP.data(),RecvBuffer.data(),BLOB_AVG_COUNT,MPI_DOUBLE,MPI_SUM,0,Dm->Comm); if (Dm->rank()==0){ @@ -993,9 +993,9 @@ void TwoPhase::ComponentAverages() // reduce the wetting phase averages for (int b=0; bComm.barrier(); + MPI_Barrier(Dm->Comm); // MPI_Allreduce(&ComponentAverages_WP(0,b),RecvBuffer.data(),BLOB_AVG_COUNT,MPI_DOUBLE,MPI_SUM,Dm->Comm); - Dm->Comm.sumReduce(&ComponentAverages_WP(0,b),RecvBuffer.data(),BLOB_AVG_COUNT); + MPI_Reduce(&ComponentAverages_WP(0,b),RecvBuffer.data(),BLOB_AVG_COUNT,MPI_DOUBLE,MPI_SUM,0,Dm->Comm); for (int idx=0; idxComm.barrier(); - nwp_volume_global = Dm->Comm.sumReduce( nwp_volume ); - wp_volume_global = Dm->Comm.sumReduce( wp_volume ); - awn_global = Dm->Comm.sumReduce( awn ); - ans_global = Dm->Comm.sumReduce( ans ); - aws_global = Dm->Comm.sumReduce( aws ); - lwns_global = Dm->Comm.sumReduce( lwns ); - As_global = Dm->Comm.sumReduce( As ); - Jwn_global = Dm->Comm.sumReduce( Jwn ); - Kwn_global = Dm->Comm.sumReduce( Kwn ); - KGwns_global = Dm->Comm.sumReduce( KGwns ); - KNwns_global = Dm->Comm.sumReduce( KNwns ); - efawns_global = Dm->Comm.sumReduce( efawns ); - wwndnw_global = Dm->Comm.sumReduce( wwndnw ); - wwnsdnwn_global = Dm->Comm.sumReduce( wwnsdnwn ); - Jwnwwndnw_global = Dm->Comm.sumReduce( Jwnwwndnw ); + MPI_Barrier(Dm->Comm); + MPI_Allreduce(&nwp_volume,&nwp_volume_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&wp_volume,&wp_volume_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&awn,&awn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&ans,&ans_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&aws,&aws_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&lwns,&lwns_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&As,&As_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&Jwn,&Jwn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&Kwn,&Kwn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&KGwns,&KGwns_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&KNwns,&KNwns_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&efawns,&efawns_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&wwndnw,&wwndnw_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&wwnsdnwn,&wwnsdnwn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&Jwnwwndnw,&Jwnwwndnw_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); // Phase averages - vol_w_global = Dm->Comm.sumReduce( vol_w ); - vol_n_global = Dm->Comm.sumReduce( vol_n ); - paw_global = Dm->Comm.sumReduce( paw ); - pan_global = Dm->Comm.sumReduce( pan ); - for (int idx=0; idx<3; idx++) - vaw_global(idx) = Dm->Comm.sumReduce( vaw(idx) ); - for (int idx=0; idx<3; idx++) - van_global(idx) = Dm->Comm.sumReduce( van(idx)); - for (int idx=0; idx<3; idx++) - vawn_global(idx) = Dm->Comm.sumReduce( vawn(idx) ); - for (int idx=0; idx<3; idx++) - vawns_global(idx) = Dm->Comm.sumReduce( vawns(idx) ); - for (int idx=0; idx<6; idx++){ - Gwn_global(idx) = Dm->Comm.sumReduce( Gwn(idx) ); - Gns_global(idx) = Dm->Comm.sumReduce( Gns(idx) ); - Gws_global(idx) = Dm->Comm.sumReduce( Gws(idx) ); - } - trawn_global = Dm->Comm.sumReduce( trawn ); - trJwn_global = Dm->Comm.sumReduce( trJwn ); - trRwn_global = Dm->Comm.sumReduce( trRwn ); - euler_global = Dm->Comm.sumReduce( euler ); - An_global = Dm->Comm.sumReduce( An ); - Jn_global = Dm->Comm.sumReduce( Jn ); - Kn_global = Dm->Comm.sumReduce( Kn ); - Dm->Comm.barrier(); + MPI_Allreduce(&vol_w,&vol_w_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&vol_n,&vol_n_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&paw,&paw_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&pan,&pan_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&vaw(0),&vaw_global(0),3,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&van(0),&van_global(0),3,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&vawn(0),&vawn_global(0),3,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&vawns(0),&vawns_global(0),3,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&Gwn(0),&Gwn_global(0),6,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&Gns(0),&Gns_global(0),6,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&Gws(0),&Gws_global(0),6,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&trawn,&trawn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&trJwn,&trJwn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&trRwn,&trRwn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&euler,&euler_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&An,&An_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&Jn,&Jn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&Kn,&Kn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + + MPI_Barrier(Dm->Comm); // Normalize the phase averages // (density of both components = 1.0) diff --git a/analysis/TwoPhase.h b/analysis/TwoPhase.h index 4d500a89..fddd04e8 100644 --- a/analysis/TwoPhase.h +++ b/analysis/TwoPhase.h @@ -12,7 +12,7 @@ #include "common/Domain.h" #include "common/Communication.h" #include "common/Utilities.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "IO/MeshDatabase.h" #include "IO/Reader.h" #include "IO/Writer.h" diff --git a/analysis/analysis.cpp b/analysis/analysis.cpp index 4298750e..7587f3c5 100644 --- a/analysis/analysis.cpp +++ b/analysis/analysis.cpp @@ -188,7 +188,7 @@ int ComputeLocalPhaseComponent(const IntArray &PhaseID, int &VALUE, BlobIDArray /****************************************************************** * Reorder the global blob ids * ******************************************************************/ -static int ReorderBlobIDs2( BlobIDArray& ID, int N_blobs, int ngx, int ngy, int ngz, const Utilities::MPI& comm ) +static int ReorderBlobIDs2( BlobIDArray& ID, int N_blobs, int ngx, int ngy, int ngz, MPI_Comm comm ) { if ( N_blobs==0 ) return 0; @@ -212,7 +212,7 @@ static int ReorderBlobIDs2( BlobIDArray& ID, int N_blobs, int ngx, int ngy, int } } ASSERT(max_id > map1(N_blobs); int N_blobs2 = 0; for (int i=0; i& N_recv, int64_t *send_buf, std::vector& recv_buf, std::map& remote_map, - const Utilities::MPI& comm ) + MPI_Comm comm ) { std::vector send_req(neighbors.size()); std::vector recv_req(neighbors.size()); - auto it = map.begin(); + std::vector status(neighbors.size()); + std::map::const_iterator it = map.begin(); ASSERT(N_send==(int)map.size()); for (size_t i=0; ifirst; send_buf[2*i+1] = it->second.new_id; } for (size_t i=0; ifirst] = it->second.new_id; } for (size_t i=0; i& remote_map, @@ -303,18 +304,18 @@ static bool updateLocalIds( const std::map& remote_map, return changed; } static int LocalToGlobalIDs( int nx, int ny, int nz, const RankInfoStruct& rank_info, - int nblobs, BlobIDArray& IDs, const Utilities::MPI& comm ) + int nblobs, BlobIDArray& IDs, MPI_Comm comm ) { PROFILE_START("LocalToGlobalIDs",1); const int rank = rank_info.rank[1][1][1]; - int nprocs = comm.getSize(); + int nprocs = comm_size(comm); const int ngx = (IDs.size(0)-nx)/2; const int ngy = (IDs.size(1)-ny)/2; const int ngz = (IDs.size(2)-nz)/2; // Get the number of blobs for each rank std::vector N_blobs(nprocs,0); PROFILE_START("LocalToGlobalIDs-Allgather",1); - comm.allGather(nblobs,getPtr(N_blobs)); + MPI_Allgather(&nblobs,1,MPI_INT,getPtr(N_blobs),1,MPI_INT,comm); PROFILE_STOP("LocalToGlobalIDs-Allgather",1); int64_t N_blobs_tot = 0; int offset = 0; @@ -362,12 +363,13 @@ static int LocalToGlobalIDs( int nx, int ny, int nz, const RankInfoStruct& rank_ std::vector N_recv(neighbors.size(),0); std::vector send_req(neighbors.size()); std::vector recv_req(neighbors.size()); + std::vector status(neighbors.size()); for (size_t i=0; i recv_buf(neighbors.size()); @@ -396,7 +398,8 @@ static int LocalToGlobalIDs( int nx, int ny, int nz, const RankInfoStruct& rank_ bool changed = updateLocalIds( remote_map, map ); // Check if we are finished int test = changed ? 1:0; - int result = comm.sumReduce( test ); + int result = 0; + MPI_Allreduce(&test,&result,1,MPI_INT,MPI_SUM,comm); if ( result==0 ) break; } @@ -432,7 +435,7 @@ static int LocalToGlobalIDs( int nx, int ny, int nz, const RankInfoStruct& rank_ } int ComputeGlobalBlobIDs( int nx, int ny, int nz, const RankInfoStruct& rank_info, const DoubleArray& Phase, const DoubleArray& SignDist, double vF, double vS, - BlobIDArray& GlobalBlobID, const Utilities::MPI& comm ) + BlobIDArray& GlobalBlobID, MPI_Comm comm ) { PROFILE_START("ComputeGlobalBlobIDs"); // First compute the local ids @@ -443,7 +446,7 @@ int ComputeGlobalBlobIDs( int nx, int ny, int nz, const RankInfoStruct& rank_inf return nglobal; } int ComputeGlobalPhaseComponent( int nx, int ny, int nz, const RankInfoStruct& rank_info, - const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, const Utilities::MPI& comm ) + const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, MPI_Comm comm ) { PROFILE_START("ComputeGlobalPhaseComponent"); // First compute the local ids @@ -459,27 +462,37 @@ int ComputeGlobalPhaseComponent( int nx, int ny, int nz, const RankInfoStruct& r * Compute the mapping of blob ids between timesteps * ******************************************************************/ typedef std::map > map_type; +template inline MPI_Datatype getMPIType(); +template<> inline MPI_Datatype getMPIType() { return MPI_INT; } +template<> inline MPI_Datatype getMPIType() { + if ( sizeof(int64_t)==sizeof(long int) ) + return MPI_LONG; + else if ( sizeof(int64_t)==sizeof(double) ) + return MPI_DOUBLE; +} template -void gatherSet( std::set& set, const Utilities::MPI& comm ) +void gatherSet( std::set& set, MPI_Comm comm ) { - int nprocs = comm.getSize(); + int nprocs = comm_size(comm); + MPI_Datatype type = getMPIType(); std::vector send_data(set.begin(),set.end()); int send_count = send_data.size(); std::vector recv_count(nprocs,0), recv_disp(nprocs,0); - comm.allGather( send_count, getPtr(recv_count) ); + MPI_Allgather(&send_count,1,MPI_INT,getPtr(recv_count),1,MPI_INT,comm); for (int i=1; i recv_data(recv_disp[nprocs-1]+recv_count[nprocs-1]); - comm.allGather( getPtr(send_data), send_count, getPtr(recv_data), - getPtr(recv_count), getPtr(recv_disp), true ); + MPI_Allgatherv(getPtr(send_data),send_count,type, + getPtr(recv_data),getPtr(recv_count),getPtr(recv_disp),type,comm); for (size_t i=0; i(); std::vector send_data; - for (auto it=src_map.begin(); it!=src_map.end(); ++it) { + for (map_type::const_iterator it=src_map.begin(); it!=src_map.end(); ++it) { int id = it->first; const std::map& src_ids = it->second; send_data.push_back(id); @@ -492,21 +505,21 @@ void gatherSrcIDMap( map_type& src_map, const Utilities::MPI& comm ) } int send_count = send_data.size(); std::vector recv_count(nprocs,0), recv_disp(nprocs,0); - comm.allGather(send_count,getPtr(recv_count)); + MPI_Allgather(&send_count,1,MPI_INT,getPtr(recv_count),1,MPI_INT,comm); for (int i=1; i recv_data(recv_disp[nprocs-1]+recv_count[nprocs-1]); - comm.allGather(getPtr(send_data),send_count, - getPtr(recv_data),getPtr(recv_count),getPtr(recv_disp),true); + MPI_Allgatherv(getPtr(send_data),send_count,type, + getPtr(recv_data),getPtr(recv_count),getPtr(recv_disp),type,comm); size_t i=0; src_map.clear(); while ( i < recv_data.size() ) { BlobIDType id = recv_data[i]; size_t count = recv_data[i+1]; i += 2; - auto& src_ids = src_map[id]; + std::map& src_ids = src_map[id]; for (size_t j=0; j::iterator it = src_ids.find(recv_data[i]); if ( it == src_ids.end() ) src_ids.insert(std::pair(recv_data[i],recv_data[i+1])); else @@ -525,7 +538,7 @@ void addSrcDstIDs( BlobIDType src_id, map_type& src_map, map_type& dst_map, } } ID_map_struct computeIDMap( int nx, int ny, int nz, - const BlobIDArray& ID1, const BlobIDArray& ID2, const Utilities::MPI& comm ) + const BlobIDArray& ID1, const BlobIDArray& ID2, MPI_Comm comm ) { ASSERT(ID1.size()==ID2.size()); PROFILE_START("computeIDMap"); @@ -767,7 +780,7 @@ void renumberIDs( const std::vector& new_ids, BlobIDArray& IDs ) ******************************************************************/ void writeIDMap( const ID_map_struct& map, long long int timestep, const std::string& filename ) { - int rank = Utilities::MPI( MPI_COMM_WORLD ).getRank(); + int rank = MPI_WORLD_RANK(); if ( rank!=0 ) return; bool empty = map.created.empty() && map.destroyed.empty() && diff --git a/analysis/analysis.h b/analysis/analysis.h index ec377995..2ce531b1 100644 --- a/analysis/analysis.h +++ b/analysis/analysis.h @@ -58,7 +58,7 @@ int ComputeLocalPhaseComponent( const IntArray &PhaseID, int &VALUE, IntArray &C */ int ComputeGlobalBlobIDs( int nx, int ny, int nz, const RankInfoStruct& rank_info, const DoubleArray& Phase, const DoubleArray& SignDist, double vF, double vS, - BlobIDArray& GlobalBlobID, const Utilities::MPI& comm ); + BlobIDArray& GlobalBlobID, MPI_Comm comm ); /*! @@ -75,7 +75,7 @@ int ComputeGlobalBlobIDs( int nx, int ny, int nz, const RankInfoStruct& rank_inf * @return Return the number of components in the specified phase */ int ComputeGlobalPhaseComponent( int nx, int ny, int nz, const RankInfoStruct& rank_info, - const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, const Utilities::MPI& comm ); + const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, MPI_Comm comm ); /*! @@ -87,7 +87,7 @@ int ComputeGlobalPhaseComponent( int nx, int ny, int nz, const RankInfoStruct& r * @param[in] nz Number of elements in the z-direction * @param[in/out] ID The ids of the blobs */ -void ReorderBlobIDs( BlobIDArray& ID, const Utilities::MPI& comm ); +void ReorderBlobIDs( BlobIDArray& ID, MPI_Comm comm ); typedef std::pair > BlobIDSplitStruct; @@ -120,7 +120,7 @@ struct ID_map_struct { * @param[in] ID1 The blob ids at the first timestep * @param[in] ID2 The blob ids at the second timestep */ -ID_map_struct computeIDMap( int nx, int ny, int nz, const BlobIDArray& ID1, const BlobIDArray& ID2, const Utilities::MPI& comm ); +ID_map_struct computeIDMap( int nx, int ny, int nz, const BlobIDArray& ID1, const BlobIDArray& ID2, MPI_Comm comm ); /*! diff --git a/analysis/distance.cpp b/analysis/distance.cpp index 9c605e1e..e297b435 100644 --- a/analysis/distance.cpp +++ b/analysis/distance.cpp @@ -176,7 +176,7 @@ void CalcVecDist( Array &d, const Array &ID0, const Domain &Dm, // Update distance double err = calcVecUpdateInterior( d, dx[0], dx[1], dx[2] ); // Check if we are finished - err = Dm.Comm.maxReduce( err ); + err = maxReduce( Dm.Comm, err ); if ( err < tol ) break; } diff --git a/analysis/morphology.cpp b/analysis/morphology.cpp index ab4312f8..72a17892 100644 --- a/analysis/morphology.cpp +++ b/analysis/morphology.cpp @@ -58,11 +58,11 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr } } } - Dm->Comm.barrier(); + MPI_Barrier(Dm->Comm); // total Global is the number of nodes in the pore-space - totalGlobal = Dm->Comm.sumReduce( count ); - maxdistGlobal = Dm->Comm.sumReduce( maxdist ); + MPI_Allreduce(&count,&totalGlobal,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&maxdist,&maxdistGlobal,1,MPI_DOUBLE,MPI_MAX,Dm->Comm); double volume=double(nprocx*nprocy*nprocz)*double(nx-2)*double(ny-2)*double(nz-2); double volume_fraction=totalGlobal/volume; if (rank==0) printf("Volume fraction for morphological opening: %f \n",volume_fraction); @@ -133,6 +133,7 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr double deltaR=0.05; // amount to change the radius in voxel units double Rcrit_old=0.0; + double GlobalNumber = 1.f; int imin,jmin,kmin,imax,jmax,kmax; if (ErodeLabel == 1){ @@ -202,41 +203,41 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr PackID(Dm->sendList_YZ, Dm->sendCount_YZ ,sendID_YZ, id); //...................................................................................... MPI_Sendrecv(sendID_x,Dm->sendCount_x,MPI_CHAR,Dm->rank_x(),sendtag, - recvID_X,Dm->recvCount_X,MPI_CHAR,Dm->rank_X(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_X,Dm->recvCount_X,MPI_CHAR,Dm->rank_X(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_X,Dm->sendCount_X,MPI_CHAR,Dm->rank_X(),sendtag, - recvID_x,Dm->recvCount_x,MPI_CHAR,Dm->rank_x(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_x,Dm->recvCount_x,MPI_CHAR,Dm->rank_x(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_y,Dm->sendCount_y,MPI_CHAR,Dm->rank_y(),sendtag, - recvID_Y,Dm->recvCount_Y,MPI_CHAR,Dm->rank_Y(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_Y,Dm->recvCount_Y,MPI_CHAR,Dm->rank_Y(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_Y,Dm->sendCount_Y,MPI_CHAR,Dm->rank_Y(),sendtag, - recvID_y,Dm->recvCount_y,MPI_CHAR,Dm->rank_y(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_y,Dm->recvCount_y,MPI_CHAR,Dm->rank_y(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_z,Dm->sendCount_z,MPI_CHAR,Dm->rank_z(),sendtag, - recvID_Z,Dm->recvCount_Z,MPI_CHAR,Dm->rank_Z(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_Z,Dm->recvCount_Z,MPI_CHAR,Dm->rank_Z(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_Z,Dm->sendCount_Z,MPI_CHAR,Dm->rank_Z(),sendtag, - recvID_z,Dm->recvCount_z,MPI_CHAR,Dm->rank_z(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_z,Dm->recvCount_z,MPI_CHAR,Dm->rank_z(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_xy,Dm->sendCount_xy,MPI_CHAR,Dm->rank_xy(),sendtag, - recvID_XY,Dm->recvCount_XY,MPI_CHAR,Dm->rank_XY(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_XY,Dm->recvCount_XY,MPI_CHAR,Dm->rank_XY(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_XY,Dm->sendCount_XY,MPI_CHAR,Dm->rank_XY(),sendtag, - recvID_xy,Dm->recvCount_xy,MPI_CHAR,Dm->rank_xy(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_xy,Dm->recvCount_xy,MPI_CHAR,Dm->rank_xy(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_Xy,Dm->sendCount_Xy,MPI_CHAR,Dm->rank_Xy(),sendtag, - recvID_xY,Dm->recvCount_xY,MPI_CHAR,Dm->rank_xY(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_xY,Dm->recvCount_xY,MPI_CHAR,Dm->rank_xY(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_xY,Dm->sendCount_xY,MPI_CHAR,Dm->rank_xY(),sendtag, - recvID_Xy,Dm->recvCount_Xy,MPI_CHAR,Dm->rank_Xy(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_Xy,Dm->recvCount_Xy,MPI_CHAR,Dm->rank_Xy(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_xz,Dm->sendCount_xz,MPI_CHAR,Dm->rank_xz(),sendtag, - recvID_XZ,Dm->recvCount_XZ,MPI_CHAR,Dm->rank_XZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_XZ,Dm->recvCount_XZ,MPI_CHAR,Dm->rank_XZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_XZ,Dm->sendCount_XZ,MPI_CHAR,Dm->rank_XZ(),sendtag, - recvID_xz,Dm->recvCount_xz,MPI_CHAR,Dm->rank_xz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_xz,Dm->recvCount_xz,MPI_CHAR,Dm->rank_xz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_Xz,Dm->sendCount_Xz,MPI_CHAR,Dm->rank_Xz(),sendtag, - recvID_xZ,Dm->recvCount_xZ,MPI_CHAR,Dm->rank_xZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_xZ,Dm->recvCount_xZ,MPI_CHAR,Dm->rank_xZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_xZ,Dm->sendCount_xZ,MPI_CHAR,Dm->rank_xZ(),sendtag, - recvID_Xz,Dm->recvCount_Xz,MPI_CHAR,Dm->rank_Xz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_Xz,Dm->recvCount_Xz,MPI_CHAR,Dm->rank_Xz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_yz,Dm->sendCount_yz,MPI_CHAR,Dm->rank_yz(),sendtag, - recvID_YZ,Dm->recvCount_YZ,MPI_CHAR,Dm->rank_YZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_YZ,Dm->recvCount_YZ,MPI_CHAR,Dm->rank_YZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_YZ,Dm->sendCount_YZ,MPI_CHAR,Dm->rank_YZ(),sendtag, - recvID_yz,Dm->recvCount_yz,MPI_CHAR,Dm->rank_yz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_yz,Dm->recvCount_yz,MPI_CHAR,Dm->rank_yz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_Yz,Dm->sendCount_Yz,MPI_CHAR,Dm->rank_Yz(),sendtag, - recvID_yZ,Dm->recvCount_yZ,MPI_CHAR,Dm->rank_yZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_yZ,Dm->recvCount_yZ,MPI_CHAR,Dm->rank_yZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_yZ,Dm->sendCount_yZ,MPI_CHAR,Dm->rank_yZ(),sendtag, - recvID_Yz,Dm->recvCount_Yz,MPI_CHAR,Dm->rank_Yz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_Yz,Dm->recvCount_Yz,MPI_CHAR,Dm->rank_Yz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); //...................................................................................... UnpackID(Dm->recvList_x, Dm->recvCount_x ,recvID_x, id); UnpackID(Dm->recvList_X, Dm->recvCount_X ,recvID_X, id); @@ -258,7 +259,7 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr UnpackID(Dm->recvList_YZ, Dm->recvCount_YZ ,recvID_YZ, id); //...................................................................................... - //double GlobalNumber = Dm->Comm.sumReduce( LocalNumber ); + MPI_Allreduce(&LocalNumber,&GlobalNumber,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); count = 0.f; for (int k=1; k } } } - countGlobal = Dm->Comm.sumReduce( count ); + MPI_Allreduce(&count,&countGlobal,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); void_fraction_new = countGlobal/totalGlobal; void_fraction_diff_new = abs(void_fraction_new-VoidFraction); /* if (rank==0){ @@ -359,11 +360,11 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptrComm.barrier(); + MPI_Barrier(Dm->Comm); // total Global is the number of nodes in the pore-space - totalGlobal = Dm->Comm.sumReduce( count ); - maxdistGlobal = Dm->Comm.sumReduce( maxdist ); + MPI_Allreduce(&count,&totalGlobal,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&maxdist,&maxdistGlobal,1,MPI_DOUBLE,MPI_MAX,Dm->Comm); double volume=double(nprocx*nprocy*nprocz)*double(nx-2)*double(ny-2)*double(nz-2); double volume_fraction=totalGlobal/volume; if (rank==0) printf("Volume fraction for morphological opening: %f \n",volume_fraction); @@ -433,6 +434,7 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptrComm.barrier(); + MPI_Barrier(Dm->Comm); FILE *DRAIN = fopen("morphdrain.csv","w"); @@ -507,41 +509,41 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptrsendList_YZ, Dm->sendCount_YZ ,sendID_YZ, id); //...................................................................................... MPI_Sendrecv(sendID_x,Dm->sendCount_x,MPI_CHAR,Dm->rank_x(),sendtag, - recvID_X,Dm->recvCount_X,MPI_CHAR,Dm->rank_X(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_X,Dm->recvCount_X,MPI_CHAR,Dm->rank_X(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_X,Dm->sendCount_X,MPI_CHAR,Dm->rank_X(),sendtag, - recvID_x,Dm->recvCount_x,MPI_CHAR,Dm->rank_x(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_x,Dm->recvCount_x,MPI_CHAR,Dm->rank_x(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_y,Dm->sendCount_y,MPI_CHAR,Dm->rank_y(),sendtag, - recvID_Y,Dm->recvCount_Y,MPI_CHAR,Dm->rank_Y(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_Y,Dm->recvCount_Y,MPI_CHAR,Dm->rank_Y(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_Y,Dm->sendCount_Y,MPI_CHAR,Dm->rank_Y(),sendtag, - recvID_y,Dm->recvCount_y,MPI_CHAR,Dm->rank_y(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_y,Dm->recvCount_y,MPI_CHAR,Dm->rank_y(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_z,Dm->sendCount_z,MPI_CHAR,Dm->rank_z(),sendtag, - recvID_Z,Dm->recvCount_Z,MPI_CHAR,Dm->rank_Z(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_Z,Dm->recvCount_Z,MPI_CHAR,Dm->rank_Z(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_Z,Dm->sendCount_Z,MPI_CHAR,Dm->rank_Z(),sendtag, - recvID_z,Dm->recvCount_z,MPI_CHAR,Dm->rank_z(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_z,Dm->recvCount_z,MPI_CHAR,Dm->rank_z(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_xy,Dm->sendCount_xy,MPI_CHAR,Dm->rank_xy(),sendtag, - recvID_XY,Dm->recvCount_XY,MPI_CHAR,Dm->rank_XY(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_XY,Dm->recvCount_XY,MPI_CHAR,Dm->rank_XY(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_XY,Dm->sendCount_XY,MPI_CHAR,Dm->rank_XY(),sendtag, - recvID_xy,Dm->recvCount_xy,MPI_CHAR,Dm->rank_xy(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_xy,Dm->recvCount_xy,MPI_CHAR,Dm->rank_xy(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_Xy,Dm->sendCount_Xy,MPI_CHAR,Dm->rank_Xy(),sendtag, - recvID_xY,Dm->recvCount_xY,MPI_CHAR,Dm->rank_xY(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_xY,Dm->recvCount_xY,MPI_CHAR,Dm->rank_xY(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_xY,Dm->sendCount_xY,MPI_CHAR,Dm->rank_xY(),sendtag, - recvID_Xy,Dm->recvCount_Xy,MPI_CHAR,Dm->rank_Xy(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_Xy,Dm->recvCount_Xy,MPI_CHAR,Dm->rank_Xy(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_xz,Dm->sendCount_xz,MPI_CHAR,Dm->rank_xz(),sendtag, - recvID_XZ,Dm->recvCount_XZ,MPI_CHAR,Dm->rank_XZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_XZ,Dm->recvCount_XZ,MPI_CHAR,Dm->rank_XZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_XZ,Dm->sendCount_XZ,MPI_CHAR,Dm->rank_XZ(),sendtag, - recvID_xz,Dm->recvCount_xz,MPI_CHAR,Dm->rank_xz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_xz,Dm->recvCount_xz,MPI_CHAR,Dm->rank_xz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_Xz,Dm->sendCount_Xz,MPI_CHAR,Dm->rank_Xz(),sendtag, - recvID_xZ,Dm->recvCount_xZ,MPI_CHAR,Dm->rank_xZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_xZ,Dm->recvCount_xZ,MPI_CHAR,Dm->rank_xZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_xZ,Dm->sendCount_xZ,MPI_CHAR,Dm->rank_xZ(),sendtag, - recvID_Xz,Dm->recvCount_Xz,MPI_CHAR,Dm->rank_Xz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_Xz,Dm->recvCount_Xz,MPI_CHAR,Dm->rank_Xz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_yz,Dm->sendCount_yz,MPI_CHAR,Dm->rank_yz(),sendtag, - recvID_YZ,Dm->recvCount_YZ,MPI_CHAR,Dm->rank_YZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_YZ,Dm->recvCount_YZ,MPI_CHAR,Dm->rank_YZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_YZ,Dm->sendCount_YZ,MPI_CHAR,Dm->rank_YZ(),sendtag, - recvID_yz,Dm->recvCount_yz,MPI_CHAR,Dm->rank_yz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_yz,Dm->recvCount_yz,MPI_CHAR,Dm->rank_yz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_Yz,Dm->sendCount_Yz,MPI_CHAR,Dm->rank_Yz(),sendtag, - recvID_yZ,Dm->recvCount_yZ,MPI_CHAR,Dm->rank_yZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_yZ,Dm->recvCount_yZ,MPI_CHAR,Dm->rank_yZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_yZ,Dm->sendCount_yZ,MPI_CHAR,Dm->rank_yZ(),sendtag, - recvID_Yz,Dm->recvCount_Yz,MPI_CHAR,Dm->rank_Yz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_Yz,Dm->recvCount_Yz,MPI_CHAR,Dm->rank_Yz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); //...................................................................................... UnpackID(Dm->recvList_x, Dm->recvCount_x ,recvID_x, id); UnpackID(Dm->recvList_X, Dm->recvCount_X ,recvID_X, id); @@ -562,7 +564,7 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptrrecvList_yZ, Dm->recvCount_yZ ,recvID_yZ, id); UnpackID(Dm->recvList_YZ, Dm->recvCount_YZ ,recvID_YZ, id); //...................................................................................... - // double GlobalNumber = Dm->Comm.sumReduce( LocalNumber ); + MPI_Allreduce(&LocalNumber,&GlobalNumber,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); for (int k=0; krank_info,phase,SignDist,vF,vS,phase_label,Dm->Comm); - Dm->Comm.barrier(); + MPI_Barrier(Dm->Comm); for (int k=0; kComm.sumReduce( count ); + MPI_Allreduce(&count,&countGlobal,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); void_fraction_new = countGlobal/totalGlobal; void_fraction_diff_new = abs(void_fraction_new-VoidFraction); if (rank==0){ @@ -700,7 +702,7 @@ double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array &id, } } } - double count_original = Dm->Comm.sumReduce( count); + double count_original=sumReduce( Dm->Comm, count); // Estimate morph_delta double morph_delta = 0.0; @@ -730,8 +732,8 @@ double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array &id, } } } - count = Dm->Comm.sumReduce( count ); - MAX_DISPLACEMENT = Dm->Comm.maxReduce( MAX_DISPLACEMENT ); + count=sumReduce( Dm->Comm, count); + MAX_DISPLACEMENT = maxReduce( Dm->Comm, MAX_DISPLACEMENT); GrowthEstimate = count - count_original; ERROR = fabs((GrowthEstimate-TargetGrowth) /TargetGrowth); @@ -774,7 +776,7 @@ double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array &id, } } } - count = Dm->Comm.sumReduce( count ); + count=sumReduce( Dm->Comm, count); return count; } diff --git a/analysis/runAnalysis.cpp b/analysis/runAnalysis.cpp index 89451c7b..6c76f58b 100644 --- a/analysis/runAnalysis.cpp +++ b/analysis/runAnalysis.cpp @@ -3,7 +3,7 @@ #include "analysis/analysis.h" #include "common/Array.h" #include "common/Communication.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "common/ScaLBL.h" #include "models/ColorModel.h" @@ -462,7 +462,7 @@ private: /****************************************************************** * MPI comm wrapper for use with analysis * ******************************************************************/ -runAnalysis::commWrapper::commWrapper( int tag_, const Utilities::MPI& comm_, runAnalysis* analysis_ ): +runAnalysis::commWrapper::commWrapper( int tag_, MPI_Comm comm_, runAnalysis* analysis_ ): comm(comm_), tag(tag_), analysis(analysis_) @@ -479,7 +479,7 @@ runAnalysis::commWrapper::~commWrapper() { if ( tag == -1 ) return; - comm.barrier(); + MPI_Barrier( comm ); analysis->d_comm_used[tag] = false; } runAnalysis::commWrapper runAnalysis::getComm( ) @@ -496,10 +496,10 @@ runAnalysis::commWrapper runAnalysis::getComm( ) if ( tag == -1 ) ERROR("Unable to get comm"); } - tag = d_comm.bcast( tag, 0 ); + MPI_Bcast( &tag, 1, MPI_INT, 0, d_comm ); d_comm_used[tag] = true; - if ( d_comms[tag].isNull() ) - d_comms[tag] = d_comm.dup(); + if ( d_comms[tag] == MPI_COMM_NULL ) + MPI_Comm_dup( MPI_COMM_WORLD, &d_comms[tag] ); return commWrapper(tag,d_comms[tag],this); } @@ -507,20 +507,14 @@ runAnalysis::commWrapper runAnalysis::getComm( ) /****************************************************************** * Constructor/Destructors * ******************************************************************/ -runAnalysis::runAnalysis( std::shared_ptr input_db, - const RankInfoStruct& rank_info, - std::shared_ptr ScaLBL_Comm, - std::shared_ptr Dm, - int Np, - bool Regular, - IntArray Map ): - d_Np( Np ), - d_regular ( Regular), - d_rank_info( rank_info ), - d_Map( Map ), - d_fillData(Dm->Comm,Dm->rank_info,{Dm->Nx-2,Dm->Ny-2,Dm->Nz-2},{1,1,1},0,1), - d_comm( Utilities::MPI( MPI_COMM_WORLD ).dup() ), - d_ScaLBL_Comm( ScaLBL_Comm) +runAnalysis::runAnalysis(std::shared_ptr input_db, const RankInfoStruct& rank_info, std::shared_ptr ScaLBL_Comm, std::shared_ptr Dm, + int Np, bool Regular, IntArray Map ): + d_Np( Np ), + d_regular ( Regular), + d_rank_info( rank_info ), + d_Map( Map ), + d_fillData(Dm->Comm,Dm->rank_info,{Dm->Nx-2,Dm->Ny-2,Dm->Nz-2},{1,1,1},0,1), + d_ScaLBL_Comm( ScaLBL_Comm) { auto db = input_db->getDatabase( "Analysis" ); @@ -558,7 +552,7 @@ runAnalysis::runAnalysis( std::shared_ptr input_db, d_restartFile = restart_file + "." + rankString; - d_rank = d_comm.getRank(); + d_rank = MPI_WORLD_RANK(); writeIDMap(ID_map_struct(),0,id_map_filename); // Initialize IO for silo IO::initialize("","silo","false"); @@ -627,8 +621,11 @@ runAnalysis::runAnalysis( std::shared_ptr input_db, // Initialize the comms - for (int i=0; i<1024; i++) + MPI_Comm_dup(MPI_COMM_WORLD,&d_comm); + for (int i=0; i<1024; i++) { + d_comms[i] = MPI_COMM_NULL; d_comm_used[i] = false; + } // Initialize the threads int N_threads = db->getWithDefault( "N_threads", 4 ); auto method = db->getWithDefault( "load_balance", "default" ); @@ -638,6 +635,12 @@ runAnalysis::~runAnalysis( ) { // Finish processing analysis finish(); + // Clear internal data + MPI_Comm_free( &d_comm ); + for (int i=0; i<1024; i++) { + if ( d_comms[i] != MPI_COMM_NULL ) + MPI_Comm_free(&d_comms[i]); + } } void runAnalysis::finish( ) { @@ -651,7 +654,7 @@ void runAnalysis::finish( ) d_wait_subphase.reset(); d_wait_restart.reset(); // Syncronize - d_comm.barrier(); + MPI_Barrier( d_comm ); PROFILE_STOP("finish"); } diff --git a/analysis/runAnalysis.h b/analysis/runAnalysis.h index 3c5bc7f0..0bf2f676 100644 --- a/analysis/runAnalysis.h +++ b/analysis/runAnalysis.h @@ -68,10 +68,10 @@ public: class commWrapper { public: - Utilities::MPI comm; + MPI_Comm comm; int tag; runAnalysis *analysis; - commWrapper( int tag, const Utilities::MPI& comm, runAnalysis *analysis ); + commWrapper( int tag, MPI_Comm comm, runAnalysis *analysis ); commWrapper( ) = delete; commWrapper( const commWrapper &rhs ) = delete; commWrapper& operator=( const commWrapper &rhs ) = delete; @@ -100,8 +100,8 @@ private: std::vector d_meshData; fillHalo d_fillData; std::string d_restartFile; - Utilities::MPI d_comm; - Utilities::MPI d_comms[1024]; + MPI_Comm d_comm; + MPI_Comm d_comms[1024]; volatile bool d_comm_used[1024]; std::shared_ptr d_ScaLBL_Comm; diff --git a/analysis/uCT.cpp b/analysis/uCT.cpp index 28d677c1..912f8e85 100644 --- a/analysis/uCT.cpp +++ b/analysis/uCT.cpp @@ -228,7 +228,8 @@ void filter_final( Array& ID, Array& Dist, Array& Mean, Array& Dist1, Array& Dist2 ) { PROFILE_SCOPED(timer,"filter_final"); - int rank = Dm.Comm.getRank(); + int rank; + MPI_Comm_rank(Dm.Comm,&rank); int Nx = Dm.Nx-2; int Ny = Dm.Ny-2; int Nz = Dm.Nz-2; @@ -241,7 +242,7 @@ void filter_final( Array& ID, Array& Dist, float tmp = 0; for (size_t i=0; i(Dist0.length()) ); + tmp = sqrt( sumReduce(Dm.Comm,tmp) / sumReduce(Dm.Comm,(float)Dist0.length()) ); const float dx1 = 0.3*tmp; const float dx2 = 1.05*dx1; if (rank==0) @@ -284,7 +285,7 @@ void filter_final( Array& ID, Array& Dist, Phase.fill(1); ComputeGlobalBlobIDs( Nx, Ny, Nz, Dm.rank_info, Phase, SignDist, 0, 0, GlobalBlobID, Dm.Comm ); fillInt.fill(GlobalBlobID); - int N_blobs = Dm.Comm.maxReduce(GlobalBlobID.max()+1); + int N_blobs = maxReduce(Dm.Comm,GlobalBlobID.max()+1); std::vector mean(N_blobs,0); std::vector count(N_blobs,0); for (int k=1; k<=Nz; k++) { @@ -320,8 +321,8 @@ void filter_final( Array& ID, Array& Dist, } } } - mean = Dm.Comm.sumReduce(mean); - count = Dm.Comm.sumReduce(count); + mean = sumReduce(Dm.Comm,mean); + count = sumReduce(Dm.Comm,count); for (size_t i=0; i -o ") -set(CMAKE_HIP_CREATE_SHARED_MODULE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_PATH} -o -shared" ) -set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_PATH} -o ") - -############################################################################### -# FIND: HIP and associated helper binaries -############################################################################### -# HIP is supported on Linux only -if(UNIX AND NOT APPLE AND NOT CYGWIN) - # Search for HIP installation - if(NOT HIP_ROOT_DIR) - # Search in user specified path first - find_path( - HIP_ROOT_DIR - NAMES hipconfig - PATHS - ENV ROCM_PATH - ENV HIP_PATH - PATH_SUFFIXES bin - DOC "HIP installed location" - NO_DEFAULT_PATH - ) - # Now search in default path - find_path( - HIP_ROOT_DIR - NAMES hipconfig - PATHS - /opt/rocm - /opt/rocm/hip - PATH_SUFFIXES bin - DOC "HIP installed location" - ) - - # Check if we found HIP installation - if(HIP_ROOT_DIR) - # If so, fix the path - string(REGEX REPLACE "[/\\\\]?bin[64]*[/\\\\]?$" "" HIP_ROOT_DIR ${HIP_ROOT_DIR}) - # And push it back to the cache - set(HIP_ROOT_DIR ${HIP_ROOT_DIR} CACHE PATH "HIP installed location" FORCE) - endif() - if(NOT EXISTS ${HIP_ROOT_DIR}) - if(HIP_FIND_REQUIRED) - message(FATAL_ERROR "Specify HIP_ROOT_DIR") - elseif(NOT HIP_FIND_QUIETLY) - message("HIP_ROOT_DIR not found or specified") - endif() - endif() - endif() - - # Find HIPCC executable - find_program( - HIP_HIPCC_EXECUTABLE - NAMES hipcc - PATHS - "${HIP_ROOT_DIR}" - ENV ROCM_PATH - ENV HIP_PATH - /opt/rocm - /opt/rocm/hip - PATH_SUFFIXES bin - NO_DEFAULT_PATH - ) - if(NOT HIP_HIPCC_EXECUTABLE) - # Now search in default paths - find_program(HIP_HIPCC_EXECUTABLE hipcc) - endif() - mark_as_advanced(HIP_HIPCC_EXECUTABLE) - - # Find HIPCONFIG executable - find_program( - HIP_HIPCONFIG_EXECUTABLE - NAMES hipconfig - PATHS - "${HIP_ROOT_DIR}" - ENV ROCM_PATH - ENV HIP_PATH - /opt/rocm - /opt/rocm/hip - PATH_SUFFIXES bin - NO_DEFAULT_PATH - ) - if(NOT HIP_HIPCONFIG_EXECUTABLE) - # Now search in default paths - find_program(HIP_HIPCONFIG_EXECUTABLE hipconfig) - endif() - mark_as_advanced(HIP_HIPCONFIG_EXECUTABLE) - - # Find HIPCC_CMAKE_LINKER_HELPER executable - find_program( - HIP_HIPCC_CMAKE_LINKER_HELPER - NAMES hipcc_cmake_linker_helper - PATHS - "${HIP_ROOT_DIR}" - ENV ROCM_PATH - ENV HIP_PATH - /opt/rocm - /opt/rocm/hip - PATH_SUFFIXES bin - NO_DEFAULT_PATH - ) - if(NOT HIP_HIPCC_CMAKE_LINKER_HELPER) - # Now search in default paths - find_program(HIP_HIPCC_CMAKE_LINKER_HELPER hipcc_cmake_linker_helper) - endif() - mark_as_advanced(HIP_HIPCC_CMAKE_LINKER_HELPER) - - if(HIP_HIPCONFIG_EXECUTABLE AND NOT HIP_VERSION) - # Compute the version - execute_process( - COMMAND ${HIP_HIPCONFIG_EXECUTABLE} --version - OUTPUT_VARIABLE _hip_version - ERROR_VARIABLE _hip_error - OUTPUT_STRIP_TRAILING_WHITESPACE - ERROR_STRIP_TRAILING_WHITESPACE - ) - if(NOT _hip_error) - set(HIP_VERSION ${_hip_version} CACHE STRING "Version of HIP as computed from hipcc") - else() - set(HIP_VERSION "0.0.0" CACHE STRING "Version of HIP as computed by FindHIP()") - endif() - mark_as_advanced(HIP_VERSION) - endif() - if(HIP_VERSION) - string(REPLACE "." ";" _hip_version_list "${HIP_VERSION}") - list(GET _hip_version_list 0 HIP_VERSION_MAJOR) - list(GET _hip_version_list 1 HIP_VERSION_MINOR) - list(GET _hip_version_list 2 HIP_VERSION_PATCH) - set(HIP_VERSION_STRING "${HIP_VERSION}") - endif() - - if(HIP_HIPCONFIG_EXECUTABLE AND NOT HIP_PLATFORM) - # Compute the platform - execute_process( - COMMAND ${HIP_HIPCONFIG_EXECUTABLE} --platform - OUTPUT_VARIABLE _hip_platform - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - set(HIP_PLATFORM ${_hip_platform} CACHE STRING "HIP platform as computed by hipconfig") - mark_as_advanced(HIP_PLATFORM) - endif() -endif() - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args( - HIP - REQUIRED_VARS - HIP_ROOT_DIR - HIP_HIPCC_EXECUTABLE - HIP_HIPCONFIG_EXECUTABLE - HIP_PLATFORM - VERSION_VAR HIP_VERSION - ) - -############################################################################### -# MACRO: Locate helper files -############################################################################### -macro(HIP_FIND_HELPER_FILE _name _extension) - set(_hip_full_name "${_name}.${_extension}") - get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) - set(HIP_${_name} "${CMAKE_CURRENT_LIST_DIR}/FindHIP/${_hip_full_name}") - if(NOT EXISTS "${HIP_${_name}}") - set(error_message "${_hip_full_name} not found in ${CMAKE_CURRENT_LIST_DIR}/FindHIP") - if(HIP_FIND_REQUIRED) - message(FATAL_ERROR "${error_message}") - else() - if(NOT HIP_FIND_QUIETLY) - message(STATUS "${error_message}") - endif() - endif() - endif() - # Set this variable as internal, so the user isn't bugged with it. - set(HIP_${_name} ${HIP_${_name}} CACHE INTERNAL "Location of ${_full_name}" FORCE) -endmacro() - -############################################################################### -hip_find_helper_file(run_make2cmake cmake) -hip_find_helper_file(run_hipcc cmake) -############################################################################### - -############################################################################### -# MACRO: Reset compiler flags -############################################################################### -macro(HIP_RESET_FLAGS) - unset(HIP_HIPCC_FLAGS) - unset(HIP_HCC_FLAGS) - unset(HIP_NVCC_FLAGS) - foreach(config ${_hip_configuration_types}) - string(TOUPPER ${config} config_upper) - unset(HIP_HIPCC_FLAGS_${config_upper}) - unset(HIP_HCC_FLAGS_${config_upper}) - unset(HIP_NVCC_FLAGS_${config_upper}) - endforeach() -endmacro() - -############################################################################### -# MACRO: Separate the options from the sources -############################################################################### -macro(HIP_GET_SOURCES_AND_OPTIONS _sources _cmake_options _hipcc_options _hcc_options _nvcc_options) - set(${_sources}) - set(${_cmake_options}) - set(${_hipcc_options}) - set(${_hcc_options}) - set(${_nvcc_options}) - set(_hipcc_found_options FALSE) - set(_hcc_found_options FALSE) - set(_nvcc_found_options FALSE) - foreach(arg ${ARGN}) - if("x${arg}" STREQUAL "xHIPCC_OPTIONS") - set(_hipcc_found_options TRUE) - set(_hcc_found_options FALSE) - set(_nvcc_found_options FALSE) - elseif("x${arg}" STREQUAL "xHCC_OPTIONS") - set(_hipcc_found_options FALSE) - set(_hcc_found_options TRUE) - set(_nvcc_found_options FALSE) - elseif("x${arg}" STREQUAL "xNVCC_OPTIONS") - set(_hipcc_found_options FALSE) - set(_hcc_found_options FALSE) - set(_nvcc_found_options TRUE) - elseif( - "x${arg}" STREQUAL "xEXCLUDE_FROM_ALL" OR - "x${arg}" STREQUAL "xSTATIC" OR - "x${arg}" STREQUAL "xSHARED" OR - "x${arg}" STREQUAL "xMODULE" - ) - list(APPEND ${_cmake_options} ${arg}) - else() - if(_hipcc_found_options) - list(APPEND ${_hipcc_options} ${arg}) - elseif(_hcc_found_options) - list(APPEND ${_hcc_options} ${arg}) - elseif(_nvcc_found_options) - list(APPEND ${_nvcc_options} ${arg}) - else() - # Assume this is a file - list(APPEND ${_sources} ${arg}) - endif() - endif() - endforeach() -endmacro() - -############################################################################### -# MACRO: Add include directories to pass to the hipcc command -############################################################################### -set(HIP_HIPCC_INCLUDE_ARGS_USER "") -macro(HIP_INCLUDE_DIRECTORIES) - foreach(dir ${ARGN}) - list(APPEND HIP_HIPCC_INCLUDE_ARGS_USER $<$:-I${dir}>) - endforeach() -endmacro() - -############################################################################### -# FUNCTION: Helper to avoid clashes of files with the same basename but different paths -############################################################################### -function(HIP_COMPUTE_BUILD_PATH path build_path) - # Convert to cmake style paths - file(TO_CMAKE_PATH "${path}" bpath) - if(IS_ABSOLUTE "${bpath}") - string(FIND "${bpath}" "${CMAKE_CURRENT_BINARY_DIR}" _binary_dir_pos) - if(_binary_dir_pos EQUAL 0) - file(RELATIVE_PATH bpath "${CMAKE_CURRENT_BINARY_DIR}" "${bpath}") - else() - file(RELATIVE_PATH bpath "${CMAKE_CURRENT_SOURCE_DIR}" "${bpath}") - endif() - endif() - - # Remove leading / - string(REGEX REPLACE "^[/]+" "" bpath "${bpath}") - # Avoid absolute paths by removing ':' - string(REPLACE ":" "_" bpath "${bpath}") - # Avoid relative paths that go up the tree - string(REPLACE "../" "__/" bpath "${bpath}") - # Avoid spaces - string(REPLACE " " "_" bpath "${bpath}") - # Strip off the filename - get_filename_component(bpath "${bpath}" PATH) - - set(${build_path} "${bpath}" PARENT_SCOPE) -endfunction() - -############################################################################### -# MACRO: Parse OPTIONS from ARGN & set variables prefixed by _option_prefix -############################################################################### -macro(HIP_PARSE_HIPCC_OPTIONS _option_prefix) - set(_hip_found_config) - foreach(arg ${ARGN}) - # Determine if we are dealing with a per-configuration flag - foreach(config ${_hip_configuration_types}) - string(TOUPPER ${config} config_upper) - if(arg STREQUAL "${config_upper}") - set(_hip_found_config _${arg}) - # Clear arg to prevent it from being processed anymore - set(arg) - endif() - endforeach() - if(arg) - list(APPEND ${_option_prefix}${_hip_found_config} "${arg}") - endif() - endforeach() -endmacro() - -############################################################################### -# MACRO: Try and include dependency file if it exists -############################################################################### -macro(HIP_INCLUDE_HIPCC_DEPENDENCIES dependency_file) - set(HIP_HIPCC_DEPEND) - set(HIP_HIPCC_DEPEND_REGENERATE FALSE) - - # Create the dependency file if it doesn't exist - if(NOT EXISTS ${dependency_file}) - file(WRITE ${dependency_file} "# Generated by: FindHIP.cmake. Do not edit.\n") - endif() - # Include the dependency file - include(${dependency_file}) - - # Verify the existence of all the included files - if(HIP_HIPCC_DEPEND) - foreach(f ${HIP_HIPCC_DEPEND}) - if(NOT EXISTS ${f}) - # If they aren't there, regenerate the file again - set(HIP_HIPCC_DEPEND_REGENERATE TRUE) - endif() - endforeach() - else() - # No dependencies, so regenerate the file - set(HIP_HIPCC_DEPEND_REGENERATE TRUE) - endif() - - # Regenerate the dependency file if needed - if(HIP_HIPCC_DEPEND_REGENERATE) - set(HIP_HIPCC_DEPEND ${dependency_file}) - file(WRITE ${dependency_file} "# Generated by: FindHIP.cmake. Do not edit.\n") - endif() -endmacro() - -############################################################################### -# MACRO: Prepare cmake commands for the target -############################################################################### -macro(HIP_PREPARE_TARGET_COMMANDS _target _format _generated_files _source_files) - set(_hip_flags "") - string(TOUPPER "${CMAKE_BUILD_TYPE}" _hip_build_configuration) - if(HIP_HOST_COMPILATION_CPP) - set(HIP_C_OR_CXX CXX) - else() - set(HIP_C_OR_CXX C) - endif() - set(generated_extension ${CMAKE_${HIP_C_OR_CXX}_OUTPUT_EXTENSION}) - - # Initialize list of includes with those specified by the user. Append with - # ones specified to cmake directly. - set(HIP_HIPCC_INCLUDE_ARGS ${HIP_HIPCC_INCLUDE_ARGS_USER}) - - # Add the include directories - set(include_directories_generator "$") - list(APPEND HIP_HIPCC_INCLUDE_ARGS "$<$:-I$>") - - get_directory_property(_hip_include_directories INCLUDE_DIRECTORIES) - list(REMOVE_DUPLICATES _hip_include_directories) - if(_hip_include_directories) - foreach(dir ${_hip_include_directories}) - list(APPEND HIP_HIPCC_INCLUDE_ARGS $<$:-I${dir}>) - endforeach() - endif() - - HIP_GET_SOURCES_AND_OPTIONS(_hip_sources _hip_cmake_options _hipcc_options _hcc_options _nvcc_options ${ARGN}) - HIP_PARSE_HIPCC_OPTIONS(HIP_HIPCC_FLAGS ${_hipcc_options}) - HIP_PARSE_HIPCC_OPTIONS(HIP_HCC_FLAGS ${_hcc_options}) - HIP_PARSE_HIPCC_OPTIONS(HIP_NVCC_FLAGS ${_nvcc_options}) - - # Add the compile definitions - set(compile_definition_generator "$") - list(APPEND HIP_HIPCC_FLAGS "$<$:-D$>") - - # Check if we are building shared library. - set(_hip_build_shared_libs FALSE) - list(FIND _hip_cmake_options SHARED _hip_found_SHARED) - list(FIND _hip_cmake_options MODULE _hip_found_MODULE) - if(_hip_found_SHARED GREATER -1 OR _hip_found_MODULE GREATER -1) - set(_hip_build_shared_libs TRUE) - endif() - list(FIND _hip_cmake_options STATIC _hip_found_STATIC) - if(_hip_found_STATIC GREATER -1) - set(_hip_build_shared_libs FALSE) - endif() - - # If we are building a shared library, add extra flags to HIP_HIPCC_FLAGS - if(_hip_build_shared_libs) - list(APPEND HIP_HCC_FLAGS "-fPIC") - list(APPEND HIP_NVCC_FLAGS "--shared -Xcompiler '-fPIC'") - endif() - - # Set host compiler - set(HIP_HOST_COMPILER "${CMAKE_${HIP_C_OR_CXX}_COMPILER}") - - # Set compiler flags - set(_HIP_HOST_FLAGS "set(CMAKE_HOST_FLAGS ${CMAKE_${HIP_C_OR_CXX}_FLAGS})") - set(_HIP_HIPCC_FLAGS "set(HIP_HIPCC_FLAGS ${HIP_HIPCC_FLAGS})") - set(_HIP_HCC_FLAGS "set(HIP_HCC_FLAGS ${HIP_HCC_FLAGS})") - set(_HIP_NVCC_FLAGS "set(HIP_NVCC_FLAGS ${HIP_NVCC_FLAGS})") - foreach(config ${_hip_configuration_types}) - string(TOUPPER ${config} config_upper) - set(_HIP_HOST_FLAGS "${_HIP_HOST_FLAGS}\nset(CMAKE_HOST_FLAGS_${config_upper} ${CMAKE_${HIP_C_OR_CXX}_FLAGS_${config_upper}})") - set(_HIP_HIPCC_FLAGS "${_HIP_HIPCC_FLAGS}\nset(HIP_HIPCC_FLAGS_${config_upper} ${HIP_HIPCC_FLAGS_${config_upper}})") - set(_HIP_HCC_FLAGS "${_HIP_HCC_FLAGS}\nset(HIP_HCC_FLAGS_${config_upper} ${HIP_HCC_FLAGS_${config_upper}})") - set(_HIP_NVCC_FLAGS "${_HIP_NVCC_FLAGS}\nset(HIP_NVCC_FLAGS_${config_upper} ${HIP_NVCC_FLAGS_${config_upper}})") - endforeach() - - # Reset the output variable - set(_hip_generated_files "") - set(_hip_source_files "") - - # Iterate over all arguments and create custom commands for all source files - foreach(file ${ARGN}) - # Ignore any file marked as a HEADER_FILE_ONLY - get_source_file_property(_is_header ${file} HEADER_FILE_ONLY) - # Allow per source file overrides of the format. Also allows compiling non .cu files. - get_source_file_property(_hip_source_format ${file} HIP_SOURCE_PROPERTY_FORMAT) - if((${file} MATCHES "\\.cu$" OR _hip_source_format) AND NOT _is_header) - set(host_flag FALSE) - else() - set(host_flag TRUE) - endif() - - if(NOT host_flag) - # Determine output directory - HIP_COMPUTE_BUILD_PATH("${file}" hip_build_path) - set(hip_compile_output_dir "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${_target}.dir/${hip_build_path}") - - get_filename_component(basename ${file} NAME) - set(generated_file_path "${hip_compile_output_dir}/${CMAKE_CFG_INTDIR}") - set(generated_file_basename "${_target}_generated_${basename}${generated_extension}") - - # Set file names - set(generated_file "${generated_file_path}/${generated_file_basename}") - set(cmake_dependency_file "${hip_compile_output_dir}/${generated_file_basename}.depend") - set(custom_target_script_pregen "${hip_compile_output_dir}/${generated_file_basename}.cmake.pre-gen") - set(custom_target_script "${hip_compile_output_dir}/${generated_file_basename}.cmake") - - # Set properties for object files - set_source_files_properties("${generated_file}" - PROPERTIES - EXTERNAL_OBJECT true # This is an object file not to be compiled, but only be linked - ) - - # Don't add CMAKE_CURRENT_SOURCE_DIR if the path is already an absolute path - get_filename_component(file_path "${file}" PATH) - if(IS_ABSOLUTE "${file_path}") - set(source_file "${file}") - else() - set(source_file "${CMAKE_CURRENT_SOURCE_DIR}/${file}") - endif() - - # Bring in the dependencies - HIP_INCLUDE_HIPCC_DEPENDENCIES(${cmake_dependency_file}) - - # Configure the build script - configure_file("${HIP_run_hipcc}" "${custom_target_script_pregen}" @ONLY) - file(GENERATE - OUTPUT "${custom_target_script}" - INPUT "${custom_target_script_pregen}" - ) - set(main_dep DEPENDS ${source_file}) - if(CMAKE_GENERATOR MATCHES "Makefiles") - set(verbose_output "$(VERBOSE)") - elseif(HIP_VERBOSE_BUILD) - set(verbose_output ON) - else() - set(verbose_output OFF) - endif() - - # Create up the comment string - file(RELATIVE_PATH generated_file_relative_path "${CMAKE_BINARY_DIR}" "${generated_file}") - set(hip_build_comment_string "Building HIPCC object ${generated_file_relative_path}") - - # Build the generated file and dependency file - add_custom_command( - OUTPUT ${generated_file} - # These output files depend on the source_file and the contents of cmake_dependency_file - ${main_dep} - DEPENDS ${HIP_HIPCC_DEPEND} - DEPENDS ${custom_target_script} - # Make sure the output directory exists before trying to write to it. - COMMAND ${CMAKE_COMMAND} -E make_directory "${generated_file_path}" - COMMAND ${CMAKE_COMMAND} ARGS - -D verbose:BOOL=${verbose_output} - -D build_configuration:STRING=${_hip_build_configuration} - -D "generated_file:STRING=${generated_file}" - -P "${custom_target_script}" - WORKING_DIRECTORY "${hip_compile_output_dir}" - COMMENT "${hip_build_comment_string}" - ) - - # Make sure the build system knows the file is generated - set_source_files_properties(${generated_file} PROPERTIES GENERATED TRUE) - list(APPEND _hip_generated_files ${generated_file}) - list(APPEND _hip_source_files ${file}) - endif() - endforeach() - - # Set the return parameter - set(${_generated_files} ${_hip_generated_files}) - set(${_source_files} ${_hip_source_files}) -endmacro() - -############################################################################### -# HIP_ADD_EXECUTABLE -############################################################################### -macro(HIP_ADD_EXECUTABLE hip_target) - # Separate the sources from the options - HIP_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _hipcc_options _hcc_options _nvcc_options ${ARGN}) - HIP_PREPARE_TARGET_COMMANDS(${hip_target} OBJ _generated_files _source_files ${_sources} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} NVCC_OPTIONS ${_nvcc_options}) - if(_source_files) - list(REMOVE_ITEM _sources ${_source_files}) - endif() - if("x${HCC_HOME}" STREQUAL "x") - set(HCC_HOME "/opt/rocm/hcc") - endif() - set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} -o ") - add_executable(${hip_target} ${_cmake_options} ${_generated_files} ${_sources}) - set_target_properties(${hip_target} PROPERTIES LINKER_LANGUAGE HIP) -endmacro() - -############################################################################### -# HIP_ADD_LIBRARY -############################################################################### -macro(HIP_ADD_LIBRARY hip_target) - # Separate the sources from the options - HIP_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _hipcc_options _hcc_options _nvcc_options ${ARGN}) - HIP_PREPARE_TARGET_COMMANDS(${hip_target} OBJ _generated_files _source_files ${_sources} ${_cmake_options} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} NVCC_OPTIONS ${_nvcc_options}) - if(_source_files) - list(REMOVE_ITEM _sources ${_source_files}) - endif() - add_library(${hip_target} ${_cmake_options} ${_generated_files} ${_sources}) - set_target_properties(${hip_target} PROPERTIES LINKER_LANGUAGE ${HIP_C_OR_CXX}) -endmacro() - -# vim: ts=4:sw=4:expandtab:smartindent diff --git a/common/Communication.h b/common/Communication.h index 7c2f8d08..7819a0bb 100644 --- a/common/Communication.h +++ b/common/Communication.h @@ -1,7 +1,7 @@ #ifndef COMMUNICATION_H_INC #define COMMUNICATION_H_INC -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "common/Utilities.h" #include "common/Array.h" @@ -38,7 +38,7 @@ struct RankInfoStruct { //! Redistribute domain data (dst may be smaller than the src) template Array redistribute( const RankInfoStruct& src_rank, const Array& src_data, - const RankInfoStruct& dst_rank, std::array dst_size, const Utilities::MPI& comm ); + const RankInfoStruct& dst_rank, std::array dst_size, MPI_Comm comm ); /*! @@ -59,7 +59,7 @@ public: * @param[in] fill Fill {faces,edges,corners} * @param[in] periodic Periodic dimensions */ - fillHalo( const Utilities::MPI& comm, const RankInfoStruct& info, + fillHalo( MPI_Comm comm, const RankInfoStruct& info, std::array n, std::array ng, int tag, int depth, std::array fill = {true,true,true}, std::array periodic = {true,true,true} ); @@ -83,7 +83,7 @@ public: private: - Utilities::MPI comm; + MPI_Comm comm; RankInfoStruct info; std::array n, ng; int depth; @@ -93,6 +93,8 @@ private: TYPE *mem; TYPE *send[3][3][3], *recv[3][3][3]; MPI_Request send_req[3][3][3], recv_req[3][3][3]; + size_t N_type; + MPI_Datatype datatype; fillHalo(); // Private empty constructor fillHalo(const fillHalo&); // Private copy constructor fillHalo& operator=(const fillHalo&); // Private assignment operator @@ -134,7 +136,7 @@ void InitializeRanks( const int rank, const int nprocx, const int nprocy, const //*************************************************************************************** -inline void CommunicateSendRecvCounts( const Utilities::MPI& Communicator, int sendtag, int recvtag, +inline void CommunicateSendRecvCounts( MPI_Comm Communicator, int sendtag, int recvtag, int rank_x, int rank_y, int rank_z, int rank_X, int rank_Y, int rank_Z, int rank_xy, int rank_XY, int rank_xY, int rank_Xy, @@ -153,53 +155,53 @@ inline void CommunicateSendRecvCounts( const Utilities::MPI& Communicator, int s { MPI_Request req1[18], req2[18]; MPI_Status stat1[18],stat2[18]; - MPI_Isend(&sendCount_x, 1,MPI_INT,rank_x,sendtag+0,Communicator.getCommunicator(),&req1[0]); - MPI_Irecv(&recvCount_X, 1,MPI_INT,rank_X,recvtag+0,Communicator.getCommunicator(),&req2[0]); - MPI_Isend(&sendCount_X, 1,MPI_INT,rank_X,sendtag+1,Communicator.getCommunicator(),&req1[1]); - MPI_Irecv(&recvCount_x, 1,MPI_INT,rank_x,recvtag+1,Communicator.getCommunicator(),&req2[1]); - MPI_Isend(&sendCount_y, 1,MPI_INT,rank_y,sendtag+2,Communicator.getCommunicator(),&req1[2]); - MPI_Irecv(&recvCount_Y, 1,MPI_INT,rank_Y,recvtag+2,Communicator.getCommunicator(),&req2[2]); - MPI_Isend(&sendCount_Y, 1,MPI_INT,rank_Y,sendtag+3,Communicator.getCommunicator(),&req1[3]); - MPI_Irecv(&recvCount_y, 1,MPI_INT,rank_y,recvtag+3,Communicator.getCommunicator(),&req2[3]); - MPI_Isend(&sendCount_z, 1,MPI_INT,rank_z,sendtag+4,Communicator.getCommunicator(),&req1[4]); - MPI_Irecv(&recvCount_Z, 1,MPI_INT,rank_Z,recvtag+4,Communicator.getCommunicator(),&req2[4]); - MPI_Isend(&sendCount_Z, 1,MPI_INT,rank_Z,sendtag+5,Communicator.getCommunicator(),&req1[5]); - MPI_Irecv(&recvCount_z, 1,MPI_INT,rank_z,recvtag+5,Communicator.getCommunicator(),&req2[5]); + MPI_Isend(&sendCount_x, 1,MPI_INT,rank_x,sendtag+0,Communicator,&req1[0]); + MPI_Irecv(&recvCount_X, 1,MPI_INT,rank_X,recvtag+0,Communicator,&req2[0]); + MPI_Isend(&sendCount_X, 1,MPI_INT,rank_X,sendtag+1,Communicator,&req1[1]); + MPI_Irecv(&recvCount_x, 1,MPI_INT,rank_x,recvtag+1,Communicator,&req2[1]); + MPI_Isend(&sendCount_y, 1,MPI_INT,rank_y,sendtag+2,Communicator,&req1[2]); + MPI_Irecv(&recvCount_Y, 1,MPI_INT,rank_Y,recvtag+2,Communicator,&req2[2]); + MPI_Isend(&sendCount_Y, 1,MPI_INT,rank_Y,sendtag+3,Communicator,&req1[3]); + MPI_Irecv(&recvCount_y, 1,MPI_INT,rank_y,recvtag+3,Communicator,&req2[3]); + MPI_Isend(&sendCount_z, 1,MPI_INT,rank_z,sendtag+4,Communicator,&req1[4]); + MPI_Irecv(&recvCount_Z, 1,MPI_INT,rank_Z,recvtag+4,Communicator,&req2[4]); + MPI_Isend(&sendCount_Z, 1,MPI_INT,rank_Z,sendtag+5,Communicator,&req1[5]); + MPI_Irecv(&recvCount_z, 1,MPI_INT,rank_z,recvtag+5,Communicator,&req2[5]); - MPI_Isend(&sendCount_xy, 1,MPI_INT,rank_xy,sendtag+6,Communicator.getCommunicator(),&req1[6]); - MPI_Irecv(&recvCount_XY, 1,MPI_INT,rank_XY,recvtag+6,Communicator.getCommunicator(),&req2[6]); - MPI_Isend(&sendCount_XY, 1,MPI_INT,rank_XY,sendtag+7,Communicator.getCommunicator(),&req1[7]); - MPI_Irecv(&recvCount_xy, 1,MPI_INT,rank_xy,recvtag+7,Communicator.getCommunicator(),&req2[7]); - MPI_Isend(&sendCount_Xy, 1,MPI_INT,rank_Xy,sendtag+8,Communicator.getCommunicator(),&req1[8]); - MPI_Irecv(&recvCount_xY, 1,MPI_INT,rank_xY,recvtag+8,Communicator.getCommunicator(),&req2[8]); - MPI_Isend(&sendCount_xY, 1,MPI_INT,rank_xY,sendtag+9,Communicator.getCommunicator(),&req1[9]); - MPI_Irecv(&recvCount_Xy, 1,MPI_INT,rank_Xy,recvtag+9,Communicator.getCommunicator(),&req2[9]); + MPI_Isend(&sendCount_xy, 1,MPI_INT,rank_xy,sendtag+6,Communicator,&req1[6]); + MPI_Irecv(&recvCount_XY, 1,MPI_INT,rank_XY,recvtag+6,Communicator,&req2[6]); + MPI_Isend(&sendCount_XY, 1,MPI_INT,rank_XY,sendtag+7,Communicator,&req1[7]); + MPI_Irecv(&recvCount_xy, 1,MPI_INT,rank_xy,recvtag+7,Communicator,&req2[7]); + MPI_Isend(&sendCount_Xy, 1,MPI_INT,rank_Xy,sendtag+8,Communicator,&req1[8]); + MPI_Irecv(&recvCount_xY, 1,MPI_INT,rank_xY,recvtag+8,Communicator,&req2[8]); + MPI_Isend(&sendCount_xY, 1,MPI_INT,rank_xY,sendtag+9,Communicator,&req1[9]); + MPI_Irecv(&recvCount_Xy, 1,MPI_INT,rank_Xy,recvtag+9,Communicator,&req2[9]); - MPI_Isend(&sendCount_xz, 1,MPI_INT,rank_xz,sendtag+10,Communicator.getCommunicator(),&req1[10]); - MPI_Irecv(&recvCount_XZ, 1,MPI_INT,rank_XZ,recvtag+10,Communicator.getCommunicator(),&req2[10]); - MPI_Isend(&sendCount_XZ, 1,MPI_INT,rank_XZ,sendtag+11,Communicator.getCommunicator(),&req1[11]); - MPI_Irecv(&recvCount_xz, 1,MPI_INT,rank_xz,recvtag+11,Communicator.getCommunicator(),&req2[11]); - MPI_Isend(&sendCount_Xz, 1,MPI_INT,rank_Xz,sendtag+12,Communicator.getCommunicator(),&req1[12]); - MPI_Irecv(&recvCount_xZ, 1,MPI_INT,rank_xZ,recvtag+12,Communicator.getCommunicator(),&req2[12]); - MPI_Isend(&sendCount_xZ, 1,MPI_INT,rank_xZ,sendtag+13,Communicator.getCommunicator(),&req1[13]); - MPI_Irecv(&recvCount_Xz, 1,MPI_INT,rank_Xz,recvtag+13,Communicator.getCommunicator(),&req2[13]); + MPI_Isend(&sendCount_xz, 1,MPI_INT,rank_xz,sendtag+10,Communicator,&req1[10]); + MPI_Irecv(&recvCount_XZ, 1,MPI_INT,rank_XZ,recvtag+10,Communicator,&req2[10]); + MPI_Isend(&sendCount_XZ, 1,MPI_INT,rank_XZ,sendtag+11,Communicator,&req1[11]); + MPI_Irecv(&recvCount_xz, 1,MPI_INT,rank_xz,recvtag+11,Communicator,&req2[11]); + MPI_Isend(&sendCount_Xz, 1,MPI_INT,rank_Xz,sendtag+12,Communicator,&req1[12]); + MPI_Irecv(&recvCount_xZ, 1,MPI_INT,rank_xZ,recvtag+12,Communicator,&req2[12]); + MPI_Isend(&sendCount_xZ, 1,MPI_INT,rank_xZ,sendtag+13,Communicator,&req1[13]); + MPI_Irecv(&recvCount_Xz, 1,MPI_INT,rank_Xz,recvtag+13,Communicator,&req2[13]); - MPI_Isend(&sendCount_yz, 1,MPI_INT,rank_yz,sendtag+14,Communicator.getCommunicator(),&req1[14]); - MPI_Irecv(&recvCount_YZ, 1,MPI_INT,rank_YZ,recvtag+14,Communicator.getCommunicator(),&req2[14]); - MPI_Isend(&sendCount_YZ, 1,MPI_INT,rank_YZ,sendtag+15,Communicator.getCommunicator(),&req1[15]); - MPI_Irecv(&recvCount_yz, 1,MPI_INT,rank_yz,recvtag+15,Communicator.getCommunicator(),&req2[15]); - MPI_Isend(&sendCount_Yz, 1,MPI_INT,rank_Yz,sendtag+16,Communicator.getCommunicator(),&req1[16]); - MPI_Irecv(&recvCount_yZ, 1,MPI_INT,rank_yZ,recvtag+16,Communicator.getCommunicator(),&req2[16]); - MPI_Isend(&sendCount_yZ, 1,MPI_INT,rank_yZ,sendtag+17,Communicator.getCommunicator(),&req1[17]); - MPI_Irecv(&recvCount_Yz, 1,MPI_INT,rank_Yz,recvtag+17,Communicator.getCommunicator(),&req2[17]); + MPI_Isend(&sendCount_yz, 1,MPI_INT,rank_yz,sendtag+14,Communicator,&req1[14]); + MPI_Irecv(&recvCount_YZ, 1,MPI_INT,rank_YZ,recvtag+14,Communicator,&req2[14]); + MPI_Isend(&sendCount_YZ, 1,MPI_INT,rank_YZ,sendtag+15,Communicator,&req1[15]); + MPI_Irecv(&recvCount_yz, 1,MPI_INT,rank_yz,recvtag+15,Communicator,&req2[15]); + MPI_Isend(&sendCount_Yz, 1,MPI_INT,rank_Yz,sendtag+16,Communicator,&req1[16]); + MPI_Irecv(&recvCount_yZ, 1,MPI_INT,rank_yZ,recvtag+16,Communicator,&req2[16]); + MPI_Isend(&sendCount_yZ, 1,MPI_INT,rank_yZ,sendtag+17,Communicator,&req1[17]); + MPI_Irecv(&recvCount_Yz, 1,MPI_INT,rank_Yz,recvtag+17,Communicator,&req2[17]); MPI_Waitall(18,req1,stat1); MPI_Waitall(18,req2,stat2); - Communicator.barrier(); + MPI_Barrier(Communicator); } //*************************************************************************************** -inline void CommunicateRecvLists( const Utilities::MPI& Communicator, int sendtag, int recvtag, +inline void CommunicateRecvLists( MPI_Comm Communicator, int sendtag, int recvtag, int *sendList_x, int *sendList_y, int *sendList_z, int *sendList_X, int *sendList_Y, int *sendList_Z, int *sendList_xy, int *sendList_XY, int *sendList_xY, int *sendList_Xy, int *sendList_xz, int *sendList_XZ, int *sendList_xZ, int *sendList_Xz, @@ -221,52 +223,52 @@ inline void CommunicateRecvLists( const Utilities::MPI& Communicator, int sendta { MPI_Request req1[18], req2[18]; MPI_Status stat1[18],stat2[18]; - MPI_Isend(sendList_x, sendCount_x,MPI_INT,rank_x,sendtag,Communicator.getCommunicator(),&req1[0]); - MPI_Irecv(recvList_X, recvCount_X,MPI_INT,rank_X,recvtag,Communicator.getCommunicator(),&req2[0]); - MPI_Isend(sendList_X, sendCount_X,MPI_INT,rank_X,sendtag,Communicator.getCommunicator(),&req1[1]); - MPI_Irecv(recvList_x, recvCount_x,MPI_INT,rank_x,recvtag,Communicator.getCommunicator(),&req2[1]); - MPI_Isend(sendList_y, sendCount_y,MPI_INT,rank_y,sendtag,Communicator.getCommunicator(),&req1[2]); - MPI_Irecv(recvList_Y, recvCount_Y,MPI_INT,rank_Y,recvtag,Communicator.getCommunicator(),&req2[2]); - MPI_Isend(sendList_Y, sendCount_Y,MPI_INT,rank_Y,sendtag,Communicator.getCommunicator(),&req1[3]); - MPI_Irecv(recvList_y, recvCount_y,MPI_INT,rank_y,recvtag,Communicator.getCommunicator(),&req2[3]); - MPI_Isend(sendList_z, sendCount_z,MPI_INT,rank_z,sendtag,Communicator.getCommunicator(),&req1[4]); - MPI_Irecv(recvList_Z, recvCount_Z,MPI_INT,rank_Z,recvtag,Communicator.getCommunicator(),&req2[4]); - MPI_Isend(sendList_Z, sendCount_Z,MPI_INT,rank_Z,sendtag,Communicator.getCommunicator(),&req1[5]); - MPI_Irecv(recvList_z, recvCount_z,MPI_INT,rank_z,recvtag,Communicator.getCommunicator(),&req2[5]); + MPI_Isend(sendList_x, sendCount_x,MPI_INT,rank_x,sendtag,Communicator,&req1[0]); + MPI_Irecv(recvList_X, recvCount_X,MPI_INT,rank_X,recvtag,Communicator,&req2[0]); + MPI_Isend(sendList_X, sendCount_X,MPI_INT,rank_X,sendtag,Communicator,&req1[1]); + MPI_Irecv(recvList_x, recvCount_x,MPI_INT,rank_x,recvtag,Communicator,&req2[1]); + MPI_Isend(sendList_y, sendCount_y,MPI_INT,rank_y,sendtag,Communicator,&req1[2]); + MPI_Irecv(recvList_Y, recvCount_Y,MPI_INT,rank_Y,recvtag,Communicator,&req2[2]); + MPI_Isend(sendList_Y, sendCount_Y,MPI_INT,rank_Y,sendtag,Communicator,&req1[3]); + MPI_Irecv(recvList_y, recvCount_y,MPI_INT,rank_y,recvtag,Communicator,&req2[3]); + MPI_Isend(sendList_z, sendCount_z,MPI_INT,rank_z,sendtag,Communicator,&req1[4]); + MPI_Irecv(recvList_Z, recvCount_Z,MPI_INT,rank_Z,recvtag,Communicator,&req2[4]); + MPI_Isend(sendList_Z, sendCount_Z,MPI_INT,rank_Z,sendtag,Communicator,&req1[5]); + MPI_Irecv(recvList_z, recvCount_z,MPI_INT,rank_z,recvtag,Communicator,&req2[5]); - MPI_Isend(sendList_xy, sendCount_xy,MPI_INT,rank_xy,sendtag,Communicator.getCommunicator(),&req1[6]); - MPI_Irecv(recvList_XY, recvCount_XY,MPI_INT,rank_XY,recvtag,Communicator.getCommunicator(),&req2[6]); - MPI_Isend(sendList_XY, sendCount_XY,MPI_INT,rank_XY,sendtag,Communicator.getCommunicator(),&req1[7]); - MPI_Irecv(recvList_xy, recvCount_xy,MPI_INT,rank_xy,recvtag,Communicator.getCommunicator(),&req2[7]); - MPI_Isend(sendList_Xy, sendCount_Xy,MPI_INT,rank_Xy,sendtag,Communicator.getCommunicator(),&req1[8]); - MPI_Irecv(recvList_xY, recvCount_xY,MPI_INT,rank_xY,recvtag,Communicator.getCommunicator(),&req2[8]); - MPI_Isend(sendList_xY, sendCount_xY,MPI_INT,rank_xY,sendtag,Communicator.getCommunicator(),&req1[9]); - MPI_Irecv(recvList_Xy, recvCount_Xy,MPI_INT,rank_Xy,recvtag,Communicator.getCommunicator(),&req2[9]); + MPI_Isend(sendList_xy, sendCount_xy,MPI_INT,rank_xy,sendtag,Communicator,&req1[6]); + MPI_Irecv(recvList_XY, recvCount_XY,MPI_INT,rank_XY,recvtag,Communicator,&req2[6]); + MPI_Isend(sendList_XY, sendCount_XY,MPI_INT,rank_XY,sendtag,Communicator,&req1[7]); + MPI_Irecv(recvList_xy, recvCount_xy,MPI_INT,rank_xy,recvtag,Communicator,&req2[7]); + MPI_Isend(sendList_Xy, sendCount_Xy,MPI_INT,rank_Xy,sendtag,Communicator,&req1[8]); + MPI_Irecv(recvList_xY, recvCount_xY,MPI_INT,rank_xY,recvtag,Communicator,&req2[8]); + MPI_Isend(sendList_xY, sendCount_xY,MPI_INT,rank_xY,sendtag,Communicator,&req1[9]); + MPI_Irecv(recvList_Xy, recvCount_Xy,MPI_INT,rank_Xy,recvtag,Communicator,&req2[9]); - MPI_Isend(sendList_xz, sendCount_xz,MPI_INT,rank_xz,sendtag,Communicator.getCommunicator(),&req1[10]); - MPI_Irecv(recvList_XZ, recvCount_XZ,MPI_INT,rank_XZ,recvtag,Communicator.getCommunicator(),&req2[10]); - MPI_Isend(sendList_XZ, sendCount_XZ,MPI_INT,rank_XZ,sendtag,Communicator.getCommunicator(),&req1[11]); - MPI_Irecv(recvList_xz, recvCount_xz,MPI_INT,rank_xz,recvtag,Communicator.getCommunicator(),&req2[11]); - MPI_Isend(sendList_Xz, sendCount_Xz,MPI_INT,rank_Xz,sendtag,Communicator.getCommunicator(),&req1[12]); - MPI_Irecv(recvList_xZ, recvCount_xZ,MPI_INT,rank_xZ,recvtag,Communicator.getCommunicator(),&req2[12]); - MPI_Isend(sendList_xZ, sendCount_xZ,MPI_INT,rank_xZ,sendtag,Communicator.getCommunicator(),&req1[13]); - MPI_Irecv(recvList_Xz, recvCount_Xz,MPI_INT,rank_Xz,recvtag,Communicator.getCommunicator(),&req2[13]); + MPI_Isend(sendList_xz, sendCount_xz,MPI_INT,rank_xz,sendtag,Communicator,&req1[10]); + MPI_Irecv(recvList_XZ, recvCount_XZ,MPI_INT,rank_XZ,recvtag,Communicator,&req2[10]); + MPI_Isend(sendList_XZ, sendCount_XZ,MPI_INT,rank_XZ,sendtag,Communicator,&req1[11]); + MPI_Irecv(recvList_xz, recvCount_xz,MPI_INT,rank_xz,recvtag,Communicator,&req2[11]); + MPI_Isend(sendList_Xz, sendCount_Xz,MPI_INT,rank_Xz,sendtag,Communicator,&req1[12]); + MPI_Irecv(recvList_xZ, recvCount_xZ,MPI_INT,rank_xZ,recvtag,Communicator,&req2[12]); + MPI_Isend(sendList_xZ, sendCount_xZ,MPI_INT,rank_xZ,sendtag,Communicator,&req1[13]); + MPI_Irecv(recvList_Xz, recvCount_Xz,MPI_INT,rank_Xz,recvtag,Communicator,&req2[13]); - MPI_Isend(sendList_yz, sendCount_yz,MPI_INT,rank_yz,sendtag,Communicator.getCommunicator(),&req1[14]); - MPI_Irecv(recvList_YZ, recvCount_YZ,MPI_INT,rank_YZ,recvtag,Communicator.getCommunicator(),&req2[14]); - MPI_Isend(sendList_YZ, sendCount_YZ,MPI_INT,rank_YZ,sendtag,Communicator.getCommunicator(),&req1[15]); - MPI_Irecv(recvList_yz, recvCount_yz,MPI_INT,rank_yz,recvtag,Communicator.getCommunicator(),&req2[15]); - MPI_Isend(sendList_Yz, sendCount_Yz,MPI_INT,rank_Yz,sendtag,Communicator.getCommunicator(),&req1[16]); - MPI_Irecv(recvList_yZ, recvCount_yZ,MPI_INT,rank_yZ,recvtag,Communicator.getCommunicator(),&req2[16]); - MPI_Isend(sendList_yZ, sendCount_yZ,MPI_INT,rank_yZ,sendtag,Communicator.getCommunicator(),&req1[17]); - MPI_Irecv(recvList_Yz, recvCount_Yz,MPI_INT,rank_Yz,recvtag,Communicator.getCommunicator(),&req2[17]); + MPI_Isend(sendList_yz, sendCount_yz,MPI_INT,rank_yz,sendtag,Communicator,&req1[14]); + MPI_Irecv(recvList_YZ, recvCount_YZ,MPI_INT,rank_YZ,recvtag,Communicator,&req2[14]); + MPI_Isend(sendList_YZ, sendCount_YZ,MPI_INT,rank_YZ,sendtag,Communicator,&req1[15]); + MPI_Irecv(recvList_yz, recvCount_yz,MPI_INT,rank_yz,recvtag,Communicator,&req2[15]); + MPI_Isend(sendList_Yz, sendCount_Yz,MPI_INT,rank_Yz,sendtag,Communicator,&req1[16]); + MPI_Irecv(recvList_yZ, recvCount_yZ,MPI_INT,rank_yZ,recvtag,Communicator,&req2[16]); + MPI_Isend(sendList_yZ, sendCount_yZ,MPI_INT,rank_yZ,sendtag,Communicator,&req1[17]); + MPI_Irecv(recvList_Yz, recvCount_Yz,MPI_INT,rank_Yz,recvtag,Communicator,&req2[17]); MPI_Waitall(18,req1,stat1); MPI_Waitall(18,req2,stat2); } //*************************************************************************************** -inline void CommunicateMeshHalo(DoubleArray &Mesh, const Utilities::MPI& Communicator, +inline void CommunicateMeshHalo(DoubleArray &Mesh, MPI_Comm Communicator, double *sendbuf_x,double *sendbuf_y,double *sendbuf_z,double *sendbuf_X,double *sendbuf_Y,double *sendbuf_Z, double *sendbuf_xy,double *sendbuf_XY,double *sendbuf_xY,double *sendbuf_Xy, double *sendbuf_xz,double *sendbuf_XZ,double *sendbuf_xZ,double *sendbuf_Xz, @@ -317,41 +319,41 @@ inline void CommunicateMeshHalo(DoubleArray &Mesh, const Utilities::MPI& Communi PackMeshData(sendList_YZ, sendCount_YZ ,sendbuf_YZ, MeshData); //...................................................................................... MPI_Sendrecv(sendbuf_x,sendCount_x,MPI_DOUBLE,rank_x,sendtag, - recvbuf_X,recvCount_X,MPI_DOUBLE,rank_X,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); + recvbuf_X,recvCount_X,MPI_DOUBLE,rank_X,recvtag,Communicator,MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_X,sendCount_X,MPI_DOUBLE,rank_X,sendtag, - recvbuf_x,recvCount_x,MPI_DOUBLE,rank_x,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); + recvbuf_x,recvCount_x,MPI_DOUBLE,rank_x,recvtag,Communicator,MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_y,sendCount_y,MPI_DOUBLE,rank_y,sendtag, - recvbuf_Y,recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); + recvbuf_Y,recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,Communicator,MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_Y,sendCount_Y,MPI_DOUBLE,rank_Y,sendtag, - recvbuf_y,recvCount_y,MPI_DOUBLE,rank_y,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); + recvbuf_y,recvCount_y,MPI_DOUBLE,rank_y,recvtag,Communicator,MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_z,sendCount_z,MPI_DOUBLE,rank_z,sendtag, - recvbuf_Z,recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); + recvbuf_Z,recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,Communicator,MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_Z,sendCount_Z,MPI_DOUBLE,rank_Z,sendtag, - recvbuf_z,recvCount_z,MPI_DOUBLE,rank_z,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); + recvbuf_z,recvCount_z,MPI_DOUBLE,rank_z,recvtag,Communicator,MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_xy,sendCount_xy,MPI_DOUBLE,rank_xy,sendtag, - recvbuf_XY,recvCount_XY,MPI_DOUBLE,rank_XY,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); + recvbuf_XY,recvCount_XY,MPI_DOUBLE,rank_XY,recvtag,Communicator,MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_XY,sendCount_XY,MPI_DOUBLE,rank_XY,sendtag, - recvbuf_xy,recvCount_xy,MPI_DOUBLE,rank_xy,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); + recvbuf_xy,recvCount_xy,MPI_DOUBLE,rank_xy,recvtag,Communicator,MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_Xy,sendCount_Xy,MPI_DOUBLE,rank_Xy,sendtag, - recvbuf_xY,recvCount_xY,MPI_DOUBLE,rank_xY,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); + recvbuf_xY,recvCount_xY,MPI_DOUBLE,rank_xY,recvtag,Communicator,MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_xY,sendCount_xY,MPI_DOUBLE,rank_xY,sendtag, - recvbuf_Xy,recvCount_Xy,MPI_DOUBLE,rank_Xy,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); + recvbuf_Xy,recvCount_Xy,MPI_DOUBLE,rank_Xy,recvtag,Communicator,MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_xz,sendCount_xz,MPI_DOUBLE,rank_xz,sendtag, - recvbuf_XZ,recvCount_XZ,MPI_DOUBLE,rank_XZ,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); + recvbuf_XZ,recvCount_XZ,MPI_DOUBLE,rank_XZ,recvtag,Communicator,MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_XZ,sendCount_XZ,MPI_DOUBLE,rank_XZ,sendtag, - recvbuf_xz,recvCount_xz,MPI_DOUBLE,rank_xz,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); + recvbuf_xz,recvCount_xz,MPI_DOUBLE,rank_xz,recvtag,Communicator,MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_Xz,sendCount_Xz,MPI_DOUBLE,rank_Xz,sendtag, - recvbuf_xZ,recvCount_xZ,MPI_DOUBLE,rank_xZ,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); + recvbuf_xZ,recvCount_xZ,MPI_DOUBLE,rank_xZ,recvtag,Communicator,MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_xZ,sendCount_xZ,MPI_DOUBLE,rank_xZ,sendtag, - recvbuf_Xz,recvCount_Xz,MPI_DOUBLE,rank_Xz,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); + recvbuf_Xz,recvCount_Xz,MPI_DOUBLE,rank_Xz,recvtag,Communicator,MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_yz,sendCount_yz,MPI_DOUBLE,rank_yz,sendtag, - recvbuf_YZ,recvCount_YZ,MPI_DOUBLE,rank_YZ,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); + recvbuf_YZ,recvCount_YZ,MPI_DOUBLE,rank_YZ,recvtag,Communicator,MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_YZ,sendCount_YZ,MPI_DOUBLE,rank_YZ,sendtag, - recvbuf_yz,recvCount_yz,MPI_DOUBLE,rank_yz,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); + recvbuf_yz,recvCount_yz,MPI_DOUBLE,rank_yz,recvtag,Communicator,MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_Yz,sendCount_Yz,MPI_DOUBLE,rank_Yz,sendtag, - recvbuf_yZ,recvCount_yZ,MPI_DOUBLE,rank_yZ,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); + recvbuf_yZ,recvCount_yZ,MPI_DOUBLE,rank_yZ,recvtag,Communicator,MPI_STATUS_IGNORE); MPI_Sendrecv(sendbuf_yZ,sendCount_yZ,MPI_DOUBLE,rank_yZ,sendtag, - recvbuf_Yz,recvCount_Yz,MPI_DOUBLE,rank_Yz,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE); + recvbuf_Yz,recvCount_Yz,MPI_DOUBLE,rank_Yz,recvtag,Communicator,MPI_STATUS_IGNORE); //........................................................................................ UnpackMeshData(recvList_x, recvCount_x ,recvbuf_x, MeshData); UnpackMeshData(recvList_X, recvCount_X ,recvbuf_X, MeshData); diff --git a/common/Communication.hpp b/common/Communication.hpp index ca310ea5..33fed3a7 100644 --- a/common/Communication.hpp +++ b/common/Communication.hpp @@ -2,8 +2,9 @@ #define COMMUNICATION_HPP_INC #include "common/Communication.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "common/Utilities.h" +//#include "ProfilerApp.h" /******************************************************** @@ -11,19 +12,17 @@ ********************************************************/ template Array redistribute( const RankInfoStruct& src_rank, const Array& src_data, - const RankInfoStruct& dst_rank, std::array dst_size, const Utilities::MPI& comm ) + const RankInfoStruct& dst_rank, std::array dst_size, MPI_Comm comm ) { - if ( comm.getSize() == 1 ) { - return src_data.subset( { 0, (size_t) dst_size[0]-1, 0, (size_t) dst_size[1]-1, 0, (size_t) dst_size[2]-1 } ); - } +#ifdef USE_MPI // Get the src size std::array src_size; int size0[3] = { (int) src_data.size(0), (int) src_data.size(1), (int) src_data.size(2) }; - comm.maxReduce( size0, src_size.data(), 3 ); + MPI_Allreduce( size0, src_size.data(), 3, MPI_INT, MPI_MAX, comm ); if ( !src_data.empty() ) ASSERT( src_size[0] == size0[0] && src_size[1] == size0[1] && src_size[2] == size0[2] ); // Check that dst_size matches on all ranks - comm.maxReduce( dst_size.data(), size0, 3 ); + MPI_Allreduce( dst_size.data(), size0, 3, MPI_INT, MPI_MAX, comm ); ASSERT( dst_size[0] == size0[0] && dst_size[1] == size0[1] && dst_size[2] == size0[2] ); // Function to get overlap range auto calcOverlap = []( int i1[3], int i2[3], int j1[3], int j2[3] ) { @@ -61,7 +60,7 @@ Array redistribute( const RankInfoStruct& src_rank, const Array& src } std::vector send_request( send_rank.size() ); for (size_t i=0; i dst_data( dst_size[0], dst_size[1], dst_size[2] ); int i1[3] = { dst_size[0] * dst_rank.ix, dst_size[1] * dst_rank.jy, dst_size[2] * dst_rank.kz }; @@ -76,14 +75,17 @@ Array redistribute( const RankInfoStruct& src_rank, const Array& src continue; int rank = src_rank.getRankForBlock(i,j,k); Array data( index[1] - index[0] + 1, index[3] - index[2] + 1, index[5] - index[4] + 1 ); - comm.recv( data.data(), data.length(), rank, 5462 ); + MPI_Recv( data.data(), sizeof(TYPE)*data.length(), MPI_BYTE, rank, 5462, comm, MPI_STATUS_IGNORE ); dst_data.copySubset( index, data ); } } } // Free data - comm.waitAll( send_request.size(), send_request.data() ); + MPI_Waitall( send_request.size(), send_request.data(), MPI_STATUSES_IGNORE ); return dst_data; +#else + return src_data.subset( { 0, dst_size[0]-1, 0, dst_size[1]-1, 0, dst_size[2]-1 ); +#endif } @@ -92,11 +94,27 @@ Array redistribute( const RankInfoStruct& src_rank, const Array& src * Structure to fill halo cells * ********************************************************/ template -fillHalo::fillHalo( const Utilities::MPI& comm_, const RankInfoStruct& info_, +fillHalo::fillHalo( MPI_Comm comm_, const RankInfoStruct& info_, std::array n_, std::array ng_, int tag0, int depth_, std::array fill, std::array periodic ): comm(comm_), info(info_), n(n_), ng(ng_), depth(depth_) { + if ( std::is_same() ) { + N_type = 1; + datatype = MPI_DOUBLE; + } else if ( std::is_same() ) { + N_type = 1; + datatype = MPI_FLOAT; + } else if ( sizeof(TYPE)%sizeof(double)==0 ) { + N_type = sizeof(TYPE) / sizeof(double); + datatype = MPI_DOUBLE; + } else if ( sizeof(TYPE)%sizeof(float)==0 ) { + N_type = sizeof(TYPE) / sizeof(float); + datatype = MPI_FLOAT; + } else { + N_type = sizeof(TYPE); + datatype = MPI_BYTE; + } // Set the fill pattern memset(fill_pattern,0,sizeof(fill_pattern)); if ( fill[0] ) { @@ -233,8 +251,8 @@ void fillHalo::fill( Array& data ) for (int k=0; k<3; k++) { if ( !fill_pattern[i][j][k] ) continue; - recv_req[i][j][k] = comm.Irecv( recv[i][j][k], depth2*N_send_recv[i][j][k], - info.rank[i][j][k], tag[2-i][2-j][2-k] ); + MPI_Irecv( recv[i][j][k], N_type*depth2*N_send_recv[i][j][k], datatype, + info.rank[i][j][k], tag[2-i][2-j][2-k], comm, &recv_req[i][j][k] ); } } } @@ -245,18 +263,19 @@ void fillHalo::fill( Array& data ) if ( !fill_pattern[i][j][k] ) continue; pack( data, i-1, j-1, k-1, send[i][j][k] ); - send_req[i][j][k] = comm.Isend( send[i][j][k], depth2*N_send_recv[i][j][k], - info.rank[i][j][k], tag[i][j][k] ); + MPI_Isend( send[i][j][k], N_type*depth2*N_send_recv[i][j][k], datatype, + info.rank[i][j][k], tag[i][j][k], comm, &send_req[i][j][k] ); } } } // Recv the dst data and unpack (we recive in reverse order to match the sends) + MPI_Status status; for (int i=2; i>=0; i--) { for (int j=2; j>=0; j--) { for (int k=2; k>=0; k--) { if ( !fill_pattern[i][j][k] ) continue; - comm.wait( recv_req[i][j][k] ); + MPI_Wait(&recv_req[i][j][k],&status); unpack( data, i-1, j-1, k-1, recv[i][j][k] ); } } @@ -267,7 +286,7 @@ void fillHalo::fill( Array& data ) for (int k=0; k<3; k++) { if ( !fill_pattern[i][j][k] ) continue; - comm.wait( send_req[i][j][k] ); + MPI_Wait(&send_req[i][j][k],&status); } } } diff --git a/common/Domain.cpp b/common/Domain.cpp index ff6c6b68..7da902c6 100644 --- a/common/Domain.cpp +++ b/common/Domain.cpp @@ -12,7 +12,7 @@ #include "common/Domain.h" #include "common/Array.h" #include "common/Utilities.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "common/Communication.h" // Inline function to read line without a return argument @@ -62,10 +62,11 @@ Domain::Domain( int nx, int ny, int nz, int rnk, int npx, int npy, int npz, NULL_USE( npy ); NULL_USE( npz ); // set up the neighbor ranks - int myrank = Comm.getRank(); + int myrank; + MPI_Comm_rank( Comm, &myrank ); rank_info = RankInfoStruct( myrank, rank_info.nx, rank_info.ny, rank_info.nz ); - Comm.barrier(); + MPI_Barrier(Comm); auto db = std::make_shared( ); db->putScalar( "BC", BC ); @@ -75,9 +76,10 @@ Domain::Domain( int nx, int ny, int nz, int rnk, int npx, int npy, int npz, db->putVector( "L", { lx, ly, lz } ); initialize( db ); } -Domain::Domain( std::shared_ptr db, const Utilities::MPI& Communicator): +Domain::Domain( std::shared_ptr db, MPI_Comm Communicator): database(db), Nx(0), Ny(0), Nz(0), Lx(0), Ly(0), Lz(0), Volume(0), BoundaryCondition(0), + Comm(MPI_COMM_NULL), inlet_layers_x(0), inlet_layers_y(0), inlet_layers_z(0), outlet_layers_x(0), outlet_layers_y(0), outlet_layers_z(0), inlet_layers_phase(1),outlet_layers_phase(2), @@ -107,13 +109,14 @@ Domain::Domain( std::shared_ptr db, const Utilities::MPI& Communicator recvData_xY(NULL), recvData_yZ(NULL), recvData_Xz(NULL), recvData_XY(NULL), recvData_YZ(NULL), recvData_XZ(NULL), id(NULL) { - Comm = Communicator.dup(); + MPI_Comm_dup(Communicator,&Comm); // set up the neighbor ranks - int myrank = Comm.getRank(); + int myrank; + MPI_Comm_rank( Comm, &myrank ); initialize( db ); rank_info = RankInfoStruct( myrank, rank_info.nx, rank_info.ny, rank_info.nz ); - Comm.barrier(); + MPI_Barrier(Comm); } Domain::~Domain() @@ -162,6 +165,10 @@ Domain::~Domain() delete [] recvData_yZ; delete [] recvData_Yz; delete [] recvData_YZ; // Free id delete [] id; + // Free the communicator + if ( Comm != MPI_COMM_WORLD && Comm != MPI_COMM_NULL ) { + MPI_Comm_free(&Comm); + } } void Domain::initialize( std::shared_ptr db ) @@ -212,7 +219,8 @@ void Domain::initialize( std::shared_ptr db ) Ny = ny+2; Nz = nz+2; // Initialize ranks - int myrank = Comm.getRank(); + int myrank; + MPI_Comm_rank( Comm, &myrank ); rank_info = RankInfoStruct(myrank,nproc[0],nproc[1],nproc[2]); // inlet layers only apply to lower part of domain if (rank_info.ix > 0) inlet_layers_x = 0; @@ -231,7 +239,8 @@ void Domain::initialize( std::shared_ptr db ) id = new signed char[N]; memset(id,0,N); BoundaryCondition = d_db->getScalar("BC"); - int nprocs = Comm.getSize(); + int nprocs; + MPI_Comm_size( Comm, &nprocs ); INSIST(nprocs == nproc[0]*nproc[1]*nproc[2],"Fatal error in processor count!"); } @@ -560,7 +569,7 @@ void Domain::Decomp( const std::string& Filename ) } else{ //printf("Sending data to process %i \n", rnk); - Comm.send(loc_id,N,rnk,15); + MPI_Send(loc_id,N,MPI_CHAR,rnk,15,Comm); } // Write the data for this rank data sprintf(LocalRankFilename,"ID.%05i",rnk+rank_offset); @@ -575,10 +584,10 @@ void Domain::Decomp( const std::string& Filename ) else{ // Recieve the subdomain from rank = 0 //printf("Ready to recieve data %i at process %i \n", N,rank); - Comm.recv(id,N,0,15); + MPI_Recv(id,N,MPI_CHAR,0,15,Comm,MPI_STATUS_IGNORE); } - Comm.barrier(); - + //Comm.barrier(); + MPI_Barrier(Comm); // Compute the porosity double sum; double sum_local=0.0; @@ -618,7 +627,8 @@ void Domain::Decomp( const std::string& Filename ) } } } - sum = Comm.sumReduce(sum_local); + MPI_Allreduce(&sum_local,&sum,1,MPI_DOUBLE,MPI_SUM,Comm); + //sum = Comm.sumReduce(sum_local); porosity = sum*iVol_global; if (rank()==0) printf("Media porosity = %f \n",porosity); //......................................................... @@ -661,7 +671,7 @@ void Domain::AggregateLabels( const std::string& filename ){ } } } - Comm.barrier(); + MPI_Barrier(Comm); // populate the FullID if (rank() == 0){ @@ -687,7 +697,7 @@ void Domain::AggregateLabels( const std::string& filename ){ ipx = (rnk - ipz*npx*npy - ipy*npx); //printf("ipx=%i ipy=%i ipz=%i\n", ipx, ipy, ipz); int tag = 15+rnk; - Comm.recv(LocalID,local_size,rnk,tag); + MPI_Recv(LocalID,local_size,MPI_CHAR,rnk,tag,Comm,MPI_STATUS_IGNORE); for (int k=1; k db, const Utilities::MPI& Communicator); + Domain( std::shared_ptr db, MPI_Comm Communicator); //! Obsolete constructor Domain( int nx, int ny, int nz, int rnk, int npx, int npy, int npz, @@ -116,7 +116,7 @@ public: // Public variables (need to create accessors instead) double porosity; RankInfoStruct rank_info; - Utilities::MPI Comm; // MPI Communicator for this domain + MPI_Comm Comm; // MPI Communicator for this domain int BoundaryCondition; diff --git a/common/MPI.I b/common/MPI.I deleted file mode 100644 index 8cbc9c09..00000000 --- a/common/MPI.I +++ /dev/null @@ -1,1143 +0,0 @@ -// This file contains the default instantiations for templated operations -// Note: Intel compilers need definitions before all default instantions to compile correctly -#ifndef included_MPI_I -#define included_MPI_I - -#include "common/Utilities.h" - -#include - - -#define MPI_CLASS MPI -#define MPI_CLASS_ERROR ERROR -#define MPI_CLASS_ASSERT ASSERT - -#undef NULL_USE -#define NULL_USE( variable ) \ - do { \ - if ( 0 ) { \ - auto static t = (char *) &variable; \ - t++; \ - } \ - } while ( 0 ) - - -namespace Utilities { - - -// Function to test if a type is a std::pair -template -struct is_pair : std::false_type { -}; -template -struct is_pair> : std::true_type { -}; - - -// Function to test if a type can be passed by MPI -template -constexpr typename std::enable_if::value,bool>::type - is_mpi_copyable() -{ - return true; -} -template -constexpr typename std::enable_if::value&&is_pair::value,bool>::type - is_mpi_copyable() -{ - return is_mpi_copyable() && is_mpi_copyable(); -} -template -constexpr typename std::enable_if::value&&!is_pair::value,bool>::type - is_mpi_copyable() -{ - return false; -} - - -/************************************************************************ - * sumReduce * - ************************************************************************/ -template -inline TYPE MPI_CLASS::sumReduce( const TYPE value ) const -{ - if ( comm_size > 1 ) { - TYPE tmp = value; - call_sumReduce( &tmp, 1 ); - return tmp; - } else { - return value; - } -} -template -inline void MPI_CLASS::sumReduce( TYPE *x, const int n ) const -{ - if ( comm_size > 1 ) - call_sumReduce( x, n ); -} -template -inline void MPI_CLASS::sumReduce( const TYPE *x, TYPE *y, const int n ) const -{ - if ( comm_size > 1 ) { - call_sumReduce( x, y, n ); - } else { - for ( int i = 0; i < n; i++ ) - y[i] = x[i]; - } -} -// Define specializations of call_sumReduce(TYPE*, const int) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::call_sumReduce( unsigned char *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( char *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( unsigned int *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( int *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( unsigned long int *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( long int *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( size_t *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( float *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( double *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce>( std::complex *, const int ) const; -#endif -// Default instantiations of call_sumReduce(TYPE*, const int) -template -void MPI_CLASS::call_sumReduce( TYPE *, const int ) const -{ - char message[200]; - sprintf( message, "Default instantion of sumReduce in parallel is not supported (%s)", - typeid( TYPE ).name() ); - MPI_CLASS_ERROR( message ); -} -// Define specializations of call_sumReduce(const TYPE*, TYPE*, const int) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::call_sumReduce( - const unsigned char *, unsigned char *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( const char *, char *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( - const unsigned int *, unsigned int *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( const int *, int *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( - const unsigned long int *, unsigned long int *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( const long int *, long int *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( const size_t *, size_t *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( const float *, float *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( const double *, double *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce>( - const std::complex *, std::complex *, const int ) const; -#endif -// Default instantiations of call_sumReduce(const TYPE*, TYPE*, const int) -template -void MPI_CLASS::call_sumReduce( const TYPE *x, TYPE *y, const int n ) const -{ - NULL_USE( x ); - NULL_USE( y ); - NULL_USE( n ); - char message[200]; - sprintf( message, "Default instantion of sumReduce in parallel is not supported (%s)", - typeid( TYPE ).name() ); - MPI_CLASS_ERROR( message ); -} - - -/************************************************************************ - * minReduce * - ************************************************************************/ -template -inline TYPE MPI_CLASS::minReduce( const TYPE value ) const -{ - if ( comm_size > 1 ) { - TYPE tmp = value; - call_minReduce( &tmp, 1, nullptr ); - return tmp; - } else { - return value; - } -} -template -inline void MPI_CLASS::minReduce( TYPE *x, const int n, int *rank_of_min ) const -{ - if ( comm_size > 1 ) { - call_minReduce( x, n, rank_of_min ); - } else { - if ( rank_of_min != nullptr ) { - for ( int i = 0; i < n; i++ ) - rank_of_min[i] = 0; - } - } -} -template -inline void MPI_CLASS::minReduce( const TYPE *x, TYPE *y, const int n, int *rank_of_min ) const -{ - if ( comm_size > 1 ) { - call_minReduce( x, y, n, rank_of_min ); - } else { - for ( int i = 0; i < n; i++ ) { - y[i] = x[i]; - if ( rank_of_min != nullptr ) - rank_of_min[i] = 0; - } - } -} -// Define specializations of call_minReduce(TYPE*, const int, int*) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::call_minReduce( unsigned char *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( char *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( unsigned int *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( int *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( unsigned long int *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( long int *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( - unsigned long long int *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( long long int *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( size_t *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( float *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( double *, const int, int * ) const; -#endif -// Default instantiations of call_minReduce(TYPE*, const int, int*) -template -void MPI_CLASS::call_minReduce( TYPE *, const int, int * ) const -{ - char message[200]; - sprintf( message, "Default instantion of minReduce in parallel is not supported (%s)", - typeid( TYPE ).name() ); - MPI_CLASS_ERROR( message ); -} -// Define specializations of call_minReduce(const TYPE*, TYPE*, const int, int*) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::call_minReduce( - const unsigned char *, unsigned char *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( const char *, char *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( - const unsigned int *, unsigned int *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( const int *, int *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( - const unsigned long int *, unsigned long int *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( const long int *, long int *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( - const unsigned long long int *, unsigned long long int *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( - const long long int *, long long int *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( const size_t *, size_t *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( const float *, float *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( const double *, double *, const int, int * ) const; -#endif -// Default instantiations of call_minReduce(const TYPE*, TYPE*, const int, int*) -template -void MPI_CLASS::call_minReduce( const TYPE *, TYPE *, const int, int * ) const -{ - char message[200]; - sprintf( message, "Default instantion of minReduce in parallel is not supported (%s)", - typeid( TYPE ).name() ); - MPI_CLASS_ERROR( message ); -} - - -/************************************************************************ - * maxReduce * - ************************************************************************/ -template -inline TYPE MPI_CLASS::maxReduce( const TYPE value ) const -{ - if ( comm_size > 1 ) { - TYPE tmp = value; - call_maxReduce( &tmp, 1, nullptr ); - return tmp; - } else { - return value; - } -} -template -inline void MPI_CLASS::maxReduce( TYPE *x, const int n, int *rank_of_max ) const -{ - if ( comm_size > 1 ) { - call_maxReduce( x, n, rank_of_max ); - } else { - if ( rank_of_max != nullptr ) { - for ( int i = 0; i < n; i++ ) - rank_of_max[i] = 0; - } - } -} -template -inline void MPI_CLASS::maxReduce( const TYPE *x, TYPE *y, const int n, int *rank_of_max ) const -{ - if ( comm_size > 1 ) { - call_maxReduce( x, y, n, rank_of_max ); - } else { - for ( int i = 0; i < n; i++ ) { - y[i] = x[i]; - if ( rank_of_max != nullptr ) - rank_of_max[i] = 0; - } - } -} -// Define specializations of call_maxReduce(TYPE*, const int, int*) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::call_maxReduce( unsigned char *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( char *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( unsigned int *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( int *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( unsigned long int *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( long int *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( - unsigned long long int *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( long long int *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( size_t *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( float *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( double *, const int, int * ) const; -#endif -// Default instantiations of call_maxReduce(TYPE*, const int, int*) -template -void MPI_CLASS::call_maxReduce( TYPE *, const int, int * ) const -{ - char message[200]; - sprintf( message, "Default instantion of maxReduce in parallel is not supported (%s)", - typeid( TYPE ).name() ); - MPI_CLASS_ERROR( message ); -} -// Define specializations of call_maxReduce(const TYPE*, TYPE*, const int, int*) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::call_maxReduce( - const unsigned char *, unsigned char *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( const char *, char *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( - const unsigned int *, unsigned int *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( const int *, int *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( - const unsigned long int *, unsigned long int *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( const long int *, long int *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( - const unsigned long long int *, unsigned long long int *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( - const long long int *, long long int *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( const size_t *, size_t *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( const float *, float *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( const double *, double *, const int, int * ) const; -#endif -// Default instantiations of call_maxReduce(const TYPE*, TYPE*, const int, int*) -template -void MPI_CLASS::call_maxReduce( const TYPE *, TYPE *, const int, int * ) const -{ - char message[200]; - sprintf( message, "Default instantion of maxReduce in parallel is not supported (%s)", - typeid( TYPE ).name() ); - MPI_CLASS_ERROR( message ); -} - - -/************************************************************************ - * bcast * - ************************************************************************/ -// Define specializations of bcast(TYPE*, const int, const int) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::call_bcast( unsigned char *, const int, const int ) const; -template<> -void MPI_CLASS::call_bcast( char *, const int, const int ) const; -template<> -void MPI_CLASS::call_bcast( unsigned int *, const int, const int ) const; -template<> -void MPI_CLASS::call_bcast( int *, const int, const int ) const; -template<> -void MPI_CLASS::call_bcast( float *, const int, const int ) const; -template<> -void MPI_CLASS::call_bcast( double *, const int, const int ) const; -#else -template<> -void MPI_CLASS::call_bcast( char *, const int, const int ) const; -#endif -// Default instantiations of bcast(TYPE*, const int, const int) -template -void MPI_CLASS::call_bcast( TYPE *x, const int n, const int root ) const -{ - static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); - call_bcast( (char *) x, (int) n * sizeof( TYPE ), root ); -} -// Specialization of bcast for std::string -template<> -inline std::string MPI_CLASS::bcast( const std::string &value, const int root ) const -{ - if ( comm_size == 1 ) - return value; - int length = static_cast( value.size() ); - call_bcast( &length, 1, root ); - if ( length == 0 ) - return std::string(); - char *str = new char[length + 1]; - if ( root == comm_rank ) { - for ( int i = 0; i < length; i++ ) - str[i] = value[i]; - } - call_bcast( str, length, root ); - str[length] = 0; - std::string result( str ); - delete[] str; - return result; -} -template<> -inline void MPI_CLASS::bcast( std::string *, const int, const int ) const -{ - MPI_CLASS_ERROR( "Cannot bcast an array of strings" ); -} -// Default implimentation of bcast -template -inline TYPE MPI_CLASS::bcast( const TYPE &value, const int root ) const -{ - if ( root >= comm_size ) - MPI_CLASS_ERROR( "root cannot be >= size in bcast" ); - if ( comm_size > 1 ) { - TYPE tmp = value; - call_bcast( &tmp, 1, root ); - return tmp; - } else { - return value; - } -} -template -inline void MPI_CLASS::bcast( TYPE *x, const int n, const int root ) const -{ - if ( root >= comm_size ) - MPI_CLASS_ERROR( "root cannot be >= size in bcast" ); - if ( comm_size > 1 ) - call_bcast( x, n, root ); -} - - -/************************************************************************ - * send * - ************************************************************************/ -// Define specializations of send(const TYPE*, const int, const int, int) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::send( const char *, const int, const int, int ) const; -template<> -void MPI_CLASS::send( const int *, int, const int, int ) const; -template<> -void MPI_CLASS::send( const float *, const int, const int, int ) const; -template<> -void MPI_CLASS::send( const double *, const int, const int, int ) const; -#else -template<> -void MPI_CLASS::send( const char *, const int, const int, int ) const; -#endif -// Default instantiations of send(const TYPE*, const int, const int, int) -template -inline void MPI_CLASS::send( - const TYPE *buf, const int length, const int recv_proc_number, int tag ) const -{ - static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); - send( (const char *) buf, length * sizeof( TYPE ), recv_proc_number, tag ); -} - - -/************************************************************************ - * Isend * - ************************************************************************/ -// Define specializations of Isend(const TYPE*, const int, const int, const int) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -MPI_Request MPI_CLASS::Isend( const char *, const int, const int, const int ) const; -template<> -MPI_Request MPI_CLASS::Isend( const int *, int, const int, const int ) const; -template<> -MPI_Request MPI_CLASS::Isend( const float *, const int, const int, const int ) const; -template<> -MPI_Request MPI_CLASS::Isend( const double *, const int, const int, const int ) const; -#else -template<> -MPI_Request MPI_CLASS::Isend( const char *, const int, const int, const int ) const; -#endif -// Default instantiations of Isend(const TYPE*, const int, const int, const int) -template -inline MPI_Request MPI_CLASS::Isend( - const TYPE *buf, const int length, const int recv_proc_number, const int tag ) const -{ - static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); - return Isend( (const char *) buf, length * sizeof( TYPE ), recv_proc_number, tag ); -} - - -/************************************************************************ - * recv * - ************************************************************************/ -// Define specializations of recv(TYPE*, int&, const int, const bool, int) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::recv( char *, int &, const int, const bool, int ) const; -template<> -void MPI_CLASS::recv( int *, int &, const int, const bool, int ) const; -template<> -void MPI_CLASS::recv( float *, int &, const int, const bool, int ) const; -template<> -void MPI_CLASS::recv( double *, int &, const int, const bool, int ) const; -#else -template<> -void MPI_CLASS::recv( char *, int &, const int, const bool, int ) const; -#endif -// Default instantiations of recv(TYPE*, int&, const int, const bool, int) -template -inline void MPI_CLASS::recv( - TYPE *buf, int &length, const int send_proc_number, const bool get_length, int tag ) const -{ - static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); - int size = length * sizeof( TYPE ); - recv( (char *) buf, size, send_proc_number, get_length, tag ); - if ( get_length ) { - MPI_CLASS_ASSERT( size % sizeof( TYPE ) == 0 ); - length = size / sizeof( TYPE ); - } -} - - -/************************************************************************ - * Irecv * - ************************************************************************/ -// Define specializations of recv(TYPE*, int&, const int, const bool, int) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -MPI_Request MPI_CLASS::Irecv( char *, const int, const int, const int ) const; -template<> -MPI_Request MPI_CLASS::Irecv( int *, const int, const int, const int ) const; -template<> -MPI_Request MPI_CLASS::Irecv( float *, const int, const int, const int ) const; -template<> -MPI_Request MPI_CLASS::Irecv( double *, const int, const int, const int ) const; -#else -template<> -MPI_Request MPI_CLASS::Irecv( char *, const int, const int, const int ) const; -#endif -// Default instantiations of recv(TYPE*, int&, const int, const bool, int) -template -inline MPI_Request MPI_CLASS::Irecv( - TYPE *buf, const int length, const int send_proc, const int tag ) const -{ - static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); - return Irecv( (char *) buf, length * sizeof( TYPE ), send_proc, tag ); -} - - -/************************************************************************ - * allGather * - ************************************************************************/ -template -std::vector MPI_CLASS::allGather( const TYPE &x ) const -{ - static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); - if ( getSize() <= 1 ) - return std::vector( 1, x ); - std::vector data( getSize() ); - allGather( x, data.data() ); - return data; -} -template -std::vector MPI_CLASS::allGather( const std::vector &x ) const -{ - static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); - if ( getSize() <= 1 ) - return x; - std::vector count = allGather( x.size() ); - std::vector disp( getSize(), 0 ); - size_t N = count[0]; - for ( size_t i = 1; i < count.size(); i++ ) { - disp[i] = disp[i - 1] + count[i - 1]; - N += count[i]; - } - std::vector data( N ); - allGather( x.data(), x.size(), data.data(), count.data(), disp.data(), true ); - return data; -} -// Specialization of MPI_CLASS::allGather for std::string -template<> -inline void MPI_CLASS::allGather( const std::string &x_in, std::string *x_out ) const -{ - // Get the bytes recvied per processor - std::vector recv_cnt( comm_size, 0 ); - allGather( (int) x_in.size() + 1, &recv_cnt[0] ); - std::vector recv_disp( comm_size, 0 ); - for ( int i = 1; i < comm_size; i++ ) - recv_disp[i] = recv_disp[i - 1] + recv_cnt[i - 1]; - // Call the vector form of allGather for the char arrays - char *recv_data = new char[recv_disp[comm_size - 1] + recv_cnt[comm_size - 1]]; - allGather( - x_in.c_str(), (int) x_in.size() + 1, recv_data, &recv_cnt[0], &recv_disp[0], true ); - for ( int i = 0; i < comm_size; i++ ) - x_out[i] = std::string( &recv_data[recv_disp[i]] ); - delete[] recv_data; -} -// Default instantiation of MPI_CLASS::allGather -template -inline void MPI_CLASS::allGather( const TYPE &x_in, TYPE *x_out ) const -{ - static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); - if ( comm_size > 1 ) { - // We can use the vector form of allGather with a char array to ge the data we want - call_allGather( x_in, x_out ); - } else { - // Single processor case - x_out[0] = x_in; - } -} -// Specialization of MPI_CLASS::allGather for std::string -template<> -inline int MPI_CLASS::allGather( - const std::string *, const int, std::string *, int *, int *, bool ) const -{ - MPI_CLASS_ERROR( "Cannot allGather an array of strings" ); - return 0; -} -// Define specializations of call_allGather(const TYPE, TYPE*) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::call_allGather( const unsigned char &, unsigned char * ) const; -template<> -void MPI_CLASS::call_allGather( const char &, char * ) const; -template<> -void MPI_CLASS::call_allGather( const unsigned int &, unsigned int * ) const; -template<> -void MPI_CLASS::call_allGather( const int &, int * ) const; -template<> -void MPI_CLASS::call_allGather( - const unsigned long int &, unsigned long int * ) const; -template<> -void MPI_CLASS::call_allGather( const long int &, long int * ) const; -template<> -void MPI_CLASS::call_allGather( const float &, float * ) const; -template<> -void MPI_CLASS::call_allGather( const double &, double * ) const; -#endif -// Default instantiation of MPI_CLASS::allGather -template -int MPI_CLASS::allGather( const TYPE *send_data, const int send_cnt, TYPE *recv_data, int *recv_cnt, - int *recv_disp, bool known_recv ) const -{ - // Check the inputs - if ( known_recv && ( recv_cnt == nullptr || recv_disp == nullptr ) ) - MPI_CLASS_ERROR( "Error calling allGather" ); - // Check if we are dealing with a single processor - if ( comm_size == 1 ) { - if ( send_data == nullptr && send_cnt > 0 ) { - MPI_CLASS_ERROR( "send_data is null" ); - } else if ( !known_recv ) { - // We do not know the recieved sizes - for ( int i = 0; i < send_cnt; i++ ) - recv_data[i] = send_data[i]; - if ( recv_cnt != nullptr ) - recv_cnt[0] = send_cnt; - if ( recv_disp != nullptr ) - recv_disp[0] = 0; - } else { - // We know the recieved sizes - for ( int i = 0; i < send_cnt; i++ ) - recv_data[i + recv_disp[0]] = send_data[i]; - } - return send_cnt; - } - // Get the sizes of the recieved data (if necessary) - int *recv_cnt2 = recv_cnt; - int *recv_disp2 = recv_disp; - if ( !known_recv ) { - if ( recv_cnt == nullptr ) - recv_cnt2 = new int[comm_size]; - if ( recv_disp == nullptr ) - recv_disp2 = new int[comm_size]; - call_allGather( send_cnt, recv_cnt2 ); - recv_disp2[0] = 0; - for ( int i = 1; i < comm_size; i++ ) - recv_disp2[i] = recv_disp2[i - 1] + recv_cnt2[i - 1]; - } - int N_recv = 0; - for ( int i = 0; i < comm_size; i++ ) - N_recv += recv_cnt2[i]; - // Send/recv the data - call_allGather( send_data, send_cnt, recv_data, recv_cnt2, recv_disp2 ); - // Delete any temporary memory - if ( recv_cnt == nullptr ) - delete[] recv_cnt2; - if ( recv_disp == nullptr ) - delete[] recv_disp2; - return N_recv; -} -// Default instantiations of call_allGather(const TYPE, TYPE*) -template -void MPI_CLASS::call_allGather( const TYPE &x_in, TYPE *x_out ) const -{ - static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); - allGather( (const char *) &x_in, (int) sizeof( TYPE ), (char *) x_out ); -} -// Define specializations of call_allGather(const TYPE*, int, TYPE*, int*, int*) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::call_allGather( - const unsigned char *, int, unsigned char *, int *, int * ) const; -template<> -void MPI_CLASS::call_allGather( const char *, int, char *, int *, int * ) const; -template<> -void MPI_CLASS::call_allGather( - const unsigned int *, int, unsigned int *, int *, int * ) const; -template<> -void MPI_CLASS::call_allGather( const int *, int, int *, int *, int * ) const; -template<> -void MPI_CLASS::call_allGather( - const unsigned long int *, int, unsigned long int *, int *, int * ) const; -template<> -void MPI_CLASS::call_allGather( const long int *, int, long int *, int *, int * ) const; -template<> -void MPI_CLASS::call_allGather( const float *, int, float *, int *, int * ) const; -template<> -void MPI_CLASS::call_allGather( const double *, int, double *, int *, int * ) const; -#else -template<> -void MPI_CLASS::call_allGather( const char *, int, char *, int *, int * ) const; -#endif -// Default instantiations of int call_allGather(const TYPE*, int, TYPE*, int*) -template -void MPI_CLASS::call_allGather( - const TYPE *x_in, int size_in, TYPE *x_out, int *size_out, int *disp_out ) const -{ - int *size2 = new int[comm_size]; - int *disp2 = new int[comm_size]; - for ( int i = 0; i < comm_size; i++ ) { - size2[i] = size_out[i] * sizeof( TYPE ); - disp2[i] = disp_out[i] * sizeof( TYPE ); - } - static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); - call_allGather( - (const char *) x_in, (int) size_in * sizeof( TYPE ), (char *) x_out, size2, disp2 ); - delete[] size2; - delete[] disp2; -} - - -/************************************************************************ - * setGather * - ************************************************************************/ -template -inline void MPI_CLASS::setGather( std::set &set ) const -{ - std::vector send_buf( set.begin(), set.end() ); - std::vector recv_cnt( this->comm_size, 0 ); - this->allGather( (int) send_buf.size(), &recv_cnt[0] ); - std::vector recv_disp( this->comm_size, 0 ); - for ( int i = 1; i < this->comm_size; i++ ) - recv_disp[i] = recv_disp[i - 1] + recv_cnt[i - 1]; - size_t N_recv_tot = 0; - for ( int i = 0; i < this->comm_size; i++ ) - N_recv_tot += recv_cnt[i]; - if ( N_recv_tot == 0 ) - return; - std::vector recv_buf( N_recv_tot ); - TYPE *send_data = nullptr; - if ( send_buf.size() > 0 ) { - send_data = &send_buf[0]; - } - TYPE *recv_data = &recv_buf[0]; - static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); - this->allGather( - send_data, (int) send_buf.size(), recv_data, &recv_cnt[0], &recv_disp[0], true ); - for ( size_t i = 0; i < recv_buf.size(); i++ ) - set.insert( recv_buf[i] ); -} - - -/************************************************************************ - * mapGather * - ************************************************************************/ -template -inline void MPI_CLASS::mapGather( std::map &map ) const -{ - std::vector send_id; - std::vector send_data; - send_id.reserve( map.size() ); - send_data.reserve( map.size() ); - for ( auto it = map.begin(); it != map.end(); ++it ) { - send_id.push_back( it->first ); - send_data.push_back( it->second ); - } - int send_size = (int) send_id.size(); - std::vector recv_cnt( this->comm_size, 0 ); - this->allGather( send_size, &recv_cnt[0] ); - std::vector recv_disp( this->comm_size, 0 ); - for ( int i = 1; i < this->comm_size; i++ ) - recv_disp[i] = recv_disp[i - 1] + recv_cnt[i - 1]; - size_t N_recv_tot = 0; - for ( int i = 0; i < this->comm_size; i++ ) - N_recv_tot += recv_cnt[i]; - if ( N_recv_tot == 0 ) - return; - std::vector recv_id( N_recv_tot ); - std::vector recv_data( N_recv_tot ); - KEY *send_data1 = nullptr; - DATA *send_data2 = nullptr; - if ( send_id.size() > 0 ) { - send_data1 = &send_id[0]; - send_data2 = &send_data[0]; - } - static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); - this->allGather( send_data1, send_size, &recv_id[0], &recv_cnt[0], &recv_disp[0], true ); - this->allGather( - send_data2, send_size, &recv_data[0], &recv_cnt[0], &recv_disp[0], true ); - map = std::map(); - for ( size_t i = 0; i < N_recv_tot; i++ ) - map.insert( std::pair( recv_id[i], recv_data[i] ) ); -} - - -/************************************************************************ - * sumScan * - ************************************************************************/ -template -inline void MPI_CLASS::sumScan( const TYPE *x, TYPE *y, const int n ) const -{ - if ( comm_size > 1 ) { - call_sumScan( x, y, n ); - } else { - for ( int i = 0; i < n; i++ ) - y[i] = x[i]; - } -} -// Define specializations of call_sumScan(const TYPE*, TYPE*, int) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::call_sumScan( const unsigned char *, unsigned char *, int ) const; -template<> -void MPI_CLASS::call_sumScan( const char *, char *, int ) const; -template<> -void MPI_CLASS::call_sumScan( const unsigned int *, unsigned int *, int ) const; -template<> -void MPI_CLASS::call_sumScan( const int *, int *, int ) const; -template<> -void MPI_CLASS::call_sumScan( - const unsigned long int *, unsigned long int *, int ) const; -template<> -void MPI_CLASS::call_sumScan( const long int *, long int *, int ) const; -template<> -void MPI_CLASS::call_sumScan( const size_t *, size_t *, int ) const; -template<> -void MPI_CLASS::call_sumScan( const float *, float *, int ) const; -template<> -void MPI_CLASS::call_sumScan( const double *, double *, int ) const; -template<> -void MPI_CLASS::call_sumScan>( - const std::complex *, std::complex *, int ) const; -#endif -// Default instantiations of call_sumScan(const TYPE*, TYPE*, int) -template -void MPI_CLASS::call_sumScan( const TYPE *, TYPE *, int ) const -{ - char message[200]; - sprintf( message, "Default instantion of sumScan in parallel is not supported (%s)", - typeid( TYPE ).name() ); - MPI_CLASS_ERROR( message ); -} - - -/************************************************************************ - * minScan * - ************************************************************************/ -template -inline void MPI_CLASS::minScan( const TYPE *x, TYPE *y, const int n ) const -{ - if ( comm_size > 1 ) { - call_minScan( x, y, n ); - } else { - for ( int i = 0; i < n; i++ ) - y[i] = x[i]; - } -} -// Define specializations of call_minScan(const TYPE*, TYPE*, int) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::call_minScan( const unsigned char *, unsigned char *, int ) const; -template<> -void MPI_CLASS::call_minScan( const char *, char *, int ) const; -template<> -void MPI_CLASS::call_minScan( const unsigned int *, unsigned int *, int ) const; -template<> -void MPI_CLASS::call_minScan( const int *, int *, int ) const; -template<> -void MPI_CLASS::call_minScan( - const unsigned long int *, unsigned long int *, int ) const; -template<> -void MPI_CLASS::call_minScan( const long int *, long int *, int ) const; -template<> -void MPI_CLASS::call_minScan( const size_t *, size_t *, int ) const; -template<> -void MPI_CLASS::call_minScan( const float *, float *, int ) const; -template<> -void MPI_CLASS::call_minScan( const double *, double *, int ) const; -#endif -// Default instantiations of call_minScan(const TYPE*, TYPE*, int) -template -void MPI_CLASS::call_minScan( const TYPE *, TYPE *, int ) const -{ - char message[200]; - sprintf( message, "Default instantion of minScan in parallel is not supported (%s)", - typeid( TYPE ).name() ); - MPI_CLASS_ERROR( message ); -} - - -/************************************************************************ - * maxScan * - ************************************************************************/ -template -inline void MPI_CLASS::maxScan( const TYPE *x, TYPE *y, const int n ) const -{ - if ( comm_size > 1 ) { - call_maxScan( x, y, n ); - } else { - for ( int i = 0; i < n; i++ ) - y[i] = x[i]; - } -} -// Define specializations of call_maxScan(const TYPE*, TYPE*, int) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::call_maxScan( const unsigned char *, unsigned char *, int ) const; -template<> -void MPI_CLASS::call_maxScan( const char *, char *, int ) const; -template<> -void MPI_CLASS::call_maxScan( const unsigned int *, unsigned int *, int ) const; -template<> -void MPI_CLASS::call_maxScan( const int *, int *, int ) const; -template<> -void MPI_CLASS::call_maxScan( - const unsigned long int *, unsigned long int *, int ) const; -template<> -void MPI_CLASS::call_maxScan( const long int *, long int *, int ) const; -template<> -void MPI_CLASS::call_maxScan( const size_t *, size_t *, int ) const; -template<> -void MPI_CLASS::call_maxScan( const float *, float *, int ) const; -template<> -void MPI_CLASS::call_maxScan( const double *, double *, int ) const; -#endif -// Default instantiations of call_maxScan(const TYPE*, TYPE*, int) -template -void MPI_CLASS::call_maxScan( const TYPE *, TYPE *, int ) const -{ - char message[200]; - sprintf( message, "Default instantion of maxReduce in parallel is not supported (%s)", - typeid( TYPE ).name() ); - MPI_CLASS_ERROR( message ); -} - - -/************************************************************************ - * allToAll * - ************************************************************************/ -// Define specializations of allToAll(const int n, const char*, char* ) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::allToAll( - const int n, const unsigned char *, unsigned char * ) const; -template<> -void MPI_CLASS::allToAll( const int n, const char *, char * ) const; -template<> -void MPI_CLASS::allToAll( const int n, const unsigned int *, unsigned int * ) const; -template<> -void MPI_CLASS::allToAll( const int n, const int *, int * ) const; -template<> -void MPI_CLASS::allToAll( - const int n, const unsigned long int *, unsigned long int * ) const; -template<> -void MPI_CLASS::allToAll( const int n, const long int *, long int * ) const; -template<> -void MPI_CLASS::allToAll( const int n, const float *, float * ) const; -template<> -void MPI_CLASS::allToAll( const int n, const double *, double * ) const; -#endif -// Default instantiations of allToAll(const int n, const char*, char* ) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template -void MPI_CLASS::allToAll( const int n, const TYPE *send_data, TYPE *recv_data ) const -{ - static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); - allToAll( n * sizeof( TYPE ), (char *) send_data, (char *) recv_data ); -} -#else -template -void MPI_CLASS::allToAll( const int n, const TYPE *send_data, TYPE *recv_data ) const -{ - if ( comm_size != 1 ) - MPI_CLASS_ERROR( "Invalid size for allToAll" ); - for ( int i = 0; i < n; i++ ) - recv_data[i] = send_data[i]; -} -#endif - - -/************************************************************************ - * allToAll * - ************************************************************************/ -template -int MPI_CLASS::allToAll( const TYPE *send_data, const int send_cnt[], const int send_disp[], - TYPE *recv_data, int *recv_cnt, int *recv_disp, bool known_recv ) const -{ - int N_recieved = 0; - if ( comm_size == 1 ) { - // Special case for single-processor communicators - if ( known_recv ) { - if ( recv_cnt[0] != send_cnt[0] && send_cnt[0] > 0 ) - MPI_CLASS_ERROR( "Single processor send/recv are different sizes" ); - } else { - if ( recv_cnt != nullptr ) - recv_cnt[0] = send_cnt[0]; - if ( recv_disp != nullptr ) - recv_disp[0] = send_disp[0]; - } - for ( int i = 0; i < send_cnt[0]; i++ ) - recv_data[i + recv_disp[0]] = send_data[i + send_disp[0]]; - N_recieved = send_cnt[0]; - } else if ( known_recv ) { - // The recieve sizes are known - MPI_CLASS_ASSERT( recv_cnt != nullptr && recv_disp != nullptr ); - call_allToAll( send_data, send_cnt, send_disp, recv_data, recv_cnt, recv_disp ); - for ( int i = 0; i < comm_size; i++ ) - N_recieved += recv_cnt[i]; - } else { - // The recieve sizes are not known, we need to communicate that information first - int *recv_cnt2 = recv_cnt; - int *recv_disp2 = recv_disp; - if ( recv_cnt == nullptr ) - recv_cnt2 = new int[comm_size]; - if ( recv_disp == nullptr ) - recv_disp2 = new int[comm_size]; - // Communicate the size we will be recieving from each processor - allToAll( 1, send_cnt, recv_cnt2 ); - recv_disp2[0] = 0; - for ( int i = 1; i < comm_size; i++ ) - recv_disp2[i] = recv_disp2[i - 1] + recv_cnt2[i - 1]; - // Send the data - call_allToAll( send_data, send_cnt, send_disp, recv_data, recv_cnt2, recv_disp2 ); - for ( int i = 0; i < comm_size; i++ ) - N_recieved += recv_cnt2[i]; - if ( recv_cnt == nullptr ) - delete[] recv_cnt2; - if ( recv_disp == nullptr ) - delete[] recv_disp2; - } - return N_recieved; -} -// Define specializations of call_allToAll -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::call_allToAll( const unsigned char *, const int *, const int *, - unsigned char *, const int *, const int * ) const; -template<> -void MPI_CLASS::call_allToAll( - const char *, const int *, const int *, char *, const int *, const int * ) const; -template<> -void MPI_CLASS::call_allToAll( const unsigned int *, const int *, const int *, - unsigned int *, const int *, const int * ) const; -template<> -void MPI_CLASS::call_allToAll( - const int *, const int *, const int *, int *, const int *, const int * ) const; -template<> -void MPI_CLASS::call_allToAll( const unsigned long int *, const int *, - const int *, unsigned long int *, const int *, const int * ) const; -template<> -void MPI_CLASS::call_allToAll( - const long int *, const int *, const int *, long int *, const int *, const int * ) const; -template<> -void MPI_CLASS::call_allToAll( - const float *, const int *, const int *, float *, const int *, const int * ) const; -template<> -void MPI_CLASS::call_allToAll( - const double *, const int *, const int *, double *, const int *, const int * ) const; -#else -template<> -void MPI_CLASS::call_allToAll( - const char *, const int *, const int *, char *, const int *, const int * ) const; -#endif -// Default instantiations of call_allToAll -template -void MPI_CLASS::call_allToAll( const TYPE *send_data, const int send_cnt[], const int send_disp[], - TYPE *recv_data, const int *recv_cnt, const int *recv_disp ) const -{ - int *send_cnt2 = new int[comm_size]; - int *recv_cnt2 = new int[comm_size]; - int *send_disp2 = new int[comm_size]; - int *recv_disp2 = new int[comm_size]; - for ( int i = 0; i < comm_size; i++ ) { - send_cnt2[i] = send_cnt[i] * sizeof( TYPE ); - send_disp2[i] = send_disp[i] * sizeof( TYPE ); - recv_cnt2[i] = recv_cnt[i] * sizeof( TYPE ); - recv_disp2[i] = recv_disp[i] * sizeof( TYPE ); - } - static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); - call_allToAll( - (char *) send_data, send_cnt2, send_disp2, (char *) recv_data, recv_cnt2, recv_disp2 ); - delete[] send_cnt2; - delete[] recv_cnt2; - delete[] send_disp2; - delete[] recv_disp2; -} - - -} // namespace Utilities - -#endif diff --git a/common/MPI.cpp b/common/MPI.cpp deleted file mode 100644 index 73932d03..00000000 --- a/common/MPI.cpp +++ /dev/null @@ -1,3758 +0,0 @@ -// This file impliments a wrapper class for MPI functions - -#include "common/MPI.h" -#include "common/Utilities.h" - -#include "ProfilerApp.h" -#include "StackTrace/ErrorHandlers.h" -#include "StackTrace/StackTrace.h" - -// Include all other headers -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -// Include OS specific headers -#undef USE_WINDOWS -#undef USE_LINUX -#undef USE_MAC -#if defined( WIN32 ) || defined( _WIN32 ) || defined( WIN64 ) || defined( _WIN64 ) -// We are using windows -#define USE_WINDOWS -#include -#include -#define sched_yield() Sleep( 0 ) -#elif defined( __APPLE__ ) -// Using MAC -#define USE_MAC -#include -#elif defined( __linux ) || defined( __linux__ ) || defined( __unix ) || defined( __posix ) -// We are using linux -#define USE_LINUX -#include -#include -#else -#error Unknown OS -#endif - - -// Convience defines -#define MPI_ERROR ERROR -#define MPI_ASSERT ASSERT -#define MPI_INSIST INSIST -#define MPI_WARNING WARNING -#define MPI_CLASS_COMM_NULL MPI_COMM_NULL -#define MPI_CLASS_COMM_SELF MPI_COMM_SELF -#define MPI_CLASS_COMM_WORLD MPI_COMM_WORLD - - -// Global variable to track create new unique comms (dup and split) -#ifndef USE_MPI -MPI_Comm uniqueGlobalComm = 11; -#endif - - -#if defined( USE_SAMRAI ) && defined( USE_PETSC ) && !defined( USE_MPI ) -int MPI_REQUEST_NULL = 3; -int MPI_ERR_IN_STATUS = 4; -#endif - - -namespace Utilities { - - -// Some special structs to work with MPI -#ifdef USE_MPI -struct IntIntStruct { - int j; - int i; -}; -struct LongIntStruct { - long int j; - int i; -}; -struct FloatIntStruct { - float f; - int i; -}; -struct DoubleIntStruct { - double d; - int i; -}; -#endif - - -// Initialized the static member variables -volatile unsigned int MPI_CLASS::N_MPI_Comm_created = 0; -volatile unsigned int MPI_CLASS::N_MPI_Comm_destroyed = 0; -short MPI_CLASS::profile_level = 127; - - -// Define a type for use with size_t -#ifdef USE_MPI -static MPI_Datatype MPI_SIZE_T = 0x0; -static MPI_Datatype getSizeTDataType() -{ - int size_int, size_long, size_longlong, size_longlong2; - MPI_Type_size( MPI_UNSIGNED, &size_int ); - MPI_Type_size( MPI_UNSIGNED_LONG, &size_long ); - MPI_Type_size( MPI_UNSIGNED_LONG_LONG, &size_longlong ); - MPI_Type_size( MPI_LONG_LONG_INT, &size_longlong2 ); - if ( sizeof( size_t ) == size_int ) { - return MPI_UNSIGNED; - } else if ( sizeof( size_t ) == size_long ) { - return MPI_UNSIGNED_LONG; - } else if ( sizeof( size_t ) == size_longlong ) { - return MPI_UNSIGNED_LONG_LONG; - } else if ( sizeof( size_t ) == size_longlong2 ) { - MPI_WARNING( "Using signed long long datatype for size_t in MPI" ); - return MPI_LONG_LONG_INT; // Note: this is not unsigned - } else { - MPI_ERROR( "No suitable datatype found" ); - } - return 0; -} -#endif - - -// Static data for asyncronous communication without MPI -// Note: these routines may not be thread-safe yet -#ifndef USE_MPI -static const int mpi_max_tag = 0x003FFFFF; -struct Isendrecv_struct { - const char *data; // Pointer to data - int status; // Status: 1-sending, 2-recieving -}; -std::map global_isendrecv_list; -static MPI_Request getRequest( MPI_Comm comm, int tag ) -{ - MPI_ASSERT( tag >= 0 && tag <= mpi_max_tag ); - // Use hashing function: 2^64*0.5*(sqrt(5)-1) - uint64_t a = static_cast( comm ) * 0x9E3779B97F4A7C15; - uint64_t b = static_cast( tag ) * 0x9E3779B97F4A7C15; - uint64_t hash = a ^ b; - MPI_Request request; - memcpy( &request, &hash, sizeof( MPI_Request ) ); - return request; -} -#endif - - -// Check the mpi error code -#ifdef USE_MPI -inline void check_MPI( int error ) -{ - if ( error != MPI_SUCCESS ) - MPI_ERROR( "Error calling MPI routine" ); -} -#endif - - -/****************************************************************** - * Some helper functions to convert between signed/unsigned types * - ******************************************************************/ -DISABLE_WARNINGS -static inline constexpr unsigned int offset_int() -{ - return ~static_cast( std::numeric_limits::min() ) + 1; -} -static inline constexpr unsigned long int offset_long() -{ - return ~static_cast( std::numeric_limits::min() ) + 1; -} -static inline constexpr unsigned long long int offset_long_long() -{ - return ~static_cast( std::numeric_limits::min() ) + 1; -} -ENABLE_WARNINGS -static inline unsigned int signed_to_unsigned( int x ) -{ - const auto offset = offset_int(); - return ( x >= 0 ) ? static_cast( x ) + offset : - offset - static_cast( -x ); -} -static inline unsigned long int signed_to_unsigned( long int x ) -{ - const auto offset = offset_long(); - return ( x >= 0 ) ? static_cast( x ) + offset : - offset - static_cast( -x ); -} -static inline unsigned long long int signed_to_unsigned( long long int x ) -{ - const auto offset = offset_long_long(); - return ( x >= 0 ) ? static_cast( x ) + offset : - offset - static_cast( -x ); -} -static inline int unsigned_to_signed( unsigned int x ) -{ - const auto offset = offset_int(); - return ( x >= offset ) ? static_cast( x - offset ) : -static_cast( offset - x ); -} -static inline long int unsigned_to_signed( unsigned long int x ) -{ - const auto offset = offset_long(); - return ( x >= offset ) ? static_cast( x - offset ) : - -static_cast( offset - x ); -} -static inline long long int unsigned_to_signed( unsigned long long int x ) -{ - const auto offset = offset_long_long(); - return ( x >= offset ) ? static_cast( x - offset ) : - -static_cast( offset - x ); -} - - -/************************************************************************ - * Get the MPI version * - ************************************************************************/ -std::array MPI_CLASS::version() -{ -#ifdef USE_MPI - int MPI_version; - int MPI_subversion; - MPI_Get_version( &MPI_version, &MPI_subversion ); - return { MPI_version, MPI_subversion }; -#else - return { 0, 0 }; -#endif -} -std::string MPI_CLASS::info() -{ -#ifdef USE_MPI -#if MPI_VERSION >= 3 - int MPI_version_length = 0; - char MPI_version_string[MPI_MAX_LIBRARY_VERSION_STRING]; - MPI_Get_library_version( MPI_version_string, &MPI_version_length ); - if ( MPI_version_length > 0 ) { - std::string MPI_info( MPI_version_string, MPI_version_length ); - size_t pos = MPI_info.find( '\n' ); - while ( pos != std::string::npos ) { - MPI_info.insert( pos + 1, " " ); - pos = MPI_info.find( '\n', pos + 1 ); - } - return MPI_info; - } -#endif - auto tmp = version(); - return std::to_string( tmp[0] ) + "." + std::to_string( tmp[0] ); -#else - return std::string(); -#endif -} - - -/************************************************************************ - * Functions to get/set the process affinities * - ************************************************************************/ -int MPI_CLASS::getNumberOfProcessors() { return std::thread::hardware_concurrency(); } -std::vector MPI_CLASS::getProcessAffinity() -{ - std::vector procs; -#ifdef USE_LINUX - cpu_set_t mask; - int error = sched_getaffinity( getpid(), sizeof( cpu_set_t ), &mask ); - if ( error != 0 ) - MPI_ERROR( "Error getting process affinity" ); - for ( int i = 0; i < (int) sizeof( cpu_set_t ) * CHAR_BIT; i++ ) { - if ( CPU_ISSET( i, &mask ) ) - procs.push_back( i ); - } -#elif defined( USE_MAC ) - // MAC does not support getting or setting the affinity - printf( "Warning: MAC does not support getting the process affinity\n" ); - procs.clear(); -#elif defined( USE_WINDOWS ) - HANDLE hProc = GetCurrentProcess(); - size_t procMask; - size_t sysMask; - PDWORD_PTR procMaskPtr = reinterpret_cast( &procMask ); - PDWORD_PTR sysMaskPtr = reinterpret_cast( &sysMask ); - GetProcessAffinityMask( hProc, procMaskPtr, sysMaskPtr ); - for ( int i = 0; i < (int) sizeof( size_t ) * CHAR_BIT; i++ ) { - if ( ( procMask & 0x1 ) != 0 ) - procs.push_back( i ); - procMask >>= 1; - } -#else -#error Unknown OS -#endif - return procs; -} -void MPI_CLASS::setProcessAffinity( const std::vector &procs ) -{ -#ifdef USE_LINUX - cpu_set_t mask; - CPU_ZERO( &mask ); - for ( auto cpu : procs ) - CPU_SET( cpu, &mask ); - int error = sched_setaffinity( getpid(), sizeof( cpu_set_t ), &mask ); - if ( error != 0 ) - MPI_ERROR( "Error setting process affinity" ); -#elif defined( USE_MAC ) - // MAC does not support getting or setting the affinity - NULL_USE( procs ); -#elif defined( USE_WINDOWS ) - DWORD mask = 0; - for ( size_t i = 0; i < procs.size(); i++ ) - mask |= ( (DWORD) 1 ) << procs[i]; - HANDLE hProc = GetCurrentProcess(); - SetProcessAffinityMask( hProc, mask ); -#else -#error Unknown OS -#endif -} - - -/************************************************************************ - * Function to check if MPI is active * - ************************************************************************/ -bool MPI_CLASS::MPI_active() -{ -#ifdef USE_MPI - int initialized = 0, finalized = 0; - MPI_Initialized( &initialized ); - MPI_Finalized( &finalized ); - return initialized != 0 && finalized == 0; -#else - return true; -#endif -} -MPI_CLASS::ThreadSupport MPI_CLASS::queryThreadSupport() -{ -#ifdef USE_MPI - int provided = 0; - MPI_Query_thread( &provided ); - if ( provided == MPI_THREAD_SINGLE ) - return ThreadSupport::SINGLE; - if ( provided == MPI_THREAD_FUNNELED ) - return ThreadSupport::FUNNELED; - if ( provided == MPI_THREAD_SERIALIZED ) - return ThreadSupport::SERIALIZED; - if ( provided == MPI_THREAD_MULTIPLE ) - return ThreadSupport::MULTIPLE; - return ThreadSupport::SINGLE; -#else - return ThreadSupport::MULTIPLE; -#endif -} - - -/************************************************************************ - * Function to perform a load balance of the given processes * - ************************************************************************/ -void MPI_CLASS::balanceProcesses( const MPI_CLASS &globalComm, const int method, - const std::vector &procs, const int N_min_in, const int N_max_in ) -{ - // Build the list of processors to use - std::vector cpus = procs; - if ( cpus.empty() ) { - for ( int i = 0; i < getNumberOfProcessors(); i++ ) - cpus.push_back( i ); - } - // Handle the "easy cases" - if ( method == 1 ) { - // Trivial case where we do not need any communication - setProcessAffinity( cpus ); - return; - } - // Get the sub-communicator for the current node - MPI_CLASS nodeComm = globalComm.splitByNode(); - int N_min = std::min( std::max( N_min_in, 1 ), cpus.size() ); - int N_max = N_max_in; - if ( N_max == -1 ) - N_max = cpus.size(); - N_max = std::min( N_max, cpus.size() ); - MPI_ASSERT( N_max >= N_min ); - // Perform the load balance within the node - if ( method == 2 ) { - int N_proc = cpus.size() / nodeComm.getSize(); - N_proc = std::max( N_proc, N_min ); - N_proc = std::min( N_proc, N_max ); - std::vector cpus2( N_proc, -1 ); - for ( int i = 0; i < N_proc; i++ ) - cpus2[i] = cpus[( nodeComm.getRank() * N_proc + i ) % cpus.size()]; - setProcessAffinity( cpus2 ); - } else { - MPI_ERROR( "Unknown method for load balance" ); - } -} - - -/************************************************************************ - * Empty constructor * - ************************************************************************/ -MPI_CLASS::MPI_CLASS() -{ -// Initialize the data members to a defaul communicator of self -#ifdef USE_MPI - communicator = MPI_COMM_NULL; - d_maxTag = 0x7FFFFFFF; -#else - communicator = MPI_CLASS_COMM_NULL; - d_maxTag = mpi_max_tag; -#endif - d_ranks = nullptr; - d_count = nullptr; - d_manage = false; - comm_rank = 0; - comm_size = 1; - d_isNull = true; - d_currentTag = nullptr; - d_call_abort = true; - tmp_alignment = -1; -} - - -/************************************************************************ - * Empty deconstructor * - ************************************************************************/ -MPI_CLASS::~MPI_CLASS() { reset(); } -void MPI_CLASS::reset() -{ - // Decrement the count if used - int count = -1; - if ( d_count != nullptr ) - count = --( *d_count ); - if ( count == 0 ) { - // We are holding that last reference to the MPI_Comm object, we need to free it - if ( d_manage ) { -#ifdef USE_MPI - MPI_Comm_set_errhandler( communicator, MPI_ERRORS_ARE_FATAL ); - int err = MPI_Comm_free( &communicator ); - if ( err != MPI_SUCCESS ) - MPI_ERROR( "Problem free'ing MPI_Comm object" ); - communicator = MPI_CLASS_COMM_NULL; - ++N_MPI_Comm_destroyed; -#endif - } - if ( d_ranks != nullptr ) - delete[] d_ranks; - delete d_count; - } - if ( d_currentTag == nullptr ) { - // No tag index - } else if ( d_currentTag[1] > 1 ) { - --( d_currentTag[1] ); - } else { - delete[] d_currentTag; - } - d_manage = false; - d_count = nullptr; - d_ranks = nullptr; - comm_rank = 0; - comm_size = 1; - d_maxTag = 0; - d_isNull = true; - d_currentTag = nullptr; - d_call_abort = true; -} - - -/************************************************************************ - * Copy constructors * - ************************************************************************/ -MPI_CLASS::MPI_CLASS( const MPI_CLASS &comm ) - : communicator( comm.communicator ), - d_isNull( comm.d_isNull ), - d_manage( comm.d_manage ), - comm_rank( comm.comm_rank ), - comm_size( comm.comm_size ), - d_ranks( comm.d_ranks ), - d_maxTag( comm.d_maxTag ), - d_currentTag( comm.d_currentTag ) -{ - // Initialize the data members to the existing comm object - if ( d_currentTag != nullptr ) - ++d_currentTag[1]; - d_call_abort = comm.d_call_abort; - // Set and increment the count - d_count = comm.d_count; - if ( d_count != nullptr ) - ++( *d_count ); - tmp_alignment = -1; -} -MPI_CLASS::MPI_CLASS( MPI_CLASS &&rhs ) : MPI_CLASS() -{ - std::swap( communicator, rhs.communicator ); - std::swap( d_isNull, rhs.d_isNull ); - std::swap( d_manage, rhs.d_manage ); - std::swap( d_call_abort, rhs.d_call_abort ); - std::swap( profile_level, rhs.profile_level ); - std::swap( comm_rank, rhs.comm_rank ); - std::swap( comm_size, rhs.comm_size ); - std::swap( d_ranks, rhs.d_ranks ); - std::swap( d_maxTag, rhs.d_maxTag ); - std::swap( d_currentTag, rhs.d_currentTag ); - std::swap( d_count, rhs.d_count ); - std::swap( tmp_alignment, rhs.tmp_alignment ); -} - - -/************************************************************************ - * Assignment operators * - ************************************************************************/ -MPI_CLASS &MPI_CLASS::operator=( const MPI_CLASS &comm ) -{ - if ( this == &comm ) // protect against invalid self-assignment - return *this; - // Destroy the previous object - this->reset(); - // Initialize the data members to the existing object - this->communicator = comm.communicator; - this->comm_rank = comm.comm_rank; - this->comm_size = comm.comm_size; - this->d_ranks = comm.d_ranks; - this->d_isNull = comm.d_isNull; - this->d_manage = comm.d_manage; - this->d_maxTag = comm.d_maxTag; - this->d_call_abort = comm.d_call_abort; - this->d_currentTag = comm.d_currentTag; - if ( this->d_currentTag != nullptr ) - ++( this->d_currentTag[1] ); - // Set and increment the count - this->d_count = comm.d_count; - if ( this->d_count != nullptr ) - ++( *d_count ); - this->tmp_alignment = -1; - return *this; -} -MPI_CLASS &MPI_CLASS::operator=( MPI_CLASS &&rhs ) -{ - if ( this == &rhs ) // protect against invalid self-assignment - return *this; - std::swap( communicator, rhs.communicator ); - std::swap( d_isNull, rhs.d_isNull ); - std::swap( d_manage, rhs.d_manage ); - std::swap( d_call_abort, rhs.d_call_abort ); - std::swap( profile_level, rhs.profile_level ); - std::swap( comm_rank, rhs.comm_rank ); - std::swap( comm_size, rhs.comm_size ); - std::swap( d_ranks, rhs.d_ranks ); - std::swap( d_maxTag, rhs.d_maxTag ); - std::swap( d_currentTag, rhs.d_currentTag ); - std::swap( d_count, rhs.d_count ); - std::swap( tmp_alignment, rhs.tmp_alignment ); - return *this; -} - - -/************************************************************************ - * Constructor from existing MPI communicator * - ************************************************************************/ -int d_global_currentTag_world1[2] = { 1, 1 }; -int d_global_currentTag_world2[2] = { 1, 1 }; -int d_global_currentTag_self[2] = { 1, 1 }; -#ifdef USE_MPI -std::atomic_int d_global_count_world1 = { 1 }; -std::atomic_int d_global_count_world2 = { 1 }; -std::atomic_int d_global_count_self = { 1 }; -#endif -MPI_CLASS::MPI_CLASS( MPI_Comm comm, bool manage ) -{ - d_count = nullptr; - d_ranks = nullptr; - d_manage = false; - tmp_alignment = -1; - // Check if we are using our version of comm_world - if ( comm == MPI_CLASS_COMM_WORLD ) { - communicator = MPI_COMM_WORLD; - } else if ( comm == MPI_CLASS_COMM_SELF ) { - communicator = MPI_COMM_SELF; - } else if ( comm == MPI_CLASS_COMM_NULL ) { - communicator = MPI_COMM_NULL; - } else { - communicator = comm; - } -#ifdef USE_MPI - // We are using MPI, use the MPI communicator to initialize the data - if ( communicator != MPI_COMM_NULL ) { - // Set the MPI_SIZE_T datatype if it has not been set - if ( MPI_SIZE_T == 0x0 ) - MPI_SIZE_T = getSizeTDataType(); - // Attach the error handler - StackTrace::setMPIErrorHandler( communicator ); - // Get the communicator properties - MPI_Comm_rank( communicator, &comm_rank ); - MPI_Comm_size( communicator, &comm_size ); - int flag, *val; - int ierr = MPI_Comm_get_attr( communicator, MPI_TAG_UB, &val, &flag ); - MPI_ASSERT( ierr == MPI_SUCCESS ); - if ( flag == 0 ) { - d_maxTag = 0x7FFFFFFF; // The tag is not a valid attribute (set to 2^31-1) - } else { - d_maxTag = *val; - if ( d_maxTag < 0 ) { - d_maxTag = 0x7FFFFFFF; - } // The maximum tag is > a signed int (set to 2^31-1) - MPI_INSIST( d_maxTag >= 0x7FFF, "maximum tag size is < MPI standard" ); - } - } else { - comm_rank = 1; - comm_size = 0; - d_maxTag = 0x7FFFFFFF; - } - d_isNull = communicator == MPI_COMM_NULL; - if ( manage && communicator != MPI_COMM_NULL && communicator != MPI_COMM_SELF && - communicator != MPI_COMM_WORLD ) - d_manage = true; - // Create the count (Note: we do not need to worry about thread safety) - if ( communicator == MPI_CLASS_COMM_WORLD ) { - d_count = &d_global_count_world1; - ++( *d_count ); - } else if ( communicator == MPI_COMM_WORLD ) { - d_count = &d_global_count_world2; - ++( *d_count ); - } else if ( communicator == MPI_COMM_SELF ) { - d_count = &d_global_count_self; - ++( *d_count ); - } else if ( communicator == MPI_COMM_NULL ) { - d_count = nullptr; - } else { - d_count = new std::atomic_int; - *d_count = 1; - } - if ( d_manage ) - ++N_MPI_Comm_created; - // Create d_ranks - if ( comm_size > 1 ) { - d_ranks = new int[comm_size]; - d_ranks[0] = -1; - } -#else - // We are not using MPI, intialize based on the communicator - NULL_USE( manage ); - comm_rank = 0; - comm_size = 1; - d_maxTag = mpi_max_tag; - d_isNull = communicator == MPI_COMM_NULL; - if ( d_isNull ) - comm_size = 0; -#endif - if ( communicator == MPI_CLASS_COMM_WORLD ) { - d_currentTag = d_global_currentTag_world1; - ++( this->d_currentTag[1] ); - } else if ( communicator == MPI_COMM_WORLD ) { - d_currentTag = d_global_currentTag_world2; - ++( this->d_currentTag[1] ); - } else if ( communicator == MPI_COMM_SELF ) { - d_currentTag = d_global_currentTag_self; - ++( this->d_currentTag[1] ); - } else if ( communicator == MPI_COMM_NULL ) { - d_currentTag = nullptr; - } else { - d_currentTag = new int[2]; - d_currentTag[0] = ( d_maxTag <= 0x10000 ) ? 1 : 0x1FFF; - d_currentTag[1] = 1; - } - d_call_abort = true; -} - - -/************************************************************************ - * Return the ranks of the communicator in the global comm * - ************************************************************************/ -std::vector MPI_CLASS::globalRanks() const -{ - // Get my global rank if it has not been set - static int myGlobalRank = -1; - if ( myGlobalRank == -1 ) { -#ifdef USE_MPI - if ( MPI_active() ) - MPI_Comm_rank( MPI_CLASS_COMM_WORLD, &myGlobalRank ); -#else - myGlobalRank = 0; -#endif - } - // Check if we are dealing with a serial or null communicator - if ( comm_size == 1 ) - return std::vector( 1, myGlobalRank ); - if ( d_ranks == nullptr || communicator == MPI_COMM_NULL ) - return std::vector(); - // Fill d_ranks if necessary - if ( d_ranks[0] == -1 ) { - if ( communicator == MPI_CLASS_COMM_WORLD ) { - for ( int i = 0; i < comm_size; i++ ) - d_ranks[i] = i; - } else { - - MPI_ASSERT( myGlobalRank != -1 ); - this->allGather( myGlobalRank, d_ranks ); - } - } - // Return d_ranks - return std::vector( d_ranks, d_ranks + comm_size ); -} - - -/************************************************************************ - * Generate a random number * - ************************************************************************/ -size_t MPI_CLASS::rand() const -{ - size_t val = 0; - if ( getRank() == 0 ) { - static std::random_device rd; - static std::mt19937 gen( rd() ); - static std::uniform_int_distribution dist; - val = dist( gen ); - } - val = bcast( val, 0 ); - return val; -} - - -/************************************************************************ - * Intersect two communicators * - ************************************************************************/ -#ifdef USE_MPI -static inline void MPI_Group_free2( MPI_Group *group ) -{ - if ( *group != MPI_GROUP_EMPTY ) { - // MPICH is fine with free'ing an empty group, OpenMPI crashes - MPI_Group_free( group ); - } -} -MPI_CLASS MPI_CLASS::intersect( const MPI_CLASS &comm1, const MPI_CLASS &comm2 ) -{ - MPI_Group group1 = MPI_GROUP_EMPTY, group2 = MPI_GROUP_EMPTY; - if ( !comm1.isNull() ) { - MPI_Group_free2( &group1 ); - MPI_Comm_group( comm1.communicator, &group1 ); - } - if ( !comm2.isNull() ) { - MPI_Group_free2( &group2 ); - MPI_Comm_group( comm2.communicator, &group2 ); - } - MPI_Group group12; - MPI_Group_intersection( group1, group2, &group12 ); - int compare1, compare2; - MPI_Group_compare( group1, group12, &compare1 ); - MPI_Group_compare( group2, group12, &compare2 ); - MPI_CLASS new_comm( MPI_CLASS_COMM_NULL ); - int size; - MPI_Group_size( group12, &size ); - if ( compare1 != MPI_UNEQUAL && size != 0 ) { - // The intersection matches comm1 - new_comm = comm1; - } else if ( compare2 != MPI_UNEQUAL && size != 0 ) { - // The intersection matches comm2 - new_comm = comm2; - } else if ( comm1.isNull() ) { - // comm1 is null, we can return safely (comm1 is needed for communication) - } else { - // The intersection is smaller than comm1 or comm2 - // Check if the new comm is nullptr for all processors - int max_size = 0; - MPI_Allreduce( &size, &max_size, 1, MPI_INT, MPI_MAX, comm1.communicator ); - if ( max_size == 0 ) { - // We are dealing with completely disjoint sets - new_comm = MPI_CLASS( MPI_CLASS_COMM_NULL, false ); - } else { - // Create the new comm - // Note: OpenMPI crashes if the intersection group is EMPTY for any processors - // We will set it to SELF for the EMPTY processors, then create a nullptr comm later - if ( group12 == MPI_GROUP_EMPTY ) { - MPI_Group_free2( &group12 ); - MPI_Comm_group( MPI_COMM_SELF, &group12 ); - } - MPI_Comm new_MPI_comm; - MPI_Comm_create( comm1.communicator, group12, &new_MPI_comm ); - if ( size > 0 ) { - // This is the valid case where we create a new intersection comm - new_comm = MPI_CLASS( new_MPI_comm, true ); - } else { - // We actually want a null comm for this communicator - new_comm = MPI_CLASS( MPI_CLASS_COMM_NULL, false ); - MPI_Comm_free( &new_MPI_comm ); - } - } - } - MPI_Group_free2( &group1 ); - MPI_Group_free2( &group2 ); - MPI_Group_free2( &group12 ); - return new_comm; -} -#else -MPI_CLASS MPI_CLASS::intersect( const MPI_CLASS &comm1, const MPI_CLASS &comm2 ) -{ - if ( comm1.isNull() || comm2.isNull() ) - return MPI_CLASS( MPI_CLASS_COMM_NULL, false ); - MPI_ASSERT( comm1.comm_size == 1 && comm2.comm_size == 1 ); - return comm1; -} -#endif - - -/************************************************************************ - * Split a comm * - ************************************************************************/ -MPI_CLASS MPI_CLASS::split( int color, int key ) const -{ - if ( d_isNull ) { - return MPI_CLASS( MPI_CLASS_COMM_NULL ); - } else if ( comm_size == 1 ) { - if ( color == -1 ) - return MPI_CLASS( MPI_CLASS_COMM_NULL ); - return dup(); - } - MPI_Comm new_MPI_comm = MPI_CLASS_COMM_NULL; -#ifdef USE_MPI - // USE MPI to split the communicator - if ( color == -1 ) { - check_MPI( MPI_Comm_split( communicator, MPI_UNDEFINED, key, &new_MPI_comm ) ); - } else { - check_MPI( MPI_Comm_split( communicator, color, key, &new_MPI_comm ) ); - } -#endif - // Create the new object - NULL_USE( key ); - MPI_CLASS new_comm( new_MPI_comm, true ); - new_comm.d_call_abort = d_call_abort; - return new_comm; -} -MPI_CLASS MPI_CLASS::splitByNode( int key ) const -{ - // Check if we are dealing with a single processor (trivial case) - if ( comm_size == 1 ) - return this->split( 0, 0 ); - // Get the node name - std::string name = MPI_CLASS::getNodeName(); - // Gather the names from all ranks - std::vector list( comm_size ); - allGather( name, &list[0] ); - // Create the colors - std::vector color( comm_size, -1 ); - color[0] = 0; - for ( int i = 1; i < comm_size; i++ ) { - const std::string tmp1 = list[i]; - for ( int j = 0; j < i; j++ ) { - const std::string tmp2 = list[j]; - if ( tmp1 == tmp2 ) { - color[i] = color[j]; - break; - } - color[i] = color[i - 1] + 1; - } - } - MPI_CLASS new_comm = this->split( color[comm_rank], key ); - return new_comm; -} - - -/************************************************************************ - * Duplicate an exisiting comm object * - ************************************************************************/ -MPI_CLASS MPI_CLASS::dup() const -{ - if ( d_isNull ) - return MPI_CLASS( MPI_CLASS_COMM_NULL ); - MPI_Comm new_MPI_comm = communicator; -#if defined( USE_MPI ) || defined( USE_PETSC ) - // USE MPI to duplicate the communicator - MPI_Comm_dup( communicator, &new_MPI_comm ); -#else - new_MPI_comm = uniqueGlobalComm; - uniqueGlobalComm++; -#endif - // Create the new comm object - MPI_CLASS new_comm( new_MPI_comm, true ); - new_comm.d_isNull = d_isNull; - new_comm.d_call_abort = d_call_abort; - return new_comm; -} - - -/************************************************************************ - * Get the node name * - ************************************************************************/ -std::string MPI_CLASS::getNodeName() -{ -#ifdef USE_MPI - int length; - char name[MPI_MAX_PROCESSOR_NAME + 1]; - memset( name, 0, MPI_MAX_PROCESSOR_NAME + 1 ); - MPI_Get_processor_name( name, &length ); - return std::string( name ); -#else - return "Node0"; -#endif -} - - -/************************************************************************ - * Overload operator == * - ************************************************************************/ -bool MPI_CLASS::operator==( const MPI_CLASS &comm ) const -{ - return communicator == comm.communicator; -} - - -/************************************************************************ - * Overload operator != * - ************************************************************************/ -bool MPI_CLASS::operator!=( const MPI_CLASS &comm ) const -{ - return communicator != comm.communicator; -} - - -/************************************************************************ - * Overload operator < * - ************************************************************************/ -bool MPI_CLASS::operator<( const MPI_CLASS &comm ) const -{ - MPI_ASSERT( !this->d_isNull && !comm.d_isNull ); - bool flag = true; - // First check if either communicator is NULL - if ( this->d_isNull ) - return false; - if ( comm.d_isNull ) - flag = false; - // Use compare to check if the comms are equal - if ( compare( comm ) != 0 ) - return false; - // Check that the size of the other communicator is > the current communicator size - if ( comm_size >= comm.comm_size ) - flag = false; -// Check the union of the communicator groups -// this is < comm iff this group is a subgroup of comm's group -#ifdef USE_MPI - MPI_Group group1 = MPI_GROUP_EMPTY, group2 = MPI_GROUP_EMPTY, group12 = MPI_GROUP_EMPTY; - if ( !d_isNull ) - MPI_Comm_group( communicator, &group1 ); - if ( !comm.d_isNull ) - MPI_Comm_group( comm.communicator, &group2 ); - MPI_Group_union( group1, group2, &group12 ); - int compare; - MPI_Group_compare( group2, group12, &compare ); - if ( compare == MPI_UNEQUAL ) - flag = false; - MPI_Group_free( &group1 ); - MPI_Group_free( &group2 ); - MPI_Group_free( &group12 ); -#endif - // Perform a global reduce of the flag (equivalent to all operation) - return allReduce( flag ); -} - - -/************************************************************************ - * Overload operator <= * - ************************************************************************/ -bool MPI_CLASS::operator<=( const MPI_CLASS &comm ) const -{ - MPI_ASSERT( !this->d_isNull && !comm.d_isNull ); - bool flag = true; - // First check if either communicator is NULL - if ( this->d_isNull ) - return false; - if ( comm.d_isNull ) - flag = false; -#ifdef USE_MPI - int world_size = 0; - MPI_Comm_size( MPI_COMM_WORLD, &world_size ); - if ( comm.getSize() == world_size ) - return true; - if ( getSize() == 1 && !comm.d_isNull ) - return true; -#endif - // Use compare to check if the comms are equal - if ( compare( comm ) != 0 ) - return true; - // Check that the size of the other communicator is > the current communicator size - // this is <= comm iff this group is a subgroup of comm's group - if ( comm_size > comm.comm_size ) - flag = false; -// Check the unnion of the communicator groups -#ifdef USE_MPI - MPI_Group group1, group2, group12; - MPI_Comm_group( communicator, &group1 ); - MPI_Comm_group( comm.communicator, &group2 ); - MPI_Group_union( group1, group2, &group12 ); - int compare; - MPI_Group_compare( group2, group12, &compare ); - if ( compare == MPI_UNEQUAL ) - flag = false; - MPI_Group_free( &group1 ); - MPI_Group_free( &group2 ); - MPI_Group_free( &group12 ); -#endif - // Perform a global reduce of the flag (equivalent to all operation) - return allReduce( flag ); -} - - -/************************************************************************ - * Overload operator > * - ************************************************************************/ -bool MPI_CLASS::operator>( const MPI_CLASS &comm ) const -{ - bool flag = true; - // First check if either communicator is NULL - if ( this->d_isNull ) - return false; - if ( comm.d_isNull ) - flag = false; - // Use compare to check if the comms are equal - if ( compare( comm ) != 0 ) - return false; - // Check that the size of the other communicator is > the current communicator size - if ( comm_size <= comm.comm_size ) - flag = false; -// Check the unnion of the communicator groups -// this is > comm iff comm's group is a subgroup of this group -#ifdef USE_MPI - MPI_Group group1 = MPI_GROUP_EMPTY, group2 = MPI_GROUP_EMPTY, group12 = MPI_GROUP_EMPTY; - if ( !d_isNull ) - MPI_Comm_group( communicator, &group1 ); - if ( !comm.d_isNull ) - MPI_Comm_group( comm.communicator, &group2 ); - MPI_Group_union( group1, group2, &group12 ); - int compare; - MPI_Group_compare( group1, group12, &compare ); - if ( compare == MPI_UNEQUAL ) - flag = false; - MPI_Group_free( &group1 ); - MPI_Group_free( &group2 ); - MPI_Group_free( &group12 ); -#endif - // Perform a global reduce of the flag (equivalent to all operation) - return allReduce( flag ); -} - - -/************************************************************************ - * Overload operator >= * - ************************************************************************/ -bool MPI_CLASS::operator>=( const MPI_CLASS &comm ) const -{ - bool flag = true; - // First check if either communicator is NULL - if ( this->d_isNull ) - return false; - if ( comm.d_isNull ) - flag = false; -#ifdef USE_MPI - int world_size = 0; - MPI_Comm_size( MPI_COMM_WORLD, &world_size ); - if ( getSize() == world_size ) - return true; - if ( comm.getSize() == 1 && !comm.d_isNull ) - return true; -#endif - // Use compare to check if the comms are equal - if ( compare( comm ) != 0 ) - return true; - // Check that the size of the other communicator is > the current communicator size - if ( comm_size < comm.comm_size ) - flag = false; -// Check the unnion of the communicator groups -// this is >= comm iff comm's group is a subgroup of this group -#ifdef USE_MPI - MPI_Group group1 = MPI_GROUP_EMPTY, group2 = MPI_GROUP_EMPTY, group12 = MPI_GROUP_EMPTY; - if ( !d_isNull ) - MPI_Comm_group( communicator, &group1 ); - if ( !comm.d_isNull ) - MPI_Comm_group( comm.communicator, &group2 ); - MPI_Group_union( group1, group2, &group12 ); - int compare; - MPI_Group_compare( group1, group12, &compare ); - if ( compare == MPI_UNEQUAL ) - flag = false; - MPI_Group_free( &group1 ); - MPI_Group_free( &group2 ); - MPI_Group_free( &group12 ); -#endif - // Perform a global reduce of the flag (equivalent to all operation) - return allReduce( flag ); -} - - -/************************************************************************ - * Compare two comm objects * - ************************************************************************/ -int MPI_CLASS::compare( const MPI_CLASS &comm ) const -{ - if ( communicator == comm.communicator ) - return 1; -#ifdef USE_MPI - if ( d_isNull || comm.d_isNull ) - return 0; - int result; - check_MPI( MPI_Comm_compare( communicator, comm.communicator, &result ) ); - if ( result == MPI_IDENT ) - return 2; - else if ( result == MPI_CONGRUENT ) - return 3; - else if ( result == MPI_SIMILAR ) - return 4; - else if ( result == MPI_UNEQUAL ) - return 0; - MPI_ERROR( "Unknown results from comm compare" ); -#else - if ( comm.communicator == MPI_COMM_NULL || communicator == MPI_COMM_NULL ) - return 0; - else - return 3; -#endif - return 0; -} - - -/************************************************************************ - * Abort the program. * - ************************************************************************/ -void MPI_CLASS::setCallAbortInSerialInsteadOfExit( bool flag ) { d_call_abort = flag; } -void MPI_CLASS::abort() const -{ -#ifdef USE_MPI - MPI_Comm comm = communicator; - if ( comm == MPI_COMM_NULL ) - comm = MPI_COMM_WORLD; - if ( !MPI_active() ) { - // MPI is not availible - exit( -1 ); - } else if ( comm_size > 1 ) { - MPI_Abort( comm, -1 ); - } else if ( d_call_abort ) { - MPI_Abort( comm, -1 ); - } else { - exit( -1 ); - } -#else - exit( -1 ); -#endif -} - - -/************************************************************************ - * newTag * - ************************************************************************/ -int MPI_CLASS::newTag() -{ -#ifdef USE_MPI - // Syncronize the processes to ensure all ranks enter this call - // Needed so the count will match - barrier(); - // Return and increment the tag - int tag = ( *d_currentTag )++; - MPI_INSIST( tag <= d_maxTag, "Maximum number of tags exceeded\n" ); - return tag; -#else - static int globalCurrentTag = 1; - return globalCurrentTag++; -#endif -} - - -/************************************************************************ - * allReduce * - ************************************************************************/ -bool MPI_CLASS::allReduce( const bool value ) const -{ - bool ret = value; - if ( comm_size > 1 ) { -#ifdef USE_MPI - MPI_Allreduce( - (void *) &value, (void *) &ret, 1, MPI_UNSIGNED_CHAR, MPI_MIN, communicator ); -#else - MPI_ERROR( "This shouldn't be possible" ); -#endif - } - return ret; -} - - -/************************************************************************ - * anyReduce * - ************************************************************************/ -bool MPI_CLASS::anyReduce( const bool value ) const -{ - bool ret = value; - if ( comm_size > 1 ) { -#ifdef USE_MPI - MPI_Allreduce( - (void *) &value, (void *) &ret, 1, MPI_UNSIGNED_CHAR, MPI_MAX, communicator ); -#else - MPI_ERROR( "This shouldn't be possible" ); -#endif - } - return ret; -} - - -/************************************************************************ - * call_sumReduce * - * Note: these specializations are only called when using MPI. * - ************************************************************************/ -#ifdef USE_MPI -// unsigned char -template<> -void MPI_CLASS::call_sumReduce( - const unsigned char *send, unsigned char *recv, const int n ) const -{ - PROFILE_START( "sumReduce1", profile_level ); - MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED_CHAR, MPI_SUM, communicator ); - PROFILE_STOP( "sumReduce1", profile_level ); -} -template<> -void MPI_CLASS::call_sumReduce( unsigned char *x, const int n ) const -{ - PROFILE_START( "sumReduce2", profile_level ); - auto send = x; - auto recv = new unsigned char[n]; - MPI_Allreduce( send, recv, n, MPI_UNSIGNED_CHAR, MPI_SUM, communicator ); - for ( int i = 0; i < n; i++ ) - x[i] = recv[i]; - delete[] recv; - PROFILE_STOP( "sumReduce2", profile_level ); -} -// char -template<> -void MPI_CLASS::call_sumReduce( const char *send, char *recv, const int n ) const -{ - PROFILE_START( "sumReduce1", profile_level ); - MPI_Allreduce( (void *) send, (void *) recv, n, MPI_SIGNED_CHAR, MPI_SUM, communicator ); - PROFILE_STOP( "sumReduce1", profile_level ); -} -template<> -void MPI_CLASS::call_sumReduce( char *x, const int n ) const -{ - PROFILE_START( "sumReduce2", profile_level ); - auto send = x; - auto recv = new char[n]; - MPI_Allreduce( send, recv, n, MPI_SIGNED_CHAR, MPI_SUM, communicator ); - for ( int i = 0; i < n; i++ ) - x[i] = recv[i]; - delete[] recv; - PROFILE_STOP( "sumReduce2", profile_level ); -} -// unsigned int -template<> -void MPI_CLASS::call_sumReduce( - const unsigned int *send, unsigned int *recv, const int n ) const -{ - PROFILE_START( "sumReduce1", profile_level ); - MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED, MPI_SUM, communicator ); - PROFILE_STOP( "sumReduce1", profile_level ); -} -template<> -void MPI_CLASS::call_sumReduce( unsigned int *x, const int n ) const -{ - PROFILE_START( "sumReduce2", profile_level ); - auto send = x; - auto recv = new unsigned int[n]; - MPI_Allreduce( send, recv, n, MPI_UNSIGNED, MPI_SUM, communicator ); - for ( int i = 0; i < n; i++ ) - x[i] = recv[i]; - delete[] recv; - PROFILE_STOP( "sumReduce2", profile_level ); -} -// int -template<> -void MPI_CLASS::call_sumReduce( const int *send, int *recv, const int n ) const -{ - PROFILE_START( "sumReduce1", profile_level ); - MPI_Allreduce( (void *) send, (void *) recv, n, MPI_INT, MPI_SUM, communicator ); - PROFILE_STOP( "sumReduce1", profile_level ); -} -template<> -void MPI_CLASS::call_sumReduce( int *x, const int n ) const -{ - PROFILE_START( "sumReduce2", profile_level ); - auto send = x; - auto recv = new int[n]; - MPI_Allreduce( send, recv, n, MPI_INT, MPI_SUM, communicator ); - for ( int i = 0; i < n; i++ ) - x[i] = recv[i]; - delete[] recv; - PROFILE_STOP( "sumReduce2", profile_level ); -} -// long int -template<> -void MPI_CLASS::call_sumReduce( const long int *send, long int *recv, const int n ) const -{ - PROFILE_START( "sumReduce1", profile_level ); - MPI_Allreduce( (void *) send, (void *) recv, n, MPI_LONG, MPI_SUM, communicator ); - PROFILE_STOP( "sumReduce1", profile_level ); -} -template<> -void MPI_CLASS::call_sumReduce( long int *x, const int n ) const -{ - PROFILE_START( "sumReduce2", profile_level ); - auto send = x; - auto recv = new long int[n]; - MPI_Allreduce( send, recv, n, MPI_LONG, MPI_SUM, communicator ); - for ( int i = 0; i < n; i++ ) - x[i] = recv[i]; - delete[] recv; - PROFILE_STOP( "sumReduce2", profile_level ); -} -// unsigned long int -template<> -void MPI_CLASS::call_sumReduce( - const unsigned long *send, unsigned long *recv, const int n ) const -{ - PROFILE_START( "sumReduce1", profile_level ); - MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED_LONG, MPI_SUM, communicator ); - PROFILE_STOP( "sumReduce1", profile_level ); -} -template<> -void MPI_CLASS::call_sumReduce( unsigned long *x, const int n ) const -{ - PROFILE_START( "sumReduce2", profile_level ); - auto send = x; - auto recv = new unsigned long int[n]; - MPI_Allreduce( send, recv, n, MPI_UNSIGNED_LONG, MPI_SUM, communicator ); - for ( int i = 0; i < n; i++ ) - x[i] = recv[i]; - delete[] recv; - PROFILE_STOP( "sumReduce2", profile_level ); -} -// size_t -#ifdef USE_WINDOWS -template<> -void MPI_CLASS::call_sumReduce( const size_t *send, size_t *recv, const int n ) const -{ - MPI_ASSERT( MPI_SIZE_T != 0 ); - PROFILE_START( "sumReduce1", profile_level ); - MPI_Allreduce( (void *) send, (void *) recv, n, MPI_SIZE_T, MPI_SUM, communicator ); - PROFILE_STOP( "sumReduce1", profile_level ); -} -template<> -void MPI_CLASS::call_sumReduce( size_t *x, const int n ) const -{ - MPI_ASSERT( MPI_SIZE_T != 0 ); - PROFILE_START( "sumReduce2", profile_level ); - auto send = x; - auto recv = new size_t[n]; - MPI_Allreduce( (void *) send, (void *) recv, n, MPI_SIZE_T, MPI_SUM, communicator ); - for ( int i = 0; i < n; i++ ) - x[i] = recv[i]; - delete[] recv; - PROFILE_STOP( "sumReduce2", profile_level ); -} -#endif -// float -template<> -void MPI_CLASS::call_sumReduce( const float *send, float *recv, const int n ) const -{ - PROFILE_START( "sumReduce1", profile_level ); - MPI_Allreduce( (void *) send, (void *) recv, n, MPI_FLOAT, MPI_SUM, communicator ); - PROFILE_STOP( "sumReduce1", profile_level ); -} -template<> -void MPI_CLASS::call_sumReduce( float *x, const int n ) const -{ - PROFILE_START( "sumReduce2", profile_level ); - auto send = x; - auto recv = new float[n]; - MPI_Allreduce( send, recv, n, MPI_FLOAT, MPI_SUM, communicator ); - for ( int i = 0; i < n; i++ ) - x[i] = recv[i]; - delete[] recv; - PROFILE_STOP( "sumReduce2", profile_level ); -} -// double -template<> -void MPI_CLASS::call_sumReduce( const double *send, double *recv, const int n ) const -{ - PROFILE_START( "sumReduce1", profile_level ); - MPI_Allreduce( (void *) send, (void *) recv, n, MPI_DOUBLE, MPI_SUM, communicator ); - PROFILE_STOP( "sumReduce1", profile_level ); -} -template<> -void MPI_CLASS::call_sumReduce( double *x, const int n ) const -{ - PROFILE_START( "sumReduce2", profile_level ); - auto send = x; - auto recv = new double[n]; - MPI_Allreduce( send, recv, n, MPI_DOUBLE, MPI_SUM, communicator ); - for ( int i = 0; i < n; i++ ) - x[i] = recv[i]; - delete[] recv; - PROFILE_STOP( "sumReduce2", profile_level ); -} -// std::complex -template<> -void MPI_CLASS::call_sumReduce>( - const std::complex *x, std::complex *y, const int n ) const -{ - PROFILE_START( "sumReduce1", profile_level ); - auto send = new double[2 * n]; - auto recv = new double[2 * n]; - for ( int i = 0; i < n; i++ ) { - send[2 * i + 0] = real( x[i] ); - send[2 * i + 1] = imag( x[i] ); - } - MPI_Allreduce( (void *) send, (void *) recv, 2 * n, MPI_DOUBLE, MPI_SUM, communicator ); - for ( int i = 0; i < n; i++ ) - y[i] = std::complex( recv[2 * i + 0], recv[2 * i + 1] ); - delete[] send; - delete[] recv; - PROFILE_STOP( "sumReduce1", profile_level ); -} -template<> -void MPI_CLASS::call_sumReduce>( std::complex *x, const int n ) const -{ - PROFILE_START( "sumReduce2", profile_level ); - auto send = new double[2 * n]; - auto recv = new double[2 * n]; - for ( int i = 0; i < n; i++ ) { - send[2 * i + 0] = real( x[i] ); - send[2 * i + 1] = imag( x[i] ); - } - MPI_Allreduce( send, recv, 2 * n, MPI_DOUBLE, MPI_SUM, communicator ); - for ( int i = 0; i < n; i++ ) - x[i] = std::complex( recv[2 * i + 0], recv[2 * i + 1] ); - delete[] send; - delete[] recv; - PROFILE_STOP( "sumReduce2", profile_level ); -} -#endif - - -/************************************************************************ - * call_minReduce * - * Note: these specializations are only called when using MPI. * - ************************************************************************/ -#ifdef USE_MPI -// unsigned char -template<> -void MPI_CLASS::call_minReduce( - const unsigned char *send, unsigned char *recv, const int n, int *comm_rank_of_min ) const -{ - if ( comm_rank_of_min == nullptr ) { - PROFILE_START( "minReduce1", profile_level ); - MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED_CHAR, MPI_MIN, communicator ); - PROFILE_STOP( "minReduce1", profile_level ); - } else { - auto tmp = new int[n]; - for ( int i = 0; i < n; i++ ) - tmp[i] = send[i]; - call_minReduce( tmp, n, comm_rank_of_min ); - for ( int i = 0; i < n; i++ ) - recv[i] = static_cast( tmp[i] ); - delete[] tmp; - } -} -template<> -void MPI_CLASS::call_minReduce( - unsigned char *x, const int n, int *comm_rank_of_min ) const -{ - if ( comm_rank_of_min == nullptr ) { - PROFILE_START( "minReduce2", profile_level ); - auto send = x; - auto recv = new unsigned char[n]; - MPI_Allreduce( send, recv, n, MPI_UNSIGNED_CHAR, MPI_MIN, communicator ); - for ( int i = 0; i < n; i++ ) - x[i] = recv[i]; - delete[] recv; - PROFILE_STOP( "minReduce2", profile_level ); - } else { - auto tmp = new int[n]; - for ( int i = 0; i < n; i++ ) - tmp[i] = x[i]; - call_minReduce( tmp, n, comm_rank_of_min ); - for ( int i = 0; i < n; i++ ) - x[i] = static_cast( tmp[i] ); - delete[] tmp; - } -} -// char -template<> -void MPI_CLASS::call_minReduce( - const char *send, char *recv, const int n, int *comm_rank_of_min ) const -{ - if ( comm_rank_of_min == nullptr ) { - PROFILE_START( "minReduce1", profile_level ); - MPI_Allreduce( (void *) send, (void *) recv, n, MPI_SIGNED_CHAR, MPI_MIN, communicator ); - PROFILE_STOP( "minReduce1", profile_level ); - } else { - auto tmp = new int[n]; - for ( int i = 0; i < n; i++ ) - tmp[i] = send[i]; - call_minReduce( tmp, n, comm_rank_of_min ); - for ( int i = 0; i < n; i++ ) - recv[i] = static_cast( tmp[i] ); - delete[] tmp; - } -} -template<> -void MPI_CLASS::call_minReduce( char *x, const int n, int *comm_rank_of_min ) const -{ - if ( comm_rank_of_min == nullptr ) { - PROFILE_START( "minReduce2", profile_level ); - auto send = x; - auto recv = new char[n]; - MPI_Allreduce( send, recv, n, MPI_SIGNED_CHAR, MPI_MIN, communicator ); - for ( int i = 0; i < n; i++ ) - x[i] = recv[i]; - delete[] recv; - PROFILE_STOP( "minReduce2", profile_level ); - } else { - auto tmp = new int[n]; - for ( int i = 0; i < n; i++ ) - tmp[i] = x[i]; - call_minReduce( tmp, n, comm_rank_of_min ); - for ( int i = 0; i < n; i++ ) - x[i] = static_cast( tmp[i] ); - delete[] tmp; - } -} -// unsigned int -template<> -void MPI_CLASS::call_minReduce( - const unsigned int *send, unsigned int *recv, const int n, int *comm_rank_of_min ) const -{ - if ( comm_rank_of_min == nullptr ) { - PROFILE_START( "minReduce1", profile_level ); - MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED, MPI_MIN, communicator ); - PROFILE_STOP( "minReduce1", profile_level ); - } else { - auto tmp = new int[n]; - for ( int i = 0; i < n; i++ ) - tmp[i] = unsigned_to_signed( send[i] ); - call_minReduce( tmp, n, comm_rank_of_min ); - for ( int i = 0; i < n; i++ ) - recv[i] = signed_to_unsigned( tmp[i] ); - delete[] tmp; - } -} -template<> -void MPI_CLASS::call_minReduce( - unsigned int *x, const int n, int *comm_rank_of_min ) const -{ - if ( comm_rank_of_min == nullptr ) { - PROFILE_START( "minReduce2", profile_level ); - auto send = x; - auto recv = new unsigned int[n]; - MPI_Allreduce( send, recv, n, MPI_UNSIGNED, MPI_MIN, communicator ); - for ( int i = 0; i < n; i++ ) - x[i] = recv[i]; - delete[] recv; - PROFILE_STOP( "minReduce2", profile_level ); - } else { - auto tmp = new int[n]; - for ( int i = 0; i < n; i++ ) - tmp[i] = unsigned_to_signed( x[i] ); - call_minReduce( tmp, n, comm_rank_of_min ); - for ( int i = 0; i < n; i++ ) - x[i] = signed_to_unsigned( tmp[i] ); - delete[] tmp; - } -} -// int -template<> -void MPI_CLASS::call_minReduce( - const int *x, int *y, const int n, int *comm_rank_of_min ) const -{ - PROFILE_START( "minReduce1", profile_level ); - if ( comm_rank_of_min == nullptr ) { - MPI_Allreduce( (void *) x, (void *) y, n, MPI_INT, MPI_MIN, communicator ); - } else { - auto recv = new IntIntStruct[n]; - auto send = new IntIntStruct[n]; - for ( int i = 0; i < n; ++i ) { - send[i].j = x[i]; - send[i].i = comm_rank; - } - MPI_Allreduce( send, recv, n, MPI_2INT, MPI_MINLOC, communicator ); - for ( int i = 0; i < n; ++i ) { - y[i] = recv[i].j; - comm_rank_of_min[i] = recv[i].i; - } - delete[] recv; - delete[] send; - } - PROFILE_STOP( "minReduce1", profile_level ); -} -template<> -void MPI_CLASS::call_minReduce( int *x, const int n, int *comm_rank_of_min ) const -{ - PROFILE_START( "minReduce2", profile_level ); - if ( comm_rank_of_min == nullptr ) { - auto send = x; - auto recv = new int[n]; - MPI_Allreduce( send, recv, n, MPI_INT, MPI_MIN, communicator ); - for ( int i = 0; i < n; i++ ) - x[i] = recv[i]; - delete[] recv; - } else { - auto recv = new IntIntStruct[n]; - auto send = new IntIntStruct[n]; - for ( int i = 0; i < n; ++i ) { - send[i].j = x[i]; - send[i].i = comm_rank; - } - MPI_Allreduce( send, recv, n, MPI_2INT, MPI_MINLOC, communicator ); - for ( int i = 0; i < n; ++i ) { - x[i] = recv[i].j; - comm_rank_of_min[i] = recv[i].i; - } - delete[] recv; - delete[] send; - } - PROFILE_STOP( "minReduce2", profile_level ); -} -// unsigned long int -template<> -void MPI_CLASS::call_minReduce( const unsigned long int *send, - unsigned long int *recv, const int n, int *comm_rank_of_min ) const -{ - if ( comm_rank_of_min == nullptr ) { - PROFILE_START( "minReduce1", profile_level ); - MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED_LONG, MPI_MIN, communicator ); - PROFILE_STOP( "minReduce1", profile_level ); - } else { - auto tmp = new long int[n]; - for ( int i = 0; i < n; i++ ) - tmp[i] = unsigned_to_signed( send[i] ); - call_minReduce( tmp, n, comm_rank_of_min ); - for ( int i = 0; i < n; i++ ) - recv[i] = signed_to_unsigned( tmp[i] ); - delete[] tmp; - } -} -template<> -void MPI_CLASS::call_minReduce( - unsigned long int *x, const int n, int *comm_rank_of_min ) const -{ - if ( comm_rank_of_min == nullptr ) { - PROFILE_START( "minReduce2", profile_level ); - auto send = x; - auto recv = new unsigned long int[n]; - MPI_Allreduce( send, recv, n, MPI_UNSIGNED_LONG, MPI_MIN, communicator ); - for ( int i = 0; i < n; i++ ) - x[i] = recv[i]; - delete[] recv; - PROFILE_STOP( "minReduce2", profile_level ); - } else { - auto tmp = new long int[n]; - for ( int i = 0; i < n; i++ ) - tmp[i] = unsigned_to_signed( x[i] ); - call_minReduce( tmp, n, comm_rank_of_min ); - for ( int i = 0; i < n; i++ ) - x[i] = signed_to_unsigned( tmp[i] ); - delete[] tmp; - } -} -// long int -template<> -void MPI_CLASS::call_minReduce( - const long int *x, long int *y, const int n, int *comm_rank_of_min ) const -{ - PROFILE_START( "minReduce1", profile_level ); - if ( comm_rank_of_min == nullptr ) { - MPI_Allreduce( (void *) x, (void *) y, n, MPI_LONG, MPI_MIN, communicator ); - } else { - auto recv = new LongIntStruct[n]; - auto send = new LongIntStruct[n]; - for ( int i = 0; i < n; ++i ) { - send[i].j = x[i]; - send[i].i = comm_rank; - } - MPI_Allreduce( send, recv, n, MPI_LONG_INT, MPI_MINLOC, communicator ); - for ( int i = 0; i < n; ++i ) { - y[i] = recv[i].j; - comm_rank_of_min[i] = recv[i].i; - } - delete[] recv; - delete[] send; - } - PROFILE_STOP( "minReduce1", profile_level ); -} -template<> -void MPI_CLASS::call_minReduce( long int *x, const int n, int *comm_rank_of_min ) const -{ - PROFILE_START( "minReduce2", profile_level ); - if ( comm_rank_of_min == nullptr ) { - auto send = x; - auto recv = new long int[n]; - MPI_Allreduce( send, recv, n, MPI_LONG, MPI_MIN, communicator ); - for ( long int i = 0; i < n; i++ ) - x[i] = recv[i]; - delete[] recv; - } else { - auto recv = new LongIntStruct[n]; - auto send = new LongIntStruct[n]; - for ( int i = 0; i < n; ++i ) { - send[i].j = x[i]; - send[i].i = comm_rank; - } - MPI_Allreduce( send, recv, n, MPI_LONG_INT, MPI_MINLOC, communicator ); - for ( int i = 0; i < n; ++i ) { - x[i] = recv[i].j; - comm_rank_of_min[i] = recv[i].i; - } - delete[] recv; - delete[] send; - } - PROFILE_STOP( "minReduce2", profile_level ); -} -// unsigned long long int -template<> -void MPI_CLASS::call_minReduce( const unsigned long long int *send, - unsigned long long int *recv, const int n, int *comm_rank_of_min ) const -{ - PROFILE_START( "minReduce1", profile_level ); - if ( comm_rank_of_min == nullptr ) { - auto x = new long long int[n]; - auto y = new long long int[n]; - for ( int i = 0; i < n; i++ ) - x[i] = unsigned_to_signed( send[i] ); - MPI_Allreduce( (void *) x, (void *) y, n, MPI_LONG_LONG_INT, MPI_MIN, communicator ); - for ( int i = 0; i < n; i++ ) - recv[i] = signed_to_unsigned( y[i] ); - delete[] x; - delete[] y; - } else { - printf( "minReduce will use double\n" ); - auto tmp = new double[n]; - for ( int i = 0; i < n; i++ ) - tmp[i] = static_cast( send[i] ); - call_minReduce( tmp, n, comm_rank_of_min ); - for ( int i = 0; i < n; i++ ) - recv[i] = static_cast( tmp[i] ); - delete[] tmp; - } - PROFILE_STOP( "minReduce1", profile_level ); -} -template<> -void MPI_CLASS::call_minReduce( - unsigned long long int *x, const int n, int *comm_rank_of_min ) const -{ - auto recv = new unsigned long long int[n]; - call_minReduce( x, recv, n, comm_rank_of_min ); - for ( int i = 0; i < n; i++ ) - x[i] = recv[i]; - delete[] recv; -} -// long long int -template<> -void MPI_CLASS::call_minReduce( - const long long int *x, long long int *y, const int n, int *comm_rank_of_min ) const -{ - PROFILE_START( "minReduce1", profile_level ); - if ( comm_rank_of_min == nullptr ) { - MPI_Allreduce( (void *) x, (void *) y, n, MPI_LONG_LONG_INT, MPI_MIN, communicator ); - } else { - printf( "minReduce will use double\n" ); - auto tmp = new double[n]; - for ( int i = 0; i < n; i++ ) - tmp[i] = static_cast( x[i] ); - call_minReduce( tmp, n, comm_rank_of_min ); - for ( int i = 0; i < n; i++ ) - y[i] = static_cast( tmp[i] ); - delete[] tmp; - } - PROFILE_STOP( "minReduce1", profile_level ); -} -template<> -void MPI_CLASS::call_minReduce( - long long int *x, const int n, int *comm_rank_of_min ) const -{ - auto recv = new long long int[n]; - call_minReduce( x, recv, n, comm_rank_of_min ); - for ( int i = 0; i < n; i++ ) - x[i] = signed_to_unsigned( recv[i] ); - delete[] recv; -} -// float -template<> -void MPI_CLASS::call_minReduce( - const float *x, float *y, const int n, int *comm_rank_of_min ) const -{ - PROFILE_START( "minReduce1", profile_level ); - if ( comm_rank_of_min == nullptr ) { - MPI_Allreduce( (void *) x, (void *) y, n, MPI_INT, MPI_MIN, communicator ); - } else { - auto recv = new FloatIntStruct[n]; - auto send = new FloatIntStruct[n]; - for ( int i = 0; i < n; ++i ) { - send[i].f = x[i]; - send[i].i = comm_rank; - } - MPI_Allreduce( send, recv, n, MPI_FLOAT_INT, MPI_MINLOC, communicator ); - for ( int i = 0; i < n; ++i ) { - y[i] = recv[i].f; - comm_rank_of_min[i] = recv[i].i; - } - delete[] recv; - delete[] send; - } - PROFILE_STOP( "minReduce1", profile_level ); -} -template<> -void MPI_CLASS::call_minReduce( float *x, const int n, int *comm_rank_of_min ) const -{ - PROFILE_START( "minReduce2", profile_level ); - if ( comm_rank_of_min == nullptr ) { - auto send = x; - auto recv = new float[n]; - MPI_Allreduce( send, recv, n, MPI_FLOAT, MPI_MIN, communicator ); - for ( int i = 0; i < n; i++ ) - x[i] = recv[i]; - delete[] recv; - } else { - auto recv = new FloatIntStruct[n]; - auto send = new FloatIntStruct[n]; - for ( int i = 0; i < n; ++i ) { - send[i].f = x[i]; - send[i].i = comm_rank; - } - MPI_Allreduce( send, recv, n, MPI_FLOAT_INT, MPI_MINLOC, communicator ); - for ( int i = 0; i < n; ++i ) { - x[i] = recv[i].f; - comm_rank_of_min[i] = recv[i].i; - } - delete[] recv; - delete[] send; - } - PROFILE_STOP( "minReduce2", profile_level ); -} -// double -template<> -void MPI_CLASS::call_minReduce( - const double *x, double *y, const int n, int *comm_rank_of_min ) const -{ - PROFILE_START( "minReduce1", profile_level ); - if ( comm_rank_of_min == nullptr ) { - MPI_Allreduce( (void *) x, (void *) y, n, MPI_DOUBLE, MPI_MIN, communicator ); - } else { - auto recv = new DoubleIntStruct[n]; - auto send = new DoubleIntStruct[n]; - for ( int i = 0; i < n; ++i ) { - send[i].d = x[i]; - send[i].i = comm_rank; - } - MPI_Allreduce( send, recv, n, MPI_DOUBLE_INT, MPI_MINLOC, communicator ); - for ( int i = 0; i < n; ++i ) { - y[i] = recv[i].d; - comm_rank_of_min[i] = recv[i].i; - } - delete[] recv; - delete[] send; - } - PROFILE_STOP( "minReduce1", profile_level ); -} -template<> -void MPI_CLASS::call_minReduce( double *x, const int n, int *comm_rank_of_min ) const -{ - PROFILE_START( "minReduce2", profile_level ); - if ( comm_rank_of_min == nullptr ) { - auto send = x; - auto recv = new double[n]; - MPI_Allreduce( send, recv, n, MPI_DOUBLE, MPI_MIN, communicator ); - for ( int i = 0; i < n; i++ ) - x[i] = recv[i]; - delete[] recv; - } else { - auto recv = new DoubleIntStruct[n]; - auto send = new DoubleIntStruct[n]; - for ( int i = 0; i < n; ++i ) { - send[i].d = x[i]; - send[i].i = comm_rank; - } - MPI_Allreduce( send, recv, n, MPI_DOUBLE_INT, MPI_MINLOC, communicator ); - for ( int i = 0; i < n; ++i ) { - x[i] = recv[i].d; - comm_rank_of_min[i] = recv[i].i; - } - delete[] recv; - delete[] send; - } - PROFILE_STOP( "minReduce2", profile_level ); -} -#endif - - -/************************************************************************ - * call_maxReduce * - * Note: these specializations are only called when using MPI. * - ************************************************************************/ -#ifdef USE_MPI -// unsigned char -template<> -void MPI_CLASS::call_maxReduce( - const unsigned char *send, unsigned char *recv, const int n, int *comm_rank_of_max ) const -{ - if ( comm_rank_of_max == nullptr ) { - PROFILE_START( "maxReduce1", profile_level ); - MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED_CHAR, MPI_MAX, communicator ); - PROFILE_STOP( "maxReduce1", profile_level ); - } else { - auto tmp = new int[n]; - for ( int i = 0; i < n; i++ ) - tmp[i] = send[i]; - call_maxReduce( tmp, n, comm_rank_of_max ); - for ( int i = 0; i < n; i++ ) - recv[i] = static_cast( tmp[i] ); - delete[] tmp; - } -} -template<> -void MPI_CLASS::call_maxReduce( - unsigned char *x, const int n, int *comm_rank_of_max ) const -{ - if ( comm_rank_of_max == nullptr ) { - PROFILE_START( "maxReduce2", profile_level ); - auto send = x; - auto recv = new unsigned char[n]; - MPI_Allreduce( send, recv, n, MPI_UNSIGNED_CHAR, MPI_MAX, communicator ); - for ( int i = 0; i < n; i++ ) - x[i] = recv[i]; - delete[] recv; - PROFILE_STOP( "maxReduce2", profile_level ); - } else { - auto tmp = new int[n]; - for ( int i = 0; i < n; i++ ) - tmp[i] = x[i]; - call_maxReduce( tmp, n, comm_rank_of_max ); - for ( int i = 0; i < n; i++ ) - x[i] = static_cast( tmp[i] ); - delete[] tmp; - } -} -// char -template<> -void MPI_CLASS::call_maxReduce( - const char *send, char *recv, const int n, int *comm_rank_of_max ) const -{ - if ( comm_rank_of_max == nullptr ) { - PROFILE_START( "maxReduce1", profile_level ); - MPI_Allreduce( (void *) send, (void *) recv, n, MPI_SIGNED_CHAR, MPI_MAX, communicator ); - PROFILE_STOP( "maxReduce1", profile_level ); - } else { - auto tmp = new int[n]; - for ( int i = 0; i < n; i++ ) - tmp[i] = send[i]; - call_maxReduce( tmp, n, comm_rank_of_max ); - for ( int i = 0; i < n; i++ ) - recv[i] = static_cast( tmp[i] ); - delete[] tmp; - } -} -template<> -void MPI_CLASS::call_maxReduce( char *x, const int n, int *comm_rank_of_max ) const -{ - if ( comm_rank_of_max == nullptr ) { - PROFILE_START( "maxReduce2", profile_level ); - auto send = x; - auto recv = new char[n]; - MPI_Allreduce( send, recv, n, MPI_SIGNED_CHAR, MPI_MAX, communicator ); - for ( int i = 0; i < n; i++ ) - x[i] = recv[i]; - delete[] recv; - PROFILE_STOP( "maxReduce2", profile_level ); - } else { - auto tmp = new int[n]; - for ( int i = 0; i < n; i++ ) - tmp[i] = x[i]; - call_maxReduce( tmp, n, comm_rank_of_max ); - for ( int i = 0; i < n; i++ ) - x[i] = static_cast( tmp[i] ); - delete[] tmp; - } -} -// unsigned int -template<> -void MPI_CLASS::call_maxReduce( - const unsigned int *send, unsigned int *recv, const int n, int *comm_rank_of_max ) const -{ - if ( comm_rank_of_max == nullptr ) { - PROFILE_START( "maxReduce1", profile_level ); - MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED, MPI_MAX, communicator ); - PROFILE_STOP( "maxReduce1", profile_level ); - } else { - auto tmp = new int[n]; - for ( int i = 0; i < n; i++ ) - tmp[i] = unsigned_to_signed( send[i] ); - call_maxReduce( tmp, n, comm_rank_of_max ); - for ( int i = 0; i < n; i++ ) - recv[i] = signed_to_unsigned( tmp[i] ); - delete[] tmp; - } -} -template<> -void MPI_CLASS::call_maxReduce( - unsigned int *x, const int n, int *comm_rank_of_max ) const -{ - if ( comm_rank_of_max == nullptr ) { - PROFILE_START( "maxReduce2", profile_level ); - auto send = x; - auto recv = new unsigned int[n]; - MPI_Allreduce( send, recv, n, MPI_UNSIGNED, MPI_MAX, communicator ); - for ( int i = 0; i < n; i++ ) - x[i] = recv[i]; - delete[] recv; - PROFILE_STOP( "maxReduce2", profile_level ); - } else { - auto tmp = new int[n]; - for ( int i = 0; i < n; i++ ) - tmp[i] = unsigned_to_signed( x[i] ); - call_maxReduce( tmp, n, comm_rank_of_max ); - for ( int i = 0; i < n; i++ ) - x[i] = signed_to_unsigned( tmp[i] ); - delete[] tmp; - } -} -// int -template<> -void MPI_CLASS::call_maxReduce( - const int *x, int *y, const int n, int *comm_rank_of_max ) const -{ - PROFILE_START( "maxReduce1", profile_level ); - if ( comm_rank_of_max == nullptr ) { - MPI_Allreduce( (void *) x, (void *) y, n, MPI_INT, MPI_MAX, communicator ); - } else { - auto recv = new IntIntStruct[n]; - auto send = new IntIntStruct[n]; - for ( int i = 0; i < n; ++i ) { - send[i].j = x[i]; - send[i].i = comm_rank; - } - MPI_Allreduce( send, recv, n, MPI_2INT, MPI_MAXLOC, communicator ); - for ( int i = 0; i < n; ++i ) { - y[i] = recv[i].j; - comm_rank_of_max[i] = recv[i].i; - } - delete[] recv; - delete[] send; - } - PROFILE_STOP( "maxReduce1", profile_level ); -} -template<> -void MPI_CLASS::call_maxReduce( int *x, const int n, int *comm_rank_of_max ) const -{ - PROFILE_START( "maxReduce2", profile_level ); - if ( comm_rank_of_max == nullptr ) { - int *send = x; - auto recv = new int[n]; - MPI_Allreduce( send, recv, n, MPI_INT, MPI_MAX, communicator ); - for ( int i = 0; i < n; i++ ) - x[i] = recv[i]; - delete[] recv; - } else { - auto recv = new IntIntStruct[n]; - auto send = new IntIntStruct[n]; - for ( int i = 0; i < n; ++i ) { - send[i].j = x[i]; - send[i].i = comm_rank; - } - MPI_Allreduce( send, recv, n, MPI_2INT, MPI_MAXLOC, communicator ); - for ( int i = 0; i < n; ++i ) { - x[i] = recv[i].j; - comm_rank_of_max[i] = recv[i].i; - } - delete[] recv; - delete[] send; - } - PROFILE_STOP( "maxReduce2", profile_level ); -} -// long int -template<> -void MPI_CLASS::call_maxReduce( - const long int *x, long int *y, const int n, int *comm_rank_of_max ) const -{ - PROFILE_START( "maxReduce1", profile_level ); - if ( comm_rank_of_max == nullptr ) { - MPI_Allreduce( (void *) x, (void *) y, n, MPI_LONG, MPI_MAX, communicator ); - } else { - auto recv = new LongIntStruct[n]; - auto send = new LongIntStruct[n]; - for ( int i = 0; i < n; ++i ) { - send[i].j = x[i]; - send[i].i = comm_rank; - } - MPI_Allreduce( send, recv, n, MPI_LONG_INT, MPI_MAXLOC, communicator ); - for ( int i = 0; i < n; ++i ) { - y[i] = recv[i].j; - comm_rank_of_max[i] = recv[i].i; - } - delete[] recv; - delete[] send; - } - PROFILE_STOP( "maxReduce1", profile_level ); -} -template<> -void MPI_CLASS::call_maxReduce( long int *x, const int n, int *comm_rank_of_max ) const -{ - PROFILE_START( "maxReduce2", profile_level ); - if ( comm_rank_of_max == nullptr ) { - auto send = x; - auto recv = new long int[n]; - MPI_Allreduce( send, recv, n, MPI_LONG, MPI_MAX, communicator ); - for ( int i = 0; i < n; i++ ) - x[i] = recv[i]; - delete[] recv; - } else { - auto recv = new LongIntStruct[n]; - auto send = new LongIntStruct[n]; - for ( int i = 0; i < n; ++i ) { - send[i].j = x[i]; - send[i].i = comm_rank; - } - MPI_Allreduce( send, recv, n, MPI_LONG_INT, MPI_MAXLOC, communicator ); - for ( int i = 0; i < n; ++i ) { - x[i] = recv[i].j; - comm_rank_of_max[i] = recv[i].i; - } - delete[] recv; - delete[] send; - } - PROFILE_STOP( "maxReduce2", profile_level ); -} -// unsigned long int -template<> -void MPI_CLASS::call_maxReduce( const unsigned long int *send, - unsigned long int *recv, const int n, int *comm_rank_of_max ) const -{ - if ( comm_rank_of_max == nullptr ) { - PROFILE_START( "maxReduce1", profile_level ); - MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED_LONG, MPI_MAX, communicator ); - PROFILE_STOP( "maxReduce1", profile_level ); - } else { - auto tmp = new long int[n]; - for ( int i = 0; i < n; i++ ) - tmp[i] = unsigned_to_signed( send[i] ); - call_maxReduce( tmp, n, comm_rank_of_max ); - for ( int i = 0; i < n; i++ ) - recv[i] = signed_to_unsigned( tmp[i] ); - delete[] tmp; - } -} -template<> -void MPI_CLASS::call_maxReduce( - unsigned long int *x, const int n, int *comm_rank_of_max ) const -{ - if ( comm_rank_of_max == nullptr ) { - PROFILE_START( "maxReduce2", profile_level ); - auto send = x; - auto recv = new unsigned long int[n]; - MPI_Allreduce( send, recv, n, MPI_UNSIGNED_LONG, MPI_MAX, communicator ); - for ( int i = 0; i < n; i++ ) - x[i] = recv[i]; - delete[] recv; - PROFILE_STOP( "maxReduce2", profile_level ); - } else { - auto tmp = new long int[n]; - for ( int i = 0; i < n; i++ ) - tmp[i] = unsigned_to_signed( x[i] ); - call_maxReduce( tmp, n, comm_rank_of_max ); - for ( int i = 0; i < n; i++ ) - x[i] = signed_to_unsigned( tmp[i] ); - delete[] tmp; - } -} -// unsigned long long int -template<> -void MPI_CLASS::call_maxReduce( const unsigned long long int *send, - unsigned long long int *recv, const int n, int *comm_rank_of_max ) const -{ - PROFILE_START( "maxReduce1", profile_level ); - if ( comm_rank_of_max == nullptr ) { - auto x = new long long int[n]; - auto y = new long long int[n]; - for ( int i = 0; i < n; i++ ) - x[i] = unsigned_to_signed( send[i] ); - MPI_Allreduce( (void *) x, (void *) y, n, MPI_LONG_LONG_INT, MPI_MAX, communicator ); - for ( int i = 0; i < n; i++ ) - recv[i] = signed_to_unsigned( y[i] ); - delete[] x; - delete[] y; - } else { - printf( "maxReduce will use double\n" ); - auto tmp = new double[n]; - for ( int i = 0; i < n; i++ ) - tmp[i] = static_cast( send[i] ); - call_maxReduce( tmp, n, comm_rank_of_max ); - for ( int i = 0; i < n; i++ ) - recv[i] = static_cast( tmp[i] ); - delete[] tmp; - } - PROFILE_STOP( "maxReduce1", profile_level ); -} -template<> -void MPI_CLASS::call_maxReduce( - unsigned long long int *x, const int n, int *comm_rank_of_max ) const -{ - auto recv = new unsigned long long int[n]; - call_maxReduce( x, recv, n, comm_rank_of_max ); - for ( int i = 0; i < n; i++ ) - x[i] = recv[i]; - delete[] recv; -} -// long long int -template<> -void MPI_CLASS::call_maxReduce( - const long long int *x, long long int *y, const int n, int *comm_rank_of_max ) const -{ - PROFILE_START( "maxReduce1", profile_level ); - if ( comm_rank_of_max == nullptr ) { - MPI_Allreduce( (void *) x, (void *) y, n, MPI_LONG_LONG_INT, MPI_MAX, communicator ); - } else { - printf( "maxReduce will use double\n" ); - auto tmp = new double[n]; - for ( int i = 0; i < n; i++ ) - tmp[i] = static_cast( x[i] ); - call_maxReduce( tmp, n, comm_rank_of_max ); - for ( int i = 0; i < n; i++ ) - y[i] = static_cast( tmp[i] ); - delete[] tmp; - } - PROFILE_STOP( "maxReduce1", profile_level ); -} -template<> -void MPI_CLASS::call_maxReduce( - long long int *x, const int n, int *comm_rank_of_max ) const -{ - auto recv = new long long int[n]; - call_maxReduce( x, recv, n, comm_rank_of_max ); - for ( int i = 0; i < n; i++ ) - x[i] = signed_to_unsigned( recv[i] ); - delete[] recv; -} -// float -template<> -void MPI_CLASS::call_maxReduce( - const float *x, float *y, const int n, int *comm_rank_of_max ) const -{ - PROFILE_START( "maxReduce1", profile_level ); - if ( comm_rank_of_max == nullptr ) { - MPI_Allreduce( (void *) x, (void *) y, n, MPI_FLOAT, MPI_MAX, communicator ); - } else { - auto recv = new FloatIntStruct[n]; - auto send = new FloatIntStruct[n]; - for ( int i = 0; i < n; ++i ) { - send[i].f = x[i]; - send[i].i = comm_rank; - } - MPI_Allreduce( send, recv, n, MPI_FLOAT_INT, MPI_MAXLOC, communicator ); - for ( int i = 0; i < n; ++i ) { - y[i] = recv[i].f; - comm_rank_of_max[i] = recv[i].i; - } - delete[] recv; - delete[] send; - } - PROFILE_STOP( "maxReduce1", profile_level ); -} -template<> -void MPI_CLASS::call_maxReduce( float *x, const int n, int *comm_rank_of_max ) const -{ - PROFILE_START( "maxReduce2", profile_level ); - if ( comm_rank_of_max == nullptr ) { - auto send = x; - auto recv = new float[n]; - MPI_Allreduce( send, recv, n, MPI_FLOAT, MPI_MAX, communicator ); - for ( int i = 0; i < n; i++ ) - x[i] = recv[i]; - delete[] recv; - } else { - auto recv = new FloatIntStruct[n]; - auto send = new FloatIntStruct[n]; - for ( int i = 0; i < n; ++i ) { - send[i].f = x[i]; - send[i].i = comm_rank; - } - MPI_Allreduce( send, recv, n, MPI_FLOAT_INT, MPI_MAXLOC, communicator ); - for ( int i = 0; i < n; ++i ) { - x[i] = recv[i].f; - comm_rank_of_max[i] = recv[i].i; - } - delete[] recv; - delete[] send; - } - PROFILE_STOP( "maxReduce2", profile_level ); -} -// double -template<> -void MPI_CLASS::call_maxReduce( - const double *x, double *y, const int n, int *comm_rank_of_max ) const -{ - PROFILE_START( "maxReduce1", profile_level ); - if ( comm_rank_of_max == nullptr ) { - MPI_Allreduce( (void *) x, (void *) y, n, MPI_DOUBLE, MPI_MAX, communicator ); - } else { - auto recv = new DoubleIntStruct[n]; - auto send = new DoubleIntStruct[n]; - for ( int i = 0; i < n; ++i ) { - send[i].d = x[i]; - send[i].i = comm_rank; - } - MPI_Allreduce( send, recv, n, MPI_DOUBLE_INT, MPI_MAXLOC, communicator ); - for ( int i = 0; i < n; ++i ) { - y[i] = recv[i].d; - comm_rank_of_max[i] = recv[i].i; - } - delete[] recv; - delete[] send; - } - PROFILE_STOP( "maxReduce1", profile_level ); -} -template<> -void MPI_CLASS::call_maxReduce( double *x, const int n, int *comm_rank_of_max ) const -{ - PROFILE_START( "maxReduce2", profile_level ); - if ( comm_rank_of_max == nullptr ) { - auto send = x; - auto recv = new double[n]; - MPI_Allreduce( send, recv, n, MPI_DOUBLE, MPI_MAX, communicator ); - for ( int i = 0; i < n; i++ ) - x[i] = recv[i]; - delete[] recv; - } else { - auto recv = new DoubleIntStruct[n]; - auto send = new DoubleIntStruct[n]; - for ( int i = 0; i < n; ++i ) { - send[i].d = x[i]; - send[i].i = comm_rank; - } - MPI_Allreduce( send, recv, n, MPI_DOUBLE_INT, MPI_MAXLOC, communicator ); - for ( int i = 0; i < n; ++i ) { - x[i] = recv[i].d; - comm_rank_of_max[i] = recv[i].i; - } - delete[] recv; - delete[] send; - } - PROFILE_STOP( "maxReduce2", profile_level ); -} -#endif - - -/************************************************************************ - * bcast * - * Note: these specializations are only called when using MPI. * - ************************************************************************/ -#ifdef USE_MPI -// char -template<> -void MPI_CLASS::call_bcast( unsigned char *x, const int n, const int root ) const -{ - PROFILE_START( "bcast", profile_level ); - MPI_Bcast( x, n, MPI_UNSIGNED_CHAR, root, communicator ); - PROFILE_STOP( "bcast", profile_level ); -} -template<> -void MPI_CLASS::call_bcast( char *x, const int n, const int root ) const -{ - PROFILE_START( "bcast", profile_level ); - MPI_Bcast( x, n, MPI_CHAR, root, communicator ); - PROFILE_STOP( "bcast", profile_level ); -} -// int -template<> -void MPI_CLASS::call_bcast( unsigned int *x, const int n, const int root ) const -{ - PROFILE_START( "bcast", profile_level ); - MPI_Bcast( x, n, MPI_UNSIGNED, root, communicator ); - PROFILE_STOP( "bcast", profile_level ); -} -template<> -void MPI_CLASS::call_bcast( int *x, const int n, const int root ) const -{ - PROFILE_START( "bcast", profile_level ); - MPI_Bcast( x, n, MPI_INT, root, communicator ); - PROFILE_STOP( "bcast", profile_level ); -} -// float -template<> -void MPI_CLASS::call_bcast( float *x, const int n, const int root ) const -{ - PROFILE_START( "bcast", profile_level ); - MPI_Bcast( x, n, MPI_FLOAT, root, communicator ); - PROFILE_STOP( "bcast", profile_level ); -} -// double -template<> -void MPI_CLASS::call_bcast( double *x, const int n, const int root ) const -{ - PROFILE_START( "bcast", profile_level ); - MPI_Bcast( x, n, MPI_DOUBLE, root, communicator ); - PROFILE_STOP( "bcast", profile_level ); -} -#else -// We need a concrete instantiation of bcast(x,n,root); -template<> -void MPI_CLASS::call_bcast( char *, const int, const int ) const -{ -} -#endif - - -/************************************************************************ - * Perform a global barrier across all processors. * - ************************************************************************/ -void MPI_CLASS::barrier() const -{ -#ifdef USE_MPI - MPI_Barrier( communicator ); -#endif -} - - -/************************************************************************ - * Send data array to another processor. * - * Note: these specializations are only called when using MPI. * - ************************************************************************/ -#ifdef USE_MPI -// char -template<> -void MPI_CLASS::send( - const char *buf, const int length, const int recv_proc_number, int tag ) const -{ - // Set the tag to 0 if it is < 0 - tag = ( tag >= 0 ) ? tag : 0; - MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); - // Send the data - PROFILE_START( "send", profile_level ); - MPI_Send( (void *) buf, length, MPI_CHAR, recv_proc_number, tag, communicator ); - PROFILE_STOP( "send", profile_level ); -} -// int -template<> -void MPI_CLASS::send( - const int *buf, const int length, const int recv_proc_number, int tag ) const -{ - // Set the tag to 0 if it is < 0 - tag = ( tag >= 0 ) ? tag : 0; - MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); - // Send the data - PROFILE_START( "send", profile_level ); - MPI_Send( (void *) buf, length, MPI_INT, recv_proc_number, tag, communicator ); - PROFILE_STOP( "send", profile_level ); -} -// float -template<> -void MPI_CLASS::send( - const float *buf, const int length, const int recv_proc_number, int tag ) const -{ - // Set the tag to 0 if it is < 0 - tag = ( tag >= 0 ) ? tag : 0; - MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); - // Send the data - PROFILE_START( "send", profile_level ); - MPI_Send( (void *) buf, length, MPI_FLOAT, recv_proc_number, tag, communicator ); - PROFILE_STOP( "send", profile_level ); -} -// double -template<> -void MPI_CLASS::send( - const double *buf, const int length, const int recv_proc_number, int tag ) const -{ - // Set the tag to 0 if it is < 0 - tag = ( tag >= 0 ) ? tag : 0; - MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); - // Send the data - PROFILE_START( "send", profile_level ); - MPI_Send( (void *) buf, length, MPI_DOUBLE, recv_proc_number, tag, communicator ); - PROFILE_STOP( "send", profile_level ); -} -#else -// We need a concrete instantiation of send for use without MPI -template<> -void MPI_CLASS::send( const char *buf, const int length, const int, int tag ) const -{ - MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); - MPI_INSIST( tag >= 0, "tag must be >= 0" ); - PROFILE_START( "send", profile_level ); - auto id = getRequest( communicator, tag ); - auto it = global_isendrecv_list.find( id ); - MPI_INSIST( it == global_isendrecv_list.end(), - "send must be paired with a previous call to irecv in serial" ); - MPI_ASSERT( it->second.status == 2 ); - memcpy( (char *) it->second.data, buf, length ); - global_isendrecv_list.erase( it ); - PROFILE_START( "send", profile_level ); -} -#endif - - -/************************************************************************ - * Non-blocking send data array to another processor. * - * Note: these specializations are only called when using MPI. * - ************************************************************************/ -#ifdef USE_MPI -// char -template<> -MPI_Request MPI_CLASS::Isend( - const char *buf, const int length, const int recv_proc, const int tag ) const -{ - MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); - MPI_INSIST( tag >= 0, "tag must be >= 0" ); - MPI_Request request; - PROFILE_START( "Isend", profile_level ); - MPI_Isend( (void *) buf, length, MPI_CHAR, recv_proc, tag, communicator, &request ); - PROFILE_STOP( "Isend", profile_level ); - return request; -} -// int -template<> -MPI_Request MPI_CLASS::Isend( - const int *buf, const int length, const int recv_proc, const int tag ) const -{ - MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); - MPI_INSIST( tag >= 0, "tag must be >= 0" ); - MPI_Request request; - PROFILE_START( "Isend", profile_level ); - MPI_Isend( (void *) buf, length, MPI_INT, recv_proc, tag, communicator, &request ); - PROFILE_STOP( "Isend", profile_level ); - return request; -} -// float -template<> -MPI_Request MPI_CLASS::Isend( - const float *buf, const int length, const int recv_proc, const int tag ) const -{ - MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); - MPI_INSIST( tag >= 0, "tag must be >= 0" ); - MPI_Request request; - PROFILE_START( "Isend", profile_level ); - MPI_Isend( (void *) buf, length, MPI_FLOAT, recv_proc, tag, communicator, &request ); - PROFILE_STOP( "Isend", profile_level ); - return request; -} -// double -template<> -MPI_Request MPI_CLASS::Isend( - const double *buf, const int length, const int recv_proc, const int tag ) const -{ - MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); - MPI_INSIST( tag >= 0, "tag must be >= 0" ); - MPI_Request request; - PROFILE_START( "Isend", profile_level ); - MPI_Isend( (void *) buf, length, MPI_DOUBLE, recv_proc, tag, communicator, &request ); - PROFILE_STOP( "Isend", profile_level ); - return request; -} -#else -// We need a concrete instantiation of send for use without mpi -template<> -MPI_Request MPI_CLASS::Isend( - const char *buf, const int length, const int, const int tag ) const -{ - MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); - MPI_INSIST( tag >= 0, "tag must be >= 0" ); - PROFILE_START( "Isend", profile_level ); - auto id = getRequest( communicator, tag ); - auto it = global_isendrecv_list.find( id ); - if ( it == global_isendrecv_list.end() ) { - // We are calling isend first - Isendrecv_struct data; - data.data = buf; - data.status = 1; - global_isendrecv_list.insert( std::pair( id, data ) ); - } else { - // We called irecv first - MPI_ASSERT( it->second.status == 2 ); - memcpy( (char *) it->second.data, buf, length ); - global_isendrecv_list.erase( it ); - } - PROFILE_STOP( "Isend", profile_level ); - return id; -} -#endif - - -/************************************************************************ - * Send byte array to another processor. * - ************************************************************************/ -void MPI_CLASS::sendBytes( - const void *buf, const int number_bytes, const int recv_proc_number, int tag ) const -{ - MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); - MPI_INSIST( tag >= 0, "tag must be >= 0" ); - send( (const char *) buf, number_bytes, recv_proc_number, tag ); -} - - -/************************************************************************ - * Non-blocking send byte array to another processor. * - ************************************************************************/ -MPI_Request MPI_CLASS::IsendBytes( - const void *buf, const int number_bytes, const int recv_proc, const int tag ) const -{ - MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); - MPI_INSIST( tag >= 0, "tag must be >= 0" ); - return Isend( (const char *) buf, number_bytes, recv_proc, tag ); -} - - -/************************************************************************ - * Recieve data array to another processor. * - * Note: these specializations are only called when using MPI. * - ************************************************************************/ -#ifdef USE_MPI -// char -template<> -void MPI_CLASS::recv( - char *buf, int &length, const int send_proc_number, const bool get_length, int tag ) const -{ - // Set the tag to 0 if it is < 0 - tag = ( tag >= 0 ) ? tag : 0; - MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); - PROFILE_START( "recv", profile_level ); - // Get the recieve length if necessary - if ( get_length ) { - int bytes = this->probe( send_proc_number, tag ); - int recv_length = bytes / sizeof( char ); - MPI_INSIST( length >= recv_length, "Recived length is larger than allocated array" ); - length = recv_length; - } - // Send the data - MPI_Status status; - MPI_Recv( (void *) buf, length, MPI_CHAR, send_proc_number, tag, communicator, &status ); - PROFILE_STOP( "recv", profile_level ); -} -// int -template<> -void MPI_CLASS::recv( - int *buf, int &length, const int send_proc_number, const bool get_length, int tag ) const -{ - // Set the tag to 0 if it is < 0 - tag = ( tag >= 0 ) ? tag : 0; - MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); - PROFILE_START( "recv", profile_level ); - // Get the recieve length if necessary - if ( get_length ) { - int bytes = this->probe( send_proc_number, tag ); - int recv_length = bytes / sizeof( int ); - MPI_INSIST( length >= recv_length, "Recived length is larger than allocated array" ); - length = recv_length; - } - // Send the data - MPI_Status status; - MPI_Recv( (void *) buf, length, MPI_INT, send_proc_number, tag, communicator, &status ); - PROFILE_STOP( "recv", profile_level ); -} -// float -template<> -void MPI_CLASS::recv( - float *buf, int &length, const int send_proc_number, const bool get_length, int tag ) const -{ - // Set the tag to 0 if it is < 0 - tag = ( tag >= 0 ) ? tag : 0; - MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); - PROFILE_START( "recv", profile_level ); - // Get the recieve length if necessary - if ( get_length ) { - int bytes = this->probe( send_proc_number, tag ); - int recv_length = bytes / sizeof( float ); - MPI_INSIST( length >= recv_length, "Recived length is larger than allocated array" ); - length = recv_length; - } - // Send the data - MPI_Status status; - MPI_Recv( (void *) buf, length, MPI_FLOAT, send_proc_number, tag, communicator, &status ); - PROFILE_STOP( "recv", profile_level ); -} -// double -template<> -void MPI_CLASS::recv( - double *buf, int &length, const int send_proc_number, const bool get_length, int tag ) const -{ - // Set the tag to 0 if it is < 0 - tag = ( tag >= 0 ) ? tag : 0; - MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); - PROFILE_START( "recv", profile_level ); - // Get the recieve length if necessary - if ( get_length ) { - int bytes = this->probe( send_proc_number, tag ); - int recv_length = bytes / sizeof( double ); - MPI_INSIST( length >= recv_length, "Recived length is larger than allocated array" ); - length = recv_length; - } - // Send the data - MPI_Status status; - MPI_Recv( (void *) buf, length, MPI_DOUBLE, send_proc_number, tag, communicator, &status ); - PROFILE_STOP( "recv", profile_level ); -} -#else -// We need a concrete instantiation of recv for use without mpi -template<> -void MPI_CLASS::recv( char *buf, int &length, const int, const bool, int tag ) const -{ - MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); - MPI_INSIST( tag >= 0, "tag must be >= 0" ); - PROFILE_START( "recv", profile_level ); - auto id = getRequest( communicator, tag ); - auto it = global_isendrecv_list.find( id ); - MPI_INSIST( it != global_isendrecv_list.end(), - "recv must be paired with a previous call to isend in serial" ); - MPI_ASSERT( it->second.status == 1 ); - memcpy( buf, it->second.data, length ); - global_isendrecv_list.erase( it ); - PROFILE_STOP( "recv", profile_level ); -} -#endif - - -/************************************************************************ - * Non-blocking recieve data array to another processor. * - * Note: these specializations are only called when using MPI. * - ************************************************************************/ -#ifdef USE_MPI -// char -template<> -MPI_Request MPI_CLASS::Irecv( - char *buf, const int length, const int send_proc, const int tag ) const -{ - MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); - MPI_INSIST( tag >= 0, "tag must be >= 0" ); - MPI_Request request; - PROFILE_START( "Irecv", profile_level ); - MPI_Irecv( (void *) buf, length, MPI_CHAR, send_proc, tag, communicator, &request ); - PROFILE_STOP( "Irecv", profile_level ); - return request; -} -// int -template<> -MPI_Request MPI_CLASS::Irecv( - int *buf, const int length, const int send_proc, const int tag ) const -{ - MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); - MPI_INSIST( tag >= 0, "tag must be >= 0" ); - MPI_Request request; - PROFILE_START( "Irecv", profile_level ); - MPI_Irecv( (void *) buf, length, MPI_INT, send_proc, tag, communicator, &request ); - PROFILE_STOP( "Irecv", profile_level ); - return request; -} -// float -template<> -MPI_Request MPI_CLASS::Irecv( - float *buf, const int length, const int send_proc, const int tag ) const -{ - MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); - MPI_INSIST( tag >= 0, "tag must be >= 0" ); - MPI_Request request; - PROFILE_START( "Irecv", profile_level ); - MPI_Irecv( (void *) buf, length, MPI_FLOAT, send_proc, tag, communicator, &request ); - PROFILE_STOP( "Irecv", profile_level ); - return request; -} -// double -template<> -MPI_Request MPI_CLASS::Irecv( - double *buf, const int length, const int send_proc, const int tag ) const -{ - MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); - MPI_INSIST( tag >= 0, "tag must be >= 0" ); - MPI_Request request; - PROFILE_START( "Irecv", profile_level ); - MPI_Irecv( (void *) buf, length, MPI_DOUBLE, send_proc, tag, communicator, &request ); - PROFILE_STOP( "Irecv", profile_level ); - return request; -} -#else -// We need a concrete instantiation of irecv for use without mpi -template<> -MPI_Request MPI_CLASS::Irecv( char *buf, const int length, const int, const int tag ) const -{ - MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); - MPI_INSIST( tag >= 0, "tag must be >= 0" ); - PROFILE_START( "Irecv", profile_level ); - auto id = getRequest( communicator, tag ); - auto it = global_isendrecv_list.find( id ); - if ( it == global_isendrecv_list.end() ) { - // We are calling Irecv first - Isendrecv_struct data; - data.data = buf; - data.status = 2; - global_isendrecv_list.insert( std::pair( id, data ) ); - } else { - // We called Isend first - MPI_ASSERT( it->second.status == 1 ); - memcpy( buf, it->second.data, length ); - global_isendrecv_list.erase( it ); - } - PROFILE_STOP( "Irecv", profile_level ); - return id; -} -#endif - - -/************************************************************************ - * Recieve byte array to another processor. * - ************************************************************************/ -void MPI_CLASS::recvBytes( void *buf, int &number_bytes, const int send_proc, int tag ) const -{ - recv( (char *) buf, number_bytes, send_proc, false, tag ); -} - - -/************************************************************************ - * Recieve byte array to another processor. * - ************************************************************************/ -MPI_Request MPI_CLASS::IrecvBytes( - void *buf, const int number_bytes, const int send_proc, const int tag ) const -{ - MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); - MPI_INSIST( tag >= 0, "tag must be >= 0" ); - return Irecv( (char *) buf, number_bytes, send_proc, tag ); -} - - -/************************************************************************ - * allGather * - * Note: these specializations are only called when using MPI. * - ************************************************************************/ -#ifdef USE_MPI -// unsigned char -template<> -void MPI_CLASS::call_allGather( - const unsigned char &x_in, unsigned char *x_out ) const -{ - PROFILE_START( "allGather", profile_level ); - MPI_Allgather( - (void *) &x_in, 1, MPI_UNSIGNED_CHAR, (void *) x_out, 1, MPI_UNSIGNED_CHAR, communicator ); - PROFILE_STOP( "allGather", profile_level ); -} -template<> -void MPI_CLASS::call_allGather( const unsigned char *x_in, int size_in, - unsigned char *x_out, int *size_out, int *disp_out ) const -{ - PROFILE_START( "allGatherv", profile_level ); - MPI_Allgatherv( (void *) x_in, size_in, MPI_CHAR, (void *) x_out, size_out, disp_out, MPI_CHAR, - communicator ); - PROFILE_STOP( "allGatherv", profile_level ); -} -// char -template<> -void MPI_CLASS::call_allGather( const char &x_in, char *x_out ) const -{ - PROFILE_START( "allGather", profile_level ); - MPI_Allgather( (void *) &x_in, 1, MPI_CHAR, (void *) x_out, 1, MPI_CHAR, communicator ); - PROFILE_STOP( "allGather", profile_level ); -} -template<> -void MPI_CLASS::call_allGather( - const char *x_in, int size_in, char *x_out, int *size_out, int *disp_out ) const -{ - PROFILE_START( "allGatherv", profile_level ); - MPI_Allgatherv( (void *) x_in, size_in, MPI_CHAR, (void *) x_out, size_out, disp_out, MPI_CHAR, - communicator ); - PROFILE_STOP( "allGatherv", profile_level ); -} -// unsigned int -template<> -void MPI_CLASS::call_allGather( const unsigned int &x_in, unsigned int *x_out ) const -{ - PROFILE_START( "allGather", profile_level ); - MPI_Allgather( (void *) &x_in, 1, MPI_UNSIGNED, (void *) x_out, 1, MPI_UNSIGNED, communicator ); - PROFILE_STOP( "allGather", profile_level ); -} -template<> -void MPI_CLASS::call_allGather( - const unsigned int *x_in, int size_in, unsigned int *x_out, int *size_out, int *disp_out ) const -{ - PROFILE_START( "allGatherv", profile_level ); - MPI_Allgatherv( (void *) x_in, size_in, MPI_UNSIGNED, (void *) x_out, size_out, disp_out, - MPI_UNSIGNED, communicator ); - PROFILE_STOP( "allGatherv", profile_level ); -} -// int -template<> -void MPI_CLASS::call_allGather( const int &x_in, int *x_out ) const -{ - PROFILE_START( "allGather", profile_level ); - MPI_Allgather( (void *) &x_in, 1, MPI_INT, (void *) x_out, 1, MPI_INT, communicator ); - PROFILE_STOP( "allGather", profile_level ); -} -template<> -void MPI_CLASS::call_allGather( - const int *x_in, int size_in, int *x_out, int *size_out, int *disp_out ) const -{ - PROFILE_START( "allGatherv", profile_level ); - MPI_Allgatherv( (void *) x_in, size_in, MPI_INT, (void *) x_out, size_out, disp_out, MPI_INT, - communicator ); - PROFILE_STOP( "allGatherv", profile_level ); -} -// unsigned long int -template<> -void MPI_CLASS::call_allGather( - const unsigned long int &x_in, unsigned long int *x_out ) const -{ - PROFILE_START( "allGather", profile_level ); - MPI_Allgather( - (void *) &x_in, 1, MPI_UNSIGNED_LONG, (void *) x_out, 1, MPI_UNSIGNED_LONG, communicator ); - PROFILE_STOP( "allGather", profile_level ); -} -template<> -void MPI_CLASS::call_allGather( const unsigned long int *x_in, int size_in, - unsigned long int *x_out, int *size_out, int *disp_out ) const -{ - PROFILE_START( "allGatherv", profile_level ); - MPI_Allgatherv( (void *) x_in, size_in, MPI_UNSIGNED_LONG, (void *) x_out, size_out, disp_out, - MPI_UNSIGNED_LONG, communicator ); - PROFILE_STOP( "allGatherv", profile_level ); -} -// long int -template<> -void MPI_CLASS::call_allGather( const long int &x_in, long int *x_out ) const -{ - PROFILE_START( "allGather", profile_level ); - MPI_Allgather( (void *) &x_in, 1, MPI_LONG, (void *) x_out, 1, MPI_LONG, communicator ); - PROFILE_STOP( "allGather", profile_level ); -} -template<> -void MPI_CLASS::call_allGather( - const long int *x_in, int size_in, long int *x_out, int *size_out, int *disp_out ) const -{ - PROFILE_START( "allGatherv", profile_level ); - MPI_Allgatherv( (void *) x_in, size_in, MPI_LONG, (void *) x_out, size_out, disp_out, MPI_LONG, - communicator ); - PROFILE_STOP( "allGatherv", profile_level ); -} -// float -template<> -void MPI_CLASS::call_allGather( const float &x_in, float *x_out ) const -{ - PROFILE_START( "allGather", profile_level ); - MPI_Allgather( (void *) &x_in, 1, MPI_FLOAT, (void *) x_out, 1, MPI_FLOAT, communicator ); - PROFILE_STOP( "allGather", profile_level ); -} -template<> -void MPI_CLASS::call_allGather( - const float *x_in, int size_in, float *x_out, int *size_out, int *disp_out ) const -{ - PROFILE_START( "allGatherv", profile_level ); - MPI_Allgatherv( (void *) x_in, size_in, MPI_FLOAT, (void *) x_out, size_out, disp_out, - MPI_FLOAT, communicator ); - PROFILE_STOP( "allGatherv", profile_level ); -} -// double -template<> -void MPI_CLASS::call_allGather( const double &x_in, double *x_out ) const -{ - PROFILE_START( "allGather", profile_level ); - MPI_Allgather( (void *) &x_in, 1, MPI_DOUBLE, (void *) x_out, 1, MPI_DOUBLE, communicator ); - PROFILE_STOP( "allGather", profile_level ); -} -template<> -void MPI_CLASS::call_allGather( - const double *x_in, int size_in, double *x_out, int *size_out, int *disp_out ) const -{ - PROFILE_START( "allGatherv", profile_level ); - MPI_Allgatherv( (void *) x_in, size_in, MPI_DOUBLE, (void *) x_out, size_out, disp_out, - MPI_DOUBLE, communicator ); - PROFILE_STOP( "allGatherv", profile_level ); -} -#else -// We need a concrete instantiation of call_allGather(x_in,size_in,x_out,size_out) -template<> -void MPI_CLASS::call_allGather( const char *, int, char *, int *, int * ) const -{ - MPI_ERROR( "Internal error in communicator (allGather) " ); -} -#endif - - -/************************************************************************ - * allToAll * - * Note: these specializations are only called when using MPI. * - ************************************************************************/ -#ifdef USE_MPI -template<> -void MPI_CLASS::allToAll( - const int n, const unsigned char *send, unsigned char *recv ) const -{ - PROFILE_START( "allToAll", profile_level ); - MPI_Alltoall( - (void *) send, n, MPI_UNSIGNED_CHAR, (void *) recv, n, MPI_UNSIGNED_CHAR, communicator ); - PROFILE_STOP( "allToAll", profile_level ); -} -template<> -void MPI_CLASS::allToAll( const int n, const char *send, char *recv ) const -{ - PROFILE_START( "allToAll", profile_level ); - MPI_Alltoall( (void *) send, n, MPI_CHAR, (void *) recv, n, MPI_CHAR, communicator ); - PROFILE_STOP( "allToAll", profile_level ); -} -template<> -void MPI_CLASS::allToAll( - const int n, const unsigned int *send, unsigned int *recv ) const -{ - PROFILE_START( "allToAll", profile_level ); - MPI_Alltoall( (void *) send, n, MPI_UNSIGNED, (void *) recv, n, MPI_UNSIGNED, communicator ); - PROFILE_STOP( "allToAll", profile_level ); -} -template<> -void MPI_CLASS::allToAll( const int n, const int *send, int *recv ) const -{ - PROFILE_START( "allToAll", profile_level ); - MPI_Alltoall( (void *) send, n, MPI_INT, (void *) recv, n, MPI_INT, communicator ); - PROFILE_STOP( "allToAll", profile_level ); -} -template<> -void MPI_CLASS::allToAll( - const int n, const unsigned long int *send, unsigned long int *recv ) const -{ - PROFILE_START( "allToAll", profile_level ); - MPI_Alltoall( - (void *) send, n, MPI_UNSIGNED_LONG, (void *) recv, n, MPI_UNSIGNED_LONG, communicator ); - PROFILE_STOP( "allToAll", profile_level ); -} -template<> -void MPI_CLASS::allToAll( const int n, const long int *send, long int *recv ) const -{ - PROFILE_START( "allToAll", profile_level ); - MPI_Alltoall( (void *) send, n, MPI_LONG, (void *) recv, n, MPI_LONG, communicator ); - PROFILE_STOP( "allToAll", profile_level ); -} -template<> -void MPI_CLASS::allToAll( const int n, const float *send, float *recv ) const -{ - PROFILE_START( "allToAll", profile_level ); - MPI_Alltoall( (void *) send, n, MPI_FLOAT, (void *) recv, n, MPI_FLOAT, communicator ); - PROFILE_STOP( "allToAll", profile_level ); -} -template<> -void MPI_CLASS::allToAll( const int n, const double *send, double *recv ) const -{ - PROFILE_START( "allToAll", profile_level ); - MPI_Alltoall( (void *) send, n, MPI_DOUBLE, (void *) recv, n, MPI_DOUBLE, communicator ); - PROFILE_STOP( "allToAll", profile_level ); -} -#endif - - -/************************************************************************ - * call_allToAll * - * Note: these specializations are only called when using MPI. * - ************************************************************************/ -#ifdef USE_MPI -// unsigned char -template<> -void MPI_CLASS::call_allToAll( const unsigned char *send_data, const int send_cnt[], - const int send_disp[], unsigned char *recv_data, const int *recv_cnt, - const int *recv_disp ) const -{ - PROFILE_START( "allToAllv", profile_level ); - MPI_Alltoallv( (void *) send_data, (int *) send_cnt, (int *) send_disp, MPI_UNSIGNED_CHAR, - (void *) recv_data, (int *) recv_cnt, (int *) recv_disp, MPI_UNSIGNED_CHAR, communicator ); - PROFILE_STOP( "allToAllv", profile_level ); -} -// char -template<> -void MPI_CLASS::call_allToAll( const char *send_data, const int send_cnt[], - const int send_disp[], char *recv_data, const int *recv_cnt, const int *recv_disp ) const -{ - PROFILE_START( "allToAllv", profile_level ); - MPI_Alltoallv( (void *) send_data, (int *) send_cnt, (int *) send_disp, MPI_CHAR, - (void *) recv_data, (int *) recv_cnt, (int *) recv_disp, MPI_CHAR, communicator ); - PROFILE_STOP( "allToAllv", profile_level ); -} -// unsigned int -template<> -void MPI_CLASS::call_allToAll( const unsigned int *send_data, const int send_cnt[], - const int send_disp[], unsigned int *recv_data, const int *recv_cnt, - const int *recv_disp ) const -{ - PROFILE_START( "allToAllv", profile_level ); - MPI_Alltoallv( (void *) send_data, (int *) send_cnt, (int *) send_disp, MPI_UNSIGNED, - (void *) recv_data, (int *) recv_cnt, (int *) recv_disp, MPI_UNSIGNED, communicator ); - PROFILE_STOP( "allToAllv", profile_level ); -} -// int -template<> -void MPI_CLASS::call_allToAll( const int *send_data, const int send_cnt[], - const int send_disp[], int *recv_data, const int *recv_cnt, const int *recv_disp ) const -{ - PROFILE_START( "allToAllv", profile_level ); - MPI_Alltoallv( (void *) send_data, (int *) send_cnt, (int *) send_disp, MPI_INT, - (void *) recv_data, (int *) recv_cnt, (int *) recv_disp, MPI_INT, communicator ); - PROFILE_STOP( "allToAllv", profile_level ); -} -// unsigned long int -template<> -void MPI_CLASS::call_allToAll( const unsigned long int *send_data, - const int send_cnt[], const int send_disp[], unsigned long int *recv_data, const int *recv_cnt, - const int *recv_disp ) const -{ - PROFILE_START( "allToAllv", profile_level ); - MPI_Alltoallv( (void *) send_data, (int *) send_cnt, (int *) send_disp, MPI_UNSIGNED_LONG, - (void *) recv_data, (int *) recv_cnt, (int *) recv_disp, MPI_UNSIGNED_LONG, communicator ); - PROFILE_STOP( "allToAllv", profile_level ); -} -// long int -template<> -void MPI_CLASS::call_allToAll( const long int *send_data, const int send_cnt[], - const int send_disp[], long int *recv_data, const int *recv_cnt, const int *recv_disp ) const -{ - PROFILE_START( "allToAllv", profile_level ); - MPI_Alltoallv( (void *) send_data, (int *) send_cnt, (int *) send_disp, MPI_LONG, - (void *) recv_data, (int *) recv_cnt, (int *) recv_disp, MPI_LONG, communicator ); - PROFILE_STOP( "allToAllv", profile_level ); -} -// float -template<> -void MPI_CLASS::call_allToAll( const float *send_data, const int send_cnt[], - const int send_disp[], float *recv_data, const int *recv_cnt, const int *recv_disp ) const -{ - PROFILE_START( "allToAllv", profile_level ); - MPI_Alltoallv( (void *) send_data, (int *) send_cnt, (int *) send_disp, MPI_FLOAT, - (void *) recv_data, (int *) recv_cnt, (int *) recv_disp, MPI_FLOAT, communicator ); - PROFILE_STOP( "allToAllv", profile_level ); -} -// double -template<> -void MPI_CLASS::call_allToAll( const double *send_data, const int send_cnt[], - const int send_disp[], double *recv_data, const int *recv_cnt, const int *recv_disp ) const -{ - PROFILE_START( "allToAllv", profile_level ); - MPI_Alltoallv( (void *) send_data, (int *) send_cnt, (int *) send_disp, MPI_DOUBLE, - (void *) recv_data, (int *) recv_cnt, (int *) recv_disp, MPI_DOUBLE, communicator ); - PROFILE_STOP( "allToAllv", profile_level ); -} -#else -// Default instatiation of unsigned char -template<> -void MPI_CLASS::call_allToAll( - const char *, const int[], const int[], char *, const int *, const int * ) const -{ - MPI_ERROR( "Should not reach this point" ); -} -#endif - - -/************************************************************************ - * call_sumScan * - * Note: these specializations are only called when using MPI. * - ************************************************************************/ -#ifdef USE_MPI -// unsigned char -template<> -void MPI_CLASS::call_sumScan( - const unsigned char *send, unsigned char *recv, int n ) const -{ - PROFILE_START( "sumScan", profile_level ); - MPI_Scan( (void *) send, (void *) recv, n, MPI_UNSIGNED_CHAR, MPI_SUM, communicator ); - PROFILE_STOP( "sumScan", profile_level ); -} -// char -template<> -void MPI_CLASS::call_sumScan( const char *send, char *recv, int n ) const -{ - PROFILE_START( "sumScan", profile_level ); - MPI_Scan( (void *) send, (void *) recv, n, MPI_SIGNED_CHAR, MPI_SUM, communicator ); - PROFILE_STOP( "sumScan", profile_level ); -} -// unsigned int -template<> -void MPI_CLASS::call_sumScan( - const unsigned int *send, unsigned int *recv, int n ) const -{ - PROFILE_START( "sumScan", profile_level ); - MPI_Scan( (void *) send, (void *) recv, n, MPI_UNSIGNED, MPI_SUM, communicator ); - PROFILE_STOP( "sumScan", profile_level ); -} -// int -template<> -void MPI_CLASS::call_sumScan( const int *send, int *recv, int n ) const -{ - PROFILE_START( "sumScan", profile_level ); - MPI_Scan( (void *) send, (void *) recv, n, MPI_INT, MPI_SUM, communicator ); - PROFILE_STOP( "sumScan", profile_level ); -} -// long int -template<> -void MPI_CLASS::call_sumScan( const long int *send, long int *recv, int n ) const -{ - PROFILE_START( "sumScan", profile_level ); - MPI_Scan( (void *) send, (void *) recv, n, MPI_LONG, MPI_SUM, communicator ); - PROFILE_STOP( "sumScan", profile_level ); -} -// unsigned long int -template<> -void MPI_CLASS::call_sumScan( - const unsigned long *send, unsigned long *recv, int n ) const -{ - PROFILE_START( "sumScan", profile_level ); - MPI_Scan( (void *) send, (void *) recv, n, MPI_UNSIGNED_LONG, MPI_SUM, communicator ); - PROFILE_STOP( "sumScan", profile_level ); -} -// size_t -#ifdef USE_WINDOWS -template<> -void MPI_CLASS::call_sumScan( const size_t *send, size_t *recv, int n ) const -{ - MPI_ASSERT( MPI_SIZE_T != 0 ); - PROFILE_START( "sumScan", profile_level ); - MPI_Scan( (void *) send, (void *) recv, n, MPI_SIZE_T, MPI_SUM, communicator ); - PROFILE_STOP( "sumScan", profile_level ); -} -#endif -// float -template<> -void MPI_CLASS::call_sumScan( const float *send, float *recv, int n ) const -{ - PROFILE_START( "sumScan", profile_level ); - MPI_Scan( (void *) send, (void *) recv, n, MPI_FLOAT, MPI_SUM, communicator ); - PROFILE_STOP( "sumScan", profile_level ); -} -// double -template<> -void MPI_CLASS::call_sumScan( const double *send, double *recv, int n ) const -{ - PROFILE_START( "sumScan", profile_level ); - MPI_Scan( (void *) send, (void *) recv, n, MPI_DOUBLE, MPI_SUM, communicator ); - PROFILE_STOP( "sumScan", profile_level ); -} -// std::complex -template<> -void MPI_CLASS::call_sumScan>( - const std::complex *x, std::complex *y, int n ) const -{ - auto send = new double[2 * n]; - auto recv = new double[2 * n]; - for ( int i = 0; i < n; i++ ) { - send[2 * i + 0] = real( x[i] ); - send[2 * i + 1] = imag( x[i] ); - } - MPI_Scan( (void *) send, (void *) recv, 2 * n, MPI_DOUBLE, MPI_SUM, communicator ); - for ( int i = 0; i < n; i++ ) - y[i] = std::complex( recv[2 * i + 0], recv[2 * i + 1] ); - delete[] send; - delete[] recv; -} -#endif - - -/************************************************************************ - * call_minScan * - * Note: these specializations are only called when using MPI. * - ************************************************************************/ -#ifdef USE_MPI -// unsigned char -template<> -void MPI_CLASS::call_minScan( - const unsigned char *send, unsigned char *recv, int n ) const -{ - PROFILE_START( "minScan", profile_level ); - MPI_Scan( (void *) send, (void *) recv, n, MPI_UNSIGNED_CHAR, MPI_MIN, communicator ); - PROFILE_STOP( "minScan", profile_level ); -} -// char -template<> -void MPI_CLASS::call_minScan( const char *send, char *recv, int n ) const -{ - PROFILE_START( "minScan", profile_level ); - MPI_Scan( (void *) send, (void *) recv, n, MPI_SIGNED_CHAR, MPI_MIN, communicator ); - PROFILE_STOP( "minScan", profile_level ); -} -// unsigned int -template<> -void MPI_CLASS::call_minScan( - const unsigned int *send, unsigned int *recv, int n ) const -{ - PROFILE_START( "minScan", profile_level ); - MPI_Scan( (void *) send, (void *) recv, n, MPI_UNSIGNED, MPI_MIN, communicator ); - PROFILE_STOP( "minScan", profile_level ); -} -// int -template<> -void MPI_CLASS::call_minScan( const int *send, int *recv, int n ) const -{ - PROFILE_START( "minScan", profile_level ); - MPI_Scan( (void *) send, (void *) recv, n, MPI_INT, MPI_MIN, communicator ); - PROFILE_STOP( "minScan", profile_level ); -} -// unsigned long int -template<> -void MPI_CLASS::call_minScan( - const unsigned long int *send, unsigned long int *recv, int n ) const -{ - PROFILE_START( "minScan", profile_level ); - MPI_Scan( (void *) send, (void *) recv, n, MPI_UNSIGNED_LONG, MPI_MIN, communicator ); - PROFILE_STOP( "minScan", profile_level ); -} -// long int -template<> -void MPI_CLASS::call_minScan( const long int *send, long int *recv, int n ) const -{ - PROFILE_START( "minScan", profile_level ); - MPI_Scan( (void *) send, (void *) recv, n, MPI_LONG, MPI_MIN, communicator ); - PROFILE_STOP( "minScan", profile_level ); -} -// size_t -#ifdef USE_WINDOWS -template<> -void MPI_CLASS::call_minScan( const size_t *send, size_t *recv, int n ) const -{ - MPI_ASSERT( MPI_SIZE_T != 0 ); - PROFILE_START( "minScan", profile_level ); - MPI_Scan( (void *) send, (void *) recv, n, MPI_SIZE_T, MPI_MIN, communicator ); - PROFILE_STOP( "minScan", profile_level ); -} -#endif -// float -template<> -void MPI_CLASS::call_minScan( const float *send, float *recv, int n ) const -{ - PROFILE_START( "minScan", profile_level ); - MPI_Scan( (void *) send, (void *) recv, n, MPI_FLOAT, MPI_MIN, communicator ); - PROFILE_STOP( "minScan", profile_level ); -} -// double -template<> -void MPI_CLASS::call_minScan( const double *send, double *recv, int n ) const -{ - PROFILE_START( "minScan", profile_level ); - MPI_Scan( (void *) send, (void *) recv, n, MPI_DOUBLE, MPI_MIN, communicator ); - PROFILE_STOP( "minScan", profile_level ); -} -#endif - - -/************************************************************************ - * call_maxScan * - * Note: these specializations are only called when using MPI. * - ************************************************************************/ -#ifdef USE_MPI -// unsigned char -template<> -void MPI_CLASS::call_maxScan( - const unsigned char *send, unsigned char *recv, int n ) const -{ - PROFILE_START( "maxScan", profile_level ); - MPI_Scan( (void *) send, (void *) recv, n, MPI_UNSIGNED_CHAR, MPI_MAX, communicator ); - PROFILE_STOP( "maxScan", profile_level ); -} -// char -template<> -void MPI_CLASS::call_maxScan( const char *send, char *recv, int n ) const -{ - PROFILE_START( "maxScan", profile_level ); - MPI_Scan( (void *) send, (void *) recv, n, MPI_SIGNED_CHAR, MPI_MAX, communicator ); - PROFILE_STOP( "maxScan", profile_level ); -} -// unsigned int -template<> -void MPI_CLASS::call_maxScan( - const unsigned int *send, unsigned int *recv, int n ) const -{ - PROFILE_START( "maxScan", profile_level ); - MPI_Scan( (void *) send, (void *) recv, n, MPI_UNSIGNED, MPI_MAX, communicator ); - PROFILE_STOP( "maxScan", profile_level ); -} -// int -template<> -void MPI_CLASS::call_maxScan( const int *send, int *recv, int n ) const -{ - PROFILE_START( "maxScan", profile_level ); - MPI_Scan( (void *) send, (void *) recv, n, MPI_INT, MPI_MAX, communicator ); - PROFILE_STOP( "maxScan", profile_level ); -} -// long int -template<> -void MPI_CLASS::call_maxScan( const long int *send, long int *recv, int n ) const -{ - PROFILE_START( "maxScan", profile_level ); - MPI_Scan( (void *) send, (void *) recv, n, MPI_LONG, MPI_MAX, communicator ); - PROFILE_STOP( "maxScan", profile_level ); -} -// unsigned long int -template<> -void MPI_CLASS::call_maxScan( - const unsigned long int *send, unsigned long int *recv, int n ) const -{ - PROFILE_START( "maxScan", profile_level ); - MPI_Scan( (void *) send, (void *) recv, n, MPI_UNSIGNED_LONG, MPI_MAX, communicator ); - PROFILE_STOP( "maxScan", profile_level ); -} -// size_t -#ifdef USE_WINDOWS -template<> -void MPI_CLASS::call_maxScan( const size_t *send, size_t *recv, int n ) const -{ - MPI_ASSERT( MPI_SIZE_T != 0 ); - PROFILE_START( "maxScan", profile_level ); - MPI_Scan( (void *) send, (void *) recv, n, MPI_SIZE_T, MPI_MAX, communicator ); - PROFILE_STOP( "maxScan", profile_level ); -} -#endif -// float -template<> -void MPI_CLASS::call_maxScan( const float *send, float *recv, int n ) const -{ - PROFILE_START( "maxScan", profile_level ); - MPI_Scan( (void *) send, (void *) recv, n, MPI_INT, MPI_MAX, communicator ); - PROFILE_STOP( "maxScan", profile_level ); -} -// double -template<> -void MPI_CLASS::call_maxScan( const double *send, double *recv, int n ) const -{ - PROFILE_START( "maxScan", profile_level ); - MPI_Scan( (void *) send, (void *) recv, n, MPI_DOUBLE, MPI_MAX, communicator ); - PROFILE_STOP( "maxScan", profile_level ); -} -#endif - - -/************************************************************************ - * Communicate ranks for communication * - ************************************************************************/ -std::vector MPI_CLASS::commRanks( const std::vector &ranks ) const -{ -#ifdef USE_MPI - // Get a byte array with the ranks to communicate - auto data1 = new char[comm_size]; - auto data2 = new char[comm_size]; - memset( data1, 0, comm_size ); - memset( data2, 0, comm_size ); - for ( auto &rank : ranks ) - data1[rank] = 1; - MPI_Alltoall( data1, 1, MPI_CHAR, data2, 1, MPI_CHAR, communicator ); - int N = 0; - for ( int i = 0; i < comm_size; i++ ) - N += data2[i]; - std::vector ranks_out; - ranks_out.reserve( N ); - for ( int i = 0; i < comm_size; i++ ) { - if ( data2[i] ) - ranks_out.push_back( i ); - } - delete[] data1; - delete[] data2; - return ranks_out; -#else - return ranks; -#endif -} - - -/************************************************************************ - * Wait functions * - ************************************************************************/ -#ifdef USE_MPI -void MPI_CLASS::wait( MPI_Request request ) -{ - PROFILE_START( "wait", profile_level ); - MPI_Status status; - int flag = 0; - int err = MPI_Test( &request, &flag, &status ); - MPI_ASSERT( err == MPI_SUCCESS ); // Check that the first call is valid - while ( !flag ) { - // Put the current thread to sleep to allow other threads to run - sched_yield(); - // Check if the request has finished - MPI_Test( &request, &flag, &status ); - } - PROFILE_STOP( "wait", profile_level ); -} -int MPI_CLASS::waitAny( int count, MPI_Request *request ) -{ - if ( count == 0 ) - return -1; - PROFILE_START( "waitAny", profile_level ); - int index = -1; - int flag = 0; - auto status = new MPI_Status[count]; - int err = MPI_Testany( count, request, &index, &flag, status ); - MPI_ASSERT( err == MPI_SUCCESS ); // Check that the first call is valid - while ( !flag ) { - // Put the current thread to sleep to allow other threads to run - sched_yield(); - // Check if the request has finished - MPI_Testany( count, request, &index, &flag, status ); - } - MPI_ASSERT( index >= 0 ); // Check that the index is valid - delete[] status; - PROFILE_STOP( "waitAny", profile_level ); - return index; -} -void MPI_CLASS::waitAll( int count, MPI_Request *request ) -{ - if ( count == 0 ) - return; - PROFILE_START( "waitAll", profile_level ); - int flag = 0; - auto status = new MPI_Status[count]; - int err = MPI_Testall( count, request, &flag, status ); - MPI_ASSERT( err == MPI_SUCCESS ); // Check that the first call is valid - while ( !flag ) { - // Put the current thread to sleep to allow other threads to run - sched_yield(); - // Check if the request has finished - MPI_Testall( count, request, &flag, status ); - } - PROFILE_STOP( "waitAll", profile_level ); - delete[] status; -} -std::vector MPI_CLASS::waitSome( int count, MPI_Request *request ) -{ - if ( count == 0 ) - return std::vector(); - PROFILE_START( "waitSome", profile_level ); - std::vector indicies( count, -1 ); - auto *status = new MPI_Status[count]; - int outcount = 0; - int err = MPI_Testsome( count, request, &outcount, &indicies[0], status ); - MPI_ASSERT( err == MPI_SUCCESS ); // Check that the first call is valid - MPI_ASSERT( outcount != MPI_UNDEFINED ); // Check that the first call is valid - while ( outcount == 0 ) { - // Put the current thread to sleep to allow other threads to run - sched_yield(); - // Check if the request has finished - MPI_Testsome( count, request, &outcount, &indicies[0], status ); - } - indicies.resize( outcount ); - delete[] status; - PROFILE_STOP( "waitSome", profile_level ); - return indicies; -} -#else -void MPI_CLASS::wait( MPI_Request request ) -{ - PROFILE_START( "wait", profile_level ); - while ( 1 ) { - // Check if the request is in our list - if ( global_isendrecv_list.find( request ) == global_isendrecv_list.end() ) - break; - // Put the current thread to sleep to allow other threads to run - sched_yield(); - } - PROFILE_STOP( "wait", profile_level ); -} -int MPI_CLASS::waitAny( int count, MPI_Request *request ) -{ - if ( count == 0 ) - return -1; - PROFILE_START( "waitAny", profile_level ); - int index = 0; - while ( 1 ) { - // Check if the request is in our list - bool found_any = false; - for ( int i = 0; i < count; i++ ) { - if ( global_isendrecv_list.find( request[i] ) == global_isendrecv_list.end() ) { - found_any = true; - index = i; - } - } - if ( found_any ) - break; - // Put the current thread to sleep to allow other threads to run - sched_yield(); - } - PROFILE_STOP( "waitAny", profile_level ); - return index; -} -void MPI_CLASS::waitAll( int count, MPI_Request *request ) -{ - if ( count == 0 ) - return; - PROFILE_START( "waitAll", profile_level ); - while ( 1 ) { - // Check if the request is in our list - bool found_all = true; - for ( int i = 0; i < count; i++ ) { - if ( global_isendrecv_list.find( request[i] ) != global_isendrecv_list.end() ) - found_all = false; - } - if ( found_all ) - break; - // Put the current thread to sleep to allow other threads to run - sched_yield(); - } - PROFILE_STOP( "waitAll", profile_level ); -} -std::vector MPI_CLASS::waitSome( int count, MPI_Request *request ) -{ - if ( count == 0 ) - return std::vector(); - PROFILE_START( "waitSome", profile_level ); - std::vector indicies; - while ( 1 ) { - // Check if the request is in our list - for ( int i = 0; i < count; i++ ) { - if ( global_isendrecv_list.find( request[i] ) == global_isendrecv_list.end() ) - indicies.push_back( i ); - } - if ( !indicies.empty() ) - break; - // Put the current thread to sleep to allow other threads to run - sched_yield(); - } - PROFILE_STOP( "waitSome", profile_level ); - return indicies; -} -#endif - - -/************************************************************************ - * Probe functions * - ************************************************************************/ -#ifdef USE_MPI -int MPI_CLASS::Iprobe( int source, int tag ) const -{ - MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); - MPI_INSIST( tag >= 0, "tag must be >= 0" ); - MPI_Status status; - int flag = 0; - MPI_Iprobe( source, tag, communicator, &flag, &status ); - if ( flag == 0 ) - return -1; - int count; - MPI_Get_count( &status, MPI_BYTE, &count ); - MPI_ASSERT( count >= 0 ); - return count; -} -int MPI_CLASS::probe( int source, int tag ) const -{ - MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); - MPI_INSIST( tag >= 0, "tag must be >= 0" ); - MPI_Status status; - MPI_Probe( source, tag, communicator, &status ); - int count; - MPI_Get_count( &status, MPI_BYTE, &count ); - MPI_ASSERT( count >= 0 ); - return count; -} -#else -int MPI_CLASS::Iprobe( int, int ) const -{ - MPI_ERROR( "Not implimented for serial codes (Iprobe)" ); - return 0; -} -int MPI_CLASS::probe( int, int ) const -{ - MPI_ERROR( "Not implimented for serial codes (probe)" ); - return 0; -} -#endif - - -/************************************************************************ - * Timer functions * - ************************************************************************/ -#ifdef USE_MPI -double MPI_CLASS::time() { return MPI_Wtime(); } -double MPI_CLASS::tick() { return MPI_Wtick(); } -#else -double MPI_CLASS::time() -{ - auto t = std::chrono::system_clock::now(); - auto ns = std::chrono::duration_cast( t.time_since_epoch() ); - return 1e-9 * ns.count(); -} -double MPI_CLASS::tick() -{ - auto period = std::chrono::system_clock::period(); - return static_cast( period.num ) / static_cast( period.den ); -} -#endif - - -/************************************************************************ - * Serialize a block of code across MPI processes * - ************************************************************************/ -void MPI_CLASS::serializeStart() -{ -#ifdef USE_MPI - using namespace std::chrono_literals; - if ( comm_rank == 0 ) { - // Start rank 0 immediately - } else { - // Wait for a message from the previous rank - MPI_Request request; - MPI_Status status; - int flag = false, buf = 0; - MPI_Irecv( &buf, 1, MPI_INT, comm_rank - 1, 5627, MPI_COMM_WORLD, &request ); - while ( !flag ) { - MPI_Test( &request, &flag, &status ); - std::this_thread::sleep_for( 50ms ); - } - } -#endif -} -void MPI_CLASS::serializeStop() -{ -#ifdef USE_MPI - using namespace std::chrono_literals; - if ( comm_rank < comm_size - 1 ) { - // Send flag to next rank - MPI_Send( &comm_rank, 1, MPI_INT, comm_rank + 1, 5627, MPI_COMM_WORLD ); - // Wait for final finished flag - int flag = false, buf = 0; - MPI_Request request; - MPI_Status status; - MPI_Irecv( &buf, 1, MPI_INT, comm_size - 1, 5627, MPI_COMM_WORLD, &request ); - while ( !flag ) { - MPI_Test( &request, &flag, &status ); - std::this_thread::sleep_for( 50ms ); - } - } else { - // Send final flag to all ranks - for ( int i = 0; i < comm_size - 1; i++ ) - MPI_Send( &comm_rank, 1, MPI_INT, i, 5627, MPI_COMM_WORLD ); - } -#endif -} - - -/**************************************************************************** - * Function to start/stop MPI * - ****************************************************************************/ -#ifdef USE_EXT_MPI -static bool called_MPI_Init = false; -#endif -bool MPI_CLASS::MPI_Active() -{ -#ifdef USE_EXT_MPI - int MPI_initialized, MPI_finialized; - MPI_Initialized( &MPI_initialized ); - MPI_Finalized( &MPI_finialized ); - return MPI_initialized != 0 && MPI_finialized == 0; -#else - return false; -#endif -} -void MPI_CLASS::start_MPI( int argc, char *argv[], int profile_level ) -{ - changeProfileLevel( profile_level ); - NULL_USE( argc ); - NULL_USE( argv ); -#ifdef USE_EXT_MPI - if ( MPI_Active() ) { - called_MPI_Init = false; - } else { - int provided; - int result = MPI_Init_thread( &argc, &argv, MPI_THREAD_MULTIPLE, &provided ); - if ( result != MPI_SUCCESS ) - MPI_ERROR( "Unable to initialize MPI" ); - if ( provided < MPI_THREAD_MULTIPLE ) - std::cerr << "Warning: Failed to start MPI with MPI_THREAD_MULTIPLE\n"; - called_MPI_Init = true; - } -#endif -} -void MPI_CLASS::stop_MPI() -{ -#ifdef USE_EXT_MPI - int finalized; - MPI_Finalized( &finalized ); - if ( called_MPI_Init && !finalized ) { - MPI_Barrier( MPI_COMM_WORLD ); - MPI_Finalize(); - called_MPI_Init = true; - } -#endif -} - - -} // namespace Utilities - diff --git a/common/MPI.h b/common/MPI.h deleted file mode 100644 index e3fd3e13..00000000 --- a/common/MPI.h +++ /dev/null @@ -1,1152 +0,0 @@ -// This file includes a wrapper class for MPI functions -// Note this is a modified version of the MPI class for the Advanced Multi-Physics Package -// Used with permission - -/* - -Copyright (c) 2012 UT-Battelle, LLC - -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: -Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. -Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. -Collection of administrative costs for redistribution of the source code or binary form is allowed. However, collection of a royalty or other fee in excess of good faith amount for cost recovery for such redistribution is prohibited. - -*/ - -#ifndef included_LBPM_MPI -#define included_LBPM_MPI - - -#include -#include -#include -#include -#include -#include -#include - - -// Include mpi.h (or define MPI objects) -// clang-format off -#ifdef USE_MPI - #include "mpi.h" -#else - typedef int MPI_Comm; - typedef int MPI_Request; - typedef int MPI_Status; - typedef void *MPI_Errhandler; - enum MPI_TYPES { MPI_INT, MPI_FLOAT, MPI_DOUBLE }; - #define MPI_COMM_WORLD ( (MPI_Comm) 0xF4000010 ) - #define MPI_COMM_SELF ( (MPI_Comm) 0xF4000001 ) - #define MPI_COMM_NULL ( (MPI_Comm) 0xF4000000 ) -#endif -// clang-format on - - -namespace Utilities { - - -/** - * \class MPI - * - * @brief Provides C++ wrapper around MPI routines. - * - * Class MPI groups common MPI routines into one globally-accessible - * location. It provides small, simple routines that are common in MPI code. - * In some cases, the calling syntax has been simplified for convenience. - * Moreover, there is no reason to include the preprocessor ifdef/endif - * guards around these calls, since the MPI libraries are not called in - * these routines if the MPI libraries are not being used (e.g., when - * writing serial code). - * Note: Many of the communication routines are templated on type. When using - * unknown types the reduce calls will fail, the send and gather calls should - * succeed provided that the size of the data type object is a fixed size on - * all processors. sizeof(type) must be the same for all elements and processors. - */ -class MPI final -{ -public: - enum class ThreadSupport : int { SINGLE, FUNNELED, SERIALIZED, MULTIPLE }; - -public: // Constructors - /** - *\brief Is MPI active - *\details This returns true if MPI is initailized and not finalized - */ - static bool MPI_active(); - - /** - *\brief Empty constructor - *\details This creates an empty constructor that does not contain an MPI communicator. - */ - MPI(); - - - //! Empty destructor - ~MPI(); - - - /** - * \brief Constructor from existing MPI communicator - * \details This constructor creates a new communicator from an existing MPI communicator. - * This does not create a new internal MPI_Comm, but uses the existing comm. - * Note that by default, this will not free the MPI_Comm object and the user is - * responsible - * for free'ing the MPI_Comm when it is no longer used. This behavior is controlled by the - * optional manage argument. - * \param comm Existing MPI communicator - * \param manage Do we want to manage the comm (free the MPI_Comm when this object leaves - * scope) - */ - MPI( MPI_Comm comm, bool manage = false ); - - - /** - * \brief Constructor from existing communicator - * \details This constructor creates a new communicator from an existing communicator. - * This does not create a new internal MPI_Comm, but uses the existing comm. - * \param comm Existing communicator - */ - MPI( const MPI &comm ); - - - /*! - * Move constructor - * @param rhs Communicator to copy - */ - MPI( MPI &&rhs ); - - - /** - * \brief Assignment operator - * \details This operator overloads the assignment to correctly copy an communicator - * \param comm Existing MPI object - */ - MPI &operator=( const MPI &comm ); - - - /*! - * Move assignment operator - * @param rhs Communicator to copy - */ - MPI &operator=( MPI &&rhs ); - - - /** - * \brief Reset the object - * \details This resets the object to the empty state without an MPI_Comm - */ - void reset(); - - -public: // Member functions - /** - * \brief Get the node name - * \details This function returns a unique name for each node. - * It is a wrapper for MPI_Get_processor_name. - */ - static std::string getNodeName(); - - - //! Function to return the number of processors available - static int getNumberOfProcessors(); - - - //! Function to return the affinity of the current process - static std::vector getProcessAffinity(); - - - //! Function to set the affinity of the current process - static void setProcessAffinity( const std::vector &procs ); - - - /** - * \brief Load balance the processes within a node - * \details This function will redistribute the processes within a node using the - * process affinities to achieve the desired load balance. - * Note: this is a global operation on the given comm, and it is STRONGLY - * recommended to use COMM_WORLD. - * \param comm The communicator to use (Default is COMM_WORLD) - * \param method The desired load balance method to use: - * 1: Adjust the affinities so all processes share the given processors. - * This effectively allows the OS to handle the load balancing - * by migrating the processes as necessary. This is recommended - * for most users and use cases. (default) - * 2: Adjust the affinities so that the fewest number of processes overlap. - * This will try to give each process a unique set of processors while - * ensuring that each process has at least N_min processes. - * \param procs An optional list of processors to use. By default, setting this to an - * empty vector will use all available processors on the given node. - * \param N_min The minimum number of processors for any process (-1 indicates all available - * processors). - * \param N_max The maximum number of processors for any process (-1 indicates all available - * processors). - * - */ - static void balanceProcesses( const MPI &comm = MPI( MPI_COMM_WORLD ), const int method = 1, - const std::vector &procs = std::vector(), const int N_min = 1, - const int N_max = -1 ); - - - //! Query the level of thread support - static ThreadSupport queryThreadSupport(); - - - /** - * \brief Generate a random number - * \details This generates a random number that is consistent across the comm - */ - size_t rand() const; - - - /** - * \brief Split an existing communicator - * \details This creates a new communicator by splitting an existing communicator. - * See MPI_Comm_split for information on how the underlying split will occur. - * Note: the underlying MPI_Comm object will be free'd automatically when it is no longer - * used by any MPI objects. - * \param color Control of subset assignment (nonnegative integer). - * Processes with the same color are in the same new communicator . - * -1: processor will not be a member of any object (NULL object will be returned) - * \param key Control of rank assignment (integer). - * Note that, for a fixed color, the keys need not be unique. The processes will - * be sorted - * in ascending order according to this key, then all the processes in a given - * color will - * have the relative rank order as they did in their parent group. (See - * MPI_Comm_split) - */ - MPI split( int color, int key = -1 ) const; - - - /** - * \brief Split an existing communicator by node - * \details This creates a new communicator by splitting an existing communicator - * by the node. This will result in a separate MPI_Comm for each physical node. - * Internally this will use MPI_Get_processor_name to identify the nodes. - * Note: the underlying MPI_Comm object will be free'd automatically when it is no longer - * used by any MPI objects) - * \param key Control of rank assignment (integer). - * Note that, for a fixed color, the keys need not be unique. The processes will - * be sorted - * in ascending order according to this key, then all the processes in a given - * color will - * have the relative rank order as they did in their parent group. (See - * MPI_Comm_split) - */ - MPI splitByNode( int key = -1 ) const; - - - /** - * \brief Duplicate an existing communicator - * \details This creates a new communicator by duplicating an existing communicator. - * The resulting communicator will exist over the same processes, but have a different - * context. - * Note: the underlying MPI_Comm object will be free'd automatically when it is no longer - * used by any MPI objects. - */ - MPI dup() const; - - - /** - * \brief Create a communicator from the intersection of two communicators - * \details This creates a new communicator by intersecting two existing communicators. - * Any processors that do not contain the both communicators will receive a NULL communicator. - * There are 3 possible cases: - * The communicators are disjoint (a null communicator will be returned on all processors). - * One communicator is a sub communicator of another. This will require communication on - * the smaller communicator only. - * The communicators partially overlap. This will require communication on the first - * communicator. - */ - static MPI intersect( const MPI &comm1, const MPI &comm2 ); - - - /** - * Check if the current communicator is NULL - */ - bool isNull() const { return d_isNull; } - - - /** - * \brief Return the global ranks for the comm - * \details This returns a vector which contains the global ranks for each - * member of the communicator. The global ranks are defined according to WORLD comm. - */ - std::vector globalRanks() const; - - - /** - * Get the current MPI communicator. - * Note: The underlying MPI_Comm object may be free'd by the object when it is no - * longer used by any communicators. If the user has made a copy using the - * getCommunicator routine, then it may be free'd without user knowledge. The - * user is responsible for checking if the communicator is valid, or keeping a - * copy of the communicator that provided the MPI_Communicator. - */ - const MPI_Comm &getCommunicator() const { return communicator; } - - - /** - * \brief Overload operator == - * \details Overload operator comm1 == comm2. Two MPI objects are == if they share the same - * communicator. - * Note: this is a local operation. - */ - bool operator==( const MPI & ) const; - - - /** - * \brief Overload operator != - * \details Overload operator comm1 != comm2. Two MPI objects are != if they - * do not share the same communicator. - * Note: this is a local operation. - */ - bool operator!=( const MPI & ) const; - - - /** - * \brief Overload operator < - * \details Overload operator comm1 < comm2. One MPI object is < another iff all the - * processors in the first object are also in the second. Additionally, the second - * object must contain at least one processor that is not in the first object. - * This is a collective operation, based on the first communicator. - * As a result all processors on the first communicator will return the same value, - * while any processors that are not on the first communicator will return an unknown value. - * Additionally, all processors on the first object MUST call this routine and will be - * synchronized through this call (there is an internalallReduce). - */ - bool operator<( const MPI & ) const; - - - /** - * \brief Overload operator <= - * \details Overload operator comm1 <= comm2. One MPI object is <= another iff all the - * processors in the first object are also in the second. This is a collective operation, - * based on the first communicator. As a result all processors on the first communicator - * will return the same value, while any processors that are not on the first communicator - * will return an unknown value. Additionally, all processors on the first object MUST - * call this routine and will be synchronized through this call (there is an internal - * allReduce). - */ - bool operator<=( const MPI & ) const; - - - /** - * \brief Overload operator > - * \details Overload operator comm1 > comm2. One MPI object is > another iff all the - * processors in the second object are also in the first. Additionally, the first object - * must contain at least one processor that is not in the second object. - * This is a collective operation, based on the first communicator. - * As a result all processors on the first communicator will return the same value, - * while any processors that are not on the first communicator will return an unknown value. - * Additionally, all processors on the first object MUST call this routine and will be - * synchronized through this call (there is an internal allReduce). - */ - bool operator>( const MPI & ) const; - - - /** - * \brief Overload operator >= - * \details Overload operator comm1 >= comm2. One MPI object is > another iff all the - * processors in the second object are also in the first. Additionally, the first object - * must contain at least one processor that is not in the second object. - * This is a collective operation, based on the first communicator. - * As a result all processors on the first communicator will return the same value, while any - * processors that are not on the first communicator will return an unknown value. - * Additionally, all processors on the first object MUST call this routine and will be - * synchronized through this call (there is an internal allReduce). - */ - bool operator>=( const MPI & ) const; - - - /** - * \brief Compare to another communicator - * \details This compares the current communicator to another communicator. - * This returns 1 if the two communicators are equal (they share the same MPI communicator), - * 2 if the contexts and groups are the same, 3 if different contexts but identical groups, - * 4 if different contexts but similar groups, and 0 otherwise. - * Note: this is a local operation. - */ - int compare( const MPI & ) const; - - - /** - * Return the processor rank (identifier) from 0 through the number of - * processors minus one. - */ - int getRank() const { return comm_rank; } - - - /** - * Return the number of processors. - */ - int getSize() const { return comm_size; } - - - /** - * Return the maximum tag - */ - int maxTag() const { return d_maxTag; } - - - /** - * \brief Return a new tag - * \details This routine will return an unused tag for communication. - * Note that this tag may match a user tag, but this function will - * not return two duplicate tags. This is a global operation. - */ - int newTag(); - - - /** - * Call MPI_Abort or exit depending on whether running with one or more - * processes and value set by function above, if called. The default is - * to call exit(-1) if running with one processor and to call MPI_Abort() - * otherwise. This function avoids having to guard abort calls in - * application code. - */ - void abort() const; - - - /** - * Set boolean flag indicating whether exit or abort is called when running - * with one processor. Calling this function influences the behavior of - * calls to abort(). By default, the flag is true meaning that - * abort() will be called. Passing false means exit(-1) will be called. - */ - void setCallAbortInSerialInsteadOfExit( bool flag = true ); - - - /** - * \brief Boolean all reduce - * \details This function performs a boolean all reduce across all processors. - * It returns true iff all processor are true; - * \param value The input value for the all reduce - */ - bool allReduce( const bool value ) const; - - - /** - * \brief Boolean any reduce - * \details This function performs a boolean any reduce across all processors. - * It returns true if any processor is true; - * \param value The input value for the all reduce - */ - bool anyReduce( const bool value ) const; - - - /** - * \brief Sum Reduce - * \details This function performs a sum all reduce across all processor. - * It returns the sum across all processors; - * \param value The input value for the all reduce - */ - template - type sumReduce( const type value ) const; - - - /** - * \brief Sum Reduce - * \details Perform an array sum Reduce across all nodes. Each - * processor contributes an array of values, and the - * element-wise sum is returned in the same array. - * \param x The input/output array for the reduce - * \param n The number of values in the array (must match on all nodes) - */ - template - void sumReduce( type *x, const int n = 1 ) const; - - - /** - * \brief Sum Reduce - * \details Perform an array sum Reduce across all nodes. Each - * processor contributes an array of values, and the - * element-wise sum is returned in the same array. - * \param x The input array for the reduce - * \param y The output array for the reduce - * \param n The number of values in the array (must match on all nodes) - */ - template - void sumReduce( const type *x, type *y, const int n = 1 ) const; - - - /** - * \brief Min Reduce - * \details This function performs a min all reduce across all processor. - * It returns the minimum value across all processors; - * \param value The input value for the all reduce - */ - template - type minReduce( const type value ) const; - - - /** - * \brief Sum Reduce - * \details Perform an array min Reduce across all nodes. Each - * processor contributes an array of values, and the - * element-wise minimum is returned in the same array. - * - * If a 'rank_of_min' argument is provided, it will set the array to the - * rank of process holding the minimum value. Like the double argument, - * the size of the supplied 'rank_of_min' array should be n. - * \param x The input/output array for the reduce - * \param n The number of values in the array (must match on all nodes) - * \param rank_of_min Optional array indicating the rank of the processor containing the - * minimum value - */ - template - void minReduce( type *x, const int n = 1, int *rank_of_min = nullptr ) const; - - - /** - * \brief Sum Reduce - * \details Perform an array min Reduce across all nodes. Each - * processor contributes an array of values, and the - * element-wise minimum is returned in the same array. - * - * If a 'rank_of_min' argument is provided, it will set the array to the - * rank of process holding the minimum value. Like the double argument, - * the size of the supplied 'rank_of_min' array should be n. - * \param x The input array for the reduce - * \param y The output array for the reduce - * \param n The number of values in the array (must match on all nodes) - * \param rank_of_min Optional array indicating the rank of the processor containing the - * minimum value - */ - template - void minReduce( const type *x, type *y, const int n = 1, int *rank_of_min = nullptr ) const; - - - /** - * \brief Max Reduce - * \details This function performs a max all reduce across all processor. - * It returns the maximum value across all processors; - * \param value The input value for the all reduce - */ - template - type maxReduce( const type value ) const; - - - /** - * \brief Sum Reduce - * \details Perform an array max Reduce across all nodes. Each - * processor contributes an array of values, and the - * element-wise maximum is returned in the same array. - * - * If a 'rank_of_min' argument is provided, it will set the array to the - * rank of process holding the minimum value. Like the double argument, - * the size of the supplied 'rank_of_min' array should be n. - * \param x The input/output array for the reduce - * \param n The number of values in the array (must match on all nodes) - * \param rank_of_max Optional array indicating the rank of the processor containing the - * minimum value - */ - template - void maxReduce( type *x, const int n = 1, int *rank_of_max = nullptr ) const; - - - /** - * \brief Sum Reduce - * \details Perform an array max Reduce across all nodes. Each - * processor contributes an array of values, and the - * element-wise maximum is returned in the same array. - * - * If a 'rank_of_min' argument is provided, it will set the array to the - * rank of process holding the minimum value. Like the double argument, - * the size of the supplied 'rank_of_min' array should be n. - * \param x The input array for the reduce - * \param y The output array for the reduce - * \param n The number of values in the array (must match on all nodes) - * \param rank_of_max Optional array indicating the rank of the processor containing the - * minimum value - */ - template - void maxReduce( const type *x, type *y, const int n = 1, int *rank_of_max = nullptr ) const; - - - /** - * \brief Scan Sum Reduce - * \details Computes the sum scan (partial reductions) of data on a collection of processes. - * See MPI_Scan for more information. - * \param x The input array for the scan - * \param y The output array for the scan - * \param n The number of values in the array (must match on all nodes) - */ - template - void sumScan( const type *x, type *y, const int n = 1 ) const; - - - /** - * \brief Scan Min Reduce - * \details Computes the min scan (partial reductions) of data on a collection of processes. - * See MPI_Scan for more information. - * \param x The input array for the scan - * \param y The output array for the scan - * \param n The number of values in the array (must match on all nodes) - */ - template - void minScan( const type *x, type *y, const int n = 1 ) const; - - - /** - * \brief Scan Max Reduce - * \details Computes the max scan (partial reductions) of data on a collection of processes. - * See MPI_Scan for more information. - * \param x The input array for the scan - * \param y The output array for the scan - * \param n The number of values in the array (must match on all nodes) - */ - template - void maxScan( const type *x, type *y, const int n = 1 ) const; - - - /** - * \brief Broadcast - * \details This function broadcasts a value from root to all processors - * \param value The input value for the broadcast. - * \param root The processor performing the broadcast - */ - template - type bcast( const type &value, const int root ) const; - - - /** - * \brief Broadcast - * \details This function broadcasts an array from root to all processors - * \param value The input/output array for the broadcast - * \param n The number of values in the array (must match on all nodes) - * \param root The processor performing the broadcast - */ - template - void bcast( type *value, const int n, const int root ) const; - - - /** - * Perform a global barrier across all processors. - */ - void barrier() const; - - - /*! - * @brief This function sends an MPI message with an array to another processor. - * - * If the receiving processor knows in advance the length - * of the array, use "send_length = false;" otherwise, - * this processor will first send the length of the array, - * then send the data. This call must be paired with a - * matching call to recv. - * - * @param buf Pointer to array buffer with length integers. - * @param length Number of integers in buf that we want to send. - * @param recv Receiving processor number. - * @param tag Optional integer argument specifying an integer tag - * to be sent with this message. Default tag is 0. - * The matching recv must share this tag. - */ - template - void send( const type *buf, const int length, const int recv, int tag = 0 ) const; - - - /*! - * @brief This function sends an MPI message with an array of bytes - * (MPI_BYTES) to receiving_proc_number. - * - * This call must be paired with a matching call to recvBytes. - * - * @param buf Void pointer to an array of number_bytes bytes to send. - * @param N_bytes Integer number of bytes to send. - * @param recv Receiving processor number. - * @param tag Optional integer argument specifying an integer tag - * to be sent with this message. Default tag is 0. - * The matching recv must share this tag. - */ - void sendBytes( const void *buf, const int N_bytes, const int recv, int tag = 0 ) const; - - - /*! - * @brief This function sends an MPI message with an array - * to another processor using a non-blocking call. - * The receiving processor must know the length of the array. - * This call must be paired with a matching call to Irecv. - * - * @param buf Pointer to array buffer with length integers. - * @param length Number of integers in buf that we want to send. - * @param recv_proc Receiving processor number. - * @param tag Integer argument specifying an integer tag - * to be sent with this message. - */ - template - MPI_Request Isend( - const type *buf, const int length, const int recv_proc, const int tag ) const; - - - /*! - * @brief This function sends an MPI message with an array of bytes - * (MPI_BYTES) to receiving_proc_number using a non-blocking call. - * The receiving processor must know the number of bytes to receive. - * This call must be paired with a matching call to IrecvBytes. - * - * @param buf Void pointer to an array of number_bytes bytes to send. - * @param N_bytes Integer number of bytes to send. - * @param recv_proc Receiving processor number. - * @param tag Integer argument specifying an integer tag - * to be sent with this message. - */ - MPI_Request IsendBytes( - const void *buf, const int N_bytes, const int recv_proc, const int tag ) const; - - - /*! - * @brief This function receives an MPI message with a data - * array from another processor. - * - * If this processor knows in advance the length of the array, - * use "get_length = false;" otherwise we will get the return size. - * This call must be paired with a matching call to send. - * - * @param buf Pointer to integer array buffer with capacity of length integers. - * @param length If get_length==true: The number of elements to be received, otherwise - * the maximum number of values that can be stored in buf. - * On output the number of received elements. - * @param send Processor number of sender. - * @param tag Optional integer argument specifying a tag which must be matched - * by the tag of the incoming message. Default tag is 0. - */ - template - inline void recv( type *buf, int length, const int send, int tag ) const - { - int length2 = length; - recv( buf, length2, send, false, tag ); - } - - - /*! - * @brief This function receives an MPI message with a data - * array from another processor. - * - * If this processor knows in advance the length of the array, - * use "get_length = false;" otherwise we will get the return size. - * This call must be paired with a matching call to send. - * - * @param buf Pointer to integer array buffer with capacity of length integers. - * @param length If get_length==true: The number of elements to be received, otherwise - * the maximum number of values that can be stored in buf. - * On output the number of received elements. - * @param send Processor number of sender. - * @param get_length Optional boolean argument specifying if we first - * need to check the message size to get the size of the array. - * Default value is true. - * @param tag Optional integer argument specifying a tag which must be matched - * by the tag of the incoming message. Default tag is 0. - */ - template - void recv( type *buf, int &length, const int send, const bool get_length, int tag ) const; - - - /*! - * @brief This function receives an MPI message with an array of - * max size number_bytes (MPI_BYTES) from any processor. - * - * This call must be paired with a matching call to sendBytes. - * - * @param buf Void pointer to a buffer of size number_bytes bytes. - * @param N_bytes Integer number specifying size of buf in bytes. - * @param send Integer number specifying size of buf in bytes. - * @param tag Optional integer argument specifying a tag which - * must be matched by the tag of the incoming message. Default - * tag is 0. - */ - void recvBytes( void *buf, int &N_bytes, const int send, int tag = 0 ) const; - - - /*! - * @brief This function receives an MPI message with a data - * array from another processor using a non-blocking call. - * - * @param buf Pointer to integer array buffer with capacity of length integers. - * @param length Maximum number of values that can be stored in buf. - * @param send_proc Processor number of sender. - * @param tag Optional integer argument specifying a tag which must - * be matched by the tag of the incoming message. - */ - template - MPI_Request Irecv( type *buf, const int length, const int send_proc, const int tag ) const; - - - /*! - * @brief This function receives an MPI message with an array of - * max size number_bytes (MPI_BYTES) from any processor. - * - * This call must be paired with a matching call to sendBytes. - * - * @param buf Void pointer to a buffer of size number_bytes bytes. - * @param N_bytes Integer number specifying size of buf in bytes. - * @param send_proc Processor number of sender. - * @param tag Integer argument specifying a tag which must - * be matched by the tag of the incoming message. - */ - MPI_Request IrecvBytes( - void *buf, const int N_bytes, const int send_proc, const int tag ) const; - - - /*! - * Each processor sends every other processor a single value. - * @param[in] x Input value for allGather - * @return Output array for allGather - */ - template - std::vector allGather( const type &x ) const; - - - /*! - * Each processor sends every other processor an array - * @param[in] x Input array for allGather - * @return Output array for allGather - */ - template - std::vector allGather( const std::vector &x_in ) const; - - - /*! - * Each processor sends every other processor a single value. - * The x_out array should be preallocated to a length equal - * to the number of processors. - * @param x_in Input value for allGather - * @param x_out Output array for allGather (must be preallocated to the size of the - * communicator) - */ - template - void allGather( const type &x_in, type *x_out ) const; - - - /*! - * Each processor sends an array of data to all other processors. - * Each processor receives the values from all processors and gathers them - * to a single array. If successful, the total number of received - * elements will be returned. - * @param send_data Input array - * @param send_cnt The number of values to send - * @param recv_data Output array of received values - * @param recv_cnt The number of values to receive from each processor (N). - * If known, this should be provided as an input. Otherwise - * it is an optional output that will return the number of - * received values from each processor. - * @param recv_disp The displacement (relative to the start of the array) - * from which to store the data received from processor i. - * If known, this should be provided as an input. Otherwise - * it is an optional output that will return the starting location - * (relative to the start of the array) for the received data from - * processor i. - * @param known_recv Are the received counts and displacements known. - * If the received sizes are known, then they must be provided, - * and an extra communication step is not necessary. If the received - * sizes are not known, then an extra communication step will occur - * internally - * and the sizes and displacements will be returned (if desired). - */ - template - int allGather( const type *send_data, const int send_cnt, type *recv_data, - int *recv_cnt = nullptr, int *recv_disp = nullptr, bool known_recv = false ) const; - - - /*! - * This function combines sets from different processors to create a single master set - * @param set Input/Output std::set for the gather. - */ - template - void setGather( std::set &set ) const; - - - /*! - * This function combines std::maps from different processors to create a single master std::map - * If two or more ranks share the same key, the lowest rank will be used - * @param map Input/Output std::map for the gather. - */ - template - void mapGather( std::map &map ) const; - - - /*! - * Each processor sends an array of n values to each processor. - * Each processor sends an array of n values to each processor. - * The jth block of data is sent from processor i to processor j and placed - * in the ith block on the receiving processor. In the variable - * description, N is the size of the communicator. Note that this is a - * blocking global communication. - * @param n The number of elements in each data block to send. - * @param send_data Input array (nxN) - * @param recv_data Output array of received values (nxN) - */ - template - void allToAll( const int n, const type *send_data, type *recv_data ) const; - - - /*! - * Each processor sends an array of data to the different processors. - * Each processor may send any size array to any processor. In the variable - * description, N is the size of the communicator. Note that this is a - * blocking global communication. If successful, the total number of received - * elements will be returned. - * @param send_data Input array - * @param send_cnt The number of values to send to each processor (N) - * @param send_disp The displacement (relative to the start of the array) - * from which to send to processor i - * @param recv_data Output array of received values - * @param recv_cnt The number of values to receive from each processor (N). - * If known, this should be provided as an input. Otherwise - * it is an optional output that will return the number of - * received values from each processor. - * @param recv_disp The displacement (relative to the start of the array) - * from which to send to processor i. - * If known, this should be provided as an input. Otherwise - * it is an optional output that will return the starting location - * (relative to the start of the array) for the received data from - * processor i. - * @param known_recv Are the received counts and displacements known. - * If the received sizes are known, then they must be provided, - * and an extra communication step is not necessary. If the received - * sizes are not know, then an extra communication step will occur - * internally - * and the sizes and displacements will be returned (if desired). - */ - template - int allToAll( const type *send_data, const int send_cnt[], const int send_disp[], - type *recv_data, int *recv_cnt = nullptr, int *recv_disp = nullptr, - bool known_recv = false ) const; - - - /*! - * \brief Send a list of proccesor ids to communicate - * \details This function communicates a list of proccesors to communicate. - * Given a list of ranks that we want to send/receieve data to/from, this routine - * will communicate that set to the other ranks returning the list of processors - * that want to communication with the current rank. - * Note: this routine will involved global communication - * \param ranks List of ranks that the current rank wants to communicate with - * \return List of ranks that want to communicate with the current processor - */ - std::vector commRanks( const std::vector &ranks ) const; - - - /*! - * \brief Wait for a communication to finish - * \details Wait for a communication to finish. - * Note: this does not require a communicator. - * \param request Communication request to wait for (returned for Isend or Irecv) - */ - static void wait( MPI_Request request ); - - - /*! - * \brief Wait for any communication to finish. - * \details This function waits for any of the given communication requests to finish. - * It returns the index of the communication request that finished. - * Note: this does not require a communicator. - * \param count Number of communications to check - * \param request Array of communication requests to wait for (returned for Isend or Irecv) - */ - static int waitAny( int count, MPI_Request *request ); - - - /*! - * \brief Wait for all communications to finish. - * \details This function waits for all of the given communication requests to finish. - * Note: this does not require a communicator. - * \param count Number of communications to check - * \param request Array of communication requests to wait for (returned for Isend or Irecv) - */ - static void waitAll( int count, MPI_Request *request ); - - - /*! - * \brief Wait for some communications to finish. - * \details This function waits for one (or more) communications to finish. - * It returns an array of the indicies that have finished. - * Note: this does not require a communicator. - * \param count Number of communications to check - * \param request Array of communication requests to wait for (returned for Isend or Irecv) - */ - static std::vector waitSome( int count, MPI_Request *request ); - - - /*! - * \brief Nonblocking test for a message - * \details This function performs a non-blocking test for a message. - * It will return the number of bytes in the message if a message with - * the specified source and tag (on the current communicator) is available. - * Otherwise it will return -1. - * \param source source rank (-1: any source) - * \param tag tag (-1: any tag) - */ - int Iprobe( int source = -1, int tag = -1 ) const; - - - /*! - * \brief Blocking test for a message - * \details This function performs a blocking test for a message. - * It will return the number of bytes in the message when a message with - * the specified source and tag (on the current communicator) is available - * \param source source rank (-1: any source) - * \param tag tag (-1: any tag) - */ - int probe( int source = -1, int tag = -1 ) const; - - - /*! - * \brief Start a serial region - * \details This function will serialize MPI processes so that they run - * one at a time. A call to serializeStart must be followed by a call - * to serializeStop after the commands to be executed. - * Note: the ranks will be run in order. - */ - void serializeStart(); - - - /*! - * \brief Stop a serial region - * \details Stop a serial region. See serializeStart for more information. - */ - void serializeStop(); - - - /*! - * \brief Elapsed time - * \details This function returns the elapsed time on the calling processor - * since an arbitrary point in the past (seconds). It is a wrapper to MPI_Wtime. - * See "tick" for the timer resolution in seconds. - * The time may or may not be synchronized across processors depending on the MPI - * implementation. Refer to MPI documentation for the desired platform for more information. - */ - static double time(); - - - /*! - * \brief Timer resolution - * \details This function returns the timer resolution used by "time" - */ - static double tick(); - - - /*! - * \brief Change the level of the internal timers - * \details This function changes the level of the timers used to profile MPI - * \param level New level of the timers - */ - static void changeProfileLevel( int level ) { profile_level = level; } - - - //! Return the total number of MPI_Comm objects that have been created - static size_t MPI_Comm_created() { return N_MPI_Comm_created; } - - //! Return the total number of MPI_Comm objects that have been destroyed - static size_t MPI_Comm_destroyed() { return N_MPI_Comm_destroyed; } - - //! Return details about MPI - static std::string info(); - - //! Return the MPI version number { major, minor } - static std::array version(); - - //! Check if MPI is active - static bool MPI_Active(); - - //! Start MPI - static void start_MPI( int argc_in, char *argv_in[], int profile_level = 0 ); - - //! Stop MPI - static void stop_MPI(); - - -private: // Private helper functions for templated MPI operations; - template - void call_sumReduce( type *x, const int n = 1 ) const; - template - void call_sumReduce( const type *x, type *y, const int n = 1 ) const; - template - void call_minReduce( type *x, const int n = 1, int *rank_of_min = nullptr ) const; - template - void call_minReduce( - const type *x, type *y, const int n = 1, int *rank_of_min = nullptr ) const; - template - void call_maxReduce( type *x, const int n = 1, int *rank_of_max = nullptr ) const; - template - void call_maxReduce( - const type *x, type *y, const int n = 1, int *rank_of_max = nullptr ) const; - template - void call_bcast( type *x, const int n, const int root ) const; - template - void call_allGather( const type &x_in, type *x_out ) const; - template - void call_allGather( - const type *x_in, int size_in, type *x_out, int *size_out, int *disp_out ) const; - template - void call_sumScan( const type *x, type *y, int n = 1 ) const; - template - void call_minScan( const type *x, type *y, int n = 1 ) const; - template - void call_maxScan( const type *x, type *y, int n = 1 ) const; - template - void call_allToAll( const type *send_data, const int send_cnt[], const int send_disp[], - type *recv_data, const int *recv_cnt, const int *recv_disp ) const; - - -private: // data members - // The internal MPI communicator - MPI_Comm communicator; - - // Is the communicator NULL - bool d_isNull; - - // Do we want to manage this communicator - bool d_manage; - - // Do we want to call MPI_abort instead of exit - bool d_call_abort; - - // The level for the profiles of MPI - static short profile_level; - - // The rank and size of the communicator - int comm_rank, comm_size; - - // The ranks of the comm in the global comm - mutable int *volatile d_ranks; - - // Some attributes - int d_maxTag; - int *volatile d_currentTag; - - /* How many objects share the same underlying MPI communicator. - * When the count goes to 0, the MPI comm will be free'd (assuming it was created - * by an communicator). This may not be perfect, but is likely to be good enough. - * Note that for thread safety, any access to this variable should be blocked for thread safety. - * The value of count MUST be volatile to ensure the correct value is always used. - */ - std::atomic_int *volatile d_count; - - // Add a variable for data alignment (necessary for some Intel builds) - double tmp_alignment; - - /* We want to keep track of how many MPI_Comm objects we have created over time. - * Like the count, for thread safety this should be blocked, however the most likely error - * caused by not blocking is a slight error in the MPI count. Since this is just for reference - * we do not need to block (recognizing that the value may not be 100% accurate). - */ - static volatile unsigned int N_MPI_Comm_created; - static volatile unsigned int N_MPI_Comm_destroyed; -}; - - -} // namespace Utilities - - -// Include the default instantiations -// \cond HIDDEN_SYMBOLS -#include "common/MPI.I" -// \endcond - - -#endif diff --git a/common/MPI_Helpers.cpp b/common/MPI_Helpers.cpp new file mode 100644 index 00000000..736a2f02 --- /dev/null +++ b/common/MPI_Helpers.cpp @@ -0,0 +1,266 @@ +#include "common/MPI_Helpers.h" +#include "common/Utilities.h" + + +/******************************************************** +* Return the MPI data type * +********************************************************/ +template<> MPI_Datatype getMPItype() { + return MPI_CHAR; +} +template<> MPI_Datatype getMPItype() { + return MPI_UNSIGNED_CHAR; +} +template<> MPI_Datatype getMPItype() { + return MPI_INT; +} +template<> MPI_Datatype getMPItype() { + return MPI_LONG; +} +template<> MPI_Datatype getMPItype() { + return MPI_UNSIGNED_LONG; +} +template<> MPI_Datatype getMPItype() { + return MPI_LONG_LONG; +} +template<> MPI_Datatype getMPItype() { + return MPI_FLOAT; +} +template<> MPI_Datatype getMPItype() { + return MPI_DOUBLE; +} + + +/******************************************************** +* Concrete implimentations for packing/unpacking * +********************************************************/ +// unsigned char +template<> +size_t packsize( const unsigned char& ) +{ + return sizeof(unsigned char); +} +template<> +void pack( const unsigned char& rhs, char *buffer ) +{ + memcpy(buffer,&rhs,sizeof(unsigned char)); +} +template<> +void unpack( unsigned char& data, const char *buffer ) +{ + memcpy(&data,buffer,sizeof(unsigned char)); +} +// char +template<> +size_t packsize( const char& ) +{ + return sizeof(char); +} +template<> +void pack( const char& rhs, char *buffer ) +{ + memcpy(buffer,&rhs,sizeof(char)); +} +template<> +void unpack( char& data, const char *buffer ) +{ + memcpy(&data,buffer,sizeof(char)); +} +// int +template<> +size_t packsize( const int& ) +{ + return sizeof(int); +} +template<> +void pack( const int& rhs, char *buffer ) +{ + memcpy(buffer,&rhs,sizeof(int)); +} +template<> +void unpack( int& data, const char *buffer ) +{ + memcpy(&data,buffer,sizeof(int)); +} +// unsigned int +template<> +size_t packsize( const unsigned int& ) +{ + return sizeof(unsigned int); +} +template<> +void pack( const unsigned int& rhs, char *buffer ) +{ + memcpy(buffer,&rhs,sizeof(int)); +} +template<> +void unpack( unsigned int& data, const char *buffer ) +{ + memcpy(&data,buffer,sizeof(int)); +} +// size_t +template<> +size_t packsize( const size_t& ) +{ + return sizeof(size_t); +} +template<> +void pack( const size_t& rhs, char *buffer ) +{ + memcpy(buffer,&rhs,sizeof(size_t)); +} +template<> +void unpack( size_t& data, const char *buffer ) +{ + memcpy(&data,buffer,sizeof(size_t)); +} +// std::string +template<> +size_t packsize( const std::string& rhs ) +{ + return rhs.size()+1; +} +template<> +void pack( const std::string& rhs, char *buffer ) +{ + memcpy(buffer,rhs.c_str(),rhs.size()+1); +} +template<> +void unpack( std::string& data, const char *buffer ) +{ + data = std::string(buffer); +} + + +/******************************************************** +* Fake MPI routines * +********************************************************/ +#ifndef USE_MPI +int MPI_Init(int*,char***) +{ + return 0; +} +int MPI_Init_thread(int*,char***, int required, int *provided ) +{ + *provided = required; + return 0; +} +int MPI_Finalize() +{ + return 0; +} +int MPI_Comm_size( MPI_Comm, int *size ) +{ + *size = 1; + return 0; +} +int MPI_Comm_rank( MPI_Comm, int *rank ) +{ + *rank = 0; + return 0; +} +int MPI_Barrier( MPI_Comm ) +{ + return 0; +} +int MPI_Waitall( int, MPI_Request[], MPI_Status[] ) +{ + return 0; +} +int MPI_Wait( MPI_Request*, MPI_Status* ) +{ + return 0; +} +int MPI_Bcast( void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm ) +{ + return 0; +} +int MPI_Send(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, + MPI_Comm comm) +{ + ERROR("Not implimented yet"); + return 0; +} +int MPI_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag, + MPI_Comm comm, MPI_Status *status) +{ + ERROR("Not implimented yet"); + return 0; +} +int MPI_Isend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, + MPI_Comm comm, MPI_Request *request) +{ + ERROR("Not implimented yet"); + return 0; +} +int MPI_Irecv(void *buf, int count, MPI_Datatype datatype, int source, + int tag, MPI_Comm comm, MPI_Request *request) +{ + ERROR("Not implimented yet"); + return 0; +} +int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, + MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) +{ + ERROR("Not implimented yet"); + return 0; +} +int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm) +{ + ERROR("Not implimented yet"); + return 0; +} +int MPI_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int *recvcounts, const int *displs, + MPI_Datatype recvtype, MPI_Comm comm) +{ + ERROR("Not implimented yet"); + return 0; +} +int MPI_Sendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + int dest, int sendtag, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int source, int recvtag, + MPI_Comm comm, MPI_Status *status) +{ + ERROR("Not implimented yet"); + return 0; +} +int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, int root, MPI_Comm comm) +{ + ERROR("Not implimented yet"); + return 0; +} +int MPI_Comm_group(MPI_Comm comm, MPI_Group *group) +{ + ERROR("Not implimented yet"); + return 0; +} +int MPI_Comm_create(MPI_Comm comm, MPI_Group group, MPI_Comm *newcomm) +{ + ERROR("Not implimented yet"); + return 0; +} +int MPI_Comm_dup(MPI_Comm comm, MPI_Comm *newcomm) +{ + *newcomm = comm; + return 0; +} +double MPI_Wtime( void ) +{ + return 0.0; +} +int MPI_Comm_free(MPI_Comm *group) +{ + return 0; +} +int MPI_Group_free(MPI_Group *group) +{ + return 0; +} +#endif + + diff --git a/common/MPI_Helpers.h b/common/MPI_Helpers.h new file mode 100644 index 00000000..1d20318e --- /dev/null +++ b/common/MPI_Helpers.h @@ -0,0 +1,239 @@ +// This file contains wrappers for MPI routines and functions to pack/unpack data structures +#ifndef MPI_WRAPPERS_INC +#define MPI_WRAPPERS_INC + +#include +#include +#include +#include + +#ifdef USE_MPI + // Inlcude MPI + #include "mpi.h" +#else + // Create fake MPI types + typedef int MPI_Comm; + typedef int MPI_Request; + typedef int MPI_Status; + #define MPI_COMM_WORLD 0 + #define MPI_COMM_SELF 0 + #define MPI_COMM_NULL -1 + #define MPI_GROUP_NULL -2 + #define MPI_STATUS_IGNORE NULL + enum MPI_Datatype { MPI_LOGICAL, MPI_CHAR, MPI_UNSIGNED_CHAR, MPI_INT, + MPI_UNSIGNED, MPI_LONG, MPI_UNSIGNED_LONG, MPI_LONG_LONG, MPI_FLOAT, MPI_DOUBLE }; + enum MPI_Op { MPI_MIN, MPI_MAX, MPI_SUM }; + typedef int MPI_Group; + #define MPI_THREAD_SINGLE 0 + #define MPI_THREAD_FUNNELED 1 + #define MPI_THREAD_SERIALIZED 2 + #define MPI_THREAD_MULTIPLE 3 + // Fake MPI functions + int MPI_Init(int*,char***); + int MPI_Init_thread( int *argc, char ***argv, int required, int *provided ); + int MPI_Finalize(); + int MPI_Comm_size( MPI_Comm, int *size ); + int MPI_Comm_rank( MPI_Comm, int *rank ); + int MPI_Barrier(MPI_Comm); + int MPI_Wait(MPI_Request*,MPI_Status*); + int MPI_Waitall(int,MPI_Request[],MPI_Status[]); + int MPI_Bcast(void*,int,MPI_Datatype,int,MPI_Comm); + int MPI_Send(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, + MPI_Comm comm); + int MPI_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag, + MPI_Comm comm, MPI_Status *status); + int MPI_Isend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, + MPI_Comm comm, MPI_Request *request); + int MPI_Irecv(void *buf, int count, MPI_Datatype datatype, int source, + int tag, MPI_Comm comm, MPI_Request *request); + int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, + MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); + int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm); + int MPI_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int *recvcounts, const int *displs, + MPI_Datatype recvtype, MPI_Comm comm); + int MPI_Sendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + int dest, int sendtag, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int source, int recvtag, + MPI_Comm comm, MPI_Status *status); + int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, int root, MPI_Comm comm); + double MPI_Wtime( void ); + int MPI_Comm_group(MPI_Comm comm, MPI_Group *group); + int MPI_Comm_create(MPI_Comm comm, MPI_Group group, MPI_Comm *newcomm); + int MPI_Comm_free(MPI_Comm *group); + int MPI_Group_free(MPI_Group *group); + int MPI_Comm_dup(MPI_Comm comm, MPI_Comm *newcomm); +#endif + + +//! Get the size of the MPI_Comm +// Note: this is a thread and interrupt safe function +inline int comm_size( MPI_Comm comm ) { + int size = 1; + MPI_Comm_size( comm, &size ); + return size; +} + + +//! Get the rank of the MPI_Comm +// Note: this is a thread and interrupt safe function +inline int comm_rank( MPI_Comm comm ) { + int rank = 1; + MPI_Comm_rank( comm, &rank ); + return rank; +} + + +//! Get the size of MPI_COMM_WORLD +inline int MPI_WORLD_SIZE( ) { + return comm_size( MPI_COMM_WORLD ); +} + +//! Get the size of MPI_COMM_WORLD +inline int MPI_WORLD_RANK( ) { + return comm_rank( MPI_COMM_WORLD ); +} + +//! Return the appropriate MPI datatype for a class +template +MPI_Datatype getMPItype(); + + +//! Template function to return the buffer size required to pack a class +template +size_t packsize( const TYPE& rhs ); + +//! Template function to pack a class to a buffer +template +void pack( const TYPE& rhs, char *buffer ); + +//! Template function to unpack a class from a buffer +template +void unpack( TYPE& data, const char *buffer ); + + +//! Template function to return the buffer size required to pack a std::vector +template +size_t packsize( const std::vector& rhs ); + +//! Template function to pack a class to a buffer +template +void pack( const std::vector& rhs, char *buffer ); + +//! Template function to pack a class to a buffer +template +void unpack( std::vector& data, const char *buffer ); + + +//! Template function to return the buffer size required to pack a std::pair +template +size_t packsize( const std::pair& rhs ); + +//! Template function to pack a class to a buffer +template +void pack( const std::pair& rhs, char *buffer ); + +//! Template function to pack a class to a buffer +template +void unpack( std::pair& data, const char *buffer ); + + +//! Template function to return the buffer size required to pack a std::map +template +size_t packsize( const std::map& rhs ); + +//! Template function to pack a class to a buffer +template +void pack( const std::map& rhs, char *buffer ); + +//! Template function to pack a class to a buffer +template +void unpack( std::map& data, const char *buffer ); + + +//! Template function to return the buffer size required to pack a std::set +template +size_t packsize( const std::set& rhs ); + +//! Template function to pack a class to a buffer +template +void pack( const std::set& rhs, char *buffer ); + +//! Template function to pack a class to a buffer +template +void unpack( std::set& data, const char *buffer ); + + + +// Helper functions +inline double sumReduce( MPI_Comm comm, double x ) +{ + double y = 0; + MPI_Allreduce(&x,&y,1,MPI_DOUBLE,MPI_SUM,comm); + return y; +} +inline float sumReduce( MPI_Comm comm, float x ) +{ + float y = 0; + MPI_Allreduce(&x,&y,1,MPI_FLOAT,MPI_SUM,comm); + return y; +} +inline int sumReduce( MPI_Comm comm, int x ) +{ + int y = 0; + MPI_Allreduce(&x,&y,1,MPI_INT,MPI_SUM,comm); + return y; +} +inline long long sumReduce( MPI_Comm comm, long long x ) +{ + long long y = 0; + MPI_Allreduce(&x,&y,1,MPI_LONG_LONG,MPI_SUM,comm); + return y; +} +inline bool sumReduce( MPI_Comm comm, bool x ) +{ + int y = sumReduce( comm, x?1:0 ); + return y>0; +} +inline std::vector sumReduce( MPI_Comm comm, const std::vector& x ) +{ + auto y = x; + MPI_Allreduce(x.data(),y.data(),x.size(),MPI_FLOAT,MPI_SUM,comm); + return y; +} +inline std::vector sumReduce( MPI_Comm comm, const std::vector& x ) +{ + auto y = x; + MPI_Allreduce(x.data(),y.data(),x.size(),MPI_INT,MPI_SUM,comm); + return y; +} +inline double maxReduce( MPI_Comm comm, double x ) +{ + double y = 0; + MPI_Allreduce(&x,&y,1,MPI_DOUBLE,MPI_MAX,comm); + return y; +} +inline float maxReduce( MPI_Comm comm, float x ) +{ + float y = 0; + MPI_Allreduce(&x,&y,1,MPI_FLOAT,MPI_MAX,comm); + return y; +} +inline int maxReduce( MPI_Comm comm, int x ) +{ + int y = 0; + MPI_Allreduce(&x,&y,1,MPI_INT,MPI_MAX,comm); + return y; +} + + +#endif + + +#include "common/MPI_Helpers.hpp" + + diff --git a/IO/PackData.hpp b/common/MPI_Helpers.hpp similarity index 95% rename from IO/PackData.hpp rename to common/MPI_Helpers.hpp index 006cdf73..85261cf1 100644 --- a/IO/PackData.hpp +++ b/common/MPI_Helpers.hpp @@ -1,9 +1,8 @@ -// This file functions to pack/unpack data structures -#ifndef included_PackData_hpp -#define included_PackData_hpp - -#include "IO/PackData.h" +// This file contains wrappers for MPI routines and functions to pack/unpack data structures +#ifndef MPI_WRAPPERS_HPP +#define MPI_WRAPPERS_HPP +#include "common/MPI_Helpers.h" #include #include #include diff --git a/common/ReadMicroCT.cpp b/common/ReadMicroCT.cpp index 2209e712..79ef241e 100644 --- a/common/ReadMicroCT.cpp +++ b/common/ReadMicroCT.cpp @@ -64,11 +64,11 @@ Array readMicroCT( const std::string& filename ) // Read the compressed micro CT data and distribute -Array readMicroCT( const Database& domain, const Utilities::MPI& comm ) +Array readMicroCT( const Database& domain, MPI_Comm comm ) { // Get the local problem info auto n = domain.getVector( "n" ); - int rank = comm.getRank(); + int rank = comm_rank(MPI_COMM_WORLD); auto nproc = domain.getVector( "nproc" ); RankInfoStruct rankInfo( rank, nproc[0], nproc[1], nproc[2] ); diff --git a/common/ReadMicroCT.h b/common/ReadMicroCT.h index c8acc379..f232740e 100644 --- a/common/ReadMicroCT.h +++ b/common/ReadMicroCT.h @@ -5,12 +5,11 @@ #include "common/Array.h" #include "common/Communication.h" #include "common/Database.h" -#include "common/MPI.h" Array readMicroCT( const std::string& filename ); -Array readMicroCT( const Database& domain, const Utilities::MPI& comm ); +Array readMicroCT( const Database& domain, MPI_Comm comm ); #endif diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index 6f2966e7..e8a75994 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -5,7 +5,9 @@ ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr Dm){ Lock=false; // unlock the communicator //...................................................................................... // Create a separate copy of the communicator for the device - MPI_COMM_SCALBL = Dm->Comm.dup(); + //MPI_Comm_group(Dm->Comm,&Group); + //MPI_Comm_create(Dm->Comm,Group,&MPI_COMM_SCALBL); + MPI_Comm_dup(Dm->Comm,&MPI_COMM_SCALBL); //...................................................................................... // Copy the domain size and communication information directly from Dm Nx = Dm->Nx; @@ -213,7 +215,7 @@ ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr Dm){ ScaLBL_CopyToZeroCopy(dvcRecvList_Yz,Dm->recvList_Yz,recvCount_Yz*sizeof(int)); //...................................................................................... - MPI_COMM_SCALBL.barrier(); + MPI_Barrier(MPI_COMM_SCALBL); //................................................................................... // Set up the recieve distribution lists @@ -286,7 +288,7 @@ ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr Dm){ //................................................................................... //...................................................................................... - MPI_COMM_SCALBL.barrier(); + MPI_Barrier(MPI_COMM_SCALBL); ScaLBL_DeviceBarrier(); //...................................................................................... SendCount = sendCount_x+sendCount_X+sendCount_y+sendCount_Y+sendCount_z+sendCount_Z+ @@ -867,8 +869,8 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){ ScaLBL_D3Q19_Pack(12,dvcSendList_x,3*sendCount_x,sendCount_x,sendbuf_x,dist,N); ScaLBL_D3Q19_Pack(14,dvcSendList_x,4*sendCount_x,sendCount_x,sendbuf_x,dist,N); - req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 5*sendCount_x,rank_x,sendtag); - req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 5*recvCount_X,rank_X,recvtag); + MPI_Isend(sendbuf_x, 5*sendCount_x,MPI_DOUBLE,rank_x,sendtag,MPI_COMM_SCALBL,&req1[0]); + MPI_Irecv(recvbuf_X, 5*recvCount_X,MPI_DOUBLE,rank_X,recvtag,MPI_COMM_SCALBL,&req2[0]); //...Packing for X face(1,7,9,11,13)................................ ScaLBL_D3Q19_Pack(1,dvcSendList_X,0,sendCount_X,sendbuf_X,dist,N); ScaLBL_D3Q19_Pack(7,dvcSendList_X,sendCount_X,sendCount_X,sendbuf_X,dist,N); @@ -876,8 +878,8 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){ ScaLBL_D3Q19_Pack(11,dvcSendList_X,3*sendCount_X,sendCount_X,sendbuf_X,dist,N); ScaLBL_D3Q19_Pack(13,dvcSendList_X,4*sendCount_X,sendCount_X,sendbuf_X,dist,N); - req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 5*sendCount_X,rank_X,sendtag); - req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 5*recvCount_x,rank_x,recvtag); + MPI_Isend(sendbuf_X, 5*sendCount_X,MPI_DOUBLE,rank_X,sendtag,MPI_COMM_SCALBL,&req1[1]); + MPI_Irecv(recvbuf_x, 5*recvCount_x,MPI_DOUBLE,rank_x,recvtag,MPI_COMM_SCALBL,&req2[1]); //...Packing for y face(4,8,9,16,18)................................. ScaLBL_D3Q19_Pack(4,dvcSendList_y,0,sendCount_y,sendbuf_y,dist,N); ScaLBL_D3Q19_Pack(8,dvcSendList_y,sendCount_y,sendCount_y,sendbuf_y,dist,N); @@ -885,8 +887,8 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){ ScaLBL_D3Q19_Pack(16,dvcSendList_y,3*sendCount_y,sendCount_y,sendbuf_y,dist,N); ScaLBL_D3Q19_Pack(18,dvcSendList_y,4*sendCount_y,sendCount_y,sendbuf_y,dist,N); - req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 5*sendCount_y,rank_y,sendtag); - req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 5*recvCount_Y,rank_Y,recvtag); + MPI_Isend(sendbuf_y, 5*sendCount_y,MPI_DOUBLE,rank_y,sendtag,MPI_COMM_SCALBL,&req1[2]); + MPI_Irecv(recvbuf_Y, 5*recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,MPI_COMM_SCALBL,&req2[2]); //...Packing for Y face(3,7,10,15,17)................................. ScaLBL_D3Q19_Pack(3,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,dist,N); ScaLBL_D3Q19_Pack(7,dvcSendList_Y,sendCount_Y,sendCount_Y,sendbuf_Y,dist,N); @@ -894,8 +896,8 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){ ScaLBL_D3Q19_Pack(15,dvcSendList_Y,3*sendCount_Y,sendCount_Y,sendbuf_Y,dist,N); ScaLBL_D3Q19_Pack(17,dvcSendList_Y,4*sendCount_Y,sendCount_Y,sendbuf_Y,dist,N); - req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 5*sendCount_Y,rank_Y,sendtag); - req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 5*recvCount_y,rank_y,recvtag); + MPI_Isend(sendbuf_Y, 5*sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,MPI_COMM_SCALBL,&req1[3]); + MPI_Irecv(recvbuf_y, 5*recvCount_y,MPI_DOUBLE,rank_y,recvtag,MPI_COMM_SCALBL,&req2[3]); //...Packing for z face(6,12,13,16,17)................................ ScaLBL_D3Q19_Pack(6,dvcSendList_z,0,sendCount_z,sendbuf_z,dist,N); ScaLBL_D3Q19_Pack(12,dvcSendList_z,sendCount_z,sendCount_z,sendbuf_z,dist,N); @@ -903,8 +905,8 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){ ScaLBL_D3Q19_Pack(16,dvcSendList_z,3*sendCount_z,sendCount_z,sendbuf_z,dist,N); ScaLBL_D3Q19_Pack(17,dvcSendList_z,4*sendCount_z,sendCount_z,sendbuf_z,dist,N); - req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 5*sendCount_z,rank_z,sendtag); - req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 5*recvCount_Z,rank_Z,recvtag); + MPI_Isend(sendbuf_z, 5*sendCount_z,MPI_DOUBLE,rank_z,sendtag,MPI_COMM_SCALBL,&req1[4]); + MPI_Irecv(recvbuf_Z, 5*recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,MPI_COMM_SCALBL,&req2[4]); //...Packing for Z face(5,11,14,15,18)................................ ScaLBL_D3Q19_Pack(5,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,dist,N); @@ -913,57 +915,57 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){ ScaLBL_D3Q19_Pack(15,dvcSendList_Z,3*sendCount_Z,sendCount_Z,sendbuf_Z,dist,N); ScaLBL_D3Q19_Pack(18,dvcSendList_Z,4*sendCount_Z,sendCount_Z,sendbuf_Z,dist,N); - req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 5*sendCount_Z,rank_Z,sendtag); - req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 5*recvCount_z,rank_z,recvtag); + MPI_Isend(sendbuf_Z, 5*sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,MPI_COMM_SCALBL,&req1[5]); + MPI_Irecv(recvbuf_z, 5*recvCount_z,MPI_DOUBLE,rank_z,recvtag,MPI_COMM_SCALBL,&req2[5]); //...Pack the xy edge (8)................................ ScaLBL_D3Q19_Pack(8,dvcSendList_xy,0,sendCount_xy,sendbuf_xy,dist,N); - req1[6] = MPI_COMM_SCALBL.Isend(sendbuf_xy, sendCount_xy,rank_xy,sendtag); - req2[6] = MPI_COMM_SCALBL.Irecv(recvbuf_XY, recvCount_XY,rank_XY,recvtag); + MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_xy,sendtag,MPI_COMM_SCALBL,&req1[6]); + MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_XY,recvtag,MPI_COMM_SCALBL,&req2[6]); //...Pack the Xy edge (9)................................ ScaLBL_D3Q19_Pack(9,dvcSendList_Xy,0,sendCount_Xy,sendbuf_Xy,dist,N); - req1[8] = MPI_COMM_SCALBL.Isend(sendbuf_Xy, sendCount_Xy,rank_Xy,sendtag); - req2[8] = MPI_COMM_SCALBL.Irecv(recvbuf_xY, recvCount_xY,rank_xY,recvtag); + MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_Xy,sendtag,MPI_COMM_SCALBL,&req1[8]); + MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_xY,recvtag,MPI_COMM_SCALBL,&req2[8]); //...Pack the xY edge (10)................................ ScaLBL_D3Q19_Pack(10,dvcSendList_xY,0,sendCount_xY,sendbuf_xY,dist,N); - req1[9] = MPI_COMM_SCALBL.Isend(sendbuf_xY, sendCount_xY,rank_xY,sendtag); - req2[9] = MPI_COMM_SCALBL.Irecv(recvbuf_Xy, recvCount_Xy,rank_Xy,recvtag); + MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_xY,sendtag,MPI_COMM_SCALBL,&req1[9]); + MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_Xy,recvtag,MPI_COMM_SCALBL,&req2[9]); //...Pack the XY edge (7)................................ ScaLBL_D3Q19_Pack(7,dvcSendList_XY,0,sendCount_XY,sendbuf_XY,dist,N); - req1[7] = MPI_COMM_SCALBL.Isend(sendbuf_XY, sendCount_XY,rank_XY,sendtag); - req2[7] = MPI_COMM_SCALBL.Irecv(recvbuf_xy, recvCount_xy,rank_xy,recvtag); + MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_XY,sendtag,MPI_COMM_SCALBL,&req1[7]); + MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_xy,recvtag,MPI_COMM_SCALBL,&req2[7]); //...Pack the xz edge (12)................................ ScaLBL_D3Q19_Pack(12,dvcSendList_xz,0,sendCount_xz,sendbuf_xz,dist,N); - req1[10] = MPI_COMM_SCALBL.Isend(sendbuf_xz, sendCount_xz,rank_xz,sendtag); - req2[10] = MPI_COMM_SCALBL.Irecv(recvbuf_XZ, recvCount_XZ,rank_XZ,recvtag); + MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_xz,sendtag,MPI_COMM_SCALBL,&req1[10]); + MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_XZ,recvtag,MPI_COMM_SCALBL,&req2[10]); //...Pack the xZ edge (14)................................ ScaLBL_D3Q19_Pack(14,dvcSendList_xZ,0,sendCount_xZ,sendbuf_xZ,dist,N); - req1[13] = MPI_COMM_SCALBL.Isend(sendbuf_xZ, sendCount_xZ,rank_xZ,sendtag); - req2[13] = MPI_COMM_SCALBL.Irecv(recvbuf_Xz, recvCount_Xz,rank_Xz,recvtag); + MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_xZ,sendtag,MPI_COMM_SCALBL,&req1[13]); + MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_Xz,recvtag,MPI_COMM_SCALBL,&req2[13]); //...Pack the Xz edge (13)................................ ScaLBL_D3Q19_Pack(13,dvcSendList_Xz,0,sendCount_Xz,sendbuf_Xz,dist,N); - req1[12] = MPI_COMM_SCALBL.Isend(sendbuf_Xz, sendCount_Xz,rank_Xz,sendtag); - req2[12] = MPI_COMM_SCALBL.Irecv(recvbuf_xZ, recvCount_xZ,rank_xZ,recvtag); + MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_Xz,sendtag,MPI_COMM_SCALBL,&req1[12]); + MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_xZ,recvtag,MPI_COMM_SCALBL,&req2[12]); //...Pack the XZ edge (11)................................ ScaLBL_D3Q19_Pack(11,dvcSendList_XZ,0,sendCount_XZ,sendbuf_XZ,dist,N); - req1[11] = MPI_COMM_SCALBL.Isend(sendbuf_XZ, sendCount_XZ,rank_XZ,sendtag); - req2[11] = MPI_COMM_SCALBL.Irecv(recvbuf_xz, recvCount_xz,rank_xz,recvtag); + MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_XZ,sendtag,MPI_COMM_SCALBL,&req1[11]); + MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_xz,recvtag,MPI_COMM_SCALBL,&req2[11]); //...Pack the yz edge (16)................................ ScaLBL_D3Q19_Pack(16,dvcSendList_yz,0,sendCount_yz,sendbuf_yz,dist,N); - req1[14] = MPI_COMM_SCALBL.Isend(sendbuf_yz, sendCount_yz,rank_yz,sendtag); - req2[14] = MPI_COMM_SCALBL.Irecv(recvbuf_YZ, recvCount_YZ,rank_YZ,recvtag); + MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_yz,sendtag,MPI_COMM_SCALBL,&req1[14]); + MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_YZ,recvtag,MPI_COMM_SCALBL,&req2[14]); //...Pack the yZ edge (18)................................ ScaLBL_D3Q19_Pack(18,dvcSendList_yZ,0,sendCount_yZ,sendbuf_yZ,dist,N); - req1[17] = MPI_COMM_SCALBL.Isend(sendbuf_yZ, sendCount_yZ,rank_yZ,sendtag); - req2[17] = MPI_COMM_SCALBL.Irecv(recvbuf_Yz, recvCount_Yz,rank_Yz,recvtag); + MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_yZ,sendtag,MPI_COMM_SCALBL,&req1[17]); + MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_Yz,recvtag,MPI_COMM_SCALBL,&req2[17]); //...Pack the Yz edge (17)................................ ScaLBL_D3Q19_Pack(17,dvcSendList_Yz,0,sendCount_Yz,sendbuf_Yz,dist,N); - req1[16] = MPI_COMM_SCALBL.Isend(sendbuf_Yz, sendCount_Yz,rank_Yz,sendtag); - req2[16] = MPI_COMM_SCALBL.Irecv(recvbuf_yZ, recvCount_yZ,rank_yZ,recvtag); + MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_Yz,sendtag,MPI_COMM_SCALBL,&req1[16]); + MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_yZ,recvtag,MPI_COMM_SCALBL,&req2[16]); //...Pack the YZ edge (15)................................ ScaLBL_D3Q19_Pack(15,dvcSendList_YZ,0,sendCount_YZ,sendbuf_YZ,dist,N); - req1[15] = MPI_COMM_SCALBL.Isend(sendbuf_YZ, sendCount_YZ,rank_YZ,sendtag); - req2[15] = MPI_COMM_SCALBL.Irecv(recvbuf_yz, recvCount_yz,rank_yz,recvtag); + MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_YZ,sendtag,MPI_COMM_SCALBL,&req1[15]); + MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_yz,recvtag,MPI_COMM_SCALBL,&req2[15]); //................................................................................... } @@ -973,8 +975,8 @@ void ScaLBL_Communicator::RecvD3Q19AA(double *dist){ // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 //................................................................................... // Wait for completion of D3Q19 communication - MPI_COMM_SCALBL.waitAll(18,req1); - MPI_COMM_SCALBL.waitAll(18,req2); + MPI_Waitall(18,req1,stat1); + MPI_Waitall(18,req2,stat2); ScaLBL_DeviceBarrier(); //................................................................................... @@ -1057,8 +1059,8 @@ void ScaLBL_Communicator::RecvGrad(double *phi, double *grad){ // Recieves halo and incorporates into D3Q19 based stencil gradient computation //................................................................................... // Wait for completion of D3Q19 communication - MPI_COMM_SCALBL.waitAll(18,req1); - MPI_COMM_SCALBL.waitAll(18,req2); + MPI_Waitall(18,req1,stat1); + MPI_Waitall(18,req2,stat2); ScaLBL_DeviceBarrier(); //................................................................................... @@ -1151,36 +1153,36 @@ void ScaLBL_Communicator::BiSendD3Q7AA(double *Aq, double *Bq){ ScaLBL_D3Q19_Pack(2,dvcSendList_x,0,sendCount_x,sendbuf_x,Aq,N); ScaLBL_D3Q19_Pack(2,dvcSendList_x,sendCount_x,sendCount_x,sendbuf_x,Bq,N); - req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 2*sendCount_x,rank_x,sendtag); - req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 2*recvCount_X,rank_X,recvtag); + MPI_Isend(sendbuf_x, 2*sendCount_x,MPI_DOUBLE,rank_x,sendtag,MPI_COMM_SCALBL,&req1[0]); + MPI_Irecv(recvbuf_X, 2*recvCount_X,MPI_DOUBLE,rank_X,recvtag,MPI_COMM_SCALBL,&req2[0]); //...Packing for X face(1,7,9,11,13)................................ ScaLBL_D3Q19_Pack(1,dvcSendList_X,0,sendCount_X,sendbuf_X,Aq,N); ScaLBL_D3Q19_Pack(1,dvcSendList_X,sendCount_X,sendCount_X,sendbuf_X,Bq,N); - req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 2*sendCount_X,rank_X,sendtag); - req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 2*recvCount_x,rank_x,recvtag); + MPI_Isend(sendbuf_X, 2*sendCount_X,MPI_DOUBLE,rank_X,sendtag,MPI_COMM_SCALBL,&req1[1]); + MPI_Irecv(recvbuf_x, 2*recvCount_x,MPI_DOUBLE,rank_x,recvtag,MPI_COMM_SCALBL,&req2[1]); //...Packing for y face(4,8,9,16,18)................................. ScaLBL_D3Q19_Pack(4,dvcSendList_y,0,sendCount_y,sendbuf_y,Aq,N); ScaLBL_D3Q19_Pack(4,dvcSendList_y,sendCount_y,sendCount_y,sendbuf_y,Bq,N); - req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 2*sendCount_y,rank_y,sendtag); - req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 2*recvCount_Y,rank_Y,recvtag); + MPI_Isend(sendbuf_y, 2*sendCount_y,MPI_DOUBLE,rank_y,sendtag,MPI_COMM_SCALBL,&req1[2]); + MPI_Irecv(recvbuf_Y, 2*recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,MPI_COMM_SCALBL,&req2[2]); //...Packing for Y face(3,7,10,15,17)................................. ScaLBL_D3Q19_Pack(3,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,Aq,N); ScaLBL_D3Q19_Pack(3,dvcSendList_Y,sendCount_Y,sendCount_Y,sendbuf_Y,Bq,N); - req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 2*sendCount_Y,rank_Y,sendtag); - req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 2*recvCount_y,rank_y,recvtag); + MPI_Isend(sendbuf_Y, 2*sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,MPI_COMM_SCALBL,&req1[3]); + MPI_Irecv(recvbuf_y, 2*recvCount_y,MPI_DOUBLE,rank_y,recvtag,MPI_COMM_SCALBL,&req2[3]); //...Packing for z face(6,12,13,16,17)................................ ScaLBL_D3Q19_Pack(6,dvcSendList_z,0,sendCount_z,sendbuf_z,Aq,N); ScaLBL_D3Q19_Pack(6,dvcSendList_z,sendCount_z,sendCount_z,sendbuf_z,Bq,N); - req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 2*sendCount_z,rank_z,sendtag); - req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 2*recvCount_Z,rank_Z,recvtag); + MPI_Isend(sendbuf_z, 2*sendCount_z,MPI_DOUBLE,rank_z,sendtag,MPI_COMM_SCALBL,&req1[4]); + MPI_Irecv(recvbuf_Z, 2*recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,MPI_COMM_SCALBL,&req2[4]); //...Packing for Z face(5,11,14,15,18)................................ ScaLBL_D3Q19_Pack(5,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,Aq,N); @@ -1188,8 +1190,8 @@ void ScaLBL_Communicator::BiSendD3Q7AA(double *Aq, double *Bq){ //................................................................................... // Send all the distributions - req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 2*sendCount_Z,rank_Z,sendtag); - req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 2*recvCount_z,rank_z,recvtag); + MPI_Isend(sendbuf_Z, 2*sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,MPI_COMM_SCALBL,&req1[5]); + MPI_Irecv(recvbuf_z, 2*recvCount_z,MPI_DOUBLE,rank_z,recvtag,MPI_COMM_SCALBL,&req2[5]); } @@ -1199,8 +1201,8 @@ void ScaLBL_Communicator::BiRecvD3Q7AA(double *Aq, double *Bq){ // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 //................................................................................... // Wait for completion of D3Q19 communication - MPI_COMM_SCALBL.waitAll(6,req1); - MPI_COMM_SCALBL.waitAll(6,req2); + MPI_Waitall(6,req1,stat1); + MPI_Waitall(6,req2,stat2); ScaLBL_DeviceBarrier(); //................................................................................... @@ -1291,18 +1293,18 @@ void ScaLBL_Communicator::TriSendD3Q7AA(double *Aq, double *Bq, double *Cq){ //................................................................................... // Send all the distributions - req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 3*sendCount_x,rank_x,sendtag); - req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 3*recvCount_X,rank_X,recvtag); - req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 3*sendCount_X,rank_X,sendtag); - req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 3*recvCount_x,rank_x,recvtag); - req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 3*sendCount_y,rank_y,sendtag); - req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 3*recvCount_Y,rank_Y,recvtag); - req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 3*sendCount_Y,rank_Y,sendtag); - req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 3*recvCount_y,rank_y,recvtag); - req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 3*sendCount_z,rank_z,sendtag); - req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 3*recvCount_Z,rank_Z,recvtag); - req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 3*sendCount_Z,rank_Z,sendtag); - req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 3*recvCount_z,rank_z,recvtag); + MPI_Isend(sendbuf_x, 3*sendCount_x,MPI_DOUBLE,rank_x,sendtag,MPI_COMM_SCALBL,&req1[0]); + MPI_Irecv(recvbuf_X, 3*recvCount_X,MPI_DOUBLE,rank_X,recvtag,MPI_COMM_SCALBL,&req2[0]); + MPI_Isend(sendbuf_X, 3*sendCount_X,MPI_DOUBLE,rank_X,sendtag,MPI_COMM_SCALBL,&req1[1]); + MPI_Irecv(recvbuf_x, 3*recvCount_x,MPI_DOUBLE,rank_x,recvtag,MPI_COMM_SCALBL,&req2[1]); + MPI_Isend(sendbuf_y, 3*sendCount_y,MPI_DOUBLE,rank_y,sendtag,MPI_COMM_SCALBL,&req1[2]); + MPI_Irecv(recvbuf_Y, 3*recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,MPI_COMM_SCALBL,&req2[2]); + MPI_Isend(sendbuf_Y, 3*sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,MPI_COMM_SCALBL,&req1[3]); + MPI_Irecv(recvbuf_y, 3*recvCount_y,MPI_DOUBLE,rank_y,recvtag,MPI_COMM_SCALBL,&req2[3]); + MPI_Isend(sendbuf_z, 3*sendCount_z,MPI_DOUBLE,rank_z,sendtag,MPI_COMM_SCALBL,&req1[4]); + MPI_Irecv(recvbuf_Z, 3*recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,MPI_COMM_SCALBL,&req2[4]); + MPI_Isend(sendbuf_Z, 3*sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,MPI_COMM_SCALBL,&req1[5]); + MPI_Irecv(recvbuf_z, 3*recvCount_z,MPI_DOUBLE,rank_z,recvtag,MPI_COMM_SCALBL,&req2[5]); } @@ -1312,8 +1314,8 @@ void ScaLBL_Communicator::TriRecvD3Q7AA(double *Aq, double *Bq, double *Cq){ // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 //................................................................................... // Wait for completion of D3Q19 communication - MPI_COMM_SCALBL.waitAll(6,req1); - MPI_COMM_SCALBL.waitAll(6,req2); + MPI_Waitall(6,req1,stat1); + MPI_Waitall(6,req2,stat2); ScaLBL_DeviceBarrier(); //................................................................................... @@ -1407,49 +1409,49 @@ void ScaLBL_Communicator::SendHalo(double *data){ // Send / Recv all the phase indcator field values //................................................................................... - req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, sendCount_x,rank_x,sendtag); - req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, recvCount_X,rank_X,recvtag); - req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, sendCount_X,rank_X,sendtag); - req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, recvCount_x,rank_x,recvtag); - req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, sendCount_y,rank_y,sendtag); - req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, recvCount_Y,rank_Y,recvtag); - req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, sendCount_Y,rank_Y,sendtag); - req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, recvCount_y,rank_y,recvtag); - req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, sendCount_z,rank_z,sendtag); - req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, recvCount_Z,rank_Z,recvtag); - req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, sendCount_Z,rank_Z,sendtag); - req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, recvCount_z,rank_z,recvtag); - req1[6] = MPI_COMM_SCALBL.Isend(sendbuf_xy, sendCount_xy,rank_xy,sendtag); - req2[6] = MPI_COMM_SCALBL.Irecv(recvbuf_XY, recvCount_XY,rank_XY,recvtag); - req1[7] = MPI_COMM_SCALBL.Isend(sendbuf_XY, sendCount_XY,rank_XY,sendtag); - req2[7] = MPI_COMM_SCALBL.Irecv(recvbuf_xy, recvCount_xy,rank_xy,recvtag); - req1[8] = MPI_COMM_SCALBL.Isend(sendbuf_Xy, sendCount_Xy,rank_Xy,sendtag); - req2[8] = MPI_COMM_SCALBL.Irecv(recvbuf_xY, recvCount_xY,rank_xY,recvtag); - req1[9] = MPI_COMM_SCALBL.Isend(sendbuf_xY, sendCount_xY,rank_xY,sendtag); - req2[9] = MPI_COMM_SCALBL.Irecv(recvbuf_Xy, recvCount_Xy,rank_Xy,recvtag); - req1[10] = MPI_COMM_SCALBL.Isend(sendbuf_xz, sendCount_xz,rank_xz,sendtag); - req2[10] = MPI_COMM_SCALBL.Irecv(recvbuf_XZ, recvCount_XZ,rank_XZ,recvtag); - req1[11] = MPI_COMM_SCALBL.Isend(sendbuf_XZ, sendCount_XZ,rank_XZ,sendtag); - req2[11] = MPI_COMM_SCALBL.Irecv(recvbuf_xz, recvCount_xz,rank_xz,recvtag); - req1[12] = MPI_COMM_SCALBL.Isend(sendbuf_Xz, sendCount_Xz,rank_Xz,sendtag); - req2[12] = MPI_COMM_SCALBL.Irecv(recvbuf_xZ, recvCount_xZ,rank_xZ,recvtag); - req1[13] = MPI_COMM_SCALBL.Isend(sendbuf_xZ, sendCount_xZ,rank_xZ,sendtag); - req2[13] = MPI_COMM_SCALBL.Irecv(recvbuf_Xz, recvCount_Xz,rank_Xz,recvtag); - req1[14] = MPI_COMM_SCALBL.Isend(sendbuf_yz, sendCount_yz,rank_yz,sendtag); - req2[14] = MPI_COMM_SCALBL.Irecv(recvbuf_YZ, recvCount_YZ,rank_YZ,recvtag); - req1[15] = MPI_COMM_SCALBL.Isend(sendbuf_YZ, sendCount_YZ,rank_YZ,sendtag); - req2[15] = MPI_COMM_SCALBL.Irecv(recvbuf_yz, recvCount_yz,rank_yz,recvtag); - req1[16] = MPI_COMM_SCALBL.Isend(sendbuf_Yz, sendCount_Yz,rank_Yz,sendtag); - req2[16] = MPI_COMM_SCALBL.Irecv(recvbuf_yZ, recvCount_yZ,rank_yZ,recvtag); - req1[17] = MPI_COMM_SCALBL.Isend(sendbuf_yZ, sendCount_yZ,rank_yZ,sendtag); - req2[17] = MPI_COMM_SCALBL.Irecv(recvbuf_Yz, recvCount_Yz,rank_Yz,recvtag); + MPI_Isend(sendbuf_x, sendCount_x,MPI_DOUBLE,rank_x,sendtag,MPI_COMM_SCALBL,&req1[0]); + MPI_Irecv(recvbuf_X, recvCount_X,MPI_DOUBLE,rank_X,recvtag,MPI_COMM_SCALBL,&req2[0]); + MPI_Isend(sendbuf_X, sendCount_X,MPI_DOUBLE,rank_X,sendtag,MPI_COMM_SCALBL,&req1[1]); + MPI_Irecv(recvbuf_x, recvCount_x,MPI_DOUBLE,rank_x,recvtag,MPI_COMM_SCALBL,&req2[1]); + MPI_Isend(sendbuf_y, sendCount_y,MPI_DOUBLE,rank_y,sendtag,MPI_COMM_SCALBL,&req1[2]); + MPI_Irecv(recvbuf_Y, recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,MPI_COMM_SCALBL,&req2[2]); + MPI_Isend(sendbuf_Y, sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,MPI_COMM_SCALBL,&req1[3]); + MPI_Irecv(recvbuf_y, recvCount_y,MPI_DOUBLE,rank_y,recvtag,MPI_COMM_SCALBL,&req2[3]); + MPI_Isend(sendbuf_z, sendCount_z,MPI_DOUBLE,rank_z,sendtag,MPI_COMM_SCALBL,&req1[4]); + MPI_Irecv(recvbuf_Z, recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,MPI_COMM_SCALBL,&req2[4]); + MPI_Isend(sendbuf_Z, sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,MPI_COMM_SCALBL,&req1[5]); + MPI_Irecv(recvbuf_z, recvCount_z,MPI_DOUBLE,rank_z,recvtag,MPI_COMM_SCALBL,&req2[5]); + MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_xy,sendtag,MPI_COMM_SCALBL,&req1[6]); + MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_XY,recvtag,MPI_COMM_SCALBL,&req2[6]); + MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_XY,sendtag,MPI_COMM_SCALBL,&req1[7]); + MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_xy,recvtag,MPI_COMM_SCALBL,&req2[7]); + MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_Xy,sendtag,MPI_COMM_SCALBL,&req1[8]); + MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_xY,recvtag,MPI_COMM_SCALBL,&req2[8]); + MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_xY,sendtag,MPI_COMM_SCALBL,&req1[9]); + MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_Xy,recvtag,MPI_COMM_SCALBL,&req2[9]); + MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_xz,sendtag,MPI_COMM_SCALBL,&req1[10]); + MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_XZ,recvtag,MPI_COMM_SCALBL,&req2[10]); + MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_XZ,sendtag,MPI_COMM_SCALBL,&req1[11]); + MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_xz,recvtag,MPI_COMM_SCALBL,&req2[11]); + MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_Xz,sendtag,MPI_COMM_SCALBL,&req1[12]); + MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_xZ,recvtag,MPI_COMM_SCALBL,&req2[12]); + MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_xZ,sendtag,MPI_COMM_SCALBL,&req1[13]); + MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_Xz,recvtag,MPI_COMM_SCALBL,&req2[13]); + MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_yz,sendtag,MPI_COMM_SCALBL,&req1[14]); + MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_YZ,recvtag,MPI_COMM_SCALBL,&req2[14]); + MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_YZ,sendtag,MPI_COMM_SCALBL,&req1[15]); + MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_yz,recvtag,MPI_COMM_SCALBL,&req2[15]); + MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_Yz,sendtag,MPI_COMM_SCALBL,&req1[16]); + MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_yZ,recvtag,MPI_COMM_SCALBL,&req2[16]); + MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_yZ,sendtag,MPI_COMM_SCALBL,&req1[17]); + MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_Yz,recvtag,MPI_COMM_SCALBL,&req2[17]); //................................................................................... } void ScaLBL_Communicator::RecvHalo(double *data){ //................................................................................... - MPI_COMM_SCALBL.waitAll(18,req1); - MPI_COMM_SCALBL.waitAll(18,req2); + MPI_Waitall(18,req1,stat1); + MPI_Waitall(18,req2,stat2); ScaLBL_DeviceBarrier(); //................................................................................... //................................................................................... @@ -1562,7 +1564,7 @@ double ScaLBL_Communicator::D3Q19_Flux_BC_z(int *neighborList, double *fq, doubl LocInletArea = double(sendCount_z); else LocInletArea = 0.f; - InletArea = MPI_COMM_SCALBL.sumReduce( LocInletArea ); + MPI_Allreduce(&LocInletArea,&InletArea,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_SCALBL); //printf("Inlet area = %f \n", InletArea); // Set the flux BC @@ -1571,7 +1573,7 @@ double ScaLBL_Communicator::D3Q19_Flux_BC_z(int *neighborList, double *fq, doubl if (kproc == 0) locsum = ScaLBL_D3Q19_AAeven_Flux_BC_z(dvcSendList_z, fq, flux, InletArea, sendCount_z, N); - sum = MPI_COMM_SCALBL.sumReduce( locsum ); + MPI_Allreduce(&locsum,&sum,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_SCALBL); din = flux/InletArea + sum; //if (rank==0) printf("computed din (even) =%f \n",din); if (kproc == 0) @@ -1581,7 +1583,7 @@ double ScaLBL_Communicator::D3Q19_Flux_BC_z(int *neighborList, double *fq, doubl if (kproc == 0) locsum = ScaLBL_D3Q19_AAodd_Flux_BC_z(neighborList, dvcSendList_z, fq, flux, InletArea, sendCount_z, N); - sum = MPI_COMM_SCALBL.sumReduce( locsum ); + MPI_Allreduce(&locsum,&sum,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_SCALBL); din = flux/InletArea + sum; //if (rank==0) printf("computed din (odd)=%f \n",din); diff --git a/common/ScaLBL.h b/common/ScaLBL.h index 51195f5a..c737659c 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -202,8 +202,9 @@ private: // Give the object it's own MPI communicator RankInfoStruct rank_info; MPI_Group Group; // Group of processors associated with this domain - Utilities::MPI MPI_COMM_SCALBL; // MPI Communicator for this domain + MPI_Comm MPI_COMM_SCALBL; // MPI Communicator for this domain MPI_Request req1[18],req2[18]; + MPI_Status stat1[18],stat2[18]; //...................................................................................... // MPI ranks for all 18 neighbors //...................................................................................... diff --git a/common/SpherePack.cpp b/common/SpherePack.cpp index 18057653..a7246b72 100644 --- a/common/SpherePack.cpp +++ b/common/SpherePack.cpp @@ -9,6 +9,7 @@ #include "common/Array.h" #include "common/Utilities.h" +#include "common/MPI_Helpers.h" #include "common/Communication.h" #include "common/Database.h" #include "common/SpherePack.h" diff --git a/common/SpherePack.h b/common/SpherePack.h index 56284a40..5075b289 100644 --- a/common/SpherePack.h +++ b/common/SpherePack.h @@ -12,6 +12,7 @@ #include "common/Array.h" #include "common/Utilities.h" +#include "common/MPI_Helpers.h" #include "common/Communication.h" #include "common/Database.h" diff --git a/common/UnitTest.cpp b/common/UnitTest.cpp index aeb9026e..b995fa68 100755 --- a/common/UnitTest.cpp +++ b/common/UnitTest.cpp @@ -14,49 +14,44 @@ /******************************************************************** * Constructor/Destructor * ********************************************************************/ -UnitTest::UnitTest() : d_verbose( false ), d_comm( MPI_COMM_SELF ) +UnitTest::UnitTest() { - if ( Utilities::MPI::MPI_active() ) - d_comm = MPI_COMM_WORLD; +#ifdef USE_MPI + comm = MPI_COMM_WORLD; +#endif } UnitTest::~UnitTest() { reset(); } void UnitTest::reset() { - d_mutex.lock(); + mutex.lock(); // Clear the data forcing a reallocation - std::vector().swap( d_pass ); - std::vector().swap( d_fail ); - std::vector().swap( d_expected ); - d_mutex.unlock(); + std::vector().swap( pass_messages ); + std::vector().swap( fail_messages ); + std::vector().swap( expected_fail_messages ); + mutex.unlock(); } /******************************************************************** * Add a pass, fail, expected failure message in a thread-safe way * ********************************************************************/ -void UnitTest::passes( std::string in ) +void UnitTest::passes( const std::string &in ) { - d_mutex.lock(); - if ( d_verbose ) - printf( "UnitTest: %i passes: %s\n", d_comm.getRank(), in.data() ); - d_pass.emplace_back( std::move( in ) ); - d_mutex.unlock(); + mutex.lock(); + pass_messages.push_back( in ); + mutex.unlock(); } -void UnitTest::failure( std::string in ) +void UnitTest::failure( const std::string &in ) { - d_mutex.lock(); - if ( d_verbose ) - printf( "UnitTest: %i failed: %s\n", d_comm.getRank(), in.data() ); - d_fail.emplace_back( std::move( in ) ); - d_mutex.unlock(); + mutex.lock(); + fail_messages.push_back( in ); + mutex.unlock(); } -void UnitTest::expected_failure( std::string in ) +void UnitTest::expected_failure( const std::string &in ) { - d_mutex.lock(); - if ( d_verbose ) - printf( "UnitTest: %i expected_failure: %s\n", d_comm.getRank(), in.data() ); - d_expected.emplace_back( std::move( in ) ); - d_mutex.unlock(); + mutex.lock(); + expected_fail_messages.push_back( in ); + mutex.unlock(); } @@ -64,6 +59,23 @@ void UnitTest::expected_failure( std::string in ) * Print a global report * * Note: only rank 0 will print, all messages will be aggregated * ********************************************************************/ +inline std::vector UnitTest::allGather( int value ) const +{ + int size = getSize(); + std::vector data( size, value ); +#ifdef USE_MPI + if ( size > 1 ) + MPI_Allgather( &value, 1, MPI_INT, data.data(), 1, MPI_INT, comm ); +#endif + return data; +} +inline void UnitTest::barrier() const +{ +#ifdef USE_MPI + if ( getSize() > 1 ) + MPI_Barrier( comm ); +#endif +} static inline void print_messages( const std::vector> &messages ) { if ( messages.size() > 1 ) { @@ -81,27 +93,28 @@ static inline void print_messages( const std::vector> & } void UnitTest::report( const int level0 ) const { - d_mutex.lock(); - int size = d_comm.getSize(); - int rank = d_comm.getRank(); - // Give all processors a chance to print any remaining messages - d_comm.barrier(); - Utilities::sleep_ms( 10 ); + mutex.lock(); + int size = getSize(); + int rank = getRank(); // Broadcast the print level from rank 0 - int level = d_comm.bcast( level0, 0 ); + int level = level0; +#ifdef USE_MPI + if ( getSize() > 1 ) + MPI_Bcast( &level, 1, MPI_INT, 0, comm ); +#endif if ( level < 0 || level > 2 ) ERROR( "Invalid print level" ); // Perform a global all gather to get the number of failures per processor - auto N_pass = d_comm.allGather( d_pass.size() ); - auto N_fail = d_comm.allGather( d_fail.size() ); - auto N_expected = d_comm.allGather( d_expected.size() ); - int N_pass_tot = 0; - int N_fail_tot = 0; - int N_expected_tot = 0; + auto N_pass = allGather( pass_messages.size() ); + auto N_fail = allGather( fail_messages.size() ); + auto N_expected_fail = allGather( expected_fail_messages.size() ); + int N_pass_tot = 0; + int N_fail_tot = 0; + int N_expected_fail_tot = 0; for ( int i = 0; i < size; i++ ) { N_pass_tot += N_pass[i]; N_fail_tot += N_fail[i]; - N_expected_tot += N_expected[i]; + N_expected_fail_tot += N_expected_fail[i]; } // Send all messages to rank 0 (if needed) std::vector> pass_messages_rank( size ); @@ -109,13 +122,13 @@ void UnitTest::report( const int level0 ) const std::vector> expected_fail_rank( size ); // Get the pass messages if ( ( level == 1 && N_pass_tot <= 20 ) || level == 2 ) - pass_messages_rank = UnitTest::gatherMessages( d_pass, 1 ); + pass_messages_rank = UnitTest::gatherMessages( pass_messages, 1 ); // Get the fail messages if ( level == 1 || level == 2 ) - fail_messages_rank = UnitTest::gatherMessages( d_fail, 2 ); + fail_messages_rank = UnitTest::gatherMessages( fail_messages, 2 ); // Get the expected_fail messages - if ( ( level == 1 && N_expected_tot <= 50 ) || level == 2 ) - expected_fail_rank = UnitTest::gatherMessages( d_expected, 2 ); + if ( ( level == 1 && N_expected_fail_tot <= 50 ) || level == 2 ) + expected_fail_rank = UnitTest::gatherMessages( expected_fail_messages, 2 ); // Print the results of all messages (only rank 0 will print) if ( rank == 0 ) { pout << std::endl; @@ -161,31 +174,31 @@ void UnitTest::report( const int level0 ) const pout << std::endl; // Print the tests that expected failed pout << "Tests expected failed" << std::endl; - if ( level == 0 || ( level == 1 && N_expected_tot > 50 ) ) { + if ( level == 0 || ( level == 1 && N_expected_fail_tot > 50 ) ) { // We want to print a summary if ( size > 8 ) { // Print 1 summary for all processors printp( " %i tests expected failed (use report level 2 for more detail)\n", - N_expected_tot ); + N_expected_fail_tot ); } else { // Print a summary for each processor for ( int i = 0; i < size; i++ ) printp( " %i tests expected failed (proc %i) (use report level 2 for more " "detail)\n", - N_expected[i], i ); + N_expected_fail[i], i ); } } else { // We want to print all messages for ( int i = 0; i < size; i++ ) - ASSERT( (int) expected_fail_rank[i].size() == N_expected[i] ); + ASSERT( (int) expected_fail_rank[i].size() == N_expected_fail[i] ); print_messages( expected_fail_rank ); } pout << std::endl; } // Add a barrier to synchronize all processors (rank 0 is much slower) - d_comm.barrier(); + barrier(); Utilities::sleep_ms( 10 ); // Need a brief pause to allow any printing to finish - d_mutex.unlock(); + mutex.unlock(); } @@ -195,8 +208,8 @@ void UnitTest::report( const int level0 ) const std::vector> UnitTest::gatherMessages( const std::vector &local_messages, int tag ) const { - const int rank = d_comm.getRank(); - const int size = d_comm.getSize(); + const int rank = getRank(); + const int size = getSize(); std::vector> messages( size ); if ( rank == 0 ) { // Rank 0 should receive all messages @@ -220,6 +233,7 @@ std::vector> UnitTest::gatherMessages( void UnitTest::pack_message_stream( const std::vector &messages, const int rank, const int tag ) const { +#ifdef USE_MPI // Get the size of the messages auto N_messages = (int) messages.size(); auto *msg_size = new int[N_messages]; @@ -240,11 +254,18 @@ void UnitTest::pack_message_stream( k += msg_size[i]; } // Send the message stream (using a non-blocking send) - auto request = d_comm.Isend( data, size_data, rank, tag ); + MPI_Request request; + MPI_Isend( data, size_data, MPI_CHAR, rank, tag, comm, &request ); // Wait for the communication to send and free the temporary memory - d_comm.wait( request ); + MPI_Status status; + MPI_Wait( &request, &status ); delete[] data; delete[] msg_size; +#else + NULL_USE( messages ); + NULL_USE( rank ); + NULL_USE( tag ); +#endif } @@ -253,15 +274,20 @@ void UnitTest::pack_message_stream( ********************************************************************/ std::vector UnitTest::unpack_message_stream( const int rank, const int tag ) const { +#ifdef USE_MPI // Probe the message to get the message size - int size_data = d_comm.probe( rank, tag ); + MPI_Status status; + MPI_Probe( rank, tag, comm, &status ); + int size_data = -1; + MPI_Get_count( &status, MPI_BYTE, &size_data ); ASSERT( size_data >= 0 ); // Allocate memory to receive the data auto *data = new char[size_data]; // receive the data (using a non-blocking receive) - auto request = d_comm.Irecv( data, size_data, rank, tag ); + MPI_Request request; + MPI_Irecv( data, size_data, MPI_CHAR, rank, tag, comm, &request ); // Wait for the communication to be received - d_comm.wait( request ); + MPI_Wait( &request, &status ); // Unpack the message stream int N_messages = 0; memcpy( &N_messages, data, sizeof( int ) ); @@ -277,16 +303,77 @@ std::vector UnitTest::unpack_message_stream( const int rank, const messages[i] = std::string( &data[k], msg_size[i] ); k += msg_size[i]; } - // Delete the temporary memory delete[] data; return messages; +#else + NULL_USE( rank ); + NULL_USE( tag ); + return std::vector(); +#endif } /******************************************************************** * Other functions * ********************************************************************/ -size_t UnitTest::NumPassGlobal() const { return d_comm.sumReduce( d_pass.size() ); } -size_t UnitTest::NumFailGlobal() const { return d_comm.sumReduce( d_fail.size() ); } -size_t UnitTest::NumExpectedFailGlobal() const { return d_comm.sumReduce( d_expected.size() ); } - +int UnitTest::getRank() const +{ + int rank = 0; +#ifdef USE_MPI + int flag = 0; + MPI_Initialized( &flag ); + if ( flag ) + MPI_Comm_rank( comm, &rank ); +#endif + return rank; +} +int UnitTest::getSize() const +{ + int size = 1; +#ifdef USE_MPI + int flag = 0; + MPI_Initialized( &flag ); + if ( flag ) + MPI_Comm_size( comm, &size ); +#endif + return size; +} +size_t UnitTest::NumPassGlobal() const +{ + size_t num = pass_messages.size(); +#ifdef USE_MPI + if ( getSize() > 1 ) { + auto send = static_cast( num ); + int sum = 0; + MPI_Allreduce( &send, &sum, 1, MPI_INT, MPI_SUM, comm ); + num = static_cast( sum ); + } +#endif + return num; +} +size_t UnitTest::NumFailGlobal() const +{ + size_t num = fail_messages.size(); +#ifdef USE_MPI + if ( getSize() > 1 ) { + auto send = static_cast( num ); + int sum = 0; + MPI_Allreduce( &send, &sum, 1, MPI_INT, MPI_SUM, comm ); + num = static_cast( sum ); + } +#endif + return num; +} +size_t UnitTest::NumExpectedFailGlobal() const +{ + size_t num = expected_fail_messages.size(); +#ifdef USE_MPI + if ( getSize() > 1 ) { + auto send = static_cast( num ); + int sum = 0; + MPI_Allreduce( &send, &sum, 1, MPI_INT, MPI_SUM, comm ); + num = static_cast( sum ); + } +#endif + return num; +} diff --git a/common/UnitTest.h b/common/UnitTest.h index 9d452747..80503d19 100755 --- a/common/UnitTest.h +++ b/common/UnitTest.h @@ -1,11 +1,13 @@ #ifndef included_UnitTest #define included_UnitTest -#include "common/MPI.h" - #include +#include #include #include +#ifdef USE_MPI +#include "mpi.h" +#endif /*! @@ -26,47 +28,47 @@ * \endcode */ -class UnitTest final +class UnitTest { public: //! Constructor UnitTest(); //! Destructor - ~UnitTest(); - - // Copy constructor - UnitTest( const UnitTest & ) = delete; - - // Assignment operator - UnitTest &operator=( const UnitTest & ) = delete; + virtual ~UnitTest(); //! Indicate a passed test (thread-safe) - void passes( std::string in ); + virtual void passes( const std::string &in ); //! Indicate a failed test (thread-safe) - void failure( std::string in ); + virtual void failure( const std::string &in ); //! Indicate an expected failed test (thread-safe) - void expected_failure( std::string in ); + virtual void expected_failure( const std::string &in ); //! Return the number of passed tests locally - inline size_t NumPassLocal() const { return d_pass.size(); } + virtual size_t NumPassLocal() const { return pass_messages.size(); } //! Return the number of failed tests locally - inline size_t NumFailLocal() const { return d_fail.size(); } + virtual size_t NumFailLocal() const { return fail_messages.size(); } //! Return the number of expected failed tests locally - inline size_t NumExpectedFailLocal() const { return d_expected.size(); } + virtual size_t NumExpectedFailLocal() const { return expected_fail_messages.size(); } //! Return the number of passed tests locally - size_t NumPassGlobal() const; + virtual size_t NumPassGlobal() const; //! Return the number of failed tests locally - size_t NumFailGlobal() const; + virtual size_t NumFailGlobal() const; //! Return the number of expected failed tests locally - size_t NumExpectedFailGlobal() const; + virtual size_t NumExpectedFailGlobal() const; + + //! Return the rank of the current processor + int getRank() const; + + //! Return the number of processors + int getSize() const; /*! * Print a report of the passed and failed tests. @@ -75,28 +77,29 @@ public: * to print correctly). * @param level Optional integer specifying the level of reporting (default: 1) * 0: Report the number of tests passed, failed, and expected failures. - * 1: Report the passed tests (if <=20) or number passed, - * Report all failures, - * Report the expected failed tests (if <=50) or the number passed. + * 1: Report the number of passed tests (if <=20) or the number passed + * otherwise, report all failures, report the number of expected + * failed tests (if <=50) or the number passed otherwise. * 2: Report all passed, failed, and expected failed tests. */ - void report( const int level = 1 ) const; + virtual void report( const int level = 1 ) const; //! Clear the messages void reset(); - //! Make the unit test operator verbose? - void verbose( bool verbose = true ) { d_verbose = verbose; } +protected: + std::vector pass_messages; + std::vector fail_messages; + std::vector expected_fail_messages; + mutable std::mutex mutex; +#ifdef USE_MPI + MPI_Comm comm; +#endif private: - std::vector d_pass; - std::vector d_fail; - std::vector d_expected; - bool d_verbose; - mutable std::mutex d_mutex; - Utilities::MPI d_comm; + // Make the copy constructor private + UnitTest( const UnitTest & ) {} -private: // Function to pack the messages into a single data stream and send to the given processor // Note: This function does not return until the message stream has been sent void pack_message_stream( @@ -106,7 +109,9 @@ private: // Note: This function does not return until the message stream has been received std::vector unpack_message_stream( const int rank, const int tag ) const; - // Gather the messages + // Helper functions + inline void barrier() const; + inline std::vector allGather( int value ) const; inline std::vector> gatherMessages( const std::vector &local_messages, int tag ) const; }; diff --git a/common/UtilityMacros.h b/common/UtilityMacros.h index 2c374ef1..bfac172f 100644 --- a/common/UtilityMacros.h +++ b/common/UtilityMacros.h @@ -143,43 +143,35 @@ * Be sure to follow with ENABLE_WARNINGS */ // clang-format off -#ifndef DISABLE_WARNINGS -#if defined( USING_MSVC ) +#ifdef DISABLE_WARNINGS + // Macros previously defined +#elif defined( USING_MSVC ) #define DISABLE_WARNINGS __pragma( warning( push, 0 ) ) #define ENABLE_WARNINGS __pragma( warning( pop ) ) #elif defined( USING_CLANG ) #define DISABLE_WARNINGS \ - _Pragma( "clang diagnostic push" ) \ - _Pragma( "clang diagnostic ignored \"-Wall\"" ) \ + _Pragma( "clang diagnostic push" ) _Pragma( "clang diagnostic ignored \"-Wall\"" ) \ _Pragma( "clang diagnostic ignored \"-Wextra\"" ) \ _Pragma( "clang diagnostic ignored \"-Wunused-private-field\"" ) \ - _Pragma( "clang diagnostic ignored \"-Wdeprecated-declarations\"" ) \ - _Pragma( "clang diagnostic ignored \"-Winteger-overflow\"" ) + _Pragma( "clang diagnostic ignored \"-Wmismatched-new-delete\"" ) #define ENABLE_WARNINGS _Pragma( "clang diagnostic pop" ) #elif defined( USING_GCC ) + // Note: We cannot disable the -Wliteral-suffix message with this macro because the + // pragma command cannot suppress warnings from the C++ preprocessor. See gcc bug #53431. #define DISABLE_WARNINGS \ - _Pragma( "GCC diagnostic push" ) \ - _Pragma( "GCC diagnostic ignored \"-Wpragmas\"" ) \ - _Pragma( "GCC diagnostic ignored \"-Wall\"" ) \ + _Pragma( "GCC diagnostic push" ) _Pragma( "GCC diagnostic ignored \"-Wall\"" ) \ _Pragma( "GCC diagnostic ignored \"-Wextra\"" ) \ - _Pragma( "GCC diagnostic ignored \"-Wpedantic\"" ) \ + _Pragma( "GCC diagnostic ignored \"-Wpragmas\"" ) \ _Pragma( "GCC diagnostic ignored \"-Wunused-local-typedefs\"" ) \ _Pragma( "GCC diagnostic ignored \"-Woverloaded-virtual\"" ) \ _Pragma( "GCC diagnostic ignored \"-Wunused-parameter\"" ) \ - _Pragma( "GCC diagnostic ignored \"-Wdeprecated-declarations\"" ) \ - _Pragma( "GCC diagnostic ignored \"-Wvirtual-move-assign\"" ) \ - _Pragma( "GCC diagnostic ignored \"-Wunused-function\"" ) \ - _Pragma( "GCC diagnostic ignored \"-Woverflow\"" ) \ - _Pragma( "GCC diagnostic ignored \"-Wunused-variable\"" ) \ - _Pragma( "GCC diagnostic ignored \"-Wignored-qualifiers\"" ) \ - _Pragma( "GCC diagnostic ignored \"-Wenum-compare\"" ) \ + _Pragma( "GCC diagnostic ignored \"-Warray-bounds\"" ) \ _Pragma( "GCC diagnostic ignored \"-Wterminate\"" ) #define ENABLE_WARNINGS _Pragma( "GCC diagnostic pop" ) #else #define DISABLE_WARNINGS #define ENABLE_WARNINGS #endif -#endif // clang-format on diff --git a/cpu/BGK.cpp b/cpu/BGK.cpp index bccc5b77..436ab381 100644 --- a/cpu/BGK.cpp +++ b/cpu/BGK.cpp @@ -1,4 +1,5 @@ extern "C" void ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){ + int n; // conserved momemnts double rho,ux,uy,uz,uu; // non-conserved moments @@ -110,12 +111,14 @@ extern "C" void ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int } extern "C" void ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){ + int n; // conserved momemnts double rho,ux,uy,uz,uu; // non-conserved moments double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; int nr1,nr2,nr3,nr4,nr5,nr6,nr7,nr8,nr9,nr10,nr11,nr12,nr13,nr14,nr15,nr16,nr17,nr18; + int nread; for (int n=start; n 0 ){ // Get the density value (Streaming already performed) - double Na = Den[n]; - double Nb = Den[N+n]; + Na = Den[n]; + Nb = Den[N+n]; Phi[n] = (Na-Nb)/(Na+Nb); } } + //................................................................... } extern "C" void ScaLBL_SetSlice_z(double *Phi, double value, int Nx, int Ny, int Nz, int Slice){ - for (int n=Slice*Nx*Ny; n<(Slice+1)*Nx*Ny; n++){ + int n; + for (n=Slice*Nx*Ny; n<(Slice+1)*Nx*Ny; n++){ Phi[n] = value; } } @@ -1246,7 +1255,7 @@ extern "C" void ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *Aq, do double *Vel, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ - int ijk,nn; + int ijk,nn,n; double fq; // conserved momemnts double rho,jx,jy,jz; @@ -1829,7 +1838,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double *di double *Phi, double *Vel, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ - int nn,ijk,nread; + int n,nn,ijk,nread; int nr1,nr2,nr3,nr4,nr5,nr6; int nr7,nr8,nr9,nr10; int nr11,nr12,nr13,nr14; @@ -2483,7 +2492,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double *di extern "C" void ScaLBL_D3Q7_AAodd_PhaseField(int *neighborList, int *Map, double *Aq, double *Bq, double *Den, double *Phi, int start, int finish, int Np){ - int idx, nread; + int idx,n,nread; double fq,nA,nB; for (int n=start; n #include -ScaLBL_ColorModel::ScaLBL_ColorModel(int RANK, int NP, const Utilities::MPI& COMM): +ScaLBL_ColorModel::ScaLBL_ColorModel(int RANK, int NP, MPI_Comm COMM): rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tauA(0),tauB(0),rhoA(0),rhoB(0),alpha(0),beta(0), Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),inletA(0),inletB(0),outletA(0),outletB(0), Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) @@ -167,9 +167,9 @@ void ScaLBL_ColorModel::SetDomain(){ for (int i=0; iid[i] = 1; // initialize this way //Averages = std::shared_ptr ( new TwoPhase(Dm) ); // TwoPhase analysis object Averages = std::shared_ptr ( new SubPhase(Dm) ); // TwoPhase analysis object - comm.barrier(); + MPI_Barrier(comm); Dm->CommInit(); - comm.barrier(); + MPI_Barrier(comm); // Read domain parameters rank = Dm->rank(); nprocx = Dm->nprocx(); @@ -292,7 +292,7 @@ void ScaLBL_ColorModel::AssignComponentLabels(double *phase) for (int i=0; iid[i] = Mask->id[i]; for (size_t idx=0; idxComm.sumReduce( label_count[idx] ); + label_count_global[idx]=sumReduce( Dm->Comm, label_count[idx]); if (rank==0){ printf("Component labels: %lu \n",NLABELS); @@ -333,7 +333,7 @@ void ScaLBL_ColorModel::Create(){ Map.resize(Nx,Ny,Nz); Map.fill(-2); auto neighborList= new int[18*Npad]; Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); - comm.barrier(); + MPI_Barrier(comm); //........................................................................... // MAIN VARIABLES ALLOCATED HERE @@ -465,7 +465,7 @@ void ScaLBL_ColorModel::Initialize(){ ScaLBL_CopyToDevice(Phi,cPhi,N*sizeof(double)); ScaLBL_DeviceBarrier(); - comm.barrier(); + MPI_Barrier(comm); } if (rank==0) printf ("Initializing phase field \n"); @@ -651,7 +651,7 @@ void ScaLBL_ColorModel::Run(){ //.......create and start timer............ double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); - comm.barrier(); + MPI_Barrier(comm); starttime = MPI_Wtime(); //......................................... @@ -700,8 +700,7 @@ void ScaLBL_ColorModel::Run(){ } ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_DeviceBarrier(); - comm.barrier(); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); // *************EVEN TIMESTEP************* timestep++; @@ -736,10 +735,10 @@ void ScaLBL_ColorModel::Run(){ } ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_DeviceBarrier(); - comm.barrier(); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); //************************************************************************ + MPI_Barrier(comm); PROFILE_STOP("Update"); if (rank==0 && timestep%analysis_interval == 0 && BoundaryCondition > 0){ @@ -980,7 +979,7 @@ void ScaLBL_ColorModel::Run(){ //morph_delta *= (-1.0); REVERSE_FLOW_DIRECTION = false; } - comm.barrier(); + MPI_Barrier(comm); } morph_timesteps += analysis_interval; } @@ -990,7 +989,7 @@ void ScaLBL_ColorModel::Run(){ PROFILE_SAVE("lbpm_color_simulator",1); //************************************************************************ ScaLBL_DeviceBarrier(); - comm.barrier(); + MPI_Barrier(comm); stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep @@ -1035,17 +1034,17 @@ double ScaLBL_ColorModel::ImageInit(std::string Filename){ } } - Count = Dm->Comm.sumReduce( Count ); - PoreCount = Dm->Comm.sumReduce( PoreCount ); + Count=sumReduce( Dm->Comm, Count); + PoreCount=sumReduce( Dm->Comm, PoreCount); if (rank==0) printf(" new saturation: %f (%f / %f) \n", Count / PoreCount, Count, PoreCount); ScaLBL_CopyToDevice(Phi, PhaseLabel, Nx*Ny*Nz*sizeof(double)); - comm.barrier(); + MPI_Barrier(comm); ScaLBL_D3Q19_Init(fq, Np); ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - comm.barrier(); + MPI_Barrier(comm); ScaLBL_CopyToHost(Averages->Phi.data(),Phi,Nx*Ny*Nz*sizeof(double)); @@ -1077,7 +1076,7 @@ double ScaLBL_ColorModel::MorphOpenConnected(double target_volume_change){ BlobIDstruct new_index; double vF=0.0; double vS=0.0; ComputeGlobalBlobIDs(nx-2,ny-2,nz-2,Dm->rank_info,phase,Averages->SDs,vF,vS,phase_label,Dm->Comm); - Dm->Comm.barrier(); + MPI_Barrier(Dm->Comm); long long count_connected=0; long long count_porespace=0; @@ -1099,9 +1098,9 @@ double ScaLBL_ColorModel::MorphOpenConnected(double target_volume_change){ } } } - count_connected = Dm->Comm.sumReduce( count_connected); - count_porespace = Dm->Comm.sumReduce( count_porespace); - count_water = Dm->Comm.sumReduce( count_water); + count_connected=sumReduce( Dm->Comm, count_connected); + count_porespace=sumReduce( Dm->Comm, count_porespace); + count_water=sumReduce( Dm->Comm, count_water); for (int k=0; kComm.sumReduce( count_morphopen); + count_morphopen=sumReduce( Dm->Comm, count_morphopen); volume_change = double(count_morphopen - count_connected); if (rank==0) printf(" opening of connected oil %f \n",volume_change/count_connected); @@ -1279,8 +1278,8 @@ double ScaLBL_ColorModel::SeedPhaseField(const double seed_water_in_oil){ mass_loss += random_value*seed_water_in_oil; } - count = Dm->Comm.sumReduce( count ); - mass_loss = Dm->Comm.sumReduce( mass_loss ); + count= sumReduce( Dm->Comm, count); + mass_loss= sumReduce( Dm->Comm, mass_loss); if (rank == 0) printf("Remove mass %f from %f voxels \n",mass_loss,count); // Need to initialize Aq, Bq, Den, Phi directly @@ -1317,7 +1316,7 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta } } } - double volume_initial = Dm->Comm.sumReduce( count); + double volume_initial = sumReduce( Dm->Comm, count); /* sprintf(LocalRankFilename,"phi_initial.%05i.raw",rank); FILE *INPUT = fopen(LocalRankFilename,"wb"); @@ -1327,7 +1326,7 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta // 2. Identify connected components of phase field -> phase_label BlobIDstruct new_index; ComputeGlobalBlobIDs(Nx-2,Ny-2,Nz-2,rank_info,phase,Averages->SDs,vF,vS,phase_label,comm); - comm.barrier(); + MPI_Barrier(comm); // only operate on component "0" count = 0.0; @@ -1349,8 +1348,8 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta } } } - double volume_connected = Dm->Comm.sumReduce( count ); - second_biggest = Dm->Comm.sumReduce( second_biggest ); + double volume_connected = sumReduce( Dm->Comm, count); + second_biggest = sumReduce( Dm->Comm, second_biggest); /*int reach_x, reach_y, reach_z; for (int k=0; kComm.sumReduce( count ); + double volume_final= sumReduce( Dm->Comm, count); delta_volume = (volume_final-volume_initial); if (rank == 0) printf("MorphInit: change fluid volume fraction by %f \n", delta_volume/volume_initial); diff --git a/models/ColorModel.h b/models/ColorModel.h index c52f04c3..a3b3a124 100644 --- a/models/ColorModel.h +++ b/models/ColorModel.h @@ -12,13 +12,13 @@ Implementation of color lattice boltzmann model #include "common/Communication.h" #include "analysis/TwoPhase.h" #include "analysis/runAnalysis.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "ProfilerApp.h" #include "threadpool/thread_pool.h" class ScaLBL_ColorModel{ public: - ScaLBL_ColorModel(int RANK, int NP, const Utilities::MPI& COMM); + ScaLBL_ColorModel(int RANK, int NP, MPI_Comm COMM); ~ScaLBL_ColorModel(); // functions in they should be run @@ -68,7 +68,7 @@ public: double *Pressure; private: - Utilities::MPI comm; + MPI_Comm comm; int dist_mem_size; int neighborSize; diff --git a/models/DFHModel.cpp b/models/DFHModel.cpp index ced5853f..4eb03bea 100644 --- a/models/DFHModel.cpp +++ b/models/DFHModel.cpp @@ -3,7 +3,7 @@ color lattice boltzmann model */ #include "models/DFHModel.h" -ScaLBL_DFHModel::ScaLBL_DFHModel(int RANK, int NP, const Utilities::MPI& COMM): +ScaLBL_DFHModel::ScaLBL_DFHModel(int RANK, int NP, MPI_Comm COMM): rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tauA(0),tauB(0),rhoA(0),rhoB(0),alpha(0),beta(0), Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),inletA(0),inletB(0),outletA(0),outletB(0), Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) @@ -100,16 +100,16 @@ void ScaLBL_DFHModel::ReadParams(string filename){ } void ScaLBL_DFHModel::SetDomain(){ - Dm = std::make_shared(domain_db,comm); // full domain for analysis - Mask = std::make_shared(domain_db,comm); // mask domain removes immobile phases + Dm = std::shared_ptr(new Domain(domain_db,comm)); // full domain for analysis + Mask = std::shared_ptr(new Domain(domain_db,comm)); // mask domain removes immobile phases Nx+=2; Ny+=2; Nz += 2; N = Nx*Ny*Nz; id = new char [N]; - for (int i=0; iid[i] = 1; // initialize this way - Averages = std::make_shared( Dm ); // TwoPhase analysis object - comm.barrier(); + for (int i=0; iid[i] = 1; // initialize this way + Averages = std::shared_ptr ( new TwoPhase(Dm) ); // TwoPhase analysis object + MPI_Barrier(comm); Dm->CommInit(); - comm.barrier(); + MPI_Barrier(comm); rank = Dm->rank(); } @@ -131,7 +131,7 @@ void ScaLBL_DFHModel::ReadInput(){ sprintf(LocalRankString,"%05d",rank); sprintf(LocalRankFilename,"%s%s","SignDist.",LocalRankString); ReadBinaryFile(LocalRankFilename, Averages->SDs.data(), N); - comm.barrier(); + MPI_Barrier(comm); if (rank == 0) cout << "Domain set." << endl; } @@ -206,7 +206,7 @@ void ScaLBL_DFHModel::Create(){ Map.resize(Nx,Ny,Nz); Map.fill(-2); auto neighborList= new int[18*Npad]; Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); - comm.barrier(); + MPI_Barrier(comm); //........................................................................... // MAIN VARIABLES ALLOCATED HERE @@ -424,7 +424,7 @@ void ScaLBL_DFHModel::Initialize(){ } } } - count_wet_global = comm.sumReduce( count_wet ); + MPI_Allreduce(&count_wet,&count_wet_global,1,MPI_DOUBLE,MPI_SUM,comm); if (rank==0) printf("Wetting phase volume fraction =%f \n",count_wet_global/double(Nx*Ny*Nz*nprocs)); // initialize phi based on PhaseLabel (include solid component labels) ScaLBL_CopyToDevice(Phi, PhaseLabel, Np*sizeof(double)); @@ -446,7 +446,7 @@ void ScaLBL_DFHModel::Initialize(){ timestep=0; } } - comm.bcast(×tep,1,0); + MPI_Bcast(×tep,1,MPI_INT,0,comm); // Read in the restart file to CPU buffers double *cPhi = new double[Np]; double *cDist = new double[19*Np]; @@ -468,7 +468,7 @@ void ScaLBL_DFHModel::Initialize(){ ScaLBL_DeviceBarrier(); delete [] cPhi; delete [] cDist; - comm.barrier(); + MPI_Barrier(comm); } if (rank==0) printf ("Initializing phase field \n"); @@ -486,7 +486,7 @@ void ScaLBL_DFHModel::Run(){ //.......create and start timer............ double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); - comm.barrier(); + MPI_Barrier(comm); starttime = MPI_Wtime(); //......................................... //************ MAIN ITERATION LOOP ***************************************/ @@ -532,8 +532,7 @@ void ScaLBL_DFHModel::Run(){ } ScaLBL_D3Q19_AAodd_DFH(NeighborList, fq, Aq, Bq, Den, Phi, Gradient, SolidPotential, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_DeviceBarrier(); - comm.barrier(); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); // *************EVEN TIMESTEP************* timestep++; @@ -569,9 +568,9 @@ void ScaLBL_DFHModel::Run(){ } ScaLBL_D3Q19_AAeven_DFH(NeighborList, fq, Aq, Bq, Den, Phi, Gradient, SolidPotential, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_DeviceBarrier(); - comm.barrier(); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); //************************************************************************ + MPI_Barrier(comm); PROFILE_STOP("Update"); // Run the analysis @@ -582,7 +581,7 @@ void ScaLBL_DFHModel::Run(){ PROFILE_SAVE("lbpm_color_simulator",1); //************************************************************************ ScaLBL_DeviceBarrier(); - comm.barrier(); + MPI_Barrier(comm); stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep diff --git a/models/DFHModel.h b/models/DFHModel.h index 00e6e6b3..883ec6f8 100644 --- a/models/DFHModel.h +++ b/models/DFHModel.h @@ -12,13 +12,13 @@ Implementation of color lattice boltzmann model #include "common/Communication.h" #include "analysis/TwoPhase.h" #include "analysis/runAnalysis.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "ProfilerApp.h" #include "threadpool/thread_pool.h" class ScaLBL_DFHModel{ public: - ScaLBL_DFHModel(int RANK, int NP, const Utilities::MPI& COMM); + ScaLBL_DFHModel(int RANK, int NP, MPI_Comm COMM); ~ScaLBL_DFHModel(); // functions in they should be run @@ -66,7 +66,7 @@ public: double *Pressure; private: - Utilities::MPI comm; + MPI_Comm comm; int dist_mem_size; int neighborSize; diff --git a/models/MRTModel.cpp b/models/MRTModel.cpp index 23925930..c1db7c1c 100644 --- a/models/MRTModel.cpp +++ b/models/MRTModel.cpp @@ -5,7 +5,7 @@ #include "analysis/distance.h" #include "common/ReadMicroCT.h" -ScaLBL_MRTModel::ScaLBL_MRTModel(int RANK, int NP, const Utilities::MPI& COMM): +ScaLBL_MRTModel::ScaLBL_MRTModel(int RANK, int NP, MPI_Comm COMM): rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tau(0), Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),mu(0), Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) @@ -83,9 +83,9 @@ void ScaLBL_MRTModel::SetDomain(){ for (int i=0; iid[i] = 1; // initialize this way //Averages = std::shared_ptr ( new TwoPhase(Dm) ); // TwoPhase analysis object - comm.barrier(); + MPI_Barrier(comm); Dm->CommInit(); - comm.barrier(); + MPI_Barrier(comm); rank = Dm->rank(); nprocx = Dm->nprocx(); @@ -171,7 +171,7 @@ void ScaLBL_MRTModel::Create(){ Map.resize(Nx,Ny,Nz); Map.fill(-2); auto neighborList= new int[18*Npad]; Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); - comm.barrier(); + MPI_Barrier(comm); //........................................................................... // MAIN VARIABLES ALLOCATED HERE //........................................................................... @@ -190,7 +190,7 @@ void ScaLBL_MRTModel::Create(){ if (rank==0) printf ("Setting up device map and neighbor list \n"); // copy the neighbor list ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize); - comm.barrier(); + MPI_Barrier(comm); } @@ -225,8 +225,7 @@ void ScaLBL_MRTModel::Run(){ //.......create and start timer............ double starttime,stoptime,cputime; - ScaLBL_DeviceBarrier(); - comm.barrier(); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); starttime = MPI_Wtime(); if (rank==0) printf("Beginning AA timesteps, timestepMax = %i \n", timestepMax); if (rank==0) printf("********************************************************\n"); @@ -240,21 +239,18 @@ void ScaLBL_MRTModel::Run(){ ScaLBL_D3Q19_AAodd_MRT(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE ScaLBL_D3Q19_AAodd_MRT(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); - ScaLBL_DeviceBarrier(); - comm.barrier(); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); timestep++; ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL ScaLBL_D3Q19_AAeven_MRT(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE ScaLBL_D3Q19_AAeven_MRT(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); - ScaLBL_DeviceBarrier(); - comm.barrier(); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); //************************************************************************/ if (timestep%1000==0){ ScaLBL_D3Q19_Momentum(fq,Velocity, Np); - ScaLBL_DeviceBarrier(); - comm.barrier(); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); ScaLBL_Comm->RegularLayout(Map,&Velocity[0],Velocity_x); ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],Velocity_y); ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],Velocity_z); @@ -276,10 +272,10 @@ void ScaLBL_MRTModel::Run(){ } } } - vax = Mask->Comm.sumReduce( vax_loc ); - vay = Mask->Comm.sumReduce( vay_loc ); - vaz = Mask->Comm.sumReduce( vaz_loc ); - count = Mask->Comm.sumReduce( count_loc ); + MPI_Allreduce(&vax_loc,&vax,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&vay_loc,&vay,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&vaz_loc,&vaz,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); vax /= count; vay /= count; @@ -309,10 +305,10 @@ void ScaLBL_MRTModel::Run(){ double As = Morphology.A(); double Hs = Morphology.H(); double Xs = Morphology.X(); - Vs = Dm->Comm.sumReduce( Vs); - As = Dm->Comm.sumReduce( As); - Hs = Dm->Comm.sumReduce( Hs); - Xs = Dm->Comm.sumReduce( Xs); + Vs=sumReduce( Dm->Comm, Vs); + As=sumReduce( Dm->Comm, As); + Hs=sumReduce( Dm->Comm, Hs); + Xs=sumReduce( Dm->Comm, Xs); double h = Dm->voxel_length; double absperm = h*h*mu*Mask->Porosity()*flow_rate / force_mag; if (rank==0) { @@ -346,8 +342,7 @@ void ScaLBL_MRTModel::VelocityField(){ /* Minkowski Morphology(Mask); int SIZE=Np*sizeof(double); ScaLBL_D3Q19_Momentum(fq,Velocity, Np); - ScaLBL_DeviceBarrier();. - comm.barrier(); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); ScaLBL_CopyToHost(&VELOCITY[0],&Velocity[0],3*SIZE); memcpy(Morphology.SDn.data(), Distance.data(), Nx*Ny*Nz*sizeof(double)); @@ -374,10 +369,10 @@ void ScaLBL_MRTModel::VelocityField(){ vaz_loc += VELOCITY[2*Np+n]; count_loc+=1.0; } - vax = Mask->Comm.sumReduce( vax_loc ); - vay = Mask->Comm.sumReduce( vay_loc ); - vaz = Mask->Comm.sumReduce( vaz_loc ); - count = Mask->Comm.sumReduce( count_loc ); + MPI_Allreduce(&vax_loc,&vax,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&vay_loc,&vay,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&vaz_loc,&vaz,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); vax /= count; vay /= count; diff --git a/models/MRTModel.h b/models/MRTModel.h index 7e23cc44..aa4ee1f0 100644 --- a/models/MRTModel.h +++ b/models/MRTModel.h @@ -11,13 +11,13 @@ #include "common/ScaLBL.h" #include "common/Communication.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "analysis/Minkowski.h" #include "ProfilerApp.h" class ScaLBL_MRTModel{ public: - ScaLBL_MRTModel(int RANK, int NP, const Utilities::MPI& COMM); + ScaLBL_MRTModel(int RANK, int NP, MPI_Comm COMM); ~ScaLBL_MRTModel(); // functions in they should be run @@ -63,7 +63,7 @@ public: DoubleArray Velocity_y; DoubleArray Velocity_z; private: - Utilities::MPI comm; + MPI_Comm comm; // filenames char LocalRankString[8]; diff --git a/tests/BlobAnalyzeParallel.cpp b/tests/BlobAnalyzeParallel.cpp index 48e9e230..c9e3f8fc 100644 --- a/tests/BlobAnalyzeParallel.cpp +++ b/tests/BlobAnalyzeParallel.cpp @@ -100,10 +100,11 @@ inline void WriteBlobStates(TwoPhase TCAT, double D, double porosity){ int main(int argc, char **argv) { // Initialize MPI + int rank, nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); Utilities::setAbortBehavior( true, true, true ); Utilities::setErrorHandlers(); PROFILE_ENABLE(0); @@ -136,7 +137,7 @@ int main(int argc, char **argv) domain >> Ly; domain >> Lz; } - comm.barrier(); + MPI_Barrier(comm); // Computational domain MPI_Bcast(&nx,1,MPI_INT,0,comm); MPI_Bcast(&ny,1,MPI_INT,0,comm); @@ -149,7 +150,7 @@ int main(int argc, char **argv) MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. - comm.barrier(); + MPI_Barrier(comm); // Check that the number of processors >= the number of ranks if ( rank==0 ) { @@ -208,7 +209,7 @@ int main(int argc, char **argv) // WriteLocalSolidID(LocalRankFilename, id, N); sprintf(LocalRankFilename,"%s%s","SignDist.",LocalRankString); ReadBinaryFile(LocalRankFilename, Averages.SDs.get(), N); - comm.barrier(); + MPI_Barrier(comm); if (rank == 0) cout << "Domain set." << endl; //....................................................................... //copies of data needed to perform checkpointing from cpu @@ -220,7 +221,7 @@ int main(int argc, char **argv) if (rank==0) printf("Reading restart file! \n"); // Read in the restart file to CPU buffers ReadCheckpoint(LocalRestartFile, Den, DistEven, DistOdd, N); - comm.barrier(); + MPI_Barrier(comm); //......................................................................... // Populate the arrays needed to perform averaging if (rank==0) printf("Populate arrays \n"); @@ -328,14 +329,14 @@ int main(int argc, char **argv) // BlobContainer Blobs; DoubleArray RecvBuffer(dimx); // MPI_Allreduce(&Averages.BlobAverages.get(),&Blobs.get(),1,MPI_DOUBLE,MPI_SUM,Dm.Comm); - comm.barrier(); + MPI_Barrier(comm); if (rank==0) printf("Number of components is %i \n",dimy); for (int b=0; b 0.0){ double Vn,pn,awn,ans,Jwn,Kwn,lwns,cwns,trawn,trJwn; @@ -481,7 +482,7 @@ int main(int argc, char **argv) fclose(BLOBS);*/ PROFILE_STOP("main"); PROFILE_SAVE("BlobIdentifyParallel",false); - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); return 0; } diff --git a/tests/BlobIdentifyParallel.cpp b/tests/BlobIdentifyParallel.cpp index b8929a11..f93371cb 100644 --- a/tests/BlobIdentifyParallel.cpp +++ b/tests/BlobIdentifyParallel.cpp @@ -47,10 +47,11 @@ void readRankData( int proc, int nx, int ny, int nz, DoubleArray& Phase, DoubleA int main(int argc, char **argv) { // Initialize MPI + int rank, nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); #ifdef PROFILE PROFILE_ENABLE(0); PROFILE_DISABLE_TRACE(); @@ -128,7 +129,7 @@ int main(int argc, char **argv) PROFILE_STOP("main"); PROFILE_SAVE("BlobIdentifyParallel",false); #endif - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); return 0; } diff --git a/tests/ColorToBinary.cpp b/tests/ColorToBinary.cpp index fae156d1..7ac740bc 100644 --- a/tests/ColorToBinary.cpp +++ b/tests/ColorToBinary.cpp @@ -114,10 +114,11 @@ inline void ReadFromRank(char *FILENAME, DoubleArray &Phase, int nx, int ny, int int main(int argc, char **argv) { // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); printf("----------------------------------------------------------\n"); printf("Creating single Binary file from restart (8-bit integer)\n"); @@ -275,7 +276,7 @@ int main(int argc, char **argv) */ // **************************************************** - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); // **************************************************** } diff --git a/tests/ComponentLabel.cpp b/tests/ComponentLabel.cpp index 624ce8f4..07ef6555 100644 --- a/tests/ComponentLabel.cpp +++ b/tests/ComponentLabel.cpp @@ -119,10 +119,11 @@ inline void ReadFromRank(char *FILENAME, DoubleArray &Phase, DoubleArray &Pressu int main(int argc, char **argv) { // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); printf("----------------------------------------------------------\n"); printf("COMPUTING TCAT ANALYSIS FOR NON-WETTING PHASE FEATURES \n"); @@ -432,7 +433,7 @@ int main(int argc, char **argv) fclose(DISTANCE); */ // **************************************************** - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); // **************************************************** } diff --git a/tests/GenerateSphereTest.cpp b/tests/GenerateSphereTest.cpp index 43434092..d1917619 100644 --- a/tests/GenerateSphereTest.cpp +++ b/tests/GenerateSphereTest.cpp @@ -9,7 +9,7 @@ //#include "common/pmmc.h" #include "common/Domain.h" #include "common/SpherePack.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "common/Communication.h" /* @@ -70,8 +70,8 @@ inline void MorphOpen(DoubleArray SignDist, char *id, Domain &Dm, int nx, int ny } } // total Global is the number of nodes in the pore-space - totalGlobal = Dm.Comm.sumReduce( count ); - maxdistGlobal = Dm.Comm.sumReduce( maxdist ); + MPI_Allreduce(&count,&totalGlobal,1,MPI_DOUBLE,MPI_SUM,Dm.Comm); + MPI_Allreduce(&maxdist,&maxdistGlobal,1,MPI_DOUBLE,MPI_MAX,Dm.Comm); double volume=double(nprocx*nprocy*nprocz)*double(nx-2)*double(ny-2)*double(nz-2); double porosity=totalGlobal/volume; if (rank==0) printf("Media Porosity: %f \n",porosity); @@ -148,6 +148,7 @@ inline void MorphOpen(DoubleArray SignDist, char *id, Domain &Dm, int nx, int ny double Rcrit_old=0.0; double Rcrit_new=0.0; + double GlobalNumber = 1.f; int imin,jmin,kmin,imax,jmax,kmax; Rcrit_new = maxdistGlobal; @@ -214,41 +215,41 @@ inline void MorphOpen(DoubleArray SignDist, char *id, Domain &Dm, int nx, int ny PackID(Dm.sendList_YZ, Dm.sendCount_YZ ,sendID_YZ, id); //...................................................................................... MPI_Sendrecv(sendID_x,Dm.sendCount_x,MPI_CHAR,Dm.rank_x(),sendtag, - recvID_X,Dm.recvCount_X,MPI_CHAR,Dm.rank_X(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_X,Dm.recvCount_X,MPI_CHAR,Dm.rank_X(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_X,Dm.sendCount_X,MPI_CHAR,Dm.rank_X(),sendtag, - recvID_x,Dm.recvCount_x,MPI_CHAR,Dm.rank_x(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_x,Dm.recvCount_x,MPI_CHAR,Dm.rank_x(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_y,Dm.sendCount_y,MPI_CHAR,Dm.rank_y(),sendtag, - recvID_Y,Dm.recvCount_Y,MPI_CHAR,Dm.rank_Y(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_Y,Dm.recvCount_Y,MPI_CHAR,Dm.rank_Y(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_Y,Dm.sendCount_Y,MPI_CHAR,Dm.rank_Y(),sendtag, - recvID_y,Dm.recvCount_y,MPI_CHAR,Dm.rank_y(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_y,Dm.recvCount_y,MPI_CHAR,Dm.rank_y(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_z,Dm.sendCount_z,MPI_CHAR,Dm.rank_z(),sendtag, - recvID_Z,Dm.recvCount_Z,MPI_CHAR,Dm.rank_Z(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_Z,Dm.recvCount_Z,MPI_CHAR,Dm.rank_Z(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_Z,Dm.sendCount_Z,MPI_CHAR,Dm.rank_Z(),sendtag, - recvID_z,Dm.recvCount_z,MPI_CHAR,Dm.rank_z(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_z,Dm.recvCount_z,MPI_CHAR,Dm.rank_z(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_xy,Dm.sendCount_xy,MPI_CHAR,Dm.rank_xy(),sendtag, - recvID_XY,Dm.recvCount_XY,MPI_CHAR,Dm.rank_XY(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_XY,Dm.recvCount_XY,MPI_CHAR,Dm.rank_XY(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_XY,Dm.sendCount_XY,MPI_CHAR,Dm.rank_XY(),sendtag, - recvID_xy,Dm.recvCount_xy,MPI_CHAR,Dm.rank_xy(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_xy,Dm.recvCount_xy,MPI_CHAR,Dm.rank_xy(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_Xy,Dm.sendCount_Xy,MPI_CHAR,Dm.rank_Xy(),sendtag, - recvID_xY,Dm.recvCount_xY,MPI_CHAR,Dm.rank_xY(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_xY,Dm.recvCount_xY,MPI_CHAR,Dm.rank_xY(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_xY,Dm.sendCount_xY,MPI_CHAR,Dm.rank_xY(),sendtag, - recvID_Xy,Dm.recvCount_Xy,MPI_CHAR,Dm.rank_Xy(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_Xy,Dm.recvCount_Xy,MPI_CHAR,Dm.rank_Xy(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_xz,Dm.sendCount_xz,MPI_CHAR,Dm.rank_xz(),sendtag, - recvID_XZ,Dm.recvCount_XZ,MPI_CHAR,Dm.rank_XZ(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_XZ,Dm.recvCount_XZ,MPI_CHAR,Dm.rank_XZ(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_XZ,Dm.sendCount_XZ,MPI_CHAR,Dm.rank_XZ(),sendtag, - recvID_xz,Dm.recvCount_xz,MPI_CHAR,Dm.rank_xz(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_xz,Dm.recvCount_xz,MPI_CHAR,Dm.rank_xz(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_Xz,Dm.sendCount_Xz,MPI_CHAR,Dm.rank_Xz(),sendtag, - recvID_xZ,Dm.recvCount_xZ,MPI_CHAR,Dm.rank_xZ(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_xZ,Dm.recvCount_xZ,MPI_CHAR,Dm.rank_xZ(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_xZ,Dm.sendCount_xZ,MPI_CHAR,Dm.rank_xZ(),sendtag, - recvID_Xz,Dm.recvCount_Xz,MPI_CHAR,Dm.rank_Xz(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_Xz,Dm.recvCount_Xz,MPI_CHAR,Dm.rank_Xz(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_yz,Dm.sendCount_yz,MPI_CHAR,Dm.rank_yz(),sendtag, - recvID_YZ,Dm.recvCount_YZ,MPI_CHAR,Dm.rank_YZ(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_YZ,Dm.recvCount_YZ,MPI_CHAR,Dm.rank_YZ(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_YZ,Dm.sendCount_YZ,MPI_CHAR,Dm.rank_YZ(),sendtag, - recvID_yz,Dm.recvCount_yz,MPI_CHAR,Dm.rank_yz(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_yz,Dm.recvCount_yz,MPI_CHAR,Dm.rank_yz(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_Yz,Dm.sendCount_Yz,MPI_CHAR,Dm.rank_Yz(),sendtag, - recvID_yZ,Dm.recvCount_yZ,MPI_CHAR,Dm.rank_yZ(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_yZ,Dm.recvCount_yZ,MPI_CHAR,Dm.rank_yZ(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); MPI_Sendrecv(sendID_yZ,Dm.sendCount_yZ,MPI_CHAR,Dm.rank_yZ(),sendtag, - recvID_Yz,Dm.recvCount_Yz,MPI_CHAR,Dm.rank_Yz(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE); + recvID_Yz,Dm.recvCount_Yz,MPI_CHAR,Dm.rank_Yz(),recvtag,Dm.Comm,MPI_STATUS_IGNORE); //...................................................................................... UnpackID(Dm.recvList_x, Dm.recvCount_x ,recvID_x, id); UnpackID(Dm.recvList_X, Dm.recvCount_X ,recvID_X, id); @@ -270,7 +271,7 @@ inline void MorphOpen(DoubleArray SignDist, char *id, Domain &Dm, int nx, int ny UnpackID(Dm.recvList_YZ, Dm.recvCount_YZ ,recvID_YZ, id); //...................................................................................... - //double GlobalNumber = Dm.Comm.sumReduce( LocalNumber ); + MPI_Allreduce(&LocalNumber,&GlobalNumber,1,MPI_DOUBLE,MPI_SUM,Dm.Comm); count = 0.f; for (int k=1; k= the number of ranks if ( rank==0 ) { @@ -253,14 +254,14 @@ int main(int argc, char **argv) cz[0]=0.25*Lz; cx[1]=0.75*Lz; cx[2]=0.25*Lz; cx[3]=0.25*Lz; rad[0]=rad[1]=rad[2]=rad[3]=0.1*Lx; - comm.barrier(); + MPI_Barrier(comm); // Broadcast the sphere packing to all processes MPI_Bcast(cx,nspheres,MPI_DOUBLE,0,comm); MPI_Bcast(cy,nspheres,MPI_DOUBLE,0,comm); MPI_Bcast(cz,nspheres,MPI_DOUBLE,0,comm); MPI_Bcast(rad,nspheres,MPI_DOUBLE,0,comm); //........................................................................... - comm.barrier(); + MPI_Barrier(comm); //....................................................................... SignedDistance(Averages.Phase.data(),nspheres,cx,cy,cz,rad,Lx,Ly,Lz,Nx,Ny,Nz, Dm->iproc(),Dm->jproc(),Dm->kproc(),Dm->nprocx(),Dm->nprocy(),Dm->nprocz()); @@ -316,7 +317,7 @@ int main(int argc, char **argv) delete [] rad; } // Limit scope so variables that contain communicators will free before MPI_Finialize - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); return 0; } diff --git a/tests/TestBlobIdentify.cpp b/tests/TestBlobIdentify.cpp index 7eb5c270..ccfc6afc 100644 --- a/tests/TestBlobIdentify.cpp +++ b/tests/TestBlobIdentify.cpp @@ -23,19 +23,21 @@ inline double rand2() // Test if all ranks agree on a value -bool allAgree( int x, const Utilities::MPI& comm ) { +bool allAgree( int x, MPI_Comm comm ) { int x2 = x; - comm.bcast(&x2,1,0); + MPI_Bcast(&x2,1,MPI_INT,0,comm); int diff = x==x2 ? 0:1; - int diff2 = comm.sumReduce( diff ); + int diff2 = 0; + MPI_Allreduce(&diff,&diff2,1,MPI_INT,MPI_SUM,comm); return diff2==0; } template -bool allAgree( const std::vector& x, const Utilities::MPI& comm ) { +bool allAgree( const std::vector& x, MPI_Comm comm ) { std::vector x2 = x; - comm.bcast(&x2[0],x.size()*sizeof(T)/sizeof(int),0); + MPI_Bcast(&x2[0],x.size()*sizeof(T)/sizeof(int),MPI_INT,0,comm); int diff = x==x2 ? 0:1; - int diff2 = comm.sumReduce( diff ); + int diff2 = 0; + MPI_Allreduce(&diff,&diff2,1,MPI_INT,MPI_SUM,comm); return diff2==0; } @@ -72,9 +74,9 @@ struct bubble_struct { // Create a random set of bubles -std::vector create_bubbles( int N_bubbles, double Lx, double Ly, double Lz, const Utilities::MPI& comm ) +std::vector create_bubbles( int N_bubbles, double Lx, double Ly, double Lz, MPI_Comm comm ) { - int rank = comm.getRank(); + int rank = comm_rank(comm); std::vector bubbles(N_bubbles); if ( rank == 0 ) { double R0 = 0.2*Lx*Ly*Lz/pow((double)N_bubbles,0.333); @@ -89,7 +91,7 @@ std::vector create_bubbles( int N_bubbles, double Lx, double Ly, } } size_t N_bytes = N_bubbles*sizeof(bubble_struct); - comm.bcast((char*)&bubbles[0],N_bytes,0); + MPI_Bcast((char*)&bubbles[0],N_bytes,MPI_CHAR,0,comm); return bubbles; } @@ -122,7 +124,7 @@ void fillBubbleData( const std::vector& bubbles, DoubleArray& Pha // Shift all of the data by the given number of cells -void shift_data( DoubleArray& data, int sx, int sy, int sz, const RankInfoStruct& rank_info, const Utilities::MPI& comm ) +void shift_data( DoubleArray& data, int sx, int sy, int sz, const RankInfoStruct& rank_info, MPI_Comm comm ) { int nx = data.size(0)-2; int ny = data.size(1)-2; @@ -152,10 +154,11 @@ void shift_data( DoubleArray& data, int sx, int sy, int sz, const RankInfoStruct int main(int argc, char **argv) { // Initialize MPI + int rank, nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); PROFILE_ENABLE(1); PROFILE_DISABLE_TRACE(); PROFILE_SYNCHRONIZE(); @@ -294,7 +297,7 @@ int main(int argc, char **argv) velocity[i].z = bubbles[i].radius*(2*rand2()-1); } } - comm.bcast((char*)&velocity[0],bubbles.size()*sizeof(Point),0); + MPI_Bcast((char*)&velocity[0],bubbles.size()*sizeof(Point),MPI_CHAR,0,comm); fillBubbleData( bubbles, Phase, SignDist, Lx, Ly, Lz, rank_info ); fillData.fill(Phase); fillData.fill(SignDist); @@ -388,8 +391,8 @@ int main(int argc, char **argv) printf("\n"); } } - comm.bcast(&N1,1,0); - comm.bcast(&N2,1,0); + MPI_Bcast(&N1,1,MPI_INT,0,comm); + MPI_Bcast(&N2,1,MPI_INT,0,comm); if ( N1!=nblobs || N2!=nblobs2 ) { if ( rank==0 ) printf("Error, blob ids do not map in moving bubble test (%i,%i,%i,%i)\n", @@ -409,7 +412,7 @@ int main(int argc, char **argv) // Finished PROFILE_STOP("main"); PROFILE_SAVE("TestBlobIdentify",false); - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); return N_errors; } diff --git a/tests/TestBlobIdentifyCorners.cpp b/tests/TestBlobIdentifyCorners.cpp index 904e52e0..4795f610 100644 --- a/tests/TestBlobIdentifyCorners.cpp +++ b/tests/TestBlobIdentifyCorners.cpp @@ -18,9 +18,10 @@ int main(int argc, char **argv) { // Initialize MPI + int rank, nprocs; MPI_Init(&argc,&argv); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm_rank(MPI_COMM_WORLD,&rank); + MPI_Comm_size(MPI_COMM_WORLD,&nprocs); /*if ( nprocs != 8 ) { printf("This tests requires 8 processors\n"); return -1; diff --git a/tests/TestBubble.cpp b/tests/TestBubble.cpp index e7e0ced8..c03e5dea 100644 --- a/tests/TestBubble.cpp +++ b/tests/TestBubble.cpp @@ -7,7 +7,7 @@ #include "analysis/pmmc.h" #include "common/ScaLBL.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "common/Communication.h" #include "IO/Mesh.h" #include "IO/Writer.h" @@ -32,15 +32,14 @@ int main(int argc, char **argv) // Initialize MPI int provided_thread_support = -1; MPI_Init_thread(&argc,&argv,MPI_THREAD_MULTIPLE,&provided_thread_support); - + MPI_Comm comm; + MPI_Comm_dup(MPI_COMM_WORLD,&comm); + int rank = comm_rank(comm); + int nprocs = comm_size(comm); + if ( rank==0 && provided_thread_support(domain_db,comm); - comm.barrier(); + MPI_Barrier(comm); Nx+=2; Ny+=2; Nz += 2; int N = Nx*Ny*Nz; @@ -249,7 +250,7 @@ int main(int argc, char **argv) IntArray Map(Nx,Ny,Nz); auto neighborList= new int[18*Npad]; Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); - comm.barrier(); + MPI_Barrier(comm); //........................................................................... // MAIN VARIABLES ALLOCATED HERE @@ -386,7 +387,7 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); - comm.barrier(); + MPI_Barrier(comm); starttime = MPI_Wtime(); //......................................... @@ -436,7 +437,7 @@ int main(int argc, char **argv) } ScaLBL_D3Q19_AAodd_DFH(NeighborList, fq, Aq, Bq, Den, Phi, Gradient, SolidPotential, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, 0, ScaLBL_Comm->next, Np); - ScaLBL_DeviceBarrier(); comm.barrier(); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); // *************EVEN TIMESTEP************* timestep++; @@ -472,9 +473,9 @@ int main(int argc, char **argv) } ScaLBL_D3Q19_AAeven_DFH(NeighborList, fq, Aq, Bq, Den, Phi, Gradient, SolidPotential, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, 0, ScaLBL_Comm->next, Np); - ScaLBL_DeviceBarrier(); comm.barrier(); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); //************************************************************************ - comm.barrier(); + MPI_Barrier(comm); PROFILE_STOP("Update"); // Run the analysis @@ -486,7 +487,7 @@ int main(int argc, char **argv) PROFILE_SAVE("lbpm_color_simulator",1); //************************************************************************ ScaLBL_DeviceBarrier(); - comm.barrier(); + MPI_Barrier(comm); stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep @@ -546,8 +547,9 @@ int main(int argc, char **argv) PROFILE_STOP("Main"); PROFILE_SAVE("lbpm_color_simulator",1); // **************************************************** - comm.barrier(); + MPI_Barrier(comm); } // Limit scope so variables that contain communicators will free before MPI_Finialize + MPI_Comm_free(&comm); MPI_Finalize(); return check; } diff --git a/tests/TestColorBubble.cpp b/tests/TestColorBubble.cpp index 1f42a71e..0e6ea25a 100644 --- a/tests/TestColorBubble.cpp +++ b/tests/TestColorBubble.cpp @@ -7,7 +7,7 @@ #include #include #include "common/ScaLBL.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "models/ColorModel.h" using namespace std; @@ -64,11 +64,15 @@ inline void InitializeBubble(ScaLBL_ColorModel &ColorModel, double BubbleRadius) //*************************************************************************************** int main(int argc, char **argv) { + //***************************************** + // ***** MPI STUFF **************** + //***************************************** // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); int check=0; { if (rank == 0){ @@ -93,7 +97,7 @@ int main(int argc, char **argv) ColorModel.WriteDebug(); } // **************************************************** - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); // **************************************************** diff --git a/tests/TestColorGrad.cpp b/tests/TestColorGrad.cpp index df1c1daf..5cd6d924 100644 --- a/tests/TestColorGrad.cpp +++ b/tests/TestColorGrad.cpp @@ -7,7 +7,7 @@ #include #include #include "common/ScaLBL.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" using namespace std; @@ -15,11 +15,15 @@ using namespace std; //*************************************************************************************** int main(int argc, char **argv) { + //***************************************** + // ***** MPI STUFF **************** + //***************************************** // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); int check; { // parallel domain size (# of sub-domains) @@ -112,7 +116,7 @@ int main(int argc, char **argv) } // ************************************************************** // Broadcast simulation parameters from rank 0 to all other procs - comm.barrier(); + MPI_Barrier(comm); //................................................. MPI_Bcast(&Nx,1,MPI_INT,0,comm); MPI_Bcast(&Ny,1,MPI_INT,0,comm); @@ -125,7 +129,7 @@ int main(int argc, char **argv) MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. - comm.barrier(); + MPI_Barrier(comm); // ************************************************************** // ************************************************************** @@ -142,7 +146,7 @@ int main(int argc, char **argv) printf("********************************************************\n"); } - comm.barrier(); + MPI_Barrier(comm); double iVol_global = 1.0/Nx/Ny/Nz/nprocx/nprocy/nprocz; int BoundaryCondition=0; @@ -171,7 +175,7 @@ int main(int argc, char **argv) } } Dm.CommInit(); - comm.barrier(); + MPI_Barrier(comm); if (rank == 0) cout << "Domain set." << endl; if (rank==0) printf ("Create ScaLBL_Communicator \n"); @@ -188,7 +192,7 @@ int main(int argc, char **argv) neighborList= new int[18*Np]; ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Dm.id,Np); - comm.barrier(); + MPI_Barrier(comm); //......................device distributions................................. int dist_mem_size = Np*sizeof(double); @@ -256,7 +260,7 @@ int main(int argc, char **argv) } // **************************************************** - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); // **************************************************** diff --git a/tests/TestColorGradDFH.cpp b/tests/TestColorGradDFH.cpp index b04aebce..d6376d82 100644 --- a/tests/TestColorGradDFH.cpp +++ b/tests/TestColorGradDFH.cpp @@ -7,7 +7,7 @@ #include #include #include "common/ScaLBL.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" using namespace std; @@ -25,11 +25,15 @@ std::shared_ptr loadInputs( int nprocs ) //*************************************************************************************** int main(int argc, char **argv) { + //***************************************** + // ***** MPI STUFF **************** + //***************************************** // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); int check=0; { // parallel domain size (# of sub-domains) @@ -78,7 +82,7 @@ int main(int argc, char **argv) } } Dm->CommInit(); - comm.barrier(); + MPI_Barrier(comm); if (rank == 0) cout << "Domain set." << endl; if (rank==0) printf ("Create ScaLBL_Communicator \n"); @@ -101,7 +105,7 @@ int main(int argc, char **argv) IntArray Map(Nx,Ny,Nz); neighborList= new int[18*Npad]; Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np); - comm.barrier(); + MPI_Barrier(comm); //......................device distributions................................. int neighborSize=18*Np*sizeof(int); @@ -207,7 +211,7 @@ int main(int argc, char **argv) } // **************************************************** - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); // **************************************************** diff --git a/tests/TestColorMassBounceback.cpp b/tests/TestColorMassBounceback.cpp index 78508f9b..c05c245e 100644 --- a/tests/TestColorMassBounceback.cpp +++ b/tests/TestColorMassBounceback.cpp @@ -7,7 +7,7 @@ #include #include #include "common/ScaLBL.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" using namespace std; @@ -15,11 +15,15 @@ using namespace std; //*************************************************************************************** int main(int argc, char **argv) { + //***************************************** + // ***** MPI STUFF **************** + //***************************************** // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); int check=0; { // parallel domain size (# of sub-domains) @@ -38,7 +42,7 @@ int main(int argc, char **argv) // Initialize compute device // int device=ScaLBL_SetDevice(rank); ScaLBL_DeviceBarrier(); - comm.barrier(); + MPI_Barrier(comm); Utilities::setErrorHandlers(); // Variables that specify the computational domain @@ -73,7 +77,7 @@ int main(int argc, char **argv) // Get the rank info const RankInfoStruct rank_info(rank,nprocx,nprocy,nprocz); - comm.barrier(); + MPI_Barrier(comm); if (nprocs != nprocx*nprocy*nprocz){ printf("nprocx = %i \n",nprocx); @@ -117,7 +121,7 @@ int main(int argc, char **argv) std::shared_ptr Dm(new Domain(domain_db,comm)); for (int i=0; iNx*Dm->Ny*Dm->Nz; i++) Dm->id[i] = 1; Dm->CommInit(); - comm.barrier(); + MPI_Barrier(comm); Nx+=2; Ny+=2; Nz += 2; int N = Nx*Ny*Nz; @@ -149,7 +153,7 @@ int main(int argc, char **argv) } } Dm->CommInit(); - comm.barrier(); + MPI_Barrier(comm); if (rank == 0) cout << "Domain set." << endl; if (rank==0) printf ("Create ScaLBL_Communicator \n"); @@ -166,7 +170,7 @@ int main(int argc, char **argv) Npad=Np+32; neighborList= new int[18*Npad]; Np=ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np); - comm.barrier(); + MPI_Barrier(comm); //......................device distributions................................. int dist_mem_size = Np*sizeof(double); @@ -268,7 +272,7 @@ int main(int argc, char **argv) ScaLBL_D3Q19_AAodd_DFH(NeighborList, fq, Aq, Bq, Den, Phi, Gradient, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_DeviceBarrier(); comm.barrier(); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); timestep++; @@ -328,7 +332,7 @@ int main(int argc, char **argv) ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE ScaLBL_D3Q19_AAeven_DFH(NeighborList, fq, Aq, Bq, Den, Phi, Gradient, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_DeviceBarrier(); comm.barrier(); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); timestep++; //************************************************************************ printf("Check after even time \n"); @@ -411,7 +415,7 @@ int main(int argc, char **argv) ScaLBL_D3Q19_AAodd_DFH(NeighborList, fq, Aq, Bq, Den, Phi, Gradient, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_DeviceBarrier(); comm.barrier(); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); timestep++; @@ -472,7 +476,7 @@ int main(int argc, char **argv) ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE ScaLBL_D3Q19_AAeven_DFH(NeighborList, fq, Aq, Bq, Den, Phi, Gradient, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_DeviceBarrier(); comm.barrier(); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); timestep++; //************************************************************************ printf("Check after even time \n"); @@ -519,7 +523,7 @@ int main(int argc, char **argv) } // **************************************************** - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); // **************************************************** return check; diff --git a/tests/TestColorSquareTube.cpp b/tests/TestColorSquareTube.cpp index cf8a9566..9807f0e8 100644 --- a/tests/TestColorSquareTube.cpp +++ b/tests/TestColorSquareTube.cpp @@ -7,7 +7,7 @@ #include #include #include "common/ScaLBL.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "models/ColorModel.h" std::shared_ptr loadInputs( int nprocs ) @@ -84,11 +84,15 @@ void InitializeSquareTube(ScaLBL_ColorModel &ColorModel){ //*************************************************************************************** int main(int argc, char **argv) { + //***************************************** + // ***** MPI STUFF **************** + //***************************************** // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); int check=0; { if (rank == 0){ @@ -109,7 +113,7 @@ int main(int argc, char **argv) } // **************************************************** - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); // **************************************************** diff --git a/tests/TestCommD3Q19.cpp b/tests/TestCommD3Q19.cpp index d2799355..e1fa821f 100644 --- a/tests/TestCommD3Q19.cpp +++ b/tests/TestCommD3Q19.cpp @@ -6,7 +6,7 @@ #include #include #include "common/ScaLBL.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" using namespace std; @@ -164,10 +164,11 @@ inline void UnpackID(int *list, int count, char *recvbuf, char *ID){ int main(int argc, char **argv) { // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); int check; { @@ -262,14 +263,14 @@ int main(int argc, char **argv) } } } - sum = comm.sumReduce( sum_local ); + MPI_Allreduce(&sum_local,&sum,1,MPI_DOUBLE,MPI_SUM,comm); double iVol_global=1.f/double((Nx-2)*(Ny-2)*(Nz-2)*nprocx*nprocy*nprocz); porosity = 1.0-sum*iVol_global; if (rank==0) printf("Media porosity = %f \n",porosity); //....................................................................... //........................................................................... - comm.barrier(); + MPI_Barrier(comm); if (rank == 0) cout << "Domain set." << endl; //........................................................................... @@ -284,7 +285,7 @@ int main(int argc, char **argv) IntArray Map(Nx,Ny,Nz); Map.fill(-2); Np = ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np); - comm.barrier(); + MPI_Barrier(comm); int neighborSize=18*Np*sizeof(int); //......................device distributions................................. dist_mem_size = Np*sizeof(double); @@ -354,7 +355,7 @@ int main(int argc, char **argv) GlobalFlipScaLBL_D3Q19_Init(fq_host, Map, Np, Nx-2, Ny-2, Nz-2, iproc,jproc,kproc,nprocx,nprocy,nprocz); ScaLBL_CopyToDevice(fq, fq_host, 19*dist_mem_size); ScaLBL_DeviceBarrier(); - comm.barrier(); + MPI_Barrier(comm); //************************************************************************* // First timestep ScaLBL_Comm.SendD3Q19AA(fq); //READ FROM NORMAL @@ -377,7 +378,7 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; - comm.barrier(); + MPI_Barrier(comm); starttime = MPI_Wtime(); //......................................... @@ -397,7 +398,7 @@ int main(int argc, char **argv) //********************************************* ScaLBL_DeviceBarrier(); - comm.barrier(); + MPI_Barrier(comm); // Iteration completed! timestep++; //................................................................... @@ -426,7 +427,7 @@ int main(int argc, char **argv) if (rank==0) printf("Aggregated communication bandwidth = %f Gbit/sec \n",nprocs*ScaLBL_Comm.CommunicationCount*64*timestep/1e9); } // **************************************************** - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); // **************************************************** diff --git a/tests/TestDatabase.cpp b/tests/TestDatabase.cpp index ced704e2..00bf87e2 100644 --- a/tests/TestDatabase.cpp +++ b/tests/TestDatabase.cpp @@ -9,7 +9,7 @@ #include "common/UnitTest.h" #include "common/Utilities.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "common/Database.h" #include "ProfilerApp.h" @@ -17,8 +17,11 @@ // Main int main(int argc, char **argv) { + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); Utilities::setAbortBehavior(true,2); Utilities::setErrorHandlers(); UnitTest ut; @@ -66,7 +69,7 @@ int main(int argc, char **argv) // Finished PROFILE_SAVE("TestDatabase",true); - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); return err; } diff --git a/tests/TestFluxBC.cpp b/tests/TestFluxBC.cpp index 3e999715..020bbd89 100644 --- a/tests/TestFluxBC.cpp +++ b/tests/TestFluxBC.cpp @@ -1,5 +1,5 @@ #include -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "common/Utilities.h" #include "common/ScaLBL.h" @@ -18,9 +18,9 @@ std::shared_ptr loadInputs( int nprocs ) int main (int argc, char **argv) { MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + int rank = MPI_WORLD_RANK(); + int nprocs = MPI_WORLD_SIZE(); // set the error code // Note: the error code should be consistent across all processors @@ -89,7 +89,7 @@ int main (int argc, char **argv) neighborList= new int[18*Npad]; Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np); - comm.barrier(); + MPI_Barrier(comm); //......................device distributions................................. int dist_mem_size = Np*sizeof(double); @@ -149,7 +149,7 @@ int main (int argc, char **argv) double *VEL; VEL= new double [3*Np]; int SIZE=3*Np*sizeof(double); - ScaLBL_DeviceBarrier(); comm.barrier(); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); ScaLBL_CopyToHost(&VEL[0],&dvc_vel[0],SIZE); double Q = 0.f; @@ -192,7 +192,7 @@ int main (int argc, char **argv) din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep); ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); ScaLBL_D3Q19_AAodd_MRT(NeighborList, fq, 0, ScaLBL_Comm->next, Np, rlx_setA, rlx_setB, Fx, Fy, Fz); - ScaLBL_DeviceBarrier(); comm.barrier(); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); timestep++; ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL @@ -201,7 +201,7 @@ int main (int argc, char **argv) din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep); ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); ScaLBL_D3Q19_AAeven_MRT(fq, 0, ScaLBL_Comm->next, Np, rlx_setA, rlx_setB, Fx, Fy, Fz); - ScaLBL_DeviceBarrier(); comm.barrier(); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); timestep++; //************************************************************************/ @@ -265,7 +265,7 @@ int main (int argc, char **argv) } // Finished - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); return error; } diff --git a/tests/TestForceD3Q19.cpp b/tests/TestForceD3Q19.cpp index f8569624..b8f88aae 100644 --- a/tests/TestForceD3Q19.cpp +++ b/tests/TestForceD3Q19.cpp @@ -1,5 +1,5 @@ #include -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "common/Utilities.h" #include @@ -443,9 +443,8 @@ inline void MRT_Transform(double *dist, int Np, double Fx, double Fy, double Fz) int main (int argc, char **argv) { MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + int rank = MPI_WORLD_RANK(); + int nprocs = MPI_WORLD_SIZE(); for (int i=0; i #include #include "common/ScaLBL.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" using namespace std; @@ -46,11 +46,15 @@ std::shared_ptr loadInputs( int nprocs ) //*************************************************************************************** int main(int argc, char **argv) { + //***************************************** + // ***** MPI STUFF **************** + //***************************************** // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); int check=0; { // parallel domain size (# of sub-domains) @@ -94,7 +98,7 @@ int main(int argc, char **argv) printf("********************************************************\n"); } - comm.barrier(); + MPI_Barrier(comm); kproc = rank/(nprocx*nprocy); jproc = (rank-nprocx*nprocy*kproc)/nprocx; iproc = rank-nprocx*nprocy*kproc-nprocz*jproc; @@ -102,7 +106,7 @@ int main(int argc, char **argv) if (rank == 0) { printf("i,j,k proc=%d %d %d \n",iproc,jproc,kproc); } - comm.barrier(); + MPI_Barrier(comm); if (rank == 1){ printf("i,j,k proc=%d %d %d \n",iproc,jproc,kproc); printf("\n\n"); @@ -139,7 +143,7 @@ int main(int argc, char **argv) } } Dm->CommInit(); - comm.barrier(); + MPI_Barrier(comm); if (rank == 0) cout << "Domain set." << endl; int Np=0; // number of local pore nodes @@ -184,7 +188,7 @@ int main(int argc, char **argv) if (rank == 0) PrintNeighborList(neighborList,Np, rank); - comm.barrier(); + MPI_Barrier(comm); //......................device distributions................................. int dist_mem_size = Np*sizeof(double); @@ -209,13 +213,13 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; - ScaLBL_DeviceBarrier(); comm.barrier(); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); starttime = MPI_Wtime(); /************ MAIN ITERATION LOOP (timing communications)***************************************/ //ScaLBL_Comm->SendD3Q19(dist, &dist[10*Np]); //ScaLBL_Comm->RecvD3Q19(dist, &dist[10*Np]); - ScaLBL_DeviceBarrier(); comm.barrier(); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); if (rank==0) printf("Beginning AA timesteps...\n"); if (rank==0) printf("********************************************************\n"); @@ -227,14 +231,14 @@ int main(int argc, char **argv) ScaLBL_D3Q19_AAodd_MRT(NeighborList, dist, ScaLBL_Comm->first_interior, ScaLBL_Comm->last_interior, Np, rlx_setA, rlx_setB, Fx, Fy, Fz); ScaLBL_Comm->RecvD3Q19AA(dist); //WRITE INTO OPPOSITE ScaLBL_D3Q19_AAodd_MRT(NeighborList, dist, 0, ScaLBL_Comm->next, Np, rlx_setA, rlx_setB, Fx, Fy, Fz); - ScaLBL_DeviceBarrier(); comm.barrier(); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); timestep++; ScaLBL_Comm->SendD3Q19AA(dist); //READ FORM NORMAL ScaLBL_D3Q19_AAeven_MRT(dist, ScaLBL_Comm->first_interior, ScaLBL_Comm->last_interior, Np, rlx_setA, rlx_setB, Fx, Fy, Fz); ScaLBL_Comm->RecvD3Q19AA(dist); //WRITE INTO OPPOSITE ScaLBL_D3Q19_AAeven_MRT(dist, 0, ScaLBL_Comm->next, Np, rlx_setA, rlx_setB, Fx, Fy, Fz); - ScaLBL_DeviceBarrier(); comm.barrier(); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); timestep++; //************************************************************************/ @@ -327,7 +331,7 @@ int main(int argc, char **argv) } // **************************************************** - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); // **************************************************** diff --git a/tests/TestInterfaceSpeed.cpp b/tests/TestInterfaceSpeed.cpp index d2c901df..40d53b47 100644 --- a/tests/TestInterfaceSpeed.cpp +++ b/tests/TestInterfaceSpeed.cpp @@ -2,7 +2,7 @@ #include #include "analysis/TwoPhase.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "common/Communication.h" #include "IO/Mesh.h" #include "IO/Writer.h" @@ -18,9 +18,13 @@ int main (int argc, char *argv[]) { // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); + + int i,j,k; // Load inputs string FILENAME = argv[1]; @@ -36,7 +40,7 @@ int main (int argc, char *argv[]) Nx+=2; Ny+=2; Nz+=2; - for (int i=0; iid[i] = 1; + for (i=0; iid[i] = 1; Dm->CommInit(); @@ -47,9 +51,9 @@ int main (int argc, char *argv[]) double dist1,dist2; Cx = Cy = Cz = N*0.5; - for (int k=0; k #include #include "common/ScaLBL.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" using namespace std; @@ -488,11 +488,15 @@ inline void UnpackID(int *list, int count, char *recvbuf, char *ID){ //*************************************************************************************** int main(int argc, char **argv) { + //***************************************** + // ***** MPI STUFF **************** + //***************************************** // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); int check; { // parallel domain size (# of sub-domains) @@ -578,7 +582,7 @@ int main(int argc, char **argv) } // ************************************************************** // Broadcast simulation parameters from rank 0 to all other procs - comm.barrier(); + MPI_Barrier(comm); //................................................. MPI_Bcast(&Nx,1,MPI_INT,0,comm); MPI_Bcast(&Ny,1,MPI_INT,0,comm); @@ -591,7 +595,7 @@ int main(int argc, char **argv) MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. - comm.barrier(); + MPI_Barrier(comm); // ************************************************************** // ************************************************************** @@ -609,7 +613,7 @@ int main(int argc, char **argv) printf("********************************************************\n"); } - comm.barrier(); + MPI_Barrier(comm); kproc = rank/(nprocx*nprocy); jproc = (rank-nprocx*nprocy*kproc)/nprocx; iproc = rank-nprocx*nprocy*kproc-nprocz*jproc; @@ -617,7 +621,7 @@ int main(int argc, char **argv) if (rank == 0) { printf("i,j,k proc=%d %d %d \n",iproc,jproc,kproc); } - comm.barrier(); + MPI_Barrier(comm); if (rank == 1){ printf("i,j,k proc=%d %d %d \n",iproc,jproc,kproc); printf("\n\n"); @@ -646,7 +650,7 @@ int main(int argc, char **argv) fread(Dm.id,1,N,IDFILE); fclose(IDFILE); - comm.barrier(); + MPI_Barrier(comm); Dm.CommInit(); //....................................................................... @@ -667,12 +671,12 @@ int main(int argc, char **argv) } } } - comm.barrier(); + MPI_Barrier(comm); MPI_Allreduce(&sum_local,&sum,1,MPI_DOUBLE,MPI_SUM,comm); porosity = sum*iVol_global; if (rank==0) printf("Media porosity = %f \n",porosity); - comm.barrier(); + MPI_Barrier(comm); if (rank == 0) cout << "Domain set." << endl; if (rank==0) printf ("Create ScaLBL_Communicator \n"); @@ -702,7 +706,7 @@ int main(int argc, char **argv) neighborList= new int[18*Np]; ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Dm.id,Np); - comm.barrier(); + MPI_Barrier(comm); //......................device distributions................................. int dist_mem_size = Np*sizeof(double); @@ -730,7 +734,7 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; - ScaLBL_DeviceBarrier(); comm.barrier(); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); starttime = MPI_Wtime(); while (timestep < timesteps) { @@ -739,14 +743,14 @@ int main(int argc, char **argv) ScaLBL_D3Q19_AAodd_MRT(NeighborList, dist, ScaLBL_Comm.next, Np, Np, rlx_setA, rlx_setB, Fx, Fy, Fz); ScaLBL_Comm.RecvD3Q19AA(dist); //WRITE INTO OPPOSITE ScaLBL_D3Q19_AAodd_MRT(NeighborList, dist, 0, ScaLBL_Comm.next, Np, rlx_setA, rlx_setB, Fx, Fy, Fz); - ScaLBL_DeviceBarrier(); comm.barrier(); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); timestep++; ScaLBL_Comm.SendD3Q19AA(dist); //READ FORM NORMAL ScaLBL_D3Q19_AAeven_MRT(dist, ScaLBL_Comm.next, Np, Np, rlx_setA, rlx_setB, Fx, Fy, Fz); ScaLBL_Comm.RecvD3Q19AA(dist); //WRITE INTO OPPOSITE ScaLBL_D3Q19_AAeven_MRT(dist, 0, ScaLBL_Comm.next, Np, rlx_setA, rlx_setB, Fx, Fy, Fz); - ScaLBL_DeviceBarrier(); comm.barrier(); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); timestep++; //************************************************************************/ @@ -779,7 +783,7 @@ int main(int argc, char **argv) VEL= new double [3*Np]; int SIZE=3*Np*sizeof(double); ScaLBL_D3Q19_Momentum(dist,Velocity, Np); - ScaLBL_DeviceBarrier(); comm.barrier(); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); ScaLBL_CopyToHost(&VEL[0],&Velocity[0],SIZE); sum_local=0.f; @@ -801,7 +805,7 @@ int main(int argc, char **argv) } // **************************************************** - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); // **************************************************** diff --git a/tests/TestMap.cpp b/tests/TestMap.cpp index f3010081..a47c0d9e 100644 --- a/tests/TestMap.cpp +++ b/tests/TestMap.cpp @@ -7,7 +7,7 @@ #include #include #include "common/ScaLBL.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" using namespace std; @@ -26,9 +26,15 @@ std::shared_ptr loadInputs( int nprocs ) //*************************************************************************************** int main(int argc, char **argv) { + //***************************************** + // ***** MPI STUFF **************** + //***************************************** // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); int check=0; { @@ -39,7 +45,6 @@ int main(int argc, char **argv) {1,0,1},{-1,0,-1},{1,0,-1},{-1,0,1}, {0,1,1},{0,-1,-1},{0,1,-1},{0,-1,1}}; - int rank = comm.getRank(); if (rank == 0){ printf("********************************************************\n"); printf("Running unit test: TestMap \n"); @@ -47,7 +52,7 @@ int main(int argc, char **argv) } // Load inputs - auto db = loadInputs( comm.getSize() ); + auto db = loadInputs( nprocs ); int Nx = db->getVector( "n" )[0]; int Ny = db->getVector( "n" )[1]; int Nz = db->getVector( "n" )[2]; @@ -89,7 +94,7 @@ int main(int argc, char **argv) neighborList= new int[18*Npad]; Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np); - comm.barrier(); + MPI_Barrier(comm); // Check the neighborlist printf("Check neighborlist: exterior %i, first interior %i last interior %i \n",ScaLBL_Comm->LastExterior(),ScaLBL_Comm->FirstInterior(),ScaLBL_Comm->LastInterior()); @@ -192,7 +197,7 @@ int main(int argc, char **argv) } // **************************************************** - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); // **************************************************** diff --git a/tests/TestMassConservationD3Q7.cpp b/tests/TestMassConservationD3Q7.cpp index 68183cd2..bbfe8cae 100644 --- a/tests/TestMassConservationD3Q7.cpp +++ b/tests/TestMassConservationD3Q7.cpp @@ -8,7 +8,7 @@ #include #include "common/ScaLBL.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "models/ColorModel.h" inline void InitializeBubble(ScaLBL_ColorModel &ColorModel, double BubbleRadius){ @@ -67,10 +67,11 @@ inline void InitializeBubble(ScaLBL_ColorModel &ColorModel, double BubbleRadius) int main(int argc, char **argv) { // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); // parallel domain size (# of sub-domains) if (rank == 0){ @@ -265,7 +266,7 @@ int main(int argc, char **argv) } } // **************************************************** - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); // **************************************************** } diff --git a/tests/TestMicroCTReader.cpp b/tests/TestMicroCTReader.cpp index 9a54610c..4a4c6aac 100644 --- a/tests/TestMicroCTReader.cpp +++ b/tests/TestMicroCTReader.cpp @@ -1,6 +1,6 @@ // Test reading high-resolution files from the microct database -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "common/UnitTest.h" #include "common/Database.h" #include "common/Domain.h" @@ -13,14 +13,12 @@ void testReadMicroCT( const std::string& filename, UnitTest& ut ) { - Utilities::MPI comm( MPI_COMM_WORLD ); - // Get the domain info auto db = std::make_shared( filename ); auto domain_db = db->getDatabase( "Domain" ); // Test reading microCT files - auto data = readMicroCT( *domain_db, comm ); + auto data = readMicroCT( *domain_db, MPI_COMM_WORLD ); // Check if we loaded the data correctly if ( data.size() == domain_db->getVector( "n" ) ) @@ -32,7 +30,7 @@ void testReadMicroCT( const std::string& filename, UnitTest& ut ) auto n = domain_db->getVector( "n" ); auto nproc = domain_db->getVector( "nproc" ); int N[3] = { n[0]*nproc[0], n[1]*nproc[1], n[2]*nproc[2] }; - int rank = comm.getRank(); + int rank = comm_rank(MPI_COMM_WORLD); RankInfoStruct rankInfo( rank, nproc[0], nproc[1], nproc[2] ); std::vector meshData( 1 ); auto Var = std::make_shared(); @@ -43,7 +41,7 @@ void testReadMicroCT( const std::string& filename, UnitTest& ut ) meshData[0].meshName = "grid"; meshData[0].mesh = std::make_shared(rankInfo,n[0],n[1],n[2],N[0],N[1],N[2]); meshData[0].vars.push_back(Var); - IO::writeData( 0, meshData, comm ); + IO::writeData( 0, meshData, MPI_COMM_WORLD ); } diff --git a/tests/TestMomentsD3Q19.cpp b/tests/TestMomentsD3Q19.cpp index 6bd3e8ff..b26d7bed 100644 --- a/tests/TestMomentsD3Q19.cpp +++ b/tests/TestMomentsD3Q19.cpp @@ -1,5 +1,5 @@ #include -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "common/Utilities.h" #include @@ -463,14 +463,13 @@ inline void MRT_Transform(double *dist, int Np) { int main (int argc, char **argv) { MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + int rank = MPI_WORLD_RANK(); + int nprocs = MPI_WORLD_SIZE(); for (int i=0; i tmp = netcdf::getVar( fid, "tmp" ); @@ -96,8 +95,7 @@ int main(int argc, char **argv) { // Initialize MPI MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - const int rank = comm.getRank(); + int rank = comm_rank(MPI_COMM_WORLD); UnitTest ut; PROFILE_START("Main"); diff --git a/tests/TestPoiseuille.cpp b/tests/TestPoiseuille.cpp index 744d292d..e69507e1 100644 --- a/tests/TestPoiseuille.cpp +++ b/tests/TestPoiseuille.cpp @@ -7,7 +7,7 @@ #include #include #include "common/ScaLBL.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "models/MRTModel.h" void ParallelPlates(ScaLBL_MRTModel &MRT){ @@ -47,11 +47,15 @@ void ParallelPlates(ScaLBL_MRTModel &MRT){ //*************************************************************************************** int main(int argc, char **argv) { + //***************************************** + // ***** MPI STUFF **************** + //***************************************** // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); int check=0; { if (rank == 0){ @@ -73,7 +77,7 @@ int main(int argc, char **argv) int SIZE=MRT.Np*sizeof(double); ScaLBL_D3Q19_Momentum(MRT.fq,MRT.Velocity, MRT.Np); - ScaLBL_DeviceBarrier(); comm.barrier(); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); ScaLBL_CopyToHost(&Vz[0],&MRT.Velocity[0],3*SIZE); if (rank == 0) printf("Force: %f,%f,%f \n",MRT.Fx,MRT.Fy,MRT.Fz); @@ -87,7 +91,7 @@ int main(int argc, char **argv) j=Ny/2; k=Nz/2; if (rank == 0) printf("Channel width=%f \n",W); if (rank == 0) printf("ID flag vz analytical\n"); - comm.barrier(); + MPI_Barrier(comm); if (rank == 0) { for (i=0;i #include #include "common/ScaLBL.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" //*************************************************************************************** int main(int argc, char **argv) { + //***************************************** + // ***** MPI STUFF **************** + //***************************************** // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); int check=0; { if (rank == 0){ @@ -45,7 +50,7 @@ int main(int argc, char **argv) printf("********************************************************\n"); } - comm.barrier(); + MPI_Barrier(comm); int kproc = rank/(nprocx*nprocy); int jproc = (rank-nprocx*nprocy*kproc)/nprocx; int iproc = rank-nprocx*nprocy*kproc-nprocz*jproc; @@ -53,7 +58,7 @@ int main(int argc, char **argv) if (rank == 0) { printf("i,j,k proc=%d %d %d \n",iproc,jproc,kproc); } - comm.barrier(); + MPI_Barrier(comm); if (rank == 1){ printf("i,j,k proc=%d %d %d \n",iproc,jproc,kproc); printf("\n\n"); @@ -97,11 +102,11 @@ int main(int argc, char **argv) } } } - sum = comm.sumReduce( sum_local ); + MPI_Allreduce(&sum_local,&sum,1,MPI_DOUBLE,MPI_SUM,comm); porosity = sum*iVol_global; if (rank==0) printf("Media porosity = %f \n",porosity); - comm.barrier(); + MPI_Barrier(comm); if (rank == 0) cout << "Domain set." << endl; if (rank==0) printf ("Create ScaLBL_Communicator \n"); @@ -128,7 +133,7 @@ int main(int argc, char **argv) IntArray Map(Nx,Ny,Nz); neighborList= new int[18*Npad]; Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np); - comm.barrier(); + MPI_Barrier(comm); //......................device distributions................................. if (rank==0) printf ("Allocating distributions \n"); @@ -189,7 +194,7 @@ int main(int argc, char **argv) } } // **************************************************** - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); // **************************************************** return check; diff --git a/tests/TestSegDist.cpp b/tests/TestSegDist.cpp index b5e23ec8..ece3222d 100644 --- a/tests/TestSegDist.cpp +++ b/tests/TestSegDist.cpp @@ -39,10 +39,11 @@ std::shared_ptr loadInputs( int nprocs ) int main(int argc, char **argv) { // Initialize MPI + int rank, nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); { @@ -97,7 +98,7 @@ int main(int argc, char **argv) } } - comm.barrier(); + MPI_Barrier(comm); if (rank==0) printf("Initialized! Converting to Signed Distance function \n"); double t1 = MPI_Wtime(); @@ -115,7 +116,7 @@ int main(int argc, char **argv) } } } - err = Dm.Comm.sumReduce( err ); + err = sumReduce( Dm.Comm, err ); err = sqrt( err / (nx*ny*nz*nprocs) ); if (rank==0) printf("Mean error %0.4f \n", err); @@ -141,7 +142,7 @@ int main(int argc, char **argv) IO::writeData( "testSegDist", data, MPI_COMM_WORLD ); } - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); return 0; diff --git a/tests/TestSubphase.cpp b/tests/TestSubphase.cpp index 9738812f..fd6383be 100644 --- a/tests/TestSubphase.cpp +++ b/tests/TestSubphase.cpp @@ -26,10 +26,11 @@ std::shared_ptr loadInputs( int nprocs ) int main(int argc, char **argv) { // Initialize MPI + int rank, nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); { // Limit scope so variables that contain communicators will free before MPI_Finialize if ( rank==0 ) { @@ -136,7 +137,7 @@ int main(int argc, char **argv) // Averages->Reduce(); } // Limit scope so variables that contain communicators will free before MPI_Finialize - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); return 0; } diff --git a/tests/TestTopo3D.cpp b/tests/TestTopo3D.cpp index 948bb1d6..8d00ef5a 100644 --- a/tests/TestTopo3D.cpp +++ b/tests/TestTopo3D.cpp @@ -26,10 +26,11 @@ std::shared_ptr loadInputs( int nprocs ) int main(int argc, char **argv) { // Initialize MPI + int rank, nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); { // Limit scope so variables that contain communicators will free before MPI_Finialize if ( rank==0 ) { @@ -225,7 +226,7 @@ int main(int argc, char **argv) IO::writeData( timestep, visData, comm ); } // Limit scope so variables that contain communicators will free before MPI_Finialize - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); return 0; } diff --git a/tests/TestTorus.cpp b/tests/TestTorus.cpp index 5125ce92..2d486774 100644 --- a/tests/TestTorus.cpp +++ b/tests/TestTorus.cpp @@ -26,10 +26,11 @@ std::shared_ptr loadInputs( int nprocs ) int main(int argc, char **argv) { // Initialize MPI + int rank, nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); { // Limit scope so variables that contain communicators will free before MPI_Finialize if ( rank==0 ) { @@ -164,7 +165,7 @@ int main(int argc, char **argv) // Averages->Reduce(); } // Limit scope so variables that contain communicators will free before MPI_Finialize - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); return 0; } diff --git a/tests/TestTorusEvolve.cpp b/tests/TestTorusEvolve.cpp index 32cf7fd8..1a65d268 100644 --- a/tests/TestTorusEvolve.cpp +++ b/tests/TestTorusEvolve.cpp @@ -26,10 +26,11 @@ std::shared_ptr loadInputs( int nprocs ) int main(int argc, char **argv) { // Initialize MPI + int rank, nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); { // Limit scope so variables that contain communicators will free before MPI_Finialize if ( rank==0 ) { @@ -156,7 +157,7 @@ int main(int argc, char **argv) } } // Limit scope so variables that contain communicators will free before MPI_Finialize - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); return 0; } diff --git a/tests/TestTwoPhase.cpp b/tests/TestTwoPhase.cpp index fa54d98d..a979314a 100644 --- a/tests/TestTwoPhase.cpp +++ b/tests/TestTwoPhase.cpp @@ -8,7 +8,7 @@ #include #include "analysis/TwoPhase.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "common/Communication.h" #include "IO/Mesh.h" #include "IO/Writer.h" @@ -17,10 +17,11 @@ int main(int argc, char **argv) { // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); { // Limit scope so Domain can free it's communicator printf("Running two-phase averaging test on %i processors \n",nprocs); @@ -109,7 +110,7 @@ int main(int argc, char **argv) fclose(PHASE); } // **************************************************** - comm.barrier(); + MPI_Barrier(comm); } // Limit scope so Domain will free it's communicator MPI_Finalize(); return 0; diff --git a/tests/TestWriter.cpp b/tests/TestWriter.cpp index 37858202..8936aaff 100644 --- a/tests/TestWriter.cpp +++ b/tests/TestWriter.cpp @@ -8,7 +8,7 @@ #include "common/UnitTest.h" #include "common/Utilities.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "IO/MeshDatabase.h" #include "IO/Reader.h" #include "IO/Writer.h" @@ -34,9 +34,11 @@ inline double distance( const Point& p ) // Test writing and reading the given format void testWriter( const std::string& format, std::vector& meshData, UnitTest& ut ) { - Utilities::MPI comm( MPI_COMM_WORLD ); - int nprocs = comm.getSize(); - comm.barrier(); + int rank, nprocs; + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); + MPI_Barrier(comm); // Get the format std::string format2 = format; @@ -61,7 +63,7 @@ void testWriter( const std::string& format, std::vector& mes IO::initialize( "test_"+format, format2, false ); IO::writeData( 0, meshData, comm ); IO::writeData( 3, meshData, comm ); - comm.barrier(); + MPI_Barrier(comm); PROFILE_STOP(format+"-write"); // Get the summary name for reading @@ -226,10 +228,11 @@ void testWriter( const std::string& format, std::vector& mes // Main int main(int argc, char **argv) { + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); Utilities::setAbortBehavior(true,2); Utilities::setErrorHandlers(); UnitTest ut; @@ -386,7 +389,7 @@ int main(int argc, char **argv) ut.report(); PROFILE_SAVE("TestWriter",true); int N_errors = ut.NumFailGlobal(); - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); return N_errors; } diff --git a/tests/convertIO.cpp b/tests/convertIO.cpp index 27605237..0937729f 100644 --- a/tests/convertIO.cpp +++ b/tests/convertIO.cpp @@ -5,7 +5,7 @@ #include #include -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "common/Communication.h" #include "common/Utilities.h" #include "IO/Mesh.h" @@ -17,10 +17,11 @@ int main(int argc, char **argv) { // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); Utilities::setErrorHandlers(); PROFILE_ENABLE(2); PROFILE_ENABLE_TRACE(); @@ -69,20 +70,20 @@ int main(int argc, char **argv) i++; } - comm.barrier(); + MPI_Barrier(comm); PROFILE_STOP("Read"); // Save the mesh data to a new file PROFILE_START("Write"); IO::writeData( timestep, meshData, MPI_COMM_WORLD ); - comm.barrier(); + MPI_Barrier(comm); PROFILE_STOP("Write"); } } // Limit scope PROFILE_STOP("Main"); PROFILE_SAVE("convertData",true); - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); return 0; } diff --git a/tests/hello_world.cpp b/tests/hello_world.cpp index 810d3a9c..d236bf0e 100644 --- a/tests/hello_world.cpp +++ b/tests/hello_world.cpp @@ -1,19 +1,18 @@ #include -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "common/Utilities.h" int main (int argc, char **argv) { MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + int rank = MPI_WORLD_RANK(); + int nprocs = MPI_WORLD_SIZE(); for (int i=0; i loadInputs( ) @@ -24,11 +24,15 @@ std::shared_ptr loadInputs( ) //*************************************************************************************** int main(int argc, char **argv) { + //***************************************** + // ***** MPI STUFF **************** + //***************************************** // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); { //***************************************** // MPI ranks for all 18 neighbors @@ -92,7 +96,7 @@ int main(int argc, char **argv) rank_xy, rank_XY, rank_xY, rank_Xy, rank_xz, rank_XZ, rank_xZ, rank_Xz, rank_yz, rank_YZ, rank_yZ, rank_Yz ); - comm.barrier(); + MPI_Barrier(comm); Nz += 2; Nx = Ny = Nz; // Cubic domain @@ -181,7 +185,7 @@ int main(int argc, char **argv) } // **************************************************** - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); // **************************************************** } diff --git a/tests/lbpm_color_macro_simulator.cpp b/tests/lbpm_color_macro_simulator.cpp index 97df6812..1c619c5a 100644 --- a/tests/lbpm_color_macro_simulator.cpp +++ b/tests/lbpm_color_macro_simulator.cpp @@ -9,7 +9,7 @@ #include "common/Communication.h" #include "analysis/TwoPhase.h" #include "analysis/runAnalysis.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "ProfilerApp.h" #include "threadpool/thread_pool.h" @@ -30,9 +30,10 @@ int main(int argc, char **argv) // Initialize MPI int provided_thread_support = -1; MPI_Init_thread(&argc,&argv,MPI_THREAD_MULTIPLE,&provided_thread_support); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm; + MPI_Comm_dup(MPI_COMM_WORLD,&comm); + int rank = comm_rank(comm); + int nprocs = comm_size(comm); { // Limit scope so variables that contain communicators will free before MPI_Finialize // parallel domain size (# of sub-domains) @@ -51,7 +52,7 @@ int main(int argc, char **argv) // int device=ScaLBL_SetDevice(rank); //printf("Using GPU ID %i for rank %i \n",device,rank); ScaLBL_DeviceBarrier(); - comm.barrier(); + MPI_Barrier(comm); PROFILE_ENABLE(1); //PROFILE_ENABLE_TRACE(); @@ -170,7 +171,7 @@ int main(int argc, char **argv) } // ************************************************************** // Broadcast simulation parameters from rank 0 to all other procs - comm.barrier(); + MPI_Barrier(comm); //................................................. MPI_Bcast(&tauA,1,MPI_DOUBLE,0,comm); MPI_Bcast(&tauB,1,MPI_DOUBLE,0,comm); @@ -206,7 +207,7 @@ int main(int argc, char **argv) // Get the rank info const RankInfoStruct rank_info(rank,nprocx,nprocy,nprocz); - comm.barrier(); + MPI_Barrier(comm); if (nprocs != nprocx*nprocy*nprocz){ printf("nprocx = %i \n",nprocx); @@ -261,7 +262,7 @@ int main(int argc, char **argv) // Mask that excludes the solid phase Domain Mask(Nx,Ny,Nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BoundaryCondition); - comm.barrier(); + MPI_Barrier(comm); Nx+=2; Ny+=2; Nz += 2; int N = Nx*Ny*Nz; @@ -296,7 +297,7 @@ int main(int argc, char **argv) sprintf(LocalRankString,"%05d",rank); sprintf(LocalRankFilename,"%s%s","SignDist.",LocalRankString); ReadBinaryFile(LocalRankFilename, Averages->SDs.data(), N); - comm.barrier(); + MPI_Barrier(comm); if (rank == 0) cout << "Domain set." << endl; if (rank==0) printf("Initialize from segmented data: solid=0, NWP=1, WP=2 \n"); @@ -340,7 +341,7 @@ int main(int argc, char **argv) delete [] cDen; delete [] cfq; */ - comm.barrier(); + MPI_Barrier(comm); } fflush(stdout); @@ -415,7 +416,7 @@ int main(int argc, char **argv) neighborList= new int[18*Npad]; Np = ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Mask.id,Np); if (rank==0) printf ("Set up memory efficient layout Npad=%i, Np=%i \n",Npad,Np); - comm.barrier(); + MPI_Barrier(comm); //........................................................................... // MAIN VARIABLES ALLOCATED HERE //........................................................................... @@ -536,7 +537,7 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); - comm.barrier(); + MPI_Barrier(comm); starttime = MPI_Wtime(); //......................................... @@ -588,7 +589,7 @@ int main(int argc, char **argv) } ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm.next, Np); - ScaLBL_DeviceBarrier(); comm.barrier(); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); // *************EVEN TIMESTEP************* timestep++; @@ -621,10 +622,10 @@ int main(int argc, char **argv) } ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm.next, Np); - ScaLBL_DeviceBarrier(); comm.barrier(); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); //************************************************************************ - comm.barrier(); + MPI_Barrier(comm); PROFILE_STOP("Update"); // Run the analysis @@ -636,7 +637,7 @@ int main(int argc, char **argv) PROFILE_SAVE("lbpm_color_simulator",1); //************************************************************************ ScaLBL_DeviceBarrier(); - comm.barrier(); + MPI_Barrier(comm); stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep @@ -656,8 +657,9 @@ int main(int argc, char **argv) PROFILE_STOP("Main"); PROFILE_SAVE("lbpm_color_simulator",1); // **************************************************** - comm.barrier(); + MPI_Barrier(comm); } // Limit scope so variables that contain communicators will free before MPI_Finialize + MPI_Comm_free(&comm); MPI_Finalize(); } diff --git a/tests/lbpm_color_simulator.cpp b/tests/lbpm_color_simulator.cpp index cef13189..1f63c653 100644 --- a/tests/lbpm_color_simulator.cpp +++ b/tests/lbpm_color_simulator.cpp @@ -28,9 +28,10 @@ int main(int argc, char **argv) { // Limit scope so variables that contain communicators will free before MPI_Finialize - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm; + MPI_Comm_dup(MPI_COMM_WORLD,&comm); + int rank = comm_rank(comm); + int nprocs = comm_size(comm); if (rank == 0){ printf("********************************************************\n"); @@ -40,7 +41,7 @@ int main(int argc, char **argv) // Initialize compute device ScaLBL_SetDevice(rank); ScaLBL_DeviceBarrier(); - comm.barrier(); + MPI_Barrier(comm); PROFILE_ENABLE(1); //PROFILE_ENABLE_TRACE(); @@ -50,7 +51,7 @@ int main(int argc, char **argv) Utilities::setErrorHandlers(); auto filename = argv[1]; - ScaLBL_ColorModel ColorModel(rank,nprocs,comm.dup()); + ScaLBL_ColorModel ColorModel(rank,nprocs,comm); ColorModel.ReadParams(filename); ColorModel.SetDomain(); ColorModel.ReadInput(); @@ -63,7 +64,8 @@ int main(int argc, char **argv) PROFILE_SAVE("lbpm_color_simulator",1); // **************************************************** - comm.barrier(); + MPI_Barrier(comm); + MPI_Comm_free(&comm); } // Limit scope so variables that contain communicators will free before MPI_Finialize diff --git a/tests/lbpm_dfh_simulator.cpp b/tests/lbpm_dfh_simulator.cpp index 0d5902df..1e8dc0f9 100644 --- a/tests/lbpm_dfh_simulator.cpp +++ b/tests/lbpm_dfh_simulator.cpp @@ -26,9 +26,10 @@ int main(int argc, char **argv) // Initialize MPI int provided_thread_support = -1; MPI_Init_thread(&argc,&argv,MPI_THREAD_MULTIPLE,&provided_thread_support); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm; + MPI_Comm_dup(MPI_COMM_WORLD,&comm); + int rank = comm_rank(comm); + int nprocs = comm_size(comm); if ( rank==0 && provided_thread_support 1) depth=atoi(argv[1]); @@ -218,7 +222,7 @@ int main(int argc, char **argv) rank_xy, rank_XY, rank_xY, rank_Xy, rank_xz, rank_XZ, rank_xZ, rank_Xz, rank_yz, rank_YZ, rank_yZ, rank_Yz ); - comm.barrier(); + MPI_Barrier(comm); Nx += 2; Ny += 2; @@ -273,13 +277,13 @@ int main(int argc, char **argv) //....................................................................... if (rank == 0) printf("Reading the disc packing \n"); if (rank == 0) ReadDiscPacking(ndiscs,cx,cy,rad); - comm.barrier(); + MPI_Barrier(comm); // Broadcast the sphere packing to all processes MPI_Bcast(cx,ndiscs,MPI_DOUBLE,0,comm); MPI_Bcast(cy,ndiscs,MPI_DOUBLE,0,comm); MPI_Bcast(rad,ndiscs,MPI_DOUBLE,0,comm); //........................................................................... - comm.barrier(); + MPI_Barrier(comm); if (rank == 0){ cout << "Domain set." << endl; printf("************ \n"); @@ -384,7 +388,7 @@ int main(int argc, char **argv) //...................................................................... // **************************************************** - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); // **************************************************** } diff --git a/tests/lbpm_inkbottle_pp.cpp b/tests/lbpm_inkbottle_pp.cpp index 669ab8c0..3c39219d 100644 --- a/tests/lbpm_inkbottle_pp.cpp +++ b/tests/lbpm_inkbottle_pp.cpp @@ -9,15 +9,19 @@ #include "common/ScaLBL.h" #include "common/Communication.h" #include "analysis/TwoPhase.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" int main(int argc, char **argv) { + //***************************************** + // ***** MPI STUFF **************** + //***************************************** // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); { // parallel domain size (# of sub-domains) int nprocx,nprocy,nprocz; @@ -79,7 +83,7 @@ int main(int argc, char **argv) } // ************************************************************** // Broadcast simulation parameters from rank 0 to all other procs - comm.barrier(); + MPI_Barrier(comm); // Computational domain MPI_Bcast(&Nx,1,MPI_INT,0,comm); MPI_Bcast(&Ny,1,MPI_INT,0,comm); @@ -92,7 +96,7 @@ int main(int argc, char **argv) MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. - comm.barrier(); + MPI_Barrier(comm); // ************************************************************** if (nprocs != nprocx*nprocy*nprocz){ @@ -119,7 +123,7 @@ int main(int argc, char **argv) rank_xy, rank_XY, rank_xY, rank_Xy, rank_xz, rank_XZ, rank_xZ, rank_Xz, rank_yz, rank_YZ, rank_yZ, rank_Yz ); - comm.barrier(); + MPI_Barrier(comm); Nz += 2; Nx = Ny = Nz; // Cubic domain @@ -217,7 +221,7 @@ int main(int argc, char **argv) } // **************************************************** - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); // **************************************************** } diff --git a/tests/lbpm_juanes_bench_disc_pp.cpp b/tests/lbpm_juanes_bench_disc_pp.cpp index 47d8cb84..6f04cffa 100644 --- a/tests/lbpm_juanes_bench_disc_pp.cpp +++ b/tests/lbpm_juanes_bench_disc_pp.cpp @@ -9,7 +9,7 @@ #include "analysis/pmmc.h" #include "common/Domain.h" #include "common/Communication.h" -#include "common/MPI.h" // This includes mpi.h +#include "common/MPI_Helpers.h" // This includes mpi.h #include "common/SpherePack.h" /* @@ -130,11 +130,15 @@ inline void SignedDistanceDiscPack(double *Distance, int ndiscs, double *List_cx int main(int argc, char **argv) { + //***************************************** + // ***** MPI STUFF **************** + //***************************************** // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); // parallel domain size (# of sub-domains) int nprocx,nprocy,nprocz; int iproc,jproc,kproc; @@ -190,7 +194,7 @@ int main(int argc, char **argv) } // ************************************************************** // Broadcast simulation parameters from rank 0 to all other procs - comm.barrier(); + MPI_Barrier(comm); //................................................. // Computational domain MPI_Bcast(&Nx,1,MPI_INT,0,comm); @@ -204,7 +208,7 @@ int main(int argc, char **argv) MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. - comm.barrier(); + MPI_Barrier(comm); // ************************************************************** double Rin,Rout; @@ -236,7 +240,7 @@ int main(int argc, char **argv) rank_xy, rank_XY, rank_xY, rank_Xy, rank_xz, rank_XZ, rank_xZ, rank_Xz, rank_yz, rank_YZ, rank_yZ, rank_Yz ); - comm.barrier(); + MPI_Barrier(comm); Nx += 2; Ny += 2; Nz += 2; int N = Nx*Ny*Nz; @@ -290,13 +294,13 @@ int main(int argc, char **argv) //....................................................................... if (rank == 0) printf("Reading the disc packing \n"); if (rank == 0) ReadDiscPacking(ndiscs,cx,cy,rad); - comm.barrier(); + MPI_Barrier(comm); // Broadcast the sphere packing to all processes MPI_Bcast(cx,ndiscs,MPI_DOUBLE,0,comm); MPI_Bcast(cy,ndiscs,MPI_DOUBLE,0,comm); MPI_Bcast(rad,ndiscs,MPI_DOUBLE,0,comm); //........................................................................... - comm.barrier(); + MPI_Barrier(comm); /* if (rank == 0){ cout << "Domain set." << endl; printf("************ \n"); @@ -308,7 +312,7 @@ int main(int argc, char **argv) } */ - comm.barrier(); + MPI_Barrier(comm); if (nprocz > 1 && rank==0) printf("Disc packs are 2D -- are you sure you want nprocz > 1? \n"); if (rank ==0) printf("Compute the signed distance part I \n"); //....................................................................... @@ -486,7 +490,7 @@ int main(int argc, char **argv) //...................................................................... // **************************************************** - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); // **************************************************** } diff --git a/tests/lbpm_minkowski_scalar.cpp b/tests/lbpm_minkowski_scalar.cpp index 721207a1..3e3ede6d 100644 --- a/tests/lbpm_minkowski_scalar.cpp +++ b/tests/lbpm_minkowski_scalar.cpp @@ -14,7 +14,7 @@ #include "common/Array.h" #include "common/Domain.h" #include "common/Communication.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "IO/MeshDatabase.h" #include "IO/Mesh.h" #include "IO/Writer.h" @@ -28,11 +28,13 @@ int main(int argc, char **argv) { + // Initialize MPI + int rank, nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); { Utilities::setErrorHandlers(); PROFILE_START("Main"); @@ -85,7 +87,7 @@ int main(int argc, char **argv) fclose(SEGDAT); printf("Read segmented data from %s \n",Filename.c_str()); } - comm.barrier(); + MPI_Barrier(comm); // Get the rank info int N = (nx+2)*(ny+2)*(nz+2); @@ -150,7 +152,7 @@ int main(int argc, char **argv) } else{ printf("Sending data to process %i \n", rnk); - comm.send(tmp,N,rnk,15); + MPI_Send(tmp,N,MPI_CHAR,rnk,15,comm); } } } @@ -159,12 +161,13 @@ int main(int argc, char **argv) else{ // Recieve the subdomain from rank = 0 printf("Ready to recieve data %i at process %i \n", N,rank); - comm.recv(Dm->id,N,0,15); + MPI_Recv(Dm->id,N,MPI_CHAR,0,15,comm,MPI_STATUS_IGNORE); } - comm.barrier(); + MPI_Barrier(comm); // Compute the Minkowski functionals - auto Averages = std::make_shared(Dm); + MPI_Barrier(comm); + std::shared_ptr Averages(new Minkowski(Dm)); // Calculate the distance // Initialize the domain and communication @@ -209,7 +212,7 @@ int main(int argc, char **argv) } PROFILE_STOP("Main"); PROFILE_SAVE("Minkowski",true); - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); return 0; } diff --git a/tests/lbpm_morph_pp.cpp b/tests/lbpm_morph_pp.cpp index 939fdc32..8fe8b228 100644 --- a/tests/lbpm_morph_pp.cpp +++ b/tests/lbpm_morph_pp.cpp @@ -23,9 +23,11 @@ int main(int argc, char **argv) { // Initialize MPI + int rank, nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); { //....................................................................... // Reading the domain information file @@ -125,13 +127,13 @@ int main(int argc, char **argv) if (rank==0) printf("Initialized solid phase -- Converting to Signed Distance function \n"); CalcDist(SignDist,id_solid,*Dm); - comm.barrier(); + MPI_Barrier(comm); // Extract only the connected part of NWP BlobIDstruct new_index; double vF=0.0; double vS=0.0; ComputeGlobalBlobIDs(nx-2,ny-2,nz-2,Dm->rank_info,phase,SignDist,vF,vS,phase_label,Dm->Comm); - Dm->Comm.barrier(); + MPI_Barrier(Dm->Comm); int count_connected=0; int count_porespace=0; @@ -153,9 +155,9 @@ int main(int argc, char **argv) } } } - count_connected = Dm->Comm.sumReduce( count_connected ); - count_porespace = Dm->Comm.sumReduce( count_porespace ); - count_water = Dm->Comm.sumReduce( count_water ); + count_connected=sumReduce( Dm->Comm, count_connected); + count_porespace=sumReduce( Dm->Comm, count_porespace); + count_water=sumReduce( Dm->Comm, count_water); for (int k=0; kComm.sumReduce( count_water ); + count_water=sumReduce( Dm->Comm, count_water); SW = double(count_water) / count_porespace; if(rank==0) printf("Final saturation: %f \n", SW); @@ -234,13 +236,13 @@ int main(int argc, char **argv) } } } - comm.barrier(); + MPI_Barrier(comm); auto filename2 = READFILE + ".morph.raw"; if (rank==0) printf("Writing file to: %s \n", filename2.c_str()); Mask->AggregateLabels(filename2); } - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); } diff --git a/tests/lbpm_morphdrain_pp.cpp b/tests/lbpm_morphdrain_pp.cpp index d3c5a428..8d73b1e4 100644 --- a/tests/lbpm_morphdrain_pp.cpp +++ b/tests/lbpm_morphdrain_pp.cpp @@ -23,9 +23,11 @@ int main(int argc, char **argv) { // Initialize MPI + int rank, nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); { //....................................................................... // Reading the domain information file @@ -119,7 +121,7 @@ int main(int argc, char **argv) if (rank==0) printf("Initialized solid phase -- Converting to Signed Distance function \n"); CalcDist(SignDist,id_solid,*Dm); - comm.barrier(); + MPI_Barrier(comm); // Run the morphological opening MorphDrain(SignDist, id, Dm, SW); @@ -194,13 +196,13 @@ int main(int argc, char **argv) } } } - comm.barrier(); + MPI_Barrier(comm); auto filename2 = READFILE + ".morphdrain.raw"; if (rank==0) printf("Writing file to: %s \n", filename2.data() ); Mask->AggregateLabels( filename2 ); } - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); } diff --git a/tests/lbpm_morphopen_pp.cpp b/tests/lbpm_morphopen_pp.cpp index a6209240..f8819348 100644 --- a/tests/lbpm_morphopen_pp.cpp +++ b/tests/lbpm_morphopen_pp.cpp @@ -23,9 +23,11 @@ int main(int argc, char **argv) { // Initialize MPI + int rank, nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); { //....................................................................... // Reading the domain information file @@ -121,7 +123,7 @@ int main(int argc, char **argv) if (rank==0) printf("Initialized solid phase -- Converting to Signed Distance function \n"); CalcDist(SignDist,id_solid,*Dm); - comm.barrier(); + MPI_Barrier(comm); // Run the morphological opening MorphOpen(SignDist, id, Dm, SW, ErodeLabel, OpenLabel); @@ -196,13 +198,13 @@ int main(int argc, char **argv) } } } - comm.barrier(); + MPI_Barrier(comm); auto filename2 = READFILE + ".morphopen.raw"; if (rank==0) printf("Writing file to: %s \n", filename2.data()); Mask->AggregateLabels(filename2); } - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); } diff --git a/tests/lbpm_nondarcy_simulator.cpp b/tests/lbpm_nondarcy_simulator.cpp index 096dc790..40672375 100644 --- a/tests/lbpm_nondarcy_simulator.cpp +++ b/tests/lbpm_nondarcy_simulator.cpp @@ -9,7 +9,7 @@ #include "common/ScaLBL.h" #include "common/Communication.h" #include "analysis/TwoPhase.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" //#define WRITE_SURFACES @@ -77,11 +77,15 @@ int main(int argc, char **argv) } else { + //***************************************** + // ***** MPI STUFF **************** + //***************************************** // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); { // parallel domain size (# of sub-domains) int nprocx,nprocy,nprocz; @@ -156,7 +160,7 @@ int main(int argc, char **argv) } // ************************************************************** // Broadcast simulation parameters from rank 0 to all other procs - comm.barrier(); + MPI_Barrier(comm); //................................................. MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm); //MPI_Bcast(&pBC,1,MPI_LOGICAL,0,comm); @@ -181,7 +185,7 @@ int main(int argc, char **argv) MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. - comm.barrier(); + MPI_Barrier(comm); RESTART_INTERVAL=interval; // ************************************************************** @@ -218,7 +222,7 @@ int main(int argc, char **argv) rank_xy, rank_XY, rank_xY, rank_Xy, rank_xz, rank_XZ, rank_xZ, rank_Xz, rank_yz, rank_YZ, rank_yZ, rank_Yz ); - comm.barrier(); + MPI_Barrier(comm); Nx += 2; Ny += 2; Nz += 2; @@ -258,7 +262,7 @@ int main(int argc, char **argv) // WriteLocalSolidID(LocalRankFilename, id, N); sprintf(LocalRankFilename,"%s%s","SignDist.",LocalRankString); ReadBinaryFile(LocalRankFilename, Averages.SDs.data(), N); - comm.barrier(); + MPI_Barrier(comm); if (rank == 0) cout << "Domain set." << endl; //....................................................................... @@ -432,7 +436,7 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; - comm.barrier(); + MPI_Barrier(comm); starttime = MPI_Wtime(); //......................................... @@ -481,7 +485,7 @@ int main(int argc, char **argv) } //................................................................................... ScaLBL_DeviceBarrier(); - comm.barrier(); + MPI_Barrier(comm); // Timestep completed! @@ -553,7 +557,7 @@ int main(int argc, char **argv) //************************************************************************/ fclose(NONDARCY); ScaLBL_DeviceBarrier(); - comm.barrier(); + MPI_Barrier(comm); stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep @@ -571,7 +575,7 @@ int main(int argc, char **argv) NULL_USE(RESTART_INTERVAL); } // **************************************************** - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); // **************************************************** } diff --git a/tests/lbpm_nonnewtonian_simulator.cpp b/tests/lbpm_nonnewtonian_simulator.cpp index ff8792e7..5c33841f 100644 --- a/tests/lbpm_nonnewtonian_simulator.cpp +++ b/tests/lbpm_nonnewtonian_simulator.cpp @@ -9,7 +9,7 @@ #include "common/ScaLBL.h" #include "common/Communication.h" #include "common/TwoPhase.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "ProfilerApp.h" #include "threadpool/thread_pool.h" @@ -99,12 +99,21 @@ inline void ZeroHalo(double *Data, int Nx, int Ny, int Nz) int main(int argc, char **argv) { + //***************************************** + // ***** MPI STUFF **************** + //***************************************** // Initialize MPI + //MPI_Init(&argc,&argv); + + /* + * Definitely seems to be an issue - let's hope James gets back to me... + */ int provided_thread_support = -1; MPI_Init_thread(&argc,&argv,MPI_THREAD_MULTIPLE,&provided_thread_support); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm; + MPI_Comm_dup(MPI_COMM_WORLD,&comm); + int rank = comm_rank(comm); + int nprocs = comm_size(comm); if ( rank==0 && provided_thread_supportSDs.data(), N); - comm.barrier(); + MPI_Barrier(comm); if (rank == 0) cout << "Domain set." << endl; /* 3 */ //....................................................................... @@ -589,14 +598,14 @@ int main(int argc, char **argv) delete [] cDen; delete [] cDistEven; delete [] cDistOdd; - comm.barrier(); + MPI_Barrier(comm); } /* 14 */ // //...................................................................... // ScaLBL_D3Q7_Init(ID, A_even, A_odd, &Den[0], Nx, Ny, Nz); // ScaLBL_D3Q7_Init(ID, B_even, B_odd, &Den[N], Nx, Ny, Nz); // ScaLBL_DeviceBarrier(); -// comm.barrier(); /* 15 */ +// MPI_Barrier(comm); /* 15 */ //....................................................................... // Once phase has been initialized, map solid to account for 'smeared' interface @@ -622,7 +631,7 @@ int main(int argc, char **argv) // ScaLBL_Comm.SendHalo(Phi); // ScaLBL_Comm.RecvHalo(Phi); // ScaLBL_DeviceBarrier(); -// comm.barrier(); +// MPI_Barrier(comm); // //************************************************************************* /* 18 */ @@ -661,7 +670,7 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; - comm.barrier(); + MPI_Barrier(comm); starttime = MPI_Wtime(); /* @@ -795,7 +804,7 @@ int main(int argc, char **argv) } //................................................................................... ScaLBL_DeviceBarrier(); - comm.barrier(); + MPI_Barrier(comm); // Timestep completed! timestep++; @@ -809,7 +818,7 @@ int main(int argc, char **argv) } //************************************************************************/ ScaLBL_DeviceBarrier(); - comm.barrier(); + MPI_Barrier(comm); stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep @@ -826,7 +835,7 @@ int main(int argc, char **argv) NULL_USE(RESTART_INTERVAL); } - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); //**************************************************** } diff --git a/tests/lbpm_nonnewtonian_simulator.h b/tests/lbpm_nonnewtonian_simulator.h index 4df5e628..20da1ac3 100644 --- a/tests/lbpm_nonnewtonian_simulator.h +++ b/tests/lbpm_nonnewtonian_simulator.h @@ -1,7 +1,7 @@ // Run the analysis, blob identification, and write restart files #include "common/Array.h" #include "common/Communication.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "IO/MeshDatabase.h" //#define ANALYSIS_INTERVAL 6 @@ -9,9 +9,20 @@ #define BLOBID_INTERVAL 1000 + + + + enum AnalysisType{ AnalyzeNone=0, IdentifyBlobs=0x01, CopyPhaseIndicator=0x02, CopySimState=0x04, ComputeAverages=0x08, CreateRestart=0x10, WriteVis=0x20 }; + + + + + + + template void DeleteArray( const TYPE *p ) { @@ -19,6 +30,12 @@ void DeleteArray( const TYPE *p ) } + + + + + + // Structure used to store ids struct AnalysisWaitIdStruct { ThreadPool::thread_id_t blobID; @@ -28,6 +45,7 @@ struct AnalysisWaitIdStruct { }; + // Helper class to write the restart file from a seperate thread class WriteRestartWorkItem: public ThreadPool::WorkItem { @@ -66,9 +84,9 @@ typedef std::shared_ptr > BlobIDList; // timestep(timestep_), Nx(Nx_), Ny(Ny_), Nz(Nz_), rank_info(rank_info_), // phase(phase_), dist(dist_), last_id(last_id_), new_index(new_index_), new_id(new_id_), new_list(new_list_) // { -// newcomm = Utilities::MPI(MPI_COMM_WORLD).dup(); +// MPI_Comm_dup(MPI_COMM_WORLD,&newcomm); // } -// ~BlobIdentificationWorkItem1() {} +// ~BlobIdentificationWorkItem1() { MPI_Comm_free(&newcomm); } // virtual void run() { // // Compute the global blob id and compare to the previous version // PROFILE_START("Identify blobs",1); @@ -88,7 +106,7 @@ typedef std::shared_ptr > BlobIDList; // const DoubleArray& dist; // BlobIDstruct last_id, new_index, new_id; // BlobIDList new_list; -// Utilities::MPI newcomm; +// MPI_Comm newcomm; //}; // @@ -104,9 +122,9 @@ typedef std::shared_ptr > BlobIDList; // timestep(timestep_), Nx(Nx_), Ny(Ny_), Nz(Nz_), rank_info(rank_info_), // phase(phase_), dist(dist_), last_id(last_id_), new_index(new_index_), new_id(new_id_), new_list(new_list_) // { -// newcomm = Utilities::MPI(MPI_COMM_WORLD).dup(); +// MPI_Comm_dup(MPI_COMM_WORLD,&newcomm); // } -// ~BlobIdentificationWorkItem2() { } +// ~BlobIdentificationWorkItem2() { MPI_Comm_free(&newcomm); } // virtual void run() { // // Compute the global blob id and compare to the previous version // PROFILE_START("Identify blobs maps",1); @@ -140,7 +158,7 @@ typedef std::shared_ptr > BlobIDList; // const DoubleArray& dist; // BlobIDstruct last_id, new_index, new_id; // BlobIDList new_list; -// Utilities::MPI newcomm; +// MPI_Comm newcomm; //}; // @@ -153,9 +171,9 @@ public: TwoPhase& Avgerages_, fillHalo& fillData_ ): timestep(timestep_), visData(visData_), Averages(Avgerages_), fillData(fillData_) { - newcomm = Utilities::MPI(MPI_COMM_WORLD).dup(); + MPI_Comm_dup(MPI_COMM_WORLD,&newcomm); } - ~WriteVisWorkItem() {} + ~WriteVisWorkItem() { MPI_Comm_free(&newcomm); } virtual void run() { PROFILE_START("Save Vis",1); ASSERT(visData[0].vars[0]->name=="phase"); @@ -180,7 +198,7 @@ private: std::vector& visData; TwoPhase& Averages; fillHalo& fillData; - Utilities::MPI newcomm; + MPI_Comm newcomm; }; @@ -400,7 +418,7 @@ void run_analysis( int timestep, int restart_interval, // Spawn a thread to write the restart file if ( (type&CreateRestart) != 0 ) { - int rank = comm.getRank(); + int rank = MPI_WORLD_RANK(); // Wait for previous restart files to finish writing (not necessary, but helps to ensure memory usage is limited) tpool.wait(wait.restart); diff --git a/tests/lbpm_permeability_simulator.cpp b/tests/lbpm_permeability_simulator.cpp index eb5e6d4b..dbcfb96b 100644 --- a/tests/lbpm_permeability_simulator.cpp +++ b/tests/lbpm_permeability_simulator.cpp @@ -9,7 +9,7 @@ #include "common/ScaLBL.h" #include "common/Communication.h" #include "analysis/TwoPhase.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "models/MRTModel.h" //#define WRITE_SURFACES @@ -24,10 +24,11 @@ using namespace std; int main(int argc, char **argv) { // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); { if (rank == 0){ printf("********************************************************\n"); @@ -38,7 +39,7 @@ int main(int argc, char **argv) int device=ScaLBL_SetDevice(rank); NULL_USE( device ); ScaLBL_DeviceBarrier(); - comm.barrier(); + MPI_Barrier(comm); ScaLBL_MRTModel MRT(rank,nprocs,comm); auto filename = argv[1]; @@ -51,7 +52,7 @@ int main(int argc, char **argv) MRT.VelocityField(); } // **************************************************** - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); // **************************************************** } diff --git a/tests/lbpm_plates_pp.cpp b/tests/lbpm_plates_pp.cpp index acd64f52..8344df47 100644 --- a/tests/lbpm_plates_pp.cpp +++ b/tests/lbpm_plates_pp.cpp @@ -9,15 +9,19 @@ #include "common/ScaLBL.h" #include "common/Communication.h" #include "analysis/TwoPhase.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" int main(int argc, char **argv) { + //***************************************** + // ***** MPI STUFF **************** + //***************************************** // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); { // parallel domain size (# of sub-domains) int nprocx,nprocy,nprocz; @@ -75,7 +79,7 @@ int main(int argc, char **argv) } // ************************************************************** // Broadcast simulation parameters from rank 0 to all other procs - comm.barrier(); + MPI_Barrier(comm); // Computational domain MPI_Bcast(&Nx,1,MPI_INT,0,comm); MPI_Bcast(&Ny,1,MPI_INT,0,comm); @@ -88,7 +92,7 @@ int main(int argc, char **argv) MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. - comm.barrier(); + MPI_Barrier(comm); // ************************************************************** if (nprocs != nprocx*nprocy*nprocz){ @@ -112,7 +116,7 @@ int main(int argc, char **argv) std::shared_ptr Averages( new TwoPhase(Dm) ); - comm.barrier(); + MPI_Barrier(comm); Nz += 2; Nx = Ny = Nz; // Cubic domain @@ -196,7 +200,7 @@ int main(int argc, char **argv) } // **************************************************** - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); // **************************************************** } diff --git a/tests/lbpm_porenetwork_pp.cpp b/tests/lbpm_porenetwork_pp.cpp index 4a6ccda7..496f9d86 100644 --- a/tests/lbpm_porenetwork_pp.cpp +++ b/tests/lbpm_porenetwork_pp.cpp @@ -9,15 +9,19 @@ #include "common/ScaLBL.h" #include "common/Communication.h" #include "analysis/TwoPhase.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" int main(int argc, char **argv) { + //***************************************** + // ***** MPI STUFF **************** + //***************************************** // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); { // parallel domain size (# of sub-domains) int nprocx,nprocy,nprocz; @@ -65,7 +69,7 @@ int main(int argc, char **argv) } // ************************************************************** // Broadcast simulation parameters from rank 0 to all other procs - comm.barrier(); + MPI_Barrier(comm); // Computational domain MPI_Bcast(&Nx,1,MPI_INT,0,comm); MPI_Bcast(&Ny,1,MPI_INT,0,comm); @@ -78,7 +82,7 @@ int main(int argc, char **argv) MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. - comm.barrier(); + MPI_Barrier(comm); // ************************************************************** if (nprocs != nprocx*nprocy*nprocz){ @@ -104,7 +108,7 @@ int main(int argc, char **argv) Dm->CommInit(); std::shared_ptr Averages( new TwoPhase(Dm) ); - comm.barrier(); + MPI_Barrier(comm); Nx += 2; Ny += 2; Nz += 2; @@ -289,7 +293,7 @@ int main(int argc, char **argv) } // **************************************************** - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); // **************************************************** } diff --git a/tests/lbpm_random_pp.cpp b/tests/lbpm_random_pp.cpp index ad4b83cc..07c56e6f 100644 --- a/tests/lbpm_random_pp.cpp +++ b/tests/lbpm_random_pp.cpp @@ -52,10 +52,11 @@ inline void UnpackID(int *list, int count, char *recvbuf, char *ID){ int main(int argc, char **argv) { // Initialize MPI + int rank, nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); int InitialWetting; double Saturation; @@ -96,7 +97,7 @@ int main(int argc, char **argv) domain >> Lz; } - comm.barrier(); + MPI_Barrier(comm); // Computational domain MPI_Bcast(&nx,1,MPI_INT,0,comm); MPI_Bcast(&ny,1,MPI_INT,0,comm); @@ -109,7 +110,7 @@ int main(int argc, char **argv) MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. - comm.barrier(); + MPI_Barrier(comm); // Check that the number of processors >= the number of ranks if ( rank==0 ) { @@ -421,7 +422,7 @@ int main(int argc, char **argv) fwrite(id,1,N,ID); fclose(ID); - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); return 0; } diff --git a/tests/lbpm_refine_pp.cpp b/tests/lbpm_refine_pp.cpp index 149ae673..d90dbb04 100644 --- a/tests/lbpm_refine_pp.cpp +++ b/tests/lbpm_refine_pp.cpp @@ -16,10 +16,11 @@ int main(int argc, char **argv) { // Initialize MPI + int rank, nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); { //....................................................................... @@ -421,7 +422,7 @@ int main(int argc, char **argv) } - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); return 0; } diff --git a/tests/lbpm_segmented_decomp.cpp b/tests/lbpm_segmented_decomp.cpp index 1bc89adb..3384e454 100644 --- a/tests/lbpm_segmented_decomp.cpp +++ b/tests/lbpm_segmented_decomp.cpp @@ -18,10 +18,12 @@ int main(int argc, char **argv) { // Initialize MPI + int rank, nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); { @@ -82,7 +84,7 @@ int main(int argc, char **argv) image >> zStart; } - comm.barrier(); + MPI_Barrier(comm); // Computational domain //................................................. MPI_Bcast(&nx,1,MPI_INT,0,comm); @@ -103,7 +105,7 @@ int main(int argc, char **argv) MPI_Bcast(&yStart,1,MPI_INT,0,comm); MPI_Bcast(&zStart,1,MPI_INT,0,comm); //................................................. - comm.barrier(); + MPI_Barrier(comm); // Check that the number of processors >= the number of ranks if ( rank==0 ) { @@ -127,7 +129,7 @@ int main(int argc, char **argv) fclose(SEGDAT); printf("Read segmented data from %s \n",Filename); } - comm.barrier(); + MPI_Barrier(comm); // Get the rank info int N = (nx+2)*(ny+2)*(nz+2); @@ -202,7 +204,7 @@ int main(int argc, char **argv) printf("Ready to recieve data %i at process %i \n", N,rank); MPI_Recv(Dm.id,N,MPI_CHAR,0,15,comm,MPI_STATUS_IGNORE); } - comm.barrier(); + MPI_Barrier(comm); nx+=2; ny+=2; nz+=2; N=nx*ny*nz; @@ -338,7 +340,7 @@ int main(int argc, char **argv) if (!MULTINPUT){ if (rank==0) printf("Writing symmetric domain reflection\n"); - comm.barrier(); + MPI_Barrier(comm); int symrank,sympz; sympz = 2*nprocz - Dm.kproc() -1; symrank = sympz*nprocx*nprocy + Dm.jproc()*nprocx + Dm.iproc(); @@ -364,6 +366,6 @@ int main(int argc, char **argv) fclose(SYMID); } } - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); } diff --git a/tests/lbpm_segmented_pp.cpp b/tests/lbpm_segmented_pp.cpp index 39cf0bd1..007ff9d1 100644 --- a/tests/lbpm_segmented_pp.cpp +++ b/tests/lbpm_segmented_pp.cpp @@ -115,10 +115,11 @@ double ReadFromBlock( char *ID, int iproc, int jproc, int kproc, int Nx, int Ny, int main(int argc, char **argv) { // Initialize MPI + int rank, nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); { //....................................................................... // Reading the domain information file @@ -230,7 +231,7 @@ int main(int argc, char **argv) fclose(DIST); } - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); return 0; diff --git a/tests/lbpm_sphere_pp.cpp b/tests/lbpm_sphere_pp.cpp index 2e053eed..98778b8d 100644 --- a/tests/lbpm_sphere_pp.cpp +++ b/tests/lbpm_sphere_pp.cpp @@ -9,7 +9,7 @@ #include "analysis/pmmc.h" #include "common/Domain.h" #include "common/SpherePack.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "common/Communication.h" /* @@ -22,11 +22,15 @@ using namespace std; int main(int argc, char **argv) { + //***************************************** + // ***** MPI STUFF **************** + //***************************************** // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); // parallel domain size (# of sub-domains) int iproc,jproc,kproc; int sendtag,recvtag; @@ -123,14 +127,14 @@ int main(int argc, char **argv) //....................................................................... if (rank == 0) printf("Reading the sphere packing \n"); if (rank == 0) ReadSpherePacking(nspheres,cx,cy,cz,rad); - comm.barrier(); + MPI_Barrier(comm); // Broadcast the sphere packing to all processes MPI_Bcast(cx,nspheres,MPI_DOUBLE,0,comm); MPI_Bcast(cy,nspheres,MPI_DOUBLE,0,comm); MPI_Bcast(cz,nspheres,MPI_DOUBLE,0,comm); MPI_Bcast(rad,nspheres,MPI_DOUBLE,0,comm); //........................................................................... - comm.barrier(); + MPI_Barrier(comm); if (rank == 0) cout << "Domain set." << endl; if (rank == 0){ // Compute the Sauter mean diameter @@ -213,7 +217,7 @@ int main(int argc, char **argv) fclose(ID); // **************************************************** - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); // **************************************************** } diff --git a/tests/lbpm_squaretube_pp.cpp b/tests/lbpm_squaretube_pp.cpp index c1f05aee..42715773 100644 --- a/tests/lbpm_squaretube_pp.cpp +++ b/tests/lbpm_squaretube_pp.cpp @@ -9,15 +9,19 @@ #include "common/ScaLBL.h" #include "common/Communication.h" #include "analysis/TwoPhase.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" int main(int argc, char **argv) { + //***************************************** + // ***** MPI STUFF **************** + //***************************************** // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); { // parallel domain size (# of sub-domains) int nprocx,nprocy,nprocz; @@ -81,7 +85,7 @@ int main(int argc, char **argv) } // ************************************************************** // Broadcast simulation parameters from rank 0 to all other procs - comm.barrier(); + MPI_Barrier(comm); // Computational domain MPI_Bcast(&Nx,1,MPI_INT,0,comm); MPI_Bcast(&Ny,1,MPI_INT,0,comm); @@ -94,7 +98,7 @@ int main(int argc, char **argv) MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); //................................................. - comm.barrier(); + MPI_Barrier(comm); // ************************************************************** if (nprocs != nprocx*nprocy*nprocz){ @@ -121,7 +125,7 @@ int main(int argc, char **argv) rank_xy, rank_XY, rank_xY, rank_Xy, rank_xz, rank_XZ, rank_xZ, rank_Xz, rank_yz, rank_YZ, rank_yZ, rank_Yz ); - comm.barrier(); + MPI_Barrier(comm); Nz += 2; Nx = Ny = Nz; // Cubic domain @@ -255,7 +259,7 @@ int main(int argc, char **argv) } // **************************************************** - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); // **************************************************** } diff --git a/tests/lbpm_uCT_maskfilter.cpp b/tests/lbpm_uCT_maskfilter.cpp index 857bc4e0..cff41ad7 100644 --- a/tests/lbpm_uCT_maskfilter.cpp +++ b/tests/lbpm_uCT_maskfilter.cpp @@ -14,7 +14,7 @@ #include "common/Array.h" #include "common/Domain.h" #include "common/Communication.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "IO/MeshDatabase.h" #include "IO/Mesh.h" #include "IO/Writer.h" @@ -30,11 +30,13 @@ int main(int argc, char **argv) { + // Initialize MPI + int rank, nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); Utilities::setErrorHandlers(); PROFILE_START("Main"); @@ -149,7 +151,7 @@ int main(int argc, char **argv) } netcdf::close( distid ); - comm.barrier(); + MPI_Barrier(comm); PROFILE_STOP("ReadDistance"); if (rank==0) printf("Finished reading distance =\n"); @@ -182,7 +184,7 @@ int main(int argc, char **argv) fillFloat[0]->fill( LOCVOL[0] ); } netcdf::close( fid ); - comm.barrier(); + MPI_Barrier(comm); PROFILE_STOP("ReadVolume"); if (rank==0) printf("Read complete\n"); @@ -445,7 +447,7 @@ int main(int argc, char **argv) PROFILE_STOP("Main"); PROFILE_SAVE("lbpm_uCT_maskfilter",true); - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); return 0; } diff --git a/tests/lbpm_uCT_pp.cpp b/tests/lbpm_uCT_pp.cpp index 6e8d1bde..0285b864 100644 --- a/tests/lbpm_uCT_pp.cpp +++ b/tests/lbpm_uCT_pp.cpp @@ -14,7 +14,7 @@ #include "common/Array.h" #include "common/Domain.h" #include "common/Communication.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "IO/MeshDatabase.h" #include "IO/Mesh.h" #include "IO/Writer.h" @@ -31,10 +31,11 @@ int main(int argc, char **argv) { // Initialize MPI + int rank, nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); { Utilities::setErrorHandlers(); PROFILE_START("Main"); @@ -187,7 +188,7 @@ int main(int argc, char **argv) fillFloat[0]->fill( LOCVOL[0] ); } netcdf::close( fid ); - comm.barrier(); + MPI_Barrier(comm); PROFILE_STOP("ReadVolume"); if (rank==0) printf("Read complete\n"); @@ -250,15 +251,15 @@ int main(int argc, char **argv) } } } - count_plus = Dm[0]->Comm.sumReduce( count_plus); - count_minus = Dm[0]->Comm.sumReduce( count_minus); + count_plus=sumReduce( Dm[0]->Comm, count_plus); + count_minus=sumReduce( Dm[0]->Comm, count_minus); if (rank==0) printf("minimum value=%f, max value=%f \n",min_value,max_value); if (rank==0) printf("plus=%i, minus=%i \n",count_plus,count_minus); ASSERT( count_plus > 0 && count_minus > 0 ); - comm.barrier(); - mean_plus = Dm[0]->Comm.sumReduce( mean_plus ) / count_plus; - mean_minus = Dm[0]->Comm.sumReduce( mean_minus ) / count_minus; - comm.barrier(); + MPI_Barrier(comm); + mean_plus = sumReduce( Dm[0]->Comm, mean_plus ) / count_plus; + mean_minus = sumReduce( Dm[0]->Comm, mean_minus ) / count_minus; + MPI_Barrier(comm); if (rank==0) printf(" Region 1 mean (+): %f, Region 2 mean (-): %f \n",mean_plus, mean_minus); //if (rank==0) printf("Scale the input data (size = %i) \n",LOCVOL[0].length()); @@ -279,7 +280,7 @@ int main(int argc, char **argv) // Fill the source data for the coarse meshes if (rank==0) printf("Coarsen the mesh for N_levels=%i \n",N_levels); - comm.barrier(); + MPI_Barrier(comm); PROFILE_START("CoarsenMesh"); for (int i=1; i filter(ratio[0],ratio[1],ratio[2]); @@ -295,7 +296,7 @@ int main(int argc, char **argv) printf(" filter_x=%i, filter_y=%i, filter_z=%i \n",int(filter.size(0)),int(filter.size(1)),int(filter.size(2)) ); printf(" ratio= %i,%i,%i \n",int(ratio[0]),int(ratio[1]),int(ratio[2]) ); } - comm.barrier(); + MPI_Barrier(comm); } PROFILE_STOP("CoarsenMesh"); @@ -307,7 +308,7 @@ int main(int argc, char **argv) NonLocalMean.back(), *fillFloat.back(), *Dm.back(), nprocx, rough_cutoff, lamda, nlm_sigsq, nlm_depth); PROFILE_STOP("Solve coarse mesh"); - comm.barrier(); + MPI_Barrier(comm); // Refine the solution PROFILE_START("Refine distance"); @@ -321,7 +322,7 @@ int main(int argc, char **argv) rough_cutoff, lamda, nlm_sigsq, nlm_depth); } PROFILE_STOP("Refine distance"); - comm.barrier(); + MPI_Barrier(comm); // Perform a final filter PROFILE_START("Filtering final domains"); @@ -417,14 +418,14 @@ int main(int argc, char **argv) meshData[0].vars.push_back(filter_Dist2_var); fillDouble[0]->copy( filter_Dist2, filter_Dist2_var->data ); #endif - comm.barrier(); + MPI_Barrier(comm); if (rank==0) printf("Writing output \n"); // Write visulization data IO::writeData( 0, meshData, comm ); if (rank==0) printf("Finished. \n"); // Compute the Minkowski functionals - comm.barrier(); + MPI_Barrier(comm); auto Averages = std::make_shared(Dm[0]); Array phase_label(Nx[0]+2,Ny[0]+2,Nz[0]+2); @@ -456,7 +457,7 @@ int main(int argc, char **argv) } PROFILE_STOP("Main"); PROFILE_SAVE("lbpm_uCT_pp",true); - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); return 0; } diff --git a/tests/testCommunication.cpp b/tests/testCommunication.cpp index 911ef1c5..57ce0959 100644 --- a/tests/testCommunication.cpp +++ b/tests/testCommunication.cpp @@ -6,7 +6,7 @@ #include #include "common/Communication.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "common/Array.h" using namespace std; @@ -15,9 +15,11 @@ using namespace std; //*************************************************************************************** -int test_communication( const Utilities::MPI& comm, int nprocx, int nprocy, int nprocz ) +int test_communication( MPI_Comm comm, int nprocx, int nprocy, int nprocz ) { - int rank = comm.getRank(); + int rank,nprocs; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); int iproc,jproc,kproc; int sendtag,recvtag; if (rank==0) printf("\nRunning test %i %i %i\n",nprocx,nprocy,nprocz); @@ -36,7 +38,7 @@ int test_communication( const Utilities::MPI& comm, int nprocx, int nprocy, int rank_xy, rank_XY, rank_xY, rank_Xy, rank_xz, rank_XZ, rank_xZ, rank_Xz, rank_yz, rank_YZ, rank_yZ, rank_Yz ); - comm.barrier(); + MPI_Barrier(comm); //********************************** @@ -83,7 +85,7 @@ int test_communication( const Utilities::MPI& comm, int nprocx, int nprocy, int sendCount_xy = sendCount_yz = sendCount_xz = sendCount_Xy = sendCount_Yz = sendCount_xZ = 0; sendCount_xY = sendCount_yZ = sendCount_Xz = sendCount_XY = sendCount_YZ = sendCount_XZ = 0; - comm.barrier(); + MPI_Barrier(comm); if (rank==0) printf ("SendLists are ready on host\n"); //...................................................................................... // Use MPI to fill in the recvCounts form the associated processes @@ -156,7 +158,7 @@ int test_communication( const Utilities::MPI& comm, int nprocx, int nprocy, int recvCount_yz, recvCount_YZ, recvCount_yZ, recvCount_Yz, rank_x, rank_y, rank_z, rank_X, rank_Y, rank_Z, rank_xy, rank_XY, rank_xY, rank_Xy, rank_xz, rank_XZ, rank_xZ, rank_Xz, rank_yz, rank_YZ, rank_yZ, rank_Yz ); - comm.barrier(); + MPI_Barrier(comm); if (rank==0) printf ("RecvLists finished\n"); // Free memory @@ -179,9 +181,11 @@ int test_communication( const Utilities::MPI& comm, int nprocx, int nprocy, int template -int testHalo( const Utilities::MPI& comm, int nprocx, int nprocy, int nprocz, int depth ) +int testHalo( MPI_Comm comm, int nprocx, int nprocy, int nprocz, int depth ) { - int rank = comm.getRank(); + int rank,nprocs; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); if ( rank==0 ) printf("\nRunning Halo test %i %i %i %i\n",nprocx,nprocy,nprocz,depth); @@ -251,10 +255,11 @@ int testHalo( const Utilities::MPI& comm, int nprocx, int nprocy, int nprocz, in int main(int argc, char **argv) { // Initialize MPI + int rank,nprocs; MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); // Run the test with different domains int N_errors = 0; @@ -284,9 +289,10 @@ int main(int argc, char **argv) } // Finished - comm.barrier(); - int N_errors_global = comm.sumReduce( N_errors ); - comm.barrier(); + MPI_Barrier(comm); + int N_errors_global=0; + MPI_Allreduce( &N_errors, &N_errors_global, 1, MPI_INT, MPI_SUM, comm ); + MPI_Barrier(comm); MPI_Finalize(); if ( rank==0 ) { if ( N_errors_global==0 ) diff --git a/tests/test_dcel_minkowski.cpp b/tests/test_dcel_minkowski.cpp index 2669b522..0d6cbca9 100644 --- a/tests/test_dcel_minkowski.cpp +++ b/tests/test_dcel_minkowski.cpp @@ -26,9 +26,9 @@ std::shared_ptr loadInputs( ) int main(int argc, char **argv) { MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - //int rank = comm.getRank(); - //int nprocs = comm.getSize(); + MPI_Comm comm = MPI_COMM_WORLD; + //int rank = MPI_WORLD_RANK(); + //int nprocs = MPI_WORLD_SIZE(); int toReturn = 0; { int i,j,k; @@ -99,7 +99,7 @@ int main(int argc, char **argv) } PROFILE_SAVE("test_dcel_minkowski"); - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); return toReturn; } diff --git a/tests/test_dcel_tri_normal.cpp b/tests/test_dcel_tri_normal.cpp index b6497140..1e85b1f3 100644 --- a/tests/test_dcel_tri_normal.cpp +++ b/tests/test_dcel_tri_normal.cpp @@ -26,7 +26,7 @@ std::shared_ptr loadInputs( ) int main(int argc, char **argv) { MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); + MPI_Comm comm = MPI_COMM_WORLD; int toReturn = 0; { int i,j,k; @@ -136,7 +136,7 @@ int main(int argc, char **argv) if (count_check > 0) toReturn=2; else printf("Succeeded. \n"); } - comm.barrier(); + MPI_Barrier(comm); MPI_Finalize(); return toReturn; } From 679c53a4690876755a64a44c79d4db856c58dd01 Mon Sep 17 00:00:00 2001 From: James E McClure Date: Thu, 19 Mar 2020 13:35:10 -0400 Subject: [PATCH 059/121] Add wall factor to morphgrow to change solid penalty term --- analysis/morphology.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/analysis/morphology.cpp b/analysis/morphology.cpp index 72a17892..8f658328 100644 --- a/analysis/morphology.cpp +++ b/analysis/morphology.cpp @@ -692,6 +692,8 @@ double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array &id, int Nz = Dm->Nz; int rank = Dm->rank(); + double WALL_FACTOR = 0.0; // 1.0 if you want to penalize movements close to solid + double count=0.0; for (int k=1; k &id, for (int j=1; j MAX_DISPLACEMENT) MAX_DISPLACEMENT= fabs(wallweight*morph_delta); if (Dist(i,j,k) - wallweight*morph_delta < 0.0){ @@ -769,7 +770,7 @@ double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array &id, for (int j=1; j Date: Thu, 19 Mar 2020 13:41:31 -0400 Subject: [PATCH 060/121] Add wall factor to morphgrow to change solid penalty term --- analysis/morphology.cpp | 10 ++++------ analysis/morphology.h | 2 +- models/ColorModel.cpp | 3 ++- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/analysis/morphology.cpp b/analysis/morphology.cpp index 8f658328..f6bb3469 100644 --- a/analysis/morphology.cpp +++ b/analysis/morphology.cpp @@ -685,15 +685,13 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptr &id, std::shared_ptr Dm, double TargetGrowth) +double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array &id, std::shared_ptr Dm, double TargetGrowth, double WallFactor) { int Nx = Dm->Nx; int Ny = Dm->Ny; int Nz = Dm->Nz; int rank = Dm->rank(); - - double WALL_FACTOR = 0.0; // 1.0 if you want to penalize movements close to solid - + double count=0.0; for (int k=1; k &id, for (int j=1; j MAX_DISPLACEMENT) MAX_DISPLACEMENT= fabs(wallweight*morph_delta); if (Dist(i,j,k) - wallweight*morph_delta < 0.0){ @@ -770,7 +768,7 @@ double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array &id, for (int j=1; j Dm, double VoidFraction, signed char ErodeLabel, signed char ReplaceLabel); double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptr Dm, double VoidFraction); -double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array &id, std::shared_ptr Dm, double TargetVol); +double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array &id, std::shared_ptr Dm, double TargetVol, double WallFactor); diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 4ef7573f..05004110 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -1296,6 +1296,7 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta double vF = 0.f; double vS = 0.f; double delta_volume; + double WallFactor = 0.0; DoubleArray phase(Nx,Ny,Nz); IntArray phase_label(Nx,Ny,Nz);; @@ -1395,7 +1396,7 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta double target_delta_volume_incremental = target_delta_volume; if (fabs(target_delta_volume) > 0.01*volume_initial) target_delta_volume_incremental = 0.01*volume_initial*target_delta_volume/fabs(target_delta_volume); - delta_volume = MorphGrow(Averages->SDs,phase_distance,phase_id,Averages->Dm, target_delta_volume_incremental); + delta_volume = MorphGrow(Averages->SDs,phase_distance,phase_id,Averages->Dm, target_delta_volume_incremental, WallFactor); for (int k=0; k Date: Sat, 21 Mar 2020 09:45:43 -0400 Subject: [PATCH 061/121] make sure input database is updated across all ranks --- analysis/runAnalysis.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/analysis/runAnalysis.cpp b/analysis/runAnalysis.cpp index 6c76f58b..384d4d69 100644 --- a/analysis/runAnalysis.cpp +++ b/analysis/runAnalysis.cpp @@ -907,9 +907,8 @@ void runAnalysis::run(int timestep, std::shared_ptr input_db, TwoPhase // Spawn a thread to write the restart file // if ( matches(type,AnalysisType::CreateRestart) ) { if (timestep%d_restart_interval==0){ - + input_db->putScalar( "Restart", true ); if (d_rank==0) { - input_db->putScalar( "Restart", true ); std::ofstream OutStream("Restart.db"); input_db->print(OutStream, ""); OutStream.close(); @@ -1010,10 +1009,11 @@ void runAnalysis::basic(int timestep, std::shared_ptr input_db, SubPha ScaLBL_CopyToHost(cfq.get(),fq,19*d_Np*sizeof(double)); ScaLBL_CopyToHost(cDen.get(),Den,2*d_Np*sizeof(double)); + color_db->putScalar("timestep",timestep); + color_db->putScalar( "Restart", true ); + input_db->putDatabase("Color", color_db); + if (d_rank==0) { - color_db->putScalar("timestep",timestep); - color_db->putScalar( "Restart", true ); - input_db->putDatabase("Color", color_db); std::ofstream OutStream("Restart.db"); input_db->print(OutStream, ""); OutStream.close(); From 8d9f35d1d384e26ba84fb2e3bcdc7318a43eac4f Mon Sep 17 00:00:00 2001 From: James E McClure Date: Sat, 21 Mar 2020 09:45:57 -0400 Subject: [PATCH 062/121] updating R helper functions --- example/Workflow/HelperFunctions.R | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/example/Workflow/HelperFunctions.R b/example/Workflow/HelperFunctions.R index 6c8bd903..669b28fe 100644 --- a/example/Workflow/HelperFunctions.R +++ b/example/Workflow/HelperFunctions.R @@ -7,19 +7,20 @@ ReadDatabase<-function(FILE){ INPUT<-gsub(';','',readLines(FILE)) S<-gsub('tauA = ','',gsub("\\s+"," ",(grep("tauA",INPUT,value=TRUE)))) - TAU_A = as.numeric(S) + TAU_A = as.numeric(gsub("/.*","",S)) S<-gsub('tauB = ','',gsub("\\s+"," ",(grep("tauB",INPUT,value=TRUE)))) - TAU_B = as.numeric(S) + TAU_B = as.numeric(gsub("/.*","",S)) S<-gsub('rhoA = ','',gsub("\\s+"," ",(grep("rhoA",INPUT,value=TRUE)))) - RHO_A = as.numeric(S) + RHO_A = as.numeric(gsub("/.*","",S)) S<-gsub('rhoB = ','',gsub("\\s+"," ",(grep("rhoB",INPUT,value=TRUE)))) - RHO_B = as.numeric(S) + RHO_B = as.numeric(gsub("/.*","",S)) S<-gsub('alpha = ','',gsub("\\s+"," ",(grep("alpha",INPUT,value=TRUE)))) - ALPHA = as.numeric(S) + ALPHA = as.numeric(gsub("/.*","",S)) # Read the affinity S<-gsub('ComponentAffinity = ','',gsub("\\s+"," ",(grep("ComponentAffinity",INPUT,value=TRUE)))) + S<-gsub("/.*","",S) AFFINITY<-as.numeric(unlist(strsplit(S,", "))) PARAMETERS<-c(TAU_A,TAU_B,RHO_A,RHO_B,ALPHA,AFFINITY) From ad20322f31c01d3604b321553a113e55bc972e49 Mon Sep 17 00:00:00 2001 From: James E McClure Date: Sat, 21 Mar 2020 09:53:47 -0400 Subject: [PATCH 063/121] refactor refine pp tool --- tests/lbpm_refine_pp.cpp | 41 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/tests/lbpm_refine_pp.cpp b/tests/lbpm_refine_pp.cpp index d90dbb04..ad729aa2 100644 --- a/tests/lbpm_refine_pp.cpp +++ b/tests/lbpm_refine_pp.cpp @@ -40,7 +40,6 @@ int main(int argc, char **argv) auto domain_db = db->getDatabase( "Domain" ); // Read domain parameters - auto L = domain_db->getVector( "L" ); auto size = domain_db->getVector( "n" ); auto nproc = domain_db->getVector( "nproc" ); auto ReadValues = domain_db->getVector( "ReadValues" ); @@ -92,8 +91,42 @@ int main(int argc, char **argv) } } Dm.CommInit(); - + + Domain Mask(rnx,rny,rnz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BoundaryCondition); + Mask->ReadIDs(); + Mask.CommInit(); + for (int i=0; iid[i]; // save what was read + + // Generate the signed distance map + // Initialize the domain and communication + Array Labels(nx,ny,nz); DoubleArray SignDist(nx,ny,nz); + + // Solve for the position of the solid phase + for (int k=0;kid[n]; + if (label > 0) Labels(i,j,k) = 1; + else Labels(i,j,k) = 0; + } + } + } + // Initialize the signed distance function + for (int k=0;kSDs(i,j,k) = 2.0*double(Labels(i,j,k))-1.0; + } + } + } + // MeanFilter(Averages->SDs); + if (rank==0) printf("Initialized solid phase -- Converting to Signed Distance function \n"); + CalcDist(SignDist,Labels,*Mask); + // Read the signed distance from file sprintf(LocalRankFilename,"SignDist.%05i",rank); FILE *DIST = fopen(LocalRankFilename,"rb"); @@ -102,7 +135,7 @@ int main(int argc, char **argv) if (ReadSignDist != size_t(N)) printf("lbpm_refine_pp: Error reading signed distance function (rank=%i)\n",rank); fclose(DIST); - char *Labels; + /* char *Labels; Labels = new char[N]; sprintf(LocalRankFilename,"ID.%05i",rank); FILE *LABELS = fopen(LocalRankFilename,"rb"); @@ -110,7 +143,7 @@ int main(int argc, char **argv) ReadLabels=fread(Labels,1,N,LABELS); if (ReadLabels != size_t(N)) printf("lbpm_refine_pp: Error reading ID (rank=%i)\n",rank); fclose(LABELS); - +*/ if ( rank==0 ) printf("Set up Domain, read input distance \n"); DoubleArray RefinedSignDist(rnx,rny,rnz); From afbef5075208d4fcf8308a0bc1f8b5034480a322 Mon Sep 17 00:00:00 2001 From: JamesEMcclure Date: Sat, 21 Mar 2020 10:03:20 -0400 Subject: [PATCH 064/121] update lbpm_refine_pp --- tests/lbpm_refine_pp.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/lbpm_refine_pp.cpp b/tests/lbpm_refine_pp.cpp index ad729aa2..5f1c5875 100644 --- a/tests/lbpm_refine_pp.cpp +++ b/tests/lbpm_refine_pp.cpp @@ -12,6 +12,7 @@ #include "common/Communication.h" #include "common/Domain.h" #include "analysis/pmmc.h" +#include "analysis/distance.h" int main(int argc, char **argv) { @@ -93,10 +94,9 @@ int main(int argc, char **argv) Dm.CommInit(); Domain Mask(rnx,rny,rnz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BoundaryCondition); - Mask->ReadIDs(); + Mask.ReadIDs(); Mask.CommInit(); - for (int i=0; iid[i]; // save what was read - + // Generate the signed distance map // Initialize the domain and communication Array Labels(nx,ny,nz); @@ -108,7 +108,7 @@ int main(int argc, char **argv) for (int i=0;iid[n]; + signed char label = Mask.id[n]; if (label > 0) Labels(i,j,k) = 1; else Labels(i,j,k) = 0; } @@ -119,13 +119,13 @@ int main(int argc, char **argv) for (int j=0;jSDs(i,j,k) = 2.0*double(Labels(i,j,k))-1.0; + SignDist(i,j,k) = 2.0*double(Labels(i,j,k))-1.0; } } } // MeanFilter(Averages->SDs); if (rank==0) printf("Initialized solid phase -- Converting to Signed Distance function \n"); - CalcDist(SignDist,Labels,*Mask); + CalcDist(SignDist,Labels,Mask); // Read the signed distance from file sprintf(LocalRankFilename,"SignDist.%05i",rank); @@ -178,7 +178,7 @@ int main(int argc, char **argv) pt.y=0.5*(rj-1)+1.f; pt.z=0.5*(rk-1)+1.f; RefinedSignDist(ri,rj,rk) = LocalApprox.eval(pt); - RefineLabel(ri,rj,rk) = Labels[k*nx*ny+j*nx+i]; + RefineLabel(ri,rj,rk) = Labels(i,j,k); } } } From b206ad80a22d5d82112bd3137f0af7cf9d12ca1c Mon Sep 17 00:00:00 2001 From: James E McClure Date: Sat, 21 Mar 2020 10:06:36 -0400 Subject: [PATCH 065/121] use Filename in refine pp --- tests/lbpm_refine_pp.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/lbpm_refine_pp.cpp b/tests/lbpm_refine_pp.cpp index 5f1c5875..be2ba346 100644 --- a/tests/lbpm_refine_pp.cpp +++ b/tests/lbpm_refine_pp.cpp @@ -94,7 +94,13 @@ int main(int argc, char **argv) Dm.CommInit(); Domain Mask(rnx,rny,rnz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BoundaryCondition); - Mask.ReadIDs(); + if (domain_db->keyExists( "Filename" )){ + auto Filename = domain_db->getScalar( "Filename" ); + Mask.Decomp(Filename); + } + else{ + Mask.ReadIDs(); + } Mask.CommInit(); // Generate the signed distance map From dbbd8e30b7e28951231f9a4c7445894d6e779750 Mon Sep 17 00:00:00 2001 From: JamesEMcclure Date: Sat, 21 Mar 2020 10:32:16 -0400 Subject: [PATCH 066/121] fix refine pp --- tests/lbpm_refine_pp.cpp | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/tests/lbpm_refine_pp.cpp b/tests/lbpm_refine_pp.cpp index be2ba346..0f0ffdda 100644 --- a/tests/lbpm_refine_pp.cpp +++ b/tests/lbpm_refine_pp.cpp @@ -52,6 +52,7 @@ int main(int argc, char **argv) int nprocx = nproc[0]; int nprocy = nproc[1]; int nprocz = nproc[2]; + int BoundaryCondition=0; // Check that the number of processors >= the number of ranks if ( rank==0 ) { @@ -63,15 +64,26 @@ int main(int argc, char **argv) ERROR("Insufficient number of processors"); } - char LocalRankFilename[40]; + //Domain Mask(nx,ny,nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BoundaryCondition); + Domain Mask(domain_db,MPI_COMM_WORLD); + if (domain_db->keyExists( "Filename" )){ + auto Filename = domain_db->getScalar( "Filename" ); + if (rank==0) printf("Reading domain from %s \n",Filename.c_str()); + Mask.Decomp(Filename); + if (rank==0) printf("Complete. \n"); + } + else{ + Mask.ReadIDs(); + } + Mask.CommInit(); + char LocalRankFilename[40]; int rnx=2*nx; int rny=2*ny; int rnz=2*nz; if (rank==0) printf("Refining mesh to %i x %i x %i \n",rnx,rny,rnz); - int BoundaryCondition=0; Domain Dm(rnx,rny,rnz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BoundaryCondition); // Communication the halos @@ -83,6 +95,7 @@ int main(int argc, char **argv) int N = nx*ny*nz; // Define communication sub-domain -- everywhere + if (rank==0) printf("Initialize refined domain \n"); for (int k=0; kkeyExists( "Filename" )){ - auto Filename = domain_db->getScalar( "Filename" ); - Mask.Decomp(Filename); - } - else{ - Mask.ReadIDs(); - } - Mask.CommInit(); - // Generate the signed distance map // Initialize the domain and communication Array Labels(nx,ny,nz); @@ -133,7 +136,7 @@ int main(int argc, char **argv) if (rank==0) printf("Initialized solid phase -- Converting to Signed Distance function \n"); CalcDist(SignDist,Labels,Mask); - // Read the signed distance from file + /* // Read the signed distance from file sprintf(LocalRankFilename,"SignDist.%05i",rank); FILE *DIST = fopen(LocalRankFilename,"rb"); size_t ReadSignDist; @@ -141,7 +144,7 @@ int main(int argc, char **argv) if (ReadSignDist != size_t(N)) printf("lbpm_refine_pp: Error reading signed distance function (rank=%i)\n",rank); fclose(DIST); - /* char *Labels; + char *Labels; Labels = new char[N]; sprintf(LocalRankFilename,"ID.%05i",rank); FILE *LABELS = fopen(LocalRankFilename,"rb"); From 05ed256b30ab969000fc76750cedad22fe4ded05 Mon Sep 17 00:00:00 2001 From: James E McClure Date: Sat, 21 Mar 2020 10:37:32 -0400 Subject: [PATCH 067/121] adding refine options --- tests/lbpm_refine_pp.cpp | 433 ++++++++++++++++----------------------- 1 file changed, 173 insertions(+), 260 deletions(-) diff --git a/tests/lbpm_refine_pp.cpp b/tests/lbpm_refine_pp.cpp index 0f0ffdda..1a7ff05b 100644 --- a/tests/lbpm_refine_pp.cpp +++ b/tests/lbpm_refine_pp.cpp @@ -192,277 +192,190 @@ int main(int argc, char **argv) } } fillData.fill(RefinedSignDist); - // sprintf(LocalRankFilename,"ID.%05i",rank); - //FILE *ID = fopen(LocalRankFilename,"wb"); - //fwrite(id,1,N,ID); - //fclose(ID); -/* - sprintf(LocalRankFilename,"RefineDist.%05i",rank); - FILE *REFINEDIST = fopen(LocalRankFilename,"wb"); - fwrite(RefinedSignDist.data(),8,rnx*rny*rnz,REFINEDIST); - fclose(REFINEDIST); -*/ - if ( rank==0 ) printf("Write output \n"); - DoubleArray BlockDist(nx,ny,nz); - FILE *WRITEID, *REFINEDIST; - char * id; - id = new char [N]; - int writerank; - // Write output blocks with the same sub-domain size as origina - // refinement increases the size of the process grid - writerank = 8*Dm.kproc()*nprocx*nprocy + 4*Dm.jproc()*nprocx + 2*Dm.iproc(); - for (int k=0; k 0) id[k*nx*ny + j*nx + i]=2; - else id[k*nx*ny + j*nx + i] = RefineLabel(i,j,k); + if (domain_db->keyExists( "Filename" )){ + auto Filename = domain_db->getScalar( "Filename" ); + if ( rank==0 ) printf("Write output \n"); + sprintf(LocalRankFilename,Filename.c_str(),".refine"); + WRITEID = fopen(LocalRankFilename,"wb"); + fwrite(RefineLabel.data(),1,rnx*rny*rnz,WRITEID); + fclose(WRITEID); + } + else{ + DoubleArray BlockDist(nx,ny,nz); + FILE *WRITEID, *REFINEDIST; + char * id; + id = new char [N]; + int writerank; + + // Write output blocks with the same sub-domain size as origina + // refinement increases the size of the process grid + writerank = 8*Dm.kproc()*nprocx*nprocy + 4*Dm.jproc()*nprocx + 2*Dm.iproc(); + for (int k=0; k 0) id[k*nx*ny + j*nx + i]=2; + else id[k*nx*ny + j*nx + i] = RefineLabel(i,j,k); + } } } - } - sprintf(LocalRankFilename,"RefineDist.%05i",writerank); - REFINEDIST = fopen(LocalRankFilename,"wb"); - fwrite(BlockDist.data(),8,nx*ny*nz,REFINEDIST); - fclose(REFINEDIST); - -/* for (int k=0; k 0.f) - id[k*nx*ny + j*nx + i]=2; - else - id[k*nx*ny + j*nx + i]= 0; - } - } - } - */ - sprintf(LocalRankFilename,"RefineID.%05i",writerank); - WRITEID = fopen(LocalRankFilename,"wb"); - fwrite(id,1,nx*ny*nz,WRITEID); - fclose(WRITEID); - - writerank = 8*Dm.kproc()*nprocx*nprocy + 4*Dm.jproc()*nprocx + 2*Dm.iproc()+1; - for (int k=0; k 0) id[k*nx*ny + j*nx + i]=2; - else id[k*nx*ny + j*nx + i] = RefineLabel(i+nx-2,j,k); - } - } - } - sprintf(LocalRankFilename,"RefineDist.%05i",writerank); - REFINEDIST = fopen(LocalRankFilename,"wb"); - fwrite(BlockDist.data(),8,nx*ny*nz,REFINEDIST); - fclose(REFINEDIST); - -/* for (int k=0; k 0.f) - id[k*nx*ny + j*nx + i]=2; - else - id[k*nx*ny + j*nx + i]=0; - } - } - } - */ - sprintf(LocalRankFilename,"RefineID.%05i",writerank); - WRITEID = fopen(LocalRankFilename,"wb"); - fwrite(id,1,nx*ny*nz,WRITEID); - fclose(WRITEID); + sprintf(LocalRankFilename,"RefineDist.%05i",writerank); + REFINEDIST = fopen(LocalRankFilename,"wb"); + fwrite(BlockDist.data(),8,nx*ny*nz,REFINEDIST); + fclose(REFINEDIST); - writerank = (2*Dm.kproc())*4*nprocx*nprocy + (2*Dm.jproc()+1)*2*nprocx + 2*Dm.iproc()+1; - for (int k=0; k 0) id[k*nx*ny + j*nx + i]=2; - else id[k*nx*ny + j*nx + i] = RefineLabel(i+nx-2,j+ny-2,k); - } - } - } - sprintf(LocalRankFilename,"RefineDist.%05i",writerank); - REFINEDIST = fopen(LocalRankFilename,"wb"); - fwrite(BlockDist.data(),8,nx*ny*nz,REFINEDIST); - fclose(REFINEDIST); + sprintf(LocalRankFilename,"RefineID.%05i",writerank); + WRITEID = fopen(LocalRankFilename,"wb"); + fwrite(id,1,nx*ny*nz,WRITEID); + fclose(WRITEID); -/* for (int k=0; k 0.f) - id[k*nx*ny + j*nx + i]=2; - else - id[k*nx*ny + j*nx + i]=0; + writerank = 8*Dm.kproc()*nprocx*nprocy + 4*Dm.jproc()*nprocx + 2*Dm.iproc()+1; + for (int k=0; k 0) id[k*nx*ny + j*nx + i]=2; + else id[k*nx*ny + j*nx + i] = RefineLabel(i+nx-2,j,k); + } } } - } - */ - sprintf(LocalRankFilename,"RefineID.%05i",writerank); - WRITEID = fopen(LocalRankFilename,"wb"); - fwrite(id,1,nx*ny*nz,WRITEID); - fclose(WRITEID); - - writerank = (2*Dm.kproc())*4*nprocx*nprocy + (2*Dm.jproc()+1)*2*nprocx + 2*Dm.iproc(); - for (int k=0; k 0) id[k*nx*ny + j*nx + i]=2; - else id[k*nx*ny + j*nx + i] = RefineLabel(i,j+ny-2,k); - } - } - } - sprintf(LocalRankFilename,"RefineDist.%05i",writerank); - REFINEDIST = fopen(LocalRankFilename,"wb"); - fwrite(BlockDist.data(),8,nx*ny*nz,REFINEDIST); - fclose(REFINEDIST); -/* - for (int k=0; k 0.f) - id[k*nx*ny + j*nx + i]=2; - else - id[k*nx*ny + j*nx + i]=0; - } - } - } - */ - sprintf(LocalRankFilename,"RefineID.%05i",writerank); - WRITEID = fopen(LocalRankFilename,"wb"); - fwrite(id,1,nx*ny*nz,WRITEID); - fclose(WRITEID); - - writerank = (2*Dm.kproc()+1)*4*nprocx*nprocy + (2*Dm.jproc())*2*nprocx + 2*Dm.iproc(); - for (int k=0; k 0) id[k*nx*ny + j*nx + i]=2; - else id[k*nx*ny + j*nx + i] = RefineLabel(i,j,k+nz-2); - } - } - } - sprintf(LocalRankFilename,"RefineDist.%05i",writerank); - REFINEDIST = fopen(LocalRankFilename,"wb"); - fwrite(BlockDist.data(),8,nx*ny*nz,REFINEDIST); - fclose(REFINEDIST); -/* - for (int k=0; k 0.f) - id[k*nx*ny + j*nx + i]=2; - else - id[k*nx*ny + j*nx + i]=0; - } - } - } - */ - sprintf(LocalRankFilename,"RefineID.%05i",writerank); - WRITEID = fopen(LocalRankFilename,"wb"); - fwrite(id,1,nx*ny*nz,WRITEID); - fclose(WRITEID); - - writerank = (2*Dm.kproc()+1)*4*nprocx*nprocy + (2*Dm.jproc())*2*nprocx + 2*Dm.iproc()+1; - for (int k=0; k 0) id[k*nx*ny + j*nx + i]=2; - else id[k*nx*ny + j*nx + i] = RefineLabel(i+nx-2,j,k+nz-2); - } - } - } - sprintf(LocalRankFilename,"RefineDist.%05i",writerank); - REFINEDIST = fopen(LocalRankFilename,"wb"); - fwrite(BlockDist.data(),8,nx*ny*nz,REFINEDIST); - fclose(REFINEDIST); - -/* for (int k=0; k 0.f) - id[k*nx*ny + j*nx + i]=2; - else - id[k*nx*ny + j*nx + i]=0; - } - } - } - */ - sprintf(LocalRankFilename,"RefineID.%05i",writerank); - WRITEID = fopen(LocalRankFilename,"wb"); - fwrite(id,1,nx*ny*nz,WRITEID); - fclose(WRITEID); - - writerank = (2*Dm.kproc()+1)*4*nprocx*nprocy + (2*Dm.jproc()+1)*2*nprocx + 2*Dm.iproc(); - for (int k=0; k 0) id[k*nx*ny + j*nx + i]=2; - else id[k*nx*ny + j*nx + i] = RefineLabel(i,j+ny-2,k+nz-2); - } - } - } - sprintf(LocalRankFilename,"RefineDist.%05i",writerank); - REFINEDIST = fopen(LocalRankFilename,"wb"); - fwrite(BlockDist.data(),8,nx*ny*nz,REFINEDIST); - fclose(REFINEDIST); -/* - for (int k=0; k 0.f) - id[k*nx*ny + j*nx + i]=2; - else - id[k*nx*ny + j*nx + i]=0; - } - } - } - */ - sprintf(LocalRankFilename,"RefineID.%05i",writerank); - WRITEID = fopen(LocalRankFilename,"wb"); - fwrite(id,1,nx*ny*nz,WRITEID); - fclose(WRITEID); - - writerank = (2*Dm.kproc()+1)*4*nprocx*nprocy + (2*Dm.jproc()+1)*2*nprocx + 2*Dm.iproc()+1; - for (int k=0; k 0) id[k*nx*ny + j*nx + i]=2; - else id[k*nx*ny + j*nx + i] = RefineLabel(i+nx-2,j+ny-2,k+nz-2); - } - } - } - - sprintf(LocalRankFilename,"RefineDist.%05i",writerank); - REFINEDIST = fopen(LocalRankFilename,"wb"); - fwrite(BlockDist.data(),8,nx*ny*nz,REFINEDIST); - fclose(REFINEDIST); - -/* for (int k=0; k 0.f) - id[k*nx*ny + j*nx + i]=2; - else - id[k*nx*ny + j*nx + i]=0; - } - } - } - */ - sprintf(LocalRankFilename,"RefineID.%05i",writerank); - WRITEID = fopen(LocalRankFilename,"wb"); - fwrite(id,1,nx*ny*nz,WRITEID); - fclose(WRITEID); + sprintf(LocalRankFilename,"RefineDist.%05i",writerank); + REFINEDIST = fopen(LocalRankFilename,"wb"); + fwrite(BlockDist.data(),8,nx*ny*nz,REFINEDIST); + fclose(REFINEDIST); + sprintf(LocalRankFilename,"RefineID.%05i",writerank); + WRITEID = fopen(LocalRankFilename,"wb"); + fwrite(id,1,nx*ny*nz,WRITEID); + fclose(WRITEID); + + + writerank = (2*Dm.kproc())*4*nprocx*nprocy + (2*Dm.jproc()+1)*2*nprocx + 2*Dm.iproc()+1; + for (int k=0; k 0) id[k*nx*ny + j*nx + i]=2; + else id[k*nx*ny + j*nx + i] = RefineLabel(i+nx-2,j+ny-2,k); + } + } + } + sprintf(LocalRankFilename,"RefineDist.%05i",writerank); + REFINEDIST = fopen(LocalRankFilename,"wb"); + fwrite(BlockDist.data(),8,nx*ny*nz,REFINEDIST); + fclose(REFINEDIST); + + + sprintf(LocalRankFilename,"RefineID.%05i",writerank); + WRITEID = fopen(LocalRankFilename,"wb"); + fwrite(id,1,nx*ny*nz,WRITEID); + fclose(WRITEID); + + writerank = (2*Dm.kproc())*4*nprocx*nprocy + (2*Dm.jproc()+1)*2*nprocx + 2*Dm.iproc(); + for (int k=0; k 0) id[k*nx*ny + j*nx + i]=2; + else id[k*nx*ny + j*nx + i] = RefineLabel(i,j+ny-2,k); + } + } + } + sprintf(LocalRankFilename,"RefineDist.%05i",writerank); + REFINEDIST = fopen(LocalRankFilename,"wb"); + fwrite(BlockDist.data(),8,nx*ny*nz,REFINEDIST); + fclose(REFINEDIST); + + sprintf(LocalRankFilename,"RefineID.%05i",writerank); + WRITEID = fopen(LocalRankFilename,"wb"); + fwrite(id,1,nx*ny*nz,WRITEID); + fclose(WRITEID); + + writerank = (2*Dm.kproc()+1)*4*nprocx*nprocy + (2*Dm.jproc())*2*nprocx + 2*Dm.iproc(); + for (int k=0; k 0) id[k*nx*ny + j*nx + i]=2; + else id[k*nx*ny + j*nx + i] = RefineLabel(i,j,k+nz-2); + } + } + } + sprintf(LocalRankFilename,"RefineDist.%05i",writerank); + REFINEDIST = fopen(LocalRankFilename,"wb"); + fwrite(BlockDist.data(),8,nx*ny*nz,REFINEDIST); + fclose(REFINEDIST); + + sprintf(LocalRankFilename,"RefineID.%05i",writerank); + WRITEID = fopen(LocalRankFilename,"wb"); + fwrite(id,1,nx*ny*nz,WRITEID); + fclose(WRITEID); + + writerank = (2*Dm.kproc()+1)*4*nprocx*nprocy + (2*Dm.jproc())*2*nprocx + 2*Dm.iproc()+1; + for (int k=0; k 0) id[k*nx*ny + j*nx + i]=2; + else id[k*nx*ny + j*nx + i] = RefineLabel(i+nx-2,j,k+nz-2); + } + } + } + sprintf(LocalRankFilename,"RefineDist.%05i",writerank); + REFINEDIST = fopen(LocalRankFilename,"wb"); + fwrite(BlockDist.data(),8,nx*ny*nz,REFINEDIST); + fclose(REFINEDIST); + + sprintf(LocalRankFilename,"RefineID.%05i",writerank); + WRITEID = fopen(LocalRankFilename,"wb"); + fwrite(id,1,nx*ny*nz,WRITEID); + fclose(WRITEID); + + writerank = (2*Dm.kproc()+1)*4*nprocx*nprocy + (2*Dm.jproc()+1)*2*nprocx + 2*Dm.iproc(); + for (int k=0; k 0) id[k*nx*ny + j*nx + i]=2; + else id[k*nx*ny + j*nx + i] = RefineLabel(i,j+ny-2,k+nz-2); + } + } + } + sprintf(LocalRankFilename,"RefineDist.%05i",writerank); + REFINEDIST = fopen(LocalRankFilename,"wb"); + fwrite(BlockDist.data(),8,nx*ny*nz,REFINEDIST); + fclose(REFINEDIST); + + sprintf(LocalRankFilename,"RefineID.%05i",writerank); + WRITEID = fopen(LocalRankFilename,"wb"); + fwrite(id,1,nx*ny*nz,WRITEID); + fclose(WRITEID); + + writerank = (2*Dm.kproc()+1)*4*nprocx*nprocy + (2*Dm.jproc()+1)*2*nprocx + 2*Dm.iproc()+1; + for (int k=0; k 0) id[k*nx*ny + j*nx + i]=2; + else id[k*nx*ny + j*nx + i] = RefineLabel(i+nx-2,j+ny-2,k+nz-2); + } + } + } + + sprintf(LocalRankFilename,"RefineDist.%05i",writerank); + REFINEDIST = fopen(LocalRankFilename,"wb"); + fwrite(BlockDist.data(),8,nx*ny*nz,REFINEDIST); + fclose(REFINEDIST); + + sprintf(LocalRankFilename,"RefineID.%05i",writerank); + WRITEID = fopen(LocalRankFilename,"wb"); + fwrite(id,1,nx*ny*nz,WRITEID); + fclose(WRITEID); + } } MPI_Barrier(comm); MPI_Finalize(); From 8645d0b2a778a47e4c8d9c73d092375b1f19034f Mon Sep 17 00:00:00 2001 From: JamesEMcclure Date: Sat, 21 Mar 2020 10:42:45 -0400 Subject: [PATCH 068/121] write full refined ID --- tests/lbpm_refine_pp.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/lbpm_refine_pp.cpp b/tests/lbpm_refine_pp.cpp index 1a7ff05b..df6edbf1 100644 --- a/tests/lbpm_refine_pp.cpp +++ b/tests/lbpm_refine_pp.cpp @@ -198,7 +198,7 @@ int main(int argc, char **argv) auto Filename = domain_db->getScalar( "Filename" ); if ( rank==0 ) printf("Write output \n"); sprintf(LocalRankFilename,Filename.c_str(),".refine"); - WRITEID = fopen(LocalRankFilename,"wb"); + FILE *WRITEID = fopen("refine.raw","wb"); fwrite(RefineLabel.data(),1,rnx*rny*rnz,WRITEID); fclose(WRITEID); } From 28f3f9dcf8e7d09c2949d1d8cc727dce27fc5165 Mon Sep 17 00:00:00 2001 From: JamesEMcclure Date: Sat, 21 Mar 2020 13:02:17 -0400 Subject: [PATCH 069/121] using aggregator to write 1x 2x data --- tests/lbpm_refine_pp.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/lbpm_refine_pp.cpp b/tests/lbpm_refine_pp.cpp index df6edbf1..4734c19b 100644 --- a/tests/lbpm_refine_pp.cpp +++ b/tests/lbpm_refine_pp.cpp @@ -187,7 +187,8 @@ int main(int argc, char **argv) pt.y=0.5*(rj-1)+1.f; pt.z=0.5*(rk-1)+1.f; RefinedSignDist(ri,rj,rk) = LocalApprox.eval(pt); - RefineLabel(ri,rj,rk) = Labels(i,j,k); + RefineLabel(ri,rj,rk) = Labels(i,j,k); + Dm.id[n] = Labels(i,j,k); } } } @@ -197,10 +198,11 @@ int main(int argc, char **argv) if (domain_db->keyExists( "Filename" )){ auto Filename = domain_db->getScalar( "Filename" ); if ( rank==0 ) printf("Write output \n"); - sprintf(LocalRankFilename,Filename.c_str(),".refine"); - FILE *WRITEID = fopen("refine.raw","wb"); - fwrite(RefineLabel.data(),1,rnx*rny*rnz,WRITEID); - fclose(WRITEID); + Dm.AggregateLabels("id_2x.raw"); + Mask.AggregateLabels("id.raw"); + //FILE *WRITEID = fopen("refine.raw","wb"); + //fwrite(RefineLabel.data(),1,rnx*rny*rnz,WRITEID); + //fclose(WRITEID); } else{ DoubleArray BlockDist(nx,ny,nz); From 7258867983a421957435ae2934c81bc61607c8d6 Mon Sep 17 00:00:00 2001 From: James McClure Date: Tue, 31 Mar 2020 15:43:36 -0400 Subject: [PATCH 070/121] update shell aggregation protocol --- models/ColorModel.cpp | 64 +++++++++++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 20 deletions(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 05004110..a7d06409 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -1297,6 +1297,7 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta double vS = 0.f; double delta_volume; double WallFactor = 0.0; + bool USE_CONNECTED_NWP = false; DoubleArray phase(Nx,Ny,Nz); IntArray phase_label(Nx,Ny,Nz);; @@ -1325,32 +1326,55 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta fclose(INPUT); */ // 2. Identify connected components of phase field -> phase_label - BlobIDstruct new_index; - ComputeGlobalBlobIDs(Nx-2,Ny-2,Nz-2,rank_info,phase,Averages->SDs,vF,vS,phase_label,comm); - MPI_Barrier(comm); - // only operate on component "0" - count = 0.0; - double second_biggest = 0.0; + if (USE_CONNECTED_NWP){ + BlobIDstruct new_index; + ComputeGlobalBlobIDs(Nx-2,Ny-2,Nz-2,rank_info,phase,Averages->SDs,vF,vS,phase_label,comm); + MPI_Barrier(comm); - for (int k=0; kComm, count); + second_biggest = sumReduce( Dm->Comm, second_biggest); + } + else { + // use the whole NWP + for (int k=0; kSDs(i,j,k) > 0.f){ + if (phase(i,j,k) > 0.f ){ + phase_id(i,j,k) = 0; + } + else { + phase_id(i,j,k) = 1; + } + } + else { + phase_id(i,j,k) = 1; + } } } } - } - double volume_connected = sumReduce( Dm->Comm, count); - second_biggest = sumReduce( Dm->Comm, second_biggest); + } /*int reach_x, reach_y, reach_z; for (int k=0; k Date: Tue, 31 Mar 2020 15:55:05 -0400 Subject: [PATCH 071/121] cloning databse for restart --- analysis/runAnalysis.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/analysis/runAnalysis.cpp b/analysis/runAnalysis.cpp index 384d4d69..51b0214c 100644 --- a/analysis/runAnalysis.cpp +++ b/analysis/runAnalysis.cpp @@ -907,10 +907,11 @@ void runAnalysis::run(int timestep, std::shared_ptr input_db, TwoPhase // Spawn a thread to write the restart file // if ( matches(type,AnalysisType::CreateRestart) ) { if (timestep%d_restart_interval==0){ + auto Restart_db = input_db->clone(); input_db->putScalar( "Restart", true ); if (d_rank==0) { std::ofstream OutStream("Restart.db"); - input_db->print(OutStream, ""); + Restart_db->print(OutStream, ""); OutStream.close(); } // Write the restart file (using a seperate thread) @@ -1008,22 +1009,21 @@ void runAnalysis::basic(int timestep, std::shared_ptr input_db, SubPha cfq = std::shared_ptr(new double[19*d_Np],DeleteArray); ScaLBL_CopyToHost(cfq.get(),fq,19*d_Np*sizeof(double)); ScaLBL_CopyToHost(cDen.get(),Den,2*d_Np*sizeof(double)); - - color_db->putScalar("timestep",timestep); - color_db->putScalar( "Restart", true ); - input_db->putDatabase("Color", color_db); - + // clone the input database to avoid modifying shared data + auto Restart_db = input_db->clone(); + auto tmp_color_db = Restart_db.getDatabase( "Color" ); + tmp_color_db.putScalar("timestep",timestep); + tmp_color_db.putScalar( "Restart", true ); + Restart_db.putDatabase("Color", tmp_color_db); if (d_rank==0) { std::ofstream OutStream("Restart.db"); - input_db->print(OutStream, ""); + Restart_db.print(OutStream, ""); OutStream.close(); - } // Write the restart file (using a seperate thread) auto work1 = new WriteRestartWorkItem(d_restartFile.c_str(),cDen,cfq,d_Np); work1->add_dependency(d_wait_restart); d_wait_restart = d_tpool.add_work(work1); - } if (timestep%d_visualization_interval==0){ From c4f15d8727516207d48ca63031c55ddcfc06945e Mon Sep 17 00:00:00 2001 From: JamesEMcclure Date: Tue, 31 Mar 2020 16:12:24 -0400 Subject: [PATCH 072/121] fixed issue cloning db --- analysis/runAnalysis.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/analysis/runAnalysis.cpp b/analysis/runAnalysis.cpp index 51b0214c..7150ab31 100644 --- a/analysis/runAnalysis.cpp +++ b/analysis/runAnalysis.cpp @@ -907,12 +907,12 @@ void runAnalysis::run(int timestep, std::shared_ptr input_db, TwoPhase // Spawn a thread to write the restart file // if ( matches(type,AnalysisType::CreateRestart) ) { if (timestep%d_restart_interval==0){ - auto Restart_db = input_db->clone(); - input_db->putScalar( "Restart", true ); + auto Restart_db = input_db->cloneDatabase(); + // Restart_db->putScalar( "Restart", true ); if (d_rank==0) { - std::ofstream OutStream("Restart.db"); - Restart_db->print(OutStream, ""); - OutStream.close(); + // std::ofstream OutStream("Restart.db"); + // Restart_db->print(OutStream, ""); + // OutStream.close(); } // Write the restart file (using a seperate thread) auto work = new WriteRestartWorkItem(d_restartFile.c_str(),cDen,cfq,d_Np); @@ -1010,14 +1010,14 @@ void runAnalysis::basic(int timestep, std::shared_ptr input_db, SubPha ScaLBL_CopyToHost(cfq.get(),fq,19*d_Np*sizeof(double)); ScaLBL_CopyToHost(cDen.get(),Den,2*d_Np*sizeof(double)); // clone the input database to avoid modifying shared data - auto Restart_db = input_db->clone(); - auto tmp_color_db = Restart_db.getDatabase( "Color" ); - tmp_color_db.putScalar("timestep",timestep); - tmp_color_db.putScalar( "Restart", true ); - Restart_db.putDatabase("Color", tmp_color_db); + auto Restart_db = input_db->cloneDatabase(); + auto tmp_color_db = Restart_db->getDatabase( "Color" ); + tmp_color_db->putScalar("timestep",timestep); + tmp_color_db->putScalar( "Restart", true ); + Restart_db->putDatabase("Color", tmp_color_db); if (d_rank==0) { std::ofstream OutStream("Restart.db"); - Restart_db.print(OutStream, ""); + Restart_db->print(OutStream, ""); OutStream.close(); } // Write the restart file (using a seperate thread) From 7b67f2acfc7c0dfcd570d5b74fca52d81726eea1 Mon Sep 17 00:00:00 2001 From: JamesEMcclure Date: Tue, 31 Mar 2020 18:05:32 -0400 Subject: [PATCH 073/121] refactor shell aggregation --- models/ColorModel.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index a7d06409..25716a1e 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -1327,6 +1327,8 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta */ // 2. Identify connected components of phase field -> phase_label + double volume_connected = 0.0; + double second_biggest = 0.0; if (USE_CONNECTED_NWP){ BlobIDstruct new_index; ComputeGlobalBlobIDs(Nx-2,Ny-2,Nz-2,rank_info,phase,Averages->SDs,vF,vS,phase_label,comm); @@ -1334,7 +1336,6 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta // only operate on component "0" count = 0.0; - double second_biggest = 0.0; for (int k=0; kComm, count); + volume_connected = sumReduce( Dm->Comm, count); second_biggest = sumReduce( Dm->Comm, second_biggest); } else { @@ -1409,13 +1410,16 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta } } + if (USE_CONNECTED_NWP){ if (volume_connected - second_biggest < 2.0*fabs(target_delta_volume) && target_delta_volume < 0.0){ // if connected volume is less than 2% just delete the whole thing if (rank==0) printf("Connected region has shrunk! \n"); REVERSE_FLOW_DIRECTION = true; } + /* else{*/ if (rank==0) printf("Pathway volume / next largest ganglion %f \n",volume_connected/second_biggest ); + } if (rank==0) printf("MorphGrow with target volume fraction change %f \n", target_delta_volume/volume_initial); double target_delta_volume_incremental = target_delta_volume; if (fabs(target_delta_volume) > 0.01*volume_initial) From 0d493275b4eb54253c9cc300f012861224e97a8e Mon Sep 17 00:00:00 2001 From: James McClure Date: Wed, 1 Apr 2020 08:19:59 -0400 Subject: [PATCH 074/121] use kr as target for morph change --- models/ColorModel.cpp | 47 +++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 25716a1e..c6cb563c 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -782,6 +782,20 @@ void ScaLBL_ColorModel::Run(){ double flow_rate_B = volB*(vB_x*dir_x + vB_y*dir_y + vB_z*dir_z); double Ca = fabs(muA*flow_rate_A + muB*flow_rate_B)/(5.796*alpha); + if (SET_CAPILLARY_NUMBER && CURRENT_STEADY_TIMESTEPS%MIN_STEADY_TIMESTEPS < analysis_interval ){ + Fx *= capillary_number / Ca; + Fy *= capillary_number / Ca; + Fz *= capillary_number / Ca; + if (force_mag > 1e-3){ + Fx *= 1e-3/force_mag; // impose ceiling for stability + Fy *= 1e-3/force_mag; + Fz *= 1e-3/force_mag; + } + if (rank == 0) printf(" -- adjust force by factor %f \n ",capillary_number / Ca); + Averages->SetParams(rhoA,rhoB,tauA,tauB,Fx,Fy,Fz,alpha,beta); + color_db->putVector("F",{Fx,Fy,Fz}); + } + if ( morph_timesteps > morph_interval ){ bool isSteady = false; @@ -789,28 +803,21 @@ void ScaLBL_ColorModel::Run(){ isSteady = true; if (CURRENT_STEADY_TIMESTEPS > MAX_STEADY_TIMESTEPS) isSteady = true; - - if (SET_CAPILLARY_NUMBER && RESCALE_FORCE_COUNT < RESCALE_FORCE_MAX){ - RESCALE_FORCE_COUNT++; - Fx *= capillary_number / Ca; - Fy *= capillary_number / Ca; - Fz *= capillary_number / Ca; - - if (force_mag > 1e-3){ - Fx *= 1e-3/force_mag; // impose ceiling for stability - Fy *= 1e-3/force_mag; - Fz *= 1e-3/force_mag; - } - - if (rank == 0) printf(" -- adjust force by factor %f \n ",capillary_number / Ca); - Averages->SetParams(rhoA,rhoB,tauA,tauB,Fx,Fy,Fz,alpha,beta); - color_db->putVector("F",{Fx,Fy,Fz}); - } if ( isSteady ){ MORPH_ADAPT = true; CURRENT_MORPH_TIMESTEPS=0; - delta_volume_target = Dm->Volume*volA *morph_delta; // set target volume change + //delta_volume_target = Dm->Volume*volA *morph_delta; // set target volume change + /** morphological target based on relative permeability for A **/ + double krA_TMP= fabs(muA*flow_rate_A / force_mag); + log_krA = log(krA_TMP); + log_krA_target = log(KRA_MORPH_FACTOR*(krA_TMP)); + slope_krA_volume = (log_krA - log_krA_prev)/(Dm->Volume*(volA - volA_prev)); + delta_volume_target=Dm->Volume*(volA+(log_krA_target - log_krA)/slope_krA_volume); + log_krA_prev = log_krA; + volA_prev = volA; + printf(" log(kr)=%f, volume=%f, TARGET log(kr)=%f, volume change=%f \n",log_krA, volA, log_krA_target, delta_volume_target/(volA*Dm->Volume)); + /** compute averages & write data **/ Averages->Full(); Averages->Write(timestep); analysis.WriteVisData(timestep, current_db, *Averages, Phi, Pressure, Velocity, fq, Den ); @@ -884,7 +891,6 @@ void ScaLBL_ColorModel::Run(){ Fx *= capillary_number / Ca; Fy *= capillary_number / Ca; Fz *= capillary_number / Ca; - RESCALE_FORCE_COUNT = 1; if (force_mag > 1e-3){ Fx *= 1e-3/force_mag; // impose ceiling for stability Fy *= 1e-3/force_mag; @@ -904,6 +910,7 @@ void ScaLBL_ColorModel::Run(){ Averages->SetParams(rhoA,rhoB,tauA,tauB,Fx,Fy,Fz,alpha,beta); color_db->putVector("F",{Fx,Fy,Fz}); } + CURRENT_STEADY_TIMESTEPS = 0; } else{ @@ -979,7 +986,7 @@ void ScaLBL_ColorModel::Run(){ //morph_delta *= (-1.0); REVERSE_FLOW_DIRECTION = false; } - MPI_Barrier(comm); + comm.barrier(); } morph_timesteps += analysis_interval; } From abfe86152f23b2334800a170415a76e6cf3a8ed0 Mon Sep 17 00:00:00 2001 From: JamesEMcclure Date: Wed, 1 Apr 2020 12:13:04 -0400 Subject: [PATCH 075/121] fix bug --- models/ColorModel.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 09c8b946..f6d15b43 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -1002,7 +1002,7 @@ void ScaLBL_ColorModel::Run(){ //morph_delta *= (-1.0); REVERSE_FLOW_DIRECTION = false; } - comm.barrier(); + MPI_Barrier(comm); } morph_timesteps += analysis_interval; } From e50a099c13eab93a6a302c82c0824c7f4fb89bcc Mon Sep 17 00:00:00 2001 From: James McClure Date: Wed, 1 Apr 2020 12:20:21 -0400 Subject: [PATCH 076/121] cleaning up barriers in color model --- models/ColorModel.cpp | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index f6d15b43..a62ec927 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -716,7 +716,8 @@ void ScaLBL_ColorModel::Run(){ } ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); + MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); // *************EVEN TIMESTEP************* timestep++; @@ -751,10 +752,9 @@ void ScaLBL_ColorModel::Run(){ } ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); + MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); //************************************************************************ - - MPI_Barrier(comm); PROFILE_STOP("Update"); if (rank==0 && timestep%analysis_interval == 0 && BoundaryCondition > 0){ @@ -763,7 +763,6 @@ void ScaLBL_ColorModel::Run(){ // Run the analysis analysis.basic(timestep, current_db, *Averages, Phi, Pressure, Velocity, fq, Den ); - // allow initial ramp-up to get closer to steady state if (timestep > RAMP_TIMESTEPS && timestep%analysis_interval == 0 && USE_MORPH){ analysis.finish(); @@ -1002,17 +1001,17 @@ void ScaLBL_ColorModel::Run(){ //morph_delta *= (-1.0); REVERSE_FLOW_DIRECTION = false; } - MPI_Barrier(comm); } morph_timesteps += analysis_interval; } + MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); } analysis.finish(); PROFILE_STOP("Loop"); PROFILE_SAVE("lbpm_color_simulator",1); //************************************************************************ ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep @@ -1062,12 +1061,12 @@ double ScaLBL_ColorModel::ImageInit(std::string Filename){ if (rank==0) printf(" new saturation: %f (%f / %f) \n", Count / PoreCount, Count, PoreCount); ScaLBL_CopyToDevice(Phi, PhaseLabel, Nx*Ny*Nz*sizeof(double)); - MPI_Barrier(comm); + MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); ScaLBL_D3Q19_Init(fq, Np); ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - MPI_Barrier(comm); + MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); ScaLBL_CopyToHost(Averages->Phi.data(),Phi,Nx*Ny*Nz*sizeof(double)); @@ -1442,7 +1441,7 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta if (USE_CONNECTED_NWP){ BlobIDstruct new_index; ComputeGlobalBlobIDs(Nx-2,Ny-2,Nz-2,rank_info,phase,Averages->SDs,vF,vS,phase_label,comm); - MPI_Barrier(comm); + MPI_Barrier(Dm->comm); // only operate on component "0" count = 0.0; From 7ef292e2fcc2a1864a626c1ef5475b6c708eda4e Mon Sep 17 00:00:00 2001 From: James McClure Date: Wed, 1 Apr 2020 12:23:00 -0400 Subject: [PATCH 077/121] cleaning up barriers in color model --- models/ColorModel.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index a62ec927..b86c0918 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -1441,7 +1441,7 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta if (USE_CONNECTED_NWP){ BlobIDstruct new_index; ComputeGlobalBlobIDs(Nx-2,Ny-2,Nz-2,rank_info,phase,Averages->SDs,vF,vS,phase_label,comm); - MPI_Barrier(Dm->comm); + MPI_Barrier(Dm->Comm); // only operate on component "0" count = 0.0; From 64a19a718bc5a6725ef11bb249c5352745549646 Mon Sep 17 00:00:00 2001 From: James McClure Date: Wed, 1 Apr 2020 12:26:55 -0400 Subject: [PATCH 078/121] make ScaLBL communicator public for --- common/ScaLBL.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/ScaLBL.h b/common/ScaLBL.h index a50ab7ed..0d2ee0cf 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -134,6 +134,7 @@ public: //ScaLBL_Communicator(Domain &Dm, IntArray &Map); ~ScaLBL_Communicator(); //...................................................................................... + MPI_Comm MPI_COMM_SCALBL; // MPI Communicator unsigned long int CommunicationCount,SendCount,RecvCount; int Nx,Ny,Nz,N; int BoundaryCondition; @@ -207,7 +208,6 @@ private: // Give the object it's own MPI communicator RankInfoStruct rank_info; MPI_Group Group; // Group of processors associated with this domain - MPI_Comm MPI_COMM_SCALBL; // MPI Communicator for this domain MPI_Request req1[18],req2[18]; MPI_Status stat1[18],stat2[18]; //...................................................................................... From 50b84071456fb6806c734735730c6982384420e9 Mon Sep 17 00:00:00 2001 From: James McClure Date: Wed, 1 Apr 2020 13:13:57 -0400 Subject: [PATCH 079/121] clean up ScaLBL barriers --- cpu/Extras.cpp | 2 +- gpu/Extras.cu | 1 + models/ColorModel.cpp | 4 +--- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/cpu/Extras.cpp b/cpu/Extras.cpp index 71f5c04a..efe820d3 100644 --- a/cpu/Extras.cpp +++ b/cpu/Extras.cpp @@ -49,5 +49,5 @@ extern "C" void ScaLBL_CopyToZeroCopy(void* dest, const void* source, size_t siz } extern "C" void ScaLBL_DeviceBarrier(){ -// cudaDeviceSynchronize(); + MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL) } diff --git a/gpu/Extras.cu b/gpu/Extras.cu index 8aeedc87..cd9c265c 100644 --- a/gpu/Extras.cu +++ b/gpu/Extras.cu @@ -59,4 +59,5 @@ extern "C" void ScaLBL_CopyToHost(void* dest, const void* source, size_t size){ extern "C" void ScaLBL_DeviceBarrier(){ cudaDeviceSynchronize(); + MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL) } diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index b86c0918..57e50411 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -717,7 +717,7 @@ void ScaLBL_ColorModel::Run(){ ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_DeviceBarrier(); - MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); + ; // *************EVEN TIMESTEP************* timestep++; @@ -753,7 +753,6 @@ void ScaLBL_ColorModel::Run(){ ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_DeviceBarrier(); - MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); //************************************************************************ PROFILE_STOP("Update"); @@ -1011,7 +1010,6 @@ void ScaLBL_ColorModel::Run(){ PROFILE_SAVE("lbpm_color_simulator",1); //************************************************************************ ScaLBL_DeviceBarrier(); - MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep From f12f8154b12b5aa4fc2a124a1a615ec3735bb176 Mon Sep 17 00:00:00 2001 From: James McClure Date: Wed, 1 Apr 2020 13:15:24 -0400 Subject: [PATCH 080/121] Revert "clean up ScaLBL barriers" This reverts commit 50b84071456fb6806c734735730c6982384420e9. --- cpu/Extras.cpp | 2 +- gpu/Extras.cu | 1 - models/ColorModel.cpp | 4 +++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cpu/Extras.cpp b/cpu/Extras.cpp index efe820d3..71f5c04a 100644 --- a/cpu/Extras.cpp +++ b/cpu/Extras.cpp @@ -49,5 +49,5 @@ extern "C" void ScaLBL_CopyToZeroCopy(void* dest, const void* source, size_t siz } extern "C" void ScaLBL_DeviceBarrier(){ - MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL) +// cudaDeviceSynchronize(); } diff --git a/gpu/Extras.cu b/gpu/Extras.cu index cd9c265c..8aeedc87 100644 --- a/gpu/Extras.cu +++ b/gpu/Extras.cu @@ -59,5 +59,4 @@ extern "C" void ScaLBL_CopyToHost(void* dest, const void* source, size_t size){ extern "C" void ScaLBL_DeviceBarrier(){ cudaDeviceSynchronize(); - MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL) } diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 57e50411..b86c0918 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -717,7 +717,7 @@ void ScaLBL_ColorModel::Run(){ ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_DeviceBarrier(); - ; + MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); // *************EVEN TIMESTEP************* timestep++; @@ -753,6 +753,7 @@ void ScaLBL_ColorModel::Run(){ ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_DeviceBarrier(); + MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); //************************************************************************ PROFILE_STOP("Update"); @@ -1010,6 +1011,7 @@ void ScaLBL_ColorModel::Run(){ PROFILE_SAVE("lbpm_color_simulator",1); //************************************************************************ ScaLBL_DeviceBarrier(); + MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep From d1d626ac414eff6306669c1d70ba904a73238b45 Mon Sep 17 00:00:00 2001 From: JamesEMcclure Date: Thu, 2 Apr 2020 10:38:14 -0400 Subject: [PATCH 081/121] fix header in greyscale --- models/GreyscaleModel.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/models/GreyscaleModel.h b/models/GreyscaleModel.h index a99925b1..ea807048 100644 --- a/models/GreyscaleModel.h +++ b/models/GreyscaleModel.h @@ -10,7 +10,8 @@ Implementation of color lattice boltzmann model #include #include "common/Communication.h" -#include "common/MPI.h" +//#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "common/Database.h" #include "common/ScaLBL.h" #include "ProfilerApp.h" From ce7d348a206aa2fc70089202e522a54a27334863 Mon Sep 17 00:00:00 2001 From: James McClure Date: Thu, 2 Apr 2020 10:43:10 -0400 Subject: [PATCH 082/121] fix sumReduce --- models/GreyscaleModel.cpp | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/models/GreyscaleModel.cpp b/models/GreyscaleModel.cpp index 11d92c80..c28c88c5 100644 --- a/models/GreyscaleModel.cpp +++ b/models/GreyscaleModel.cpp @@ -261,8 +261,7 @@ void ScaLBL_GreyscaleModel::AssignComponentLabels(double *Porosity, double *Perm // Set Dm to match Mask for (int i=0; iid[i] = Mask->id[i]; - for (int idx=0; idxComm.sumReduce(label_count[idx]); - + for (int idx=0; idxComm, label_count[idx]); //Initialize a weighted porosity after considering grey voxels GreyPorosity=0.0; for (unsigned int idx=0; idxComm); //MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - vax = Mask->Comm.sumReduce( vax_loc ); - vay = Mask->Comm.sumReduce( vay_loc ); - vaz = Mask->Comm.sumReduce( vaz_loc ); - count = Mask->Comm.sumReduce( count_loc ); + vax = sumReduce( Mask->Comm, vax_loc); + vay = sumReduce( Mask->Comm, vay_loc); + vaz = sumReduce( Mask->Comm, vaz_loc); + count = sumReduce( Mask->Comm, count_loc); vax /= count; vay /= count; @@ -634,10 +633,10 @@ void ScaLBL_GreyscaleModel::Run(){ double As = Morphology.A(); double Hs = Morphology.H(); double Xs = Morphology.X(); - Vs = Dm->Comm.sumReduce( Vs); - As = Dm->Comm.sumReduce( As); - Hs = Dm->Comm.sumReduce( Hs); - Xs = Dm->Comm.sumReduce( Xs); + Vs = sumReduce( Dm->Comm, Vs); + As = sumReduce( Dm->Comm, As); + Hs = sumReduce( Dm->Comm, Hs); + Xs = sumReduce( Dm->Comm, Xs); double h = Dm->voxel_length; //double absperm = h*h*mu*Mask->Porosity()*flow_rate / force_mag; From b4a51e266b43f838ae96af26f702cea6486b810f Mon Sep 17 00:00:00 2001 From: JamesEMcclure Date: Thu, 2 Apr 2020 10:55:04 -0400 Subject: [PATCH 083/121] remove warnings for greyscale --- tests/lbpm_greyscale_simulator.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/lbpm_greyscale_simulator.cpp b/tests/lbpm_greyscale_simulator.cpp index a54b6fc4..b17778ce 100644 --- a/tests/lbpm_greyscale_simulator.cpp +++ b/tests/lbpm_greyscale_simulator.cpp @@ -8,7 +8,7 @@ #include "common/ScaLBL.h" #include "common/Communication.h" -#include "common/MPI.h" +#include "common/MPI_Helpers.h" #include "models/GreyscaleModel.h" //#define WRITE_SURFACES @@ -33,8 +33,6 @@ int main(int argc, char **argv) MPI_Comm_size(comm,&nprocs); { // parallel domain size (# of sub-domains) - int nprocx,nprocy,nprocz; - int iproc,jproc,kproc; if (rank == 0){ printf("********************************************************\n"); @@ -43,6 +41,7 @@ int main(int argc, char **argv) } // Initialize compute device int device=ScaLBL_SetDevice(rank); + NULL_USE(device); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); From 4398b09cc08ae3191cc429e3cea331a8ee5919e2 Mon Sep 17 00:00:00 2001 From: James McClure Date: Thu, 2 Apr 2020 12:53:54 -0400 Subject: [PATCH 084/121] enabling endpoint adaptation for color model --- models/ColorModel.cpp | 50 ++++++++++++++++++++--------------------- models/GreyscaleModel.h | 1 - 2 files changed, 24 insertions(+), 27 deletions(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index b86c0918..7af1cafe 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -500,6 +500,7 @@ void ScaLBL_ColorModel::Run(){ bool USE_SEED = false; bool USE_DIRECT = false; bool USE_MORPHOPEN_OIL = false; + bool USE_TARGET_VOLUME_CHANGE = false; int MAX_MORPH_TIMESTEPS = 50000; // maximum number of LBM timesteps to spend in morphological adaptation routine int MIN_STEADY_TIMESTEPS = 100000; int MAX_STEADY_TIMESTEPS = 200000; @@ -523,11 +524,11 @@ void ScaLBL_ColorModel::Run(){ bool USE_BUMP_RATE = false; /* history for morphological algoirthm */ - double KRA_MORPH_FACTOR=0.8; + double KRA_MORPH_FACTOR=0.5; double volA_prev = 0.0; double log_krA_prev = 1.0; double log_krA_target = 1.0; - double log_krA = 0.0; + double log_krA = 1.0; double slope_krA_volume = 0.0; if (color_db->keyExists( "vol_A_previous" )){ volA_prev = color_db->getScalar( "vol_A_previous" ); @@ -555,17 +556,19 @@ void ScaLBL_ColorModel::Run(){ seed_water = 0.01; USE_SEED = true; USE_MORPH = true; + USE_TARGET_VOLUME_CHANGE = true; } else if (protocol == "open connected oil"){ morph_delta = 0.05; USE_MORPH = true; USE_MORPHOPEN_OIL = true; + USE_TARGET_VOLUME_CHANGE = true; } else if (protocol == "shell aggregation"){ morph_delta = 0.05; USE_MORPH = true; + USE_TARGET_VOLUME_CHANGE = true; } - if (color_db->keyExists( "residual_endpoint_threshold" )){ RESIDUAL_ENDPOINT_THRESHOLD = color_db->getScalar( "residual_endpoint_threshold" ); } @@ -822,16 +825,28 @@ void ScaLBL_ColorModel::Run(){ if ( isSteady ){ MORPH_ADAPT = true; CURRENT_MORPH_TIMESTEPS=0; - //delta_volume_target = Dm->Volume*volA *morph_delta; // set target volume change - /** morphological target based on relative permeability for A **/ + delta_volume_target = Dm->Volume*volA *morph_delta; // set target volume change + //****** ENDPOINT ADAPTATION ********/ double krA_TMP= fabs(muA*flow_rate_A / force_mag); + double krB_TMP= fabs(muB*flow_rate_B / force_mag); log_krA = log(krA_TMP); - log_krA_target = log(KRA_MORPH_FACTOR*(krA_TMP)); - slope_krA_volume = (log_krA - log_krA_prev)/(Dm->Volume*(volA - volA_prev)); - delta_volume_target=Dm->Volume*(volA+(log_krA_target - log_krA)/slope_krA_volume); + if (krA_TMP < 0.0){ + // cannot do endpoint adaptation if kr is negative + log_krA = log_krA_prev; + } + else if (krA_TMP < krB_TMP && morph_delta > 0.0){ + /** morphological target based on relative permeability for A **/ + log_krA_target = log(KRA_MORPH_FACTOR*(krA_TMP)); + slope_krA_volume = (log_krA - log_krA_prev)/(Dm->Volume*(volA - volA_prev)); + delta_volume_target=min(delta_volume_target,Dm->Volume*(volA+(log_krA_target - log_krA)/slope_krA_volume)); + if (rank==0){ + printf(" Enabling endpoint adaptation: krA = %f, krB = %f \n",krA_TMP,krB_TMP); + printf(" log(kr)=%f, volume=%f, TARGET log(kr)=%f, volume change=%f \n",log_krA, volA, log_krA_target, delta_volume_target/(volA*Dm->Volume)); + } + } log_krA_prev = log_krA; volA_prev = volA; - printf(" log(kr)=%f, volume=%f, TARGET log(kr)=%f, volume change=%f \n",log_krA, volA, log_krA_target, delta_volume_target/(volA*Dm->Volume)); + //******************************** **/ /** compute averages & write data **/ Averages->Full(); Averages->Write(timestep); @@ -977,14 +992,6 @@ void ScaLBL_ColorModel::Run(){ CURRENT_STEADY_TIMESTEPS=0; initial_volume = volA*Dm->Volume; delta_volume = 0.0; - if (USE_DIRECT){ - //BoundaryCondition = 0; - //ScaLBL_Comm->BoundaryCondition = 0; - //ScaLBL_Comm_Regular->BoundaryCondition = 0; - //Fx = capillary_number*dir_x*force_mag / Ca; - //Fy = capillary_number*dir_y*force_mag / Ca; - //Fz = capillary_number*dir_z*force_mag / Ca; - } } else if (!(USE_DIRECT) && CURRENT_MORPH_TIMESTEPS > MAX_MORPH_TIMESTEPS) { MORPH_ADAPT = false; @@ -992,15 +999,6 @@ void ScaLBL_ColorModel::Run(){ initial_volume = volA*Dm->Volume; delta_volume = 0.0; } - if ( REVERSE_FLOW_DIRECTION ){ - //if (rank==0) printf("*****REVERSE FLOW DIRECTION***** \n"); - delta_volume = 0.0; - // flow direction will reverse after next steady point - MORPH_ADAPT = false; - CURRENT_STEADY_TIMESTEPS=0; - //morph_delta *= (-1.0); - REVERSE_FLOW_DIRECTION = false; - } } morph_timesteps += analysis_interval; } diff --git a/models/GreyscaleModel.h b/models/GreyscaleModel.h index ea807048..c670239f 100644 --- a/models/GreyscaleModel.h +++ b/models/GreyscaleModel.h @@ -10,7 +10,6 @@ Implementation of color lattice boltzmann model #include #include "common/Communication.h" -//#include "common/MPI.h" #include "common/MPI_Helpers.h" #include "common/Database.h" #include "common/ScaLBL.h" From 7f83f55e1bc3e476ad2d8c85a0f1fa166083caf2 Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 3 Apr 2020 07:16:44 -0400 Subject: [PATCH 085/121] clean up target Ca --- models/ColorModel.cpp | 26 +------------------------- 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 7af1cafe..ddf669bb 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -800,20 +800,6 @@ void ScaLBL_ColorModel::Run(){ double flow_rate_B = volB*(vB_x*dir_x + vB_y*dir_y + vB_z*dir_z); double Ca = fabs(muA*flow_rate_A + muB*flow_rate_B)/(5.796*alpha); - if (SET_CAPILLARY_NUMBER && CURRENT_STEADY_TIMESTEPS%MIN_STEADY_TIMESTEPS < analysis_interval ){ - Fx *= capillary_number / Ca; - Fy *= capillary_number / Ca; - Fz *= capillary_number / Ca; - if (force_mag > 1e-3){ - Fx *= 1e-3/force_mag; // impose ceiling for stability - Fy *= 1e-3/force_mag; - Fz *= 1e-3/force_mag; - } - if (rank == 0) printf(" -- adjust force by factor %f \n ",capillary_number / Ca); - Averages->SetParams(rhoA,rhoB,tauA,tauB,Fx,Fy,Fz,alpha,beta); - color_db->putVector("F",{Fx,Fy,Fz}); - } - if ( morph_timesteps > morph_interval ){ bool isSteady = false; @@ -926,16 +912,6 @@ void ScaLBL_ColorModel::Run(){ Fy *= 1e-3/force_mag; Fz *= 1e-3/force_mag; } - if (flow_rate_A < NOISE_THRESHOLD && USE_BUMP_RATE){ - if (rank==0) printf("Hit noise threshold (%f): bumping capillary number by %f X \n",NOISE_THRESHOLD,BUMP_RATE); - Fx *= BUMP_RATE; // impose bump condition - Fy *= BUMP_RATE; - Fz *= BUMP_RATE; - capillary_number *= BUMP_RATE; - color_db->putScalar("capillary_number",capillary_number); - current_db->putDatabase("Color", color_db); - MORPH_ADAPT = false; // re-run current point if below noise threshold - } if (rank == 0) printf(" -- adjust force by factor %f \n ",capillary_number / Ca); Averages->SetParams(rhoA,rhoB,tauA,tauB,Fx,Fy,Fz,alpha,beta); color_db->putVector("F",{Fx,Fy,Fz}); @@ -1293,7 +1269,7 @@ double ScaLBL_ColorModel::SeedPhaseField(const double seed_water_in_oil){ count= sumReduce( Dm->Comm, count); mass_loss= sumReduce( Dm->Comm, mass_loss); - if (rank == 0) printf("Remove mass %f from %f voxels \n",mass_loss,count); + if (rank == 0) printf("Remove mass %f from %f voxels \n",mass_lojavascript:void(0)ss,count); // Need to initialize Aq, Bq, Den, Phi directly //ScaLBL_CopyToDevice(Phi,phase.data(),7*Np*sizeof(double)); From e7e14a9b642a1ce6b1812fdc04660cfbf0b93e6f Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 3 Apr 2020 07:29:14 -0400 Subject: [PATCH 086/121] clean up flow adapt --- models/ColorModel.cpp | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index ddf669bb..a35c3c44 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -500,7 +500,6 @@ void ScaLBL_ColorModel::Run(){ bool USE_SEED = false; bool USE_DIRECT = false; bool USE_MORPHOPEN_OIL = false; - bool USE_TARGET_VOLUME_CHANGE = false; int MAX_MORPH_TIMESTEPS = 50000; // maximum number of LBM timesteps to spend in morphological adaptation routine int MIN_STEADY_TIMESTEPS = 100000; int MAX_STEADY_TIMESTEPS = 200000; @@ -518,10 +517,6 @@ void ScaLBL_ColorModel::Run(){ double initial_volume = 0.0; double delta_volume = 0.0; double delta_volume_target = 0.0; - double RESIDUAL_ENDPOINT_THRESHOLD = 0.04; - double NOISE_THRESHOLD = 0.0; - double BUMP_RATE = 2.0; - bool USE_BUMP_RATE = false; /* history for morphological algoirthm */ double KRA_MORPH_FACTOR=0.5; @@ -569,18 +564,6 @@ void ScaLBL_ColorModel::Run(){ USE_MORPH = true; USE_TARGET_VOLUME_CHANGE = true; } - if (color_db->keyExists( "residual_endpoint_threshold" )){ - RESIDUAL_ENDPOINT_THRESHOLD = color_db->getScalar( "residual_endpoint_threshold" ); - } - NULL_USE( RESIDUAL_ENDPOINT_THRESHOLD ); - if (color_db->keyExists( "noise_threshold" )){ - NOISE_THRESHOLD = color_db->getScalar( "noise_threshold" ); - USE_BUMP_RATE = true; - } - if (color_db->keyExists( "bump_rate" )){ - BUMP_RATE = color_db->getScalar( "bump_rate" ); - USE_BUMP_RATE = true; - } if (color_db->keyExists( "capillary_number" )){ capillary_number = color_db->getScalar( "capillary_number" ); SET_CAPILLARY_NUMBER=true; @@ -1269,7 +1252,7 @@ double ScaLBL_ColorModel::SeedPhaseField(const double seed_water_in_oil){ count= sumReduce( Dm->Comm, count); mass_loss= sumReduce( Dm->Comm, mass_loss); - if (rank == 0) printf("Remove mass %f from %f voxels \n",mass_lojavascript:void(0)ss,count); + if (rank == 0) printf("Remove mass %f from %f voxels \n",mass_loss,count); // Need to initialize Aq, Bq, Den, Phi directly //ScaLBL_CopyToDevice(Phi,phase.data(),7*Np*sizeof(double)); From 377d259884a61cc815f1c4b230328f3c8b834a71 Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 3 Apr 2020 07:30:17 -0400 Subject: [PATCH 087/121] clean up flow adapt --- models/ColorModel.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index a35c3c44..02ff6844 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -551,18 +551,15 @@ void ScaLBL_ColorModel::Run(){ seed_water = 0.01; USE_SEED = true; USE_MORPH = true; - USE_TARGET_VOLUME_CHANGE = true; } else if (protocol == "open connected oil"){ morph_delta = 0.05; USE_MORPH = true; USE_MORPHOPEN_OIL = true; - USE_TARGET_VOLUME_CHANGE = true; } else if (protocol == "shell aggregation"){ morph_delta = 0.05; USE_MORPH = true; - USE_TARGET_VOLUME_CHANGE = true; } if (color_db->keyExists( "capillary_number" )){ capillary_number = color_db->getScalar( "capillary_number" ); From 3d31d2672256af61c25c05a25a9743091f970494 Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 3 Apr 2020 07:37:29 -0400 Subject: [PATCH 088/121] clean up flow adapt --- models/ColorModel.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 02ff6844..954bca7e 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -566,9 +566,6 @@ void ScaLBL_ColorModel::Run(){ SET_CAPILLARY_NUMBER=true; //RESCALE_FORCE_MAX = 1; } -// if (analysis_db->keyExists( "rescale_force_count" )){ -// RESCALE_FORCE_MAX = analysis_db->getScalar( "rescale_force_count" ); -// } if (color_db->keyExists( "timestep" )){ timestep = color_db->getScalar( "timestep" ); } From 32f1bae784af2fe8c7e3e380c26cce9e6059ae58 Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 3 Apr 2020 08:24:28 -0400 Subject: [PATCH 089/121] don't unpack distributions when external BC are applied (D3Q7/D3Q19) --- common/ScaLBL.cpp | 148 +++++++++++++++++++++++++++++----------------- 1 file changed, 94 insertions(+), 54 deletions(-) diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index 21656757..3e2d0f07 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -1011,19 +1011,6 @@ void ScaLBL_Communicator::RecvD3Q19AA(double *dist){ ScaLBL_D3Q19_Unpack(15,dvcRecvDist_Y,3*recvCount_Y,recvCount_Y,recvbuf_Y,dist,N); ScaLBL_D3Q19_Unpack(17,dvcRecvDist_Y,4*recvCount_Y,recvCount_Y,recvbuf_Y,dist,N); //................................................................................... - //...Packing for z face(6,12,13,16,17)................................ - ScaLBL_D3Q19_Unpack(6,dvcRecvDist_z,0,recvCount_z,recvbuf_z,dist,N); - ScaLBL_D3Q19_Unpack(12,dvcRecvDist_z,recvCount_z,recvCount_z,recvbuf_z,dist,N); - ScaLBL_D3Q19_Unpack(13,dvcRecvDist_z,2*recvCount_z,recvCount_z,recvbuf_z,dist,N); - ScaLBL_D3Q19_Unpack(16,dvcRecvDist_z,3*recvCount_z,recvCount_z,recvbuf_z,dist,N); - ScaLBL_D3Q19_Unpack(17,dvcRecvDist_z,4*recvCount_z,recvCount_z,recvbuf_z,dist,N); - //...Packing for Z face(5,11,14,15,18)................................ - ScaLBL_D3Q19_Unpack(5,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,dist,N); - ScaLBL_D3Q19_Unpack(11,dvcRecvDist_Z,recvCount_Z,recvCount_Z,recvbuf_Z,dist,N); - ScaLBL_D3Q19_Unpack(14,dvcRecvDist_Z,2*recvCount_Z,recvCount_Z,recvbuf_Z,dist,N); - ScaLBL_D3Q19_Unpack(15,dvcRecvDist_Z,3*recvCount_Z,recvCount_Z,recvbuf_Z,dist,N); - ScaLBL_D3Q19_Unpack(18,dvcRecvDist_Z,4*recvCount_Z,recvCount_Z,recvbuf_Z,dist,N); - //.................................................................................. //...Pack the xy edge (8)................................ ScaLBL_D3Q19_Unpack(8,dvcRecvDist_xy,0,recvCount_xy,recvbuf_xy,dist,N); //...Pack the Xy edge (9)................................ @@ -1032,22 +1019,75 @@ void ScaLBL_Communicator::RecvD3Q19AA(double *dist){ ScaLBL_D3Q19_Unpack(10,dvcRecvDist_xY,0,recvCount_xY,recvbuf_xY,dist,N); //...Pack the XY edge (7)................................ ScaLBL_D3Q19_Unpack(7,dvcRecvDist_XY,0,recvCount_XY,recvbuf_XY,dist,N); - //...Pack the xz edge (12)................................ - ScaLBL_D3Q19_Unpack(12,dvcRecvDist_xz,0,recvCount_xz,recvbuf_xz,dist,N); - //...Pack the xZ edge (14)................................ - ScaLBL_D3Q19_Unpack(14,dvcRecvDist_xZ,0,recvCount_xZ,recvbuf_xZ,dist,N); - //...Pack the Xz edge (13)................................ - ScaLBL_D3Q19_Unpack(13,dvcRecvDist_Xz,0,recvCount_Xz,recvbuf_Xz,dist,N); - //...Pack the XZ edge (11)................................ - ScaLBL_D3Q19_Unpack(11,dvcRecvDist_XZ,0,recvCount_XZ,recvbuf_XZ,dist,N); - //...Pack the yz edge (16)................................ - ScaLBL_D3Q19_Unpack(16,dvcRecvDist_yz,0,recvCount_yz,recvbuf_yz,dist,N); - //...Pack the yZ edge (18)................................ - ScaLBL_D3Q19_Unpack(18,dvcRecvDist_yZ,0,recvCount_yZ,recvbuf_yZ,dist,N); - //...Pack the Yz edge (17)................................ - ScaLBL_D3Q19_Unpack(17,dvcRecvDist_Yz,0,recvCount_Yz,recvbuf_Yz,dist,N); - //...Pack the YZ edge (15)................................ - ScaLBL_D3Q19_Unpack(15,dvcRecvDist_YZ,0,recvCount_YZ,recvbuf_YZ,dist,N); + + if (BoundaryCondition > 0){ + if (kproc != 0){ + //...Packing for z face(6,12,13,16,17)................................ + ScaLBL_D3Q19_Unpack(6,dvcRecvDist_z,0,recvCount_z,recvbuf_z,dist,N); + ScaLBL_D3Q19_Unpack(12,dvcRecvDist_z,recvCount_z,recvCount_z,recvbuf_z,dist,N); + ScaLBL_D3Q19_Unpack(13,dvcRecvDist_z,2*recvCount_z,recvCount_z,recvbuf_z,dist,N); + ScaLBL_D3Q19_Unpack(16,dvcRecvDist_z,3*recvCount_z,recvCount_z,recvbuf_z,dist,N); + ScaLBL_D3Q19_Unpack(17,dvcRecvDist_z,4*recvCount_z,recvCount_z,recvbuf_z,dist,N); + //...Pack the xz edge (12)................................ + ScaLBL_D3Q19_Unpack(12,dvcRecvDist_xz,0,recvCount_xz,recvbuf_xz,dist,N); + //...Pack the Xz edge (13)................................ + ScaLBL_D3Q19_Unpack(13,dvcRecvDist_Xz,0,recvCount_Xz,recvbuf_Xz,dist,N); + //...Pack the yz edge (16)................................ + ScaLBL_D3Q19_Unpack(16,dvcRecvDist_yz,0,recvCount_yz,recvbuf_yz,dist,N); + //...Pack the Yz edge (17)................................ + ScaLBL_D3Q19_Unpack(17,dvcRecvDist_Yz,0,recvCount_Yz,recvbuf_Yz,dist,N); + //.................................................................................. + } + if (kproc != nprocz-1){ + //...Packing for Z face(5,11,14,15,18)................................ + ScaLBL_D3Q19_Unpack(5,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,dist,N); + ScaLBL_D3Q19_Unpack(11,dvcRecvDist_Z,recvCount_Z,recvCount_Z,recvbuf_Z,dist,N); + ScaLBL_D3Q19_Unpack(14,dvcRecvDist_Z,2*recvCount_Z,recvCount_Z,recvbuf_Z,dist,N); + ScaLBL_D3Q19_Unpack(15,dvcRecvDist_Z,3*recvCount_Z,recvCount_Z,recvbuf_Z,dist,N); + ScaLBL_D3Q19_Unpack(18,dvcRecvDist_Z,4*recvCount_Z,recvCount_Z,recvbuf_Z,dist,N); + //...Pack the xZ edge (14)................................ + ScaLBL_D3Q19_Unpack(14,dvcRecvDist_xZ,0,recvCount_xZ,recvbuf_xZ,dist,N); + //...Pack the XZ edge (11)................................ + ScaLBL_D3Q19_Unpack(11,dvcRecvDist_XZ,0,recvCount_XZ,recvbuf_XZ,dist,N); + //...Pack the yZ edge (18)................................ + ScaLBL_D3Q19_Unpack(18,dvcRecvDist_yZ,0,recvCount_yZ,recvbuf_yZ,dist,N); + //...Pack the YZ edge (15)................................ + ScaLBL_D3Q19_Unpack(15,dvcRecvDist_YZ,0,recvCount_YZ,recvbuf_YZ,dist,N); + //.................................................................................. + } + } + else { + //...Packing for z face(6,12,13,16,17)................................ + ScaLBL_D3Q19_Unpack(6,dvcRecvDist_z,0,recvCount_z,recvbuf_z,dist,N); + ScaLBL_D3Q19_Unpack(12,dvcRecvDist_z,recvCount_z,recvCount_z,recvbuf_z,dist,N); + ScaLBL_D3Q19_Unpack(13,dvcRecvDist_z,2*recvCount_z,recvCount_z,recvbuf_z,dist,N); + ScaLBL_D3Q19_Unpack(16,dvcRecvDist_z,3*recvCount_z,recvCount_z,recvbuf_z,dist,N); + ScaLBL_D3Q19_Unpack(17,dvcRecvDist_z,4*recvCount_z,recvCount_z,recvbuf_z,dist,N); + //...Packing for Z face(5,11,14,15,18)................................ + ScaLBL_D3Q19_Unpack(5,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,dist,N); + ScaLBL_D3Q19_Unpack(11,dvcRecvDist_Z,recvCount_Z,recvCount_Z,recvbuf_Z,dist,N); + ScaLBL_D3Q19_Unpack(14,dvcRecvDist_Z,2*recvCount_Z,recvCount_Z,recvbuf_Z,dist,N); + ScaLBL_D3Q19_Unpack(15,dvcRecvDist_Z,3*recvCount_Z,recvCount_Z,recvbuf_Z,dist,N); + ScaLBL_D3Q19_Unpack(18,dvcRecvDist_Z,4*recvCount_Z,recvCount_Z,recvbuf_Z,dist,N); + //.................................................................................. + //...Pack the xz edge (12)................................ + ScaLBL_D3Q19_Unpack(12,dvcRecvDist_xz,0,recvCount_xz,recvbuf_xz,dist,N); + //...Pack the xZ edge (14)................................ + ScaLBL_D3Q19_Unpack(14,dvcRecvDist_xZ,0,recvCount_xZ,recvbuf_xZ,dist,N); + //...Pack the Xz edge (13)................................ + ScaLBL_D3Q19_Unpack(13,dvcRecvDist_Xz,0,recvCount_Xz,recvbuf_Xz,dist,N); + //...Pack the XZ edge (11)................................ + ScaLBL_D3Q19_Unpack(11,dvcRecvDist_XZ,0,recvCount_XZ,recvbuf_XZ,dist,N); + //...Pack the yz edge (16)................................ + ScaLBL_D3Q19_Unpack(16,dvcRecvDist_yz,0,recvCount_yz,recvbuf_yz,dist,N); + //...Pack the yZ edge (18)................................ + ScaLBL_D3Q19_Unpack(18,dvcRecvDist_yZ,0,recvCount_yZ,recvbuf_yZ,dist,N); + //...Pack the Yz edge (17)................................ + ScaLBL_D3Q19_Unpack(17,dvcRecvDist_Yz,0,recvCount_Yz,recvbuf_Yz,dist,N); + //...Pack the YZ edge (15)................................ + ScaLBL_D3Q19_Unpack(15,dvcRecvDist_YZ,0,recvCount_YZ,recvbuf_YZ,dist,N); + } + //................................................................................... Lock=false; // unlock the communicator after communications complete //................................................................................... @@ -1225,18 +1265,18 @@ void ScaLBL_Communicator::BiRecvD3Q7AA(double *Aq, double *Bq){ ScaLBL_D3Q7_Unpack(3,dvcRecvDist_Y,0,recvCount_Y,recvbuf_Y,Aq,N); ScaLBL_D3Q7_Unpack(3,dvcRecvDist_Y,recvCount_Y,recvCount_Y,recvbuf_Y,Bq,N); //................................................................................... - - if (BoundaryCondition > 0 && kproc == 0){ - // don't unpack little z - //...Packing for Z face(5,11,14,15,18)................................ - ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,Aq,N); - ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,recvCount_Z,recvCount_Z,recvbuf_Z,Bq,N); - } - else if (BoundaryCondition > 0 && kproc == nprocz-1){ - // don't unpack big z - //...Packing for z face(6,12,13,16,17)................................ - ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,0,recvCount_z,recvbuf_z,Aq,N); - ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,recvCount_z,recvCount_z,recvbuf_z,Bq,N); + + if (BoundaryCondition > 0){ + if (kproc != 0){ + //...Packing for z face(6,12,13,16,17)................................ + ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,0,recvCount_z,recvbuf_z,Aq,N); + ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,recvCount_z,recvCount_z,recvbuf_z,Bq,N); + } + if (kproc != nprocz-1){ + //...Packing for Z face(5,11,14,15,18)................................ + ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,Aq,N); + ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,recvCount_Z,recvCount_Z,recvbuf_Z,Bq,N); + } } else { //...Packing for z face(6,12,13,16,17)................................ @@ -1343,19 +1383,19 @@ void ScaLBL_Communicator::TriRecvD3Q7AA(double *Aq, double *Bq, double *Cq){ ScaLBL_D3Q7_Unpack(3,dvcRecvDist_Y,2*recvCount_Y,recvCount_Y,recvbuf_Y,Cq,N); //................................................................................... - if (BoundaryCondition > 0 && kproc == 0){ - // don't unpack little z - //...Packing for Z face(5,11,14,15,18)................................ - ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,Aq,N); - ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,recvCount_Z,recvCount_Z,recvbuf_Z,Bq,N); - ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,2*recvCount_Z,recvCount_Z,recvbuf_Z,Cq,N); - } - else if (BoundaryCondition > 0 && kproc == nprocz-1){ - // don't unpack big z - //...Packing for z face(6,12,13,16,17)................................ - ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,0,recvCount_z,recvbuf_z,Aq,N); - ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,recvCount_z,recvCount_z,recvbuf_z,Bq,N); - ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,2*recvCount_z,recvCount_z,recvbuf_z,Cq,N); + if (BoundaryCondition > 0){ + if (kproc != 0){ + //...Packing for z face(6,12,13,16,17)................................ + ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,0,recvCount_z,recvbuf_z,Aq,N); + ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,recvCount_z,recvCount_z,recvbuf_z,Bq,N); + ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,2*recvCount_z,recvCount_z,recvbuf_z,Cq,N); + } + if (kproc != nprocz-1){ + //...Packing for Z face(5,11,14,15,18)................................ + ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,Aq,N); + ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,recvCount_Z,recvCount_Z,recvbuf_Z,Bq,N); + ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,2*recvCount_Z,recvCount_Z,recvbuf_Z,Cq,N); + } } else { //...Packing for z face(6,12,13,16,17)................................ From e641e2e3ed01bdc9588a541efbd448804724e335 Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 3 Apr 2020 08:26:48 -0400 Subject: [PATCH 090/121] remove old comments --- common/ScaLBL.h | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/common/ScaLBL.h b/common/ScaLBL.h index 901e0e3b..3bf50f6f 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -119,11 +119,6 @@ extern "C" void ScaLBL_D3Q19_Gradient_DFH(int *NeighborList, double *Phi, double // BOUNDARY CONDITION ROUTINES -//extern "C" void ScaLBL_D3Q19_Pressure_BC_z(double *disteven, double *distodd, double din, -// int Nx, int Ny, int Nz); -//extern "C" void ScaLBL_D3Q19_Pressure_BC_Z(double *disteven, double *distodd, double dout, -// int Nx, int Ny, int Nz, int outlet); - extern "C" void ScaLBL_D3Q19_AAodd_Pressure_BC_z(int *neighborList, int *list, double *dist, double din, int count, int Np); extern "C" void ScaLBL_D3Q19_AAodd_Pressure_BC_Z(int *neighborList, int *list, double *dist, double dout, int count, int Np); @@ -178,18 +173,8 @@ public: int LastInterior(); int MemoryOptimizedLayoutAA(IntArray &Map, int *neighborList, signed char *id, int Np); -// void MemoryOptimizedLayout(IntArray &Map, int *neighborList, char *id, int Np); -// void MemoryOptimizedLayoutFull(IntArray &Map, int *neighborList, char *id, int Np); -// void MemoryDenseLayout(IntArray &Map, int *neighborList, char *id, int Np); -// void MemoryDenseLayoutFull(IntArray &Map, int *neighborList, char *id, int Np); -// void SendD3Q19(double *f_even, double *f_odd); -// void RecvD3Q19(double *f_even, double *f_odd); -// void SendD3Q19AA(double *f_even, double *f_odd); -// void RecvD3Q19AA(double *f_even, double *f_odd); void SendD3Q19AA(double *dist); void RecvD3Q19AA(double *dist); -// void BiSendD3Q7(double *A_even, double *A_odd, double *B_even, double *B_odd); -// void BiRecvD3Q7(double *A_even, double *A_odd, double *B_even, double *B_odd); void BiSendD3Q7AA(double *Aq, double *Bq); void BiRecvD3Q7AA(double *Aq, double *Bq); void TriSendD3Q7AA(double *Aq, double *Bq, double *Cq); @@ -206,9 +191,6 @@ public: void D3Q19_Pressure_BC_Z(int *neighborList, double *fq, double dout, int time); double D3Q19_Flux_BC_z(int *neighborList, double *fq, double flux, int time); -// void TestSendD3Q19(double *f_even, double *f_odd); -// void TestRecvD3Q19(double *f_even, double *f_odd); - // Debugging and unit testing functions void PrintD3Q19(); From e64d44e43835ee268d367b82522c385e80fb1e72 Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 3 Apr 2020 09:30:55 -0400 Subject: [PATCH 091/121] added D3Q19 reflection BVC --- common/ScaLBL.cpp | 11 +++++++++ common/ScaLBL.h | 6 +++++ cpu/D3Q19.cpp | 36 +++++++++++++++++++++++++++++ gpu/D3Q19.cu | 59 +++++++++++++++++++++++++++++++++++++++++++---- 4 files changed, 107 insertions(+), 5 deletions(-) diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index 3e2d0f07..8f2aacee 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -1633,6 +1633,17 @@ double ScaLBL_Communicator::D3Q19_Flux_BC_z(int *neighborList, double *fq, doubl return din; } +void ScaLBL_Communicator::D3Q19_Reflection_BC_z(int *neighborList, double *fq){ + if (kproc == 0) + ScaLBL_D3Q19_AAeven_Reflection_BC_z(dvcSendList_z, fq, sendCount_z, N); + +} + +void ScaLBL_Communicator::D3Q19_Reflection_BC_Z(int *neighborList, double *fq){ + if (kproc == nprocz-1) + ScaLBL_D3Q19_AAeven_Reflection_BC_Z(dvcSendList_Z, fq, sendCount_Z, N); +} + void ScaLBL_Communicator::PrintD3Q19(){ printf("Printing D3Q19 communication buffer contents \n"); diff --git a/common/ScaLBL.h b/common/ScaLBL.h index 3bf50f6f..51ee66f4 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -137,6 +137,10 @@ extern "C" void ScaLBL_Color_BC_z(int *list, int *Map, double *Phi, double *Den, extern "C" void ScaLBL_Color_BC_Z(int *list, int *Map, double *Phi, double *Den, double vA, double vB, int count, int Np); +extern "C" void ScaLBL_D3Q19_AAeven_Reflection_BC_z(int *list, double *dist, int count, int Np); + +extern "C" void ScaLBL_D3Q19_AAeven_Reflection_BC_Z(int *list, double *dist, int count, int Np); + extern "C" void ScaLBL_SetSlice_z(double *Phi, double value, int Nx, int Ny, int Nz, int Slice); class ScaLBL_Communicator{ @@ -189,6 +193,8 @@ public: void Color_BC_Z(int *Map, double *Phi, double *Den, double vA, double vB); void D3Q19_Pressure_BC_z(int *neighborList, double *fq, double din, int time); void D3Q19_Pressure_BC_Z(int *neighborList, double *fq, double dout, int time); + void D3Q19_Reflection_BC_z(int *neighborList, double *fq); + void D3Q19_Reflection_BC_Z(int *neighborList, double *fq); double D3Q19_Flux_BC_z(int *neighborList, double *fq, double flux, int time); // Debugging and unit testing functions diff --git a/cpu/D3Q19.cpp b/cpu/D3Q19.cpp index 2c0e686d..2c67501c 100644 --- a/cpu/D3Q19.cpp +++ b/cpu/D3Q19.cpp @@ -448,6 +448,42 @@ extern "C" double ScaLBL_D3Q19_Flux_BC_Z(double *disteven, double *distodd, doub return dout; } +extern "C" void ScaLBL_D3Q19_AAeven_Reflection_BC_z(int *list, double *dist, int count, int Np){ + for (int idx=0; idx>>(disteven, distodd, dout, Nx, Ny, Nz, outlet); -//} +extern "C" void ScaLBL_D3Q19_Reflection_BC_z(int *list, double *dist, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q19_Reflection_BC_z<<>>(neighborList, list, dist, count, N); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_Reflection_BC_z (kernel): %s \n",cudaGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_Reflection_BC_Z(int *list, double *dist, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q19_Reflection_BC_Z<<>>(neighborList, list, dist, count, N); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_Reflection_BC_Z (kernel): %s \n",cudaGetErrorString(err)); + } +} extern "C" void ScaLBL_D3Q19_AAeven_MRT(double *dist, int start, int finish, int Np, double rlx_setA, double rlx_setB, double Fx, double Fy, double Fz){ From 81f25486330fbe833bbaea5bb23d932a815f8e30 Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 3 Apr 2020 09:34:35 -0400 Subject: [PATCH 092/121] fix a few warnings --- common/ScaLBL.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index 8f2aacee..71beb152 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -1520,7 +1520,7 @@ void ScaLBL_Communicator::RecvHalo(double *data){ void ScaLBL_Communicator::RegularLayout(IntArray map, const double *data, DoubleArray ®data){ // Gets data from the device and stores in regular layout - int i,j,k,n,idx; + int i,j,k,idx; int Nx = map.size(0); int Ny = map.size(1); int Nz = map.size(2); @@ -1551,7 +1551,6 @@ void ScaLBL_Communicator::RegularLayout(IntArray map, const double *data, Double void ScaLBL_Communicator::Color_BC_z(int *Map, double *Phi, double *Den, double vA, double vB){ - double Value=(vA-vB)/(vA+vB); if (kproc == 0) { // Set the phase indicator field and density on the z inlet ScaLBL_Color_BC_z(dvcSendList_z, Map, Phi, Den, vA, vB, sendCount_z, N); @@ -1560,7 +1559,6 @@ void ScaLBL_Communicator::Color_BC_z(int *Map, double *Phi, double *Den, double } void ScaLBL_Communicator::Color_BC_Z(int *Map, double *Phi, double *Den, double vA, double vB){ - double Value=(vA-vB)/(vA+vB); if (kproc == nprocz-1){ // Set the phase indicator field and density on the Z outlet ScaLBL_Color_BC_Z(dvcSendList_Z, Map, Phi, Den, vA, vB, sendCount_Z, N); From e62208caaabec617c84e8b514e15b4b7f7da450e Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 3 Apr 2020 09:52:23 -0400 Subject: [PATCH 093/121] add reflection BC to MRT / Color --- common/ScaLBL.cpp | 4 ++-- common/ScaLBL.h | 4 ++-- models/ColorModel.cpp | 12 ++++++++++-- models/MRTModel.cpp | 26 ++++++++++++++++++++++++++ 4 files changed, 40 insertions(+), 6 deletions(-) diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index 71beb152..a612bc73 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -1631,13 +1631,13 @@ double ScaLBL_Communicator::D3Q19_Flux_BC_z(int *neighborList, double *fq, doubl return din; } -void ScaLBL_Communicator::D3Q19_Reflection_BC_z(int *neighborList, double *fq){ +void ScaLBL_Communicator::D3Q19_Reflection_BC_z(double *fq){ if (kproc == 0) ScaLBL_D3Q19_AAeven_Reflection_BC_z(dvcSendList_z, fq, sendCount_z, N); } -void ScaLBL_Communicator::D3Q19_Reflection_BC_Z(int *neighborList, double *fq){ +void ScaLBL_Communicator::D3Q19_Reflection_BC_Z(double *fq){ if (kproc == nprocz-1) ScaLBL_D3Q19_AAeven_Reflection_BC_Z(dvcSendList_Z, fq, sendCount_Z, N); } diff --git a/common/ScaLBL.h b/common/ScaLBL.h index 51ee66f4..bac60b0d 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -193,8 +193,8 @@ public: void Color_BC_Z(int *Map, double *Phi, double *Den, double vA, double vB); void D3Q19_Pressure_BC_z(int *neighborList, double *fq, double din, int time); void D3Q19_Pressure_BC_Z(int *neighborList, double *fq, double dout, int time); - void D3Q19_Reflection_BC_z(int *neighborList, double *fq); - void D3Q19_Reflection_BC_Z(int *neighborList, double *fq); + void D3Q19_Reflection_BC_z(double *fq); + void D3Q19_Reflection_BC_Z(double *fq); double D3Q19_Flux_BC_z(int *neighborList, double *fq, double flux, int time); // Debugging and unit testing functions diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 954bca7e..4e9720ed 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -673,7 +673,7 @@ void ScaLBL_ColorModel::Run(){ // Perform the collision operation ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL - if (BoundaryCondition > 0){ + if (BoundaryCondition > 0 && BoundaryCondition < 5){ ScaLBL_Comm->Color_BC_z(dvcMap, Phi, Den, inletA, inletB); ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB); } @@ -694,6 +694,10 @@ void ScaLBL_ColorModel::Run(){ din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep); ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); } + else if (BoundaryCondition == 5){ + ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); + ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); + } ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_DeviceBarrier(); @@ -711,7 +715,7 @@ void ScaLBL_ColorModel::Run(){ // Perform the collision operation ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL // Halo exchange for phase field - if (BoundaryCondition > 0){ + if (BoundaryCondition > 0 && BoundaryCondition < 5){ ScaLBL_Comm->Color_BC_z(dvcMap, Phi, Den, inletA, inletB); ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB); } @@ -730,6 +734,10 @@ void ScaLBL_ColorModel::Run(){ din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep); ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); } + else if (BoundaryCondition == 5){ + ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); + ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); + } ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_DeviceBarrier(); diff --git a/models/MRTModel.cpp b/models/MRTModel.cpp index c1db7c1c..acfb8821 100644 --- a/models/MRTModel.cpp +++ b/models/MRTModel.cpp @@ -238,12 +238,38 @@ void ScaLBL_MRTModel::Run(){ ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL ScaLBL_D3Q19_AAodd_MRT(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE + // Set boundary conditions + if (BoundaryCondition == 3){ + ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + } + else if (BoundaryCondition == 4){ + din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + } + else if (BoundaryCondition == 5){ + ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); + ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); + } ScaLBL_D3Q19_AAodd_MRT(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); timestep++; ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL ScaLBL_D3Q19_AAeven_MRT(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE + // Set boundary conditions + if (BoundaryCondition == 3){ + ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + } + else if (BoundaryCondition == 4){ + din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + } + else if (BoundaryCondition == 5){ + ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); + ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); + } ScaLBL_D3Q19_AAeven_MRT(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); //************************************************************************/ From f72d401be6c88ed60ed3c71609a9c163f2a687bb Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 3 Apr 2020 09:56:56 -0400 Subject: [PATCH 094/121] fix bugs in cu --- gpu/D3Q19.cu | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gpu/D3Q19.cu b/gpu/D3Q19.cu index d43c5b29..f6f396be 100644 --- a/gpu/D3Q19.cu +++ b/gpu/D3Q19.cu @@ -1758,7 +1758,7 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Pressure_BC_Z(int *list, double *dist, //................................................... } } -__global__ void dvc_ScaLBL_D3Q19_Reflection_BC_z(int *d_neighborList, int *list, double *dist, int count, int Np){ +__global__ void dvc_ScaLBL_D3Q19_Reflection_BC_z(int *list, double *dist, int count, int Np){ int idx, n; idx = blockIdx.x*blockDim.x + threadIdx.x; if (idx < count){ @@ -1777,7 +1777,7 @@ __global__ void dvc_ScaLBL_D3Q19_Reflection_BC_z(int *d_neighborList, int *list } } -__global__ void dvc_ScaLBL_D3Q19_Reflection_BC_Z(int *d_neighborList, int *list, double *dist, int count, int Np){ +__global__ void dvc_ScaLBL_D3Q19_Reflection_BC_Z(int *list, double *dist, int count, int Np){ int idx, n; idx = blockIdx.x*blockDim.x + threadIdx.x; if (idx < count){ @@ -2691,7 +2691,7 @@ extern "C" double deviceReduce(double *in, double* out, int N) { extern "C" void ScaLBL_D3Q19_Reflection_BC_z(int *list, double *dist, int count, int Np){ int GRID = count / 512 + 1; - dvc_ScaLBL_D3Q19_Reflection_BC_z<<>>(neighborList, list, dist, count, N); + dvc_ScaLBL_D3Q19_Reflection_BC_z<<>>(list, dist, count, Np); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ printf("CUDA error in ScaLBL_D3Q19_Reflection_BC_z (kernel): %s \n",cudaGetErrorString(err)); @@ -2700,7 +2700,7 @@ extern "C" void ScaLBL_D3Q19_Reflection_BC_z(int *list, double *dist, int count, extern "C" void ScaLBL_D3Q19_Reflection_BC_Z(int *list, double *dist, int count, int Np){ int GRID = count / 512 + 1; - dvc_ScaLBL_D3Q19_Reflection_BC_Z<<>>(neighborList, list, dist, count, N); + dvc_ScaLBL_D3Q19_Reflection_BC_Z<<>>(list, dist, count, Np); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ printf("CUDA error in ScaLBL_D3Q19_Reflection_BC_Z (kernel): %s \n",cudaGetErrorString(err)); From 10b630662a8f5016458ca71b1fc0150765aa9373 Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 3 Apr 2020 10:00:17 -0400 Subject: [PATCH 095/121] fix reflection name --- common/ScaLBL.cpp | 4 ++-- common/ScaLBL.h | 4 ++-- cpu/D3Q19.cpp | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index a612bc73..0fc3d6d2 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -1633,13 +1633,13 @@ double ScaLBL_Communicator::D3Q19_Flux_BC_z(int *neighborList, double *fq, doubl void ScaLBL_Communicator::D3Q19_Reflection_BC_z(double *fq){ if (kproc == 0) - ScaLBL_D3Q19_AAeven_Reflection_BC_z(dvcSendList_z, fq, sendCount_z, N); + ScaLBL_D3Q19_Reflection_BC_z(dvcSendList_z, fq, sendCount_z, N); } void ScaLBL_Communicator::D3Q19_Reflection_BC_Z(double *fq){ if (kproc == nprocz-1) - ScaLBL_D3Q19_AAeven_Reflection_BC_Z(dvcSendList_Z, fq, sendCount_Z, N); + ScaLBL_D3Q19_Reflection_BC_Z(dvcSendList_Z, fq, sendCount_Z, N); } void ScaLBL_Communicator::PrintD3Q19(){ diff --git a/common/ScaLBL.h b/common/ScaLBL.h index bac60b0d..11445d2a 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -137,9 +137,9 @@ extern "C" void ScaLBL_Color_BC_z(int *list, int *Map, double *Phi, double *Den, extern "C" void ScaLBL_Color_BC_Z(int *list, int *Map, double *Phi, double *Den, double vA, double vB, int count, int Np); -extern "C" void ScaLBL_D3Q19_AAeven_Reflection_BC_z(int *list, double *dist, int count, int Np); +extern "C" void ScaLBL_D3Q19_Reflection_BC_z(int *list, double *dist, int count, int Np); -extern "C" void ScaLBL_D3Q19_AAeven_Reflection_BC_Z(int *list, double *dist, int count, int Np); +extern "C" void ScaLBL_D3Q19_Reflection_BC_Z(int *list, double *dist, int count, int Np); extern "C" void ScaLBL_SetSlice_z(double *Phi, double value, int Nx, int Ny, int Nz, int Slice); diff --git a/cpu/D3Q19.cpp b/cpu/D3Q19.cpp index 2c67501c..b4f7c005 100644 --- a/cpu/D3Q19.cpp +++ b/cpu/D3Q19.cpp @@ -448,7 +448,7 @@ extern "C" double ScaLBL_D3Q19_Flux_BC_Z(double *disteven, double *distodd, doub return dout; } -extern "C" void ScaLBL_D3Q19_AAeven_Reflection_BC_z(int *list, double *dist, int count, int Np){ +extern "C" void ScaLBL_D3Q19_Reflection_BC_z(int *list, double *dist, int count, int Np){ for (int idx=0; idx Date: Fri, 3 Apr 2020 16:30:54 -0400 Subject: [PATCH 096/121] fix cudamemcpy bug --- models/ColorModel.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 4e9720ed..9c46be83 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -1190,7 +1190,7 @@ double ScaLBL_ColorModel::SeedPhaseField(const double seed_water_in_oil){ ScaLBL_CopyToHost(Aq_tmp, Aq, 7*Np*sizeof(double)); ScaLBL_CopyToHost(Bq_tmp, Bq, 7*Np*sizeof(double)); - ScaLBL_CopyToHost(Vel_tmp, Velocity, 7*Np*sizeof(double)); + ScaLBL_CopyToHost(Vel_tmp, Velocity, 3*Np*sizeof(double)); //Extract averged velocity double vx_glb = (Averages->gnb.Px+Averages->gwb.Px)/(Averages->gnb.M+Averages->gwb.M); From e4d836e7fcf7b8e268d2ed2e26601d5cbbed23d8 Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 3 Apr 2020 20:24:29 -0400 Subject: [PATCH 097/121] add reflection condition for color grad --- common/ScaLBL.cpp | 19 ++++++++++++++----- common/ScaLBL.h | 2 ++ cpu/Color.cpp | 10 +++++++++- gpu/Color.cu | 13 +++++++++++++ 4 files changed, 38 insertions(+), 6 deletions(-) diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index 0fc3d6d2..fe1ce24b 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -1549,21 +1549,30 @@ void ScaLBL_Communicator::RegularLayout(IntArray map, const double *data, Double delete [] TmpDat; } - void ScaLBL_Communicator::Color_BC_z(int *Map, double *Phi, double *Den, double vA, double vB){ if (kproc == 0) { - // Set the phase indicator field and density on the z inlet - ScaLBL_Color_BC_z(dvcSendList_z, Map, Phi, Den, vA, vB, sendCount_z, N); + if (BoundaryCondition == 5){ + ScaLBL_CopySlice_z(Phi,Value,Nx,Ny,Nz,1,0); + } + else { + // Set the phase indicator field and density on the z inlet + ScaLBL_Color_BC_z(dvcSendList_z, Map, Phi, Den, vA, vB, sendCount_z, N); + } //ScaLBL_SetSlice_z(Phi,Value,Nx,Ny,Nz,0); } } void ScaLBL_Communicator::Color_BC_Z(int *Map, double *Phi, double *Den, double vA, double vB){ if (kproc == nprocz-1){ + if (BoundaryCondition == 5){ + ScaLBL_CopySlice_z(Phi,Value,Nx,Ny,Nz,Nz-2,Nz-1); + } + else { // Set the phase indicator field and density on the Z outlet - ScaLBL_Color_BC_Z(dvcSendList_Z, Map, Phi, Den, vA, vB, sendCount_Z, N); - //ScaLBL_SetSlice_z(Phi,Value,Nx,Ny,Nz,Nz-1); + ScaLBL_Color_BC_Z(dvcSendList_Z, Map, Phi, Den, vA, vB, sendCount_Z, N); + } } + } void ScaLBL_Communicator::D3Q19_Pressure_BC_z(int *neighborList, double *fq, double din, int time){ diff --git a/common/ScaLBL.h b/common/ScaLBL.h index 11445d2a..90209679 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -143,6 +143,8 @@ extern "C" void ScaLBL_D3Q19_Reflection_BC_Z(int *list, double *dist, int count, extern "C" void ScaLBL_SetSlice_z(double *Phi, double value, int Nx, int Ny, int Nz, int Slice); +extern "C" void ScaLBL_CopySlice_z(double *Phi, double value, int Nx, int Ny, int Nz, int Source, int Destination); + class ScaLBL_Communicator{ public: //...................................................................................... diff --git a/cpu/Color.cpp b/cpu/Color.cpp index 7ae84341..1b1ce0c2 100644 --- a/cpu/Color.cpp +++ b/cpu/Color.cpp @@ -1869,7 +1869,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double *di const double mrt_V12=0.04166666666666666; for (int n=start; n>>(Phi,value,Nx,Ny,Nz,Slice,Dest); +} From e1e603b25f1be9598bc1d7ca710433d23a6f19d4 Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 3 Apr 2020 20:26:32 -0400 Subject: [PATCH 098/121] add reflection condition for color grad --- common/ScaLBL.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index fe1ce24b..007a7290 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -1552,7 +1552,7 @@ void ScaLBL_Communicator::RegularLayout(IntArray map, const double *data, Double void ScaLBL_Communicator::Color_BC_z(int *Map, double *Phi, double *Den, double vA, double vB){ if (kproc == 0) { if (BoundaryCondition == 5){ - ScaLBL_CopySlice_z(Phi,Value,Nx,Ny,Nz,1,0); + ScaLBL_CopySlice_z(Phi,Nx,Ny,Nz,1,0); } else { // Set the phase indicator field and density on the z inlet @@ -1565,7 +1565,7 @@ void ScaLBL_Communicator::Color_BC_z(int *Map, double *Phi, double *Den, double void ScaLBL_Communicator::Color_BC_Z(int *Map, double *Phi, double *Den, double vA, double vB){ if (kproc == nprocz-1){ if (BoundaryCondition == 5){ - ScaLBL_CopySlice_z(Phi,Value,Nx,Ny,Nz,Nz-2,Nz-1); + ScaLBL_CopySlice_z(Phi,Nx,Ny,Nz,Nz-2,Nz-1); } else { // Set the phase indicator field and density on the Z outlet From 735b3f5d3ea838871aa8bc69b3dcdcc53046a52c Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 3 Apr 2020 20:29:37 -0400 Subject: [PATCH 099/121] fix argfs --- common/ScaLBL.h | 2 +- cpu/Color.cpp | 2 +- gpu/Color.cu | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/common/ScaLBL.h b/common/ScaLBL.h index 90209679..92956f1f 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -143,7 +143,7 @@ extern "C" void ScaLBL_D3Q19_Reflection_BC_Z(int *list, double *dist, int count, extern "C" void ScaLBL_SetSlice_z(double *Phi, double value, int Nx, int Ny, int Nz, int Slice); -extern "C" void ScaLBL_CopySlice_z(double *Phi, double value, int Nx, int Ny, int Nz, int Source, int Destination); +extern "C" void ScaLBL_CopySlice_z(double *Phi, int Nx, int Ny, int Nz, int Source, int Destination); class ScaLBL_Communicator{ public: diff --git a/cpu/Color.cpp b/cpu/Color.cpp index 1b1ce0c2..35cbd5fd 100644 --- a/cpu/Color.cpp +++ b/cpu/Color.cpp @@ -2810,7 +2810,7 @@ extern "C" void ScaLBL_PhaseField_Init(int *Map, double *Phi, double *Den, doubl } } -extern "C" void ScaLBL_CopySlice_z(double *Phi, double value, int Nx, int Ny, int Nz, int Source, int Dest){ +extern "C" void ScaLBL_CopySlice_z(double *Phi, int Nx, int Ny, int Nz, int Source, int Dest){ int n; double value; for (n=0; n>>(Phi,value,Nx,Ny,Nz,Slice,Dest); + dvc_ScaLBL_CopySlice_z<<>>(Phi,Nx,Ny,Nz,Slice,Dest); } From 354067e2da4d8b21e7bec7902086810bbd3958d0 Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 3 Apr 2020 20:31:13 -0400 Subject: [PATCH 100/121] fix argfs --- gpu/Color.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpu/Color.cu b/gpu/Color.cu index 389a8dc7..7fd87e30 100644 --- a/gpu/Color.cu +++ b/gpu/Color.cu @@ -4145,7 +4145,7 @@ extern "C" void ScaLBL_Color_BC_Z(int *list, int *Map, double *Phi, double *Den, extern "C" void ScaLBL_CopySlice_z(double *Phi, int Nx, int Ny, int Nz, int Source, int Dest){ int GRID = Nx*Ny / 512 + 1; - dvc_ScaLBL_CopySlice_z<<>>(Phi,Nx,Ny,Nz,Slice,Dest); + dvc_ScaLBL_CopySlice_z<<>>(Phi,Nx,Ny,Nz,Source,Dest); } From bad52221a8758291dce46a9ef8f0a5d31d1905cb Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 3 Apr 2020 20:33:03 -0400 Subject: [PATCH 101/121] fix argfs --- gpu/Color.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpu/Color.cu b/gpu/Color.cu index 7fd87e30..aeeb3998 100644 --- a/gpu/Color.cu +++ b/gpu/Color.cu @@ -1241,7 +1241,7 @@ __global__ void dvc_ScaLBL_SetSlice_z(double *Phi, double value, int Nx, int Ny __global__ void dvc_ScaLBL_CopySlice_z(double *Phi, int Nx, int Ny, int Nz, int Source, int Dest){ - int n; double value; + double value; int n = blockIdx.x*blockDim.x + threadIdx.x; if (n < Nx*Ny){ value = Phi[Source*Nx*Ny+n]; From b81d4199a180c776120d5852f1f3cb63e8e594bd Mon Sep 17 00:00:00 2001 From: James McClure Date: Sat, 4 Apr 2020 09:00:46 -0400 Subject: [PATCH 102/121] fix volume bug --- models/ColorModel.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 9c46be83..d8af7355 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -760,7 +760,7 @@ void ScaLBL_ColorModel::Run(){ double volA = Averages->gnb.V; volA /= Dm->Volume; volB /= Dm->Volume;; - initial_volume = volA*Dm->Volume; + //initial_volume = volA*Dm->Volume; double vA_x = Averages->gnb.Px/Averages->gnb.M; double vA_y = Averages->gnb.Py/Averages->gnb.M; double vA_z = Averages->gnb.Pz/Averages->gnb.M; @@ -1221,6 +1221,7 @@ double ScaLBL_ColorModel::SeedPhaseField(const double seed_water_in_oil){ Bq_tmp[n+4*Np] += 0.1111111111111111*random_value; Bq_tmp[n+5*Np] += 0.1111111111111111*random_value; Bq_tmp[n+6*Np] += 0.1111111111111111*random_value; + count += 1.0; } mass_loss += random_value*seed_water_in_oil; } @@ -1248,6 +1249,7 @@ double ScaLBL_ColorModel::SeedPhaseField(const double seed_water_in_oil){ Bq_tmp[n+4*Np] += 0.1111111111111111*random_value; Bq_tmp[n+5*Np] += 0.1111111111111111*random_value; Bq_tmp[n+6*Np] += 0.1111111111111111*random_value; + count += 1.0; } mass_loss += random_value*seed_water_in_oil; } From 6d4eaebf4799ce94385dfb4a2186de0534403f4a Mon Sep 17 00:00:00 2001 From: James McClure Date: Mon, 6 Apr 2020 06:07:46 -0400 Subject: [PATCH 103/121] drop velocity seeding alg --- models/ColorModel.cpp | 245 ++++++++++++------------------------------ 1 file changed, 67 insertions(+), 178 deletions(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index d8af7355..21a1f597 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -56,8 +56,6 @@ void ScaLBL_ColorModel::ReadCheckpoint(char *FILENAME, double *cPhi, double *cfq File.close(); } */ - - void ScaLBL_ColorModel::ReadParams(string filename){ // read the input database db = std::make_shared( filename ); @@ -1176,190 +1174,81 @@ double ScaLBL_ColorModel::MorphOpenConnected(double target_volume_change){ } return(volume_change); } - double ScaLBL_ColorModel::SeedPhaseField(const double seed_water_in_oil){ - srand(time(NULL)); - double mass_loss =0.f; - double count =0.f; - double *Aq_tmp, *Bq_tmp; - double *Vel_tmp; - - Aq_tmp = new double [7*Np]; - Bq_tmp = new double [7*Np]; - Vel_tmp = new double [3*Np]; + srand(time(NULL)); + double mass_loss =0.f; + double count =0.f; + double *Aq_tmp, *Bq_tmp; + + Aq_tmp = new double [7*Np]; + Bq_tmp = new double [7*Np]; - ScaLBL_CopyToHost(Aq_tmp, Aq, 7*Np*sizeof(double)); - ScaLBL_CopyToHost(Bq_tmp, Bq, 7*Np*sizeof(double)); - ScaLBL_CopyToHost(Vel_tmp, Velocity, 3*Np*sizeof(double)); - - //Extract averged velocity - double vx_glb = (Averages->gnb.Px+Averages->gwb.Px)/(Averages->gnb.M+Averages->gwb.M); - double vy_glb = (Averages->gnb.Py+Averages->gwb.Py)/(Averages->gnb.M+Averages->gwb.M); - double vz_glb = (Averages->gnb.Pz+Averages->gwb.Pz)/(Averages->gnb.M+Averages->gwb.M); - double v_mag_glb = sqrt(vx_glb*vx_glb+vy_glb*vy_glb+vz_glb*vz_glb); + ScaLBL_CopyToHost(Aq_tmp, Aq, 7*Np*sizeof(double)); + ScaLBL_CopyToHost(Bq_tmp, Bq, 7*Np*sizeof(double)); + + + for (int n=0; n < ScaLBL_Comm->LastExterior(); n++){ + double random_value = seed_water_in_oil*double(rand())/ RAND_MAX; + double dA = Aq_tmp[n] + Aq_tmp[n+Np] + Aq_tmp[n+2*Np] + Aq_tmp[n+3*Np] + Aq_tmp[n+4*Np] + Aq_tmp[n+5*Np] + Aq_tmp[n+6*Np]; + double dB = Bq_tmp[n] + Bq_tmp[n+Np] + Bq_tmp[n+2*Np] + Bq_tmp[n+3*Np] + Bq_tmp[n+4*Np] + Bq_tmp[n+5*Np] + Bq_tmp[n+6*Np]; + double phase_id = (dA - dB) / (dA + dB); + if (phase_id > 0.0){ + Aq_tmp[n] -= 0.3333333333333333*random_value; + Aq_tmp[n+Np] -= 0.1111111111111111*random_value; + Aq_tmp[n+2*Np] -= 0.1111111111111111*random_value; + Aq_tmp[n+3*Np] -= 0.1111111111111111*random_value; + Aq_tmp[n+4*Np] -= 0.1111111111111111*random_value; + Aq_tmp[n+5*Np] -= 0.1111111111111111*random_value; + Aq_tmp[n+6*Np] -= 0.1111111111111111*random_value; + + Bq_tmp[n] += 0.3333333333333333*random_value; + Bq_tmp[n+Np] += 0.1111111111111111*random_value; + Bq_tmp[n+2*Np] += 0.1111111111111111*random_value; + Bq_tmp[n+3*Np] += 0.1111111111111111*random_value; + Bq_tmp[n+4*Np] += 0.1111111111111111*random_value; + Bq_tmp[n+5*Np] += 0.1111111111111111*random_value; + Bq_tmp[n+6*Np] += 0.1111111111111111*random_value; + } + mass_loss += random_value*seed_water_in_oil; + } - for (int n=0; n < ScaLBL_Comm->LastExterior(); n++){ - double v_mag_local = sqrt(Vel_tmp[n]*Vel_tmp[n]+Vel_tmp[n+1*Np]*Vel_tmp[n+1*Np]+Vel_tmp[n+2*Np]*Vel_tmp[n+2*Np]); - double weight = (v_mag_local 0.0){ - Aq_tmp[n] -= 0.3333333333333333*random_value; - Aq_tmp[n+Np] -= 0.1111111111111111*random_value; - Aq_tmp[n+2*Np] -= 0.1111111111111111*random_value; - Aq_tmp[n+3*Np] -= 0.1111111111111111*random_value; - Aq_tmp[n+4*Np] -= 0.1111111111111111*random_value; - Aq_tmp[n+5*Np] -= 0.1111111111111111*random_value; - Aq_tmp[n+6*Np] -= 0.1111111111111111*random_value; - - Bq_tmp[n] += 0.3333333333333333*random_value; - Bq_tmp[n+Np] += 0.1111111111111111*random_value; - Bq_tmp[n+2*Np] += 0.1111111111111111*random_value; - Bq_tmp[n+3*Np] += 0.1111111111111111*random_value; - Bq_tmp[n+4*Np] += 0.1111111111111111*random_value; - Bq_tmp[n+5*Np] += 0.1111111111111111*random_value; - Bq_tmp[n+6*Np] += 0.1111111111111111*random_value; - count += 1.0; - } - mass_loss += random_value*seed_water_in_oil; - } + for (int n=ScaLBL_Comm->FirstInterior(); n < ScaLBL_Comm->LastInterior(); n++){ + double random_value = seed_water_in_oil*double(rand())/ RAND_MAX; + double dA = Aq_tmp[n] + Aq_tmp[n+Np] + Aq_tmp[n+2*Np] + Aq_tmp[n+3*Np] + Aq_tmp[n+4*Np] + Aq_tmp[n+5*Np] + Aq_tmp[n+6*Np]; + double dB = Bq_tmp[n] + Bq_tmp[n+Np] + Bq_tmp[n+2*Np] + Bq_tmp[n+3*Np] + Bq_tmp[n+4*Np] + Bq_tmp[n+5*Np] + Bq_tmp[n+6*Np]; + double phase_id = (dA - dB) / (dA + dB); + if (phase_id > 0.0){ + Aq_tmp[n] -= 0.3333333333333333*random_value; + Aq_tmp[n+Np] -= 0.1111111111111111*random_value; + Aq_tmp[n+2*Np] -= 0.1111111111111111*random_value; + Aq_tmp[n+3*Np] -= 0.1111111111111111*random_value; + Aq_tmp[n+4*Np] -= 0.1111111111111111*random_value; + Aq_tmp[n+5*Np] -= 0.1111111111111111*random_value; + Aq_tmp[n+6*Np] -= 0.1111111111111111*random_value; + + Bq_tmp[n] += 0.3333333333333333*random_value; + Bq_tmp[n+Np] += 0.1111111111111111*random_value; + Bq_tmp[n+2*Np] += 0.1111111111111111*random_value; + Bq_tmp[n+3*Np] += 0.1111111111111111*random_value; + Bq_tmp[n+4*Np] += 0.1111111111111111*random_value; + Bq_tmp[n+5*Np] += 0.1111111111111111*random_value; + Bq_tmp[n+6*Np] += 0.1111111111111111*random_value; + } + mass_loss += random_value*seed_water_in_oil; + } - for (int n=ScaLBL_Comm->FirstInterior(); n < ScaLBL_Comm->LastInterior(); n++){ - double v_mag_local = sqrt(Vel_tmp[n]*Vel_tmp[n]+Vel_tmp[n+1*Np]*Vel_tmp[n+1*Np]+Vel_tmp[n+2*Np]*Vel_tmp[n+2*Np]); - double weight = (v_mag_local 0.0){ - Aq_tmp[n] -= 0.3333333333333333*random_value; - Aq_tmp[n+Np] -= 0.1111111111111111*random_value; - Aq_tmp[n+2*Np] -= 0.1111111111111111*random_value; - Aq_tmp[n+3*Np] -= 0.1111111111111111*random_value; - Aq_tmp[n+4*Np] -= 0.1111111111111111*random_value; - Aq_tmp[n+5*Np] -= 0.1111111111111111*random_value; - Aq_tmp[n+6*Np] -= 0.1111111111111111*random_value; - - Bq_tmp[n] += 0.3333333333333333*random_value; - Bq_tmp[n+Np] += 0.1111111111111111*random_value; - Bq_tmp[n+2*Np] += 0.1111111111111111*random_value; - Bq_tmp[n+3*Np] += 0.1111111111111111*random_value; - Bq_tmp[n+4*Np] += 0.1111111111111111*random_value; - Bq_tmp[n+5*Np] += 0.1111111111111111*random_value; - Bq_tmp[n+6*Np] += 0.1111111111111111*random_value; - count += 1.0; - } - mass_loss += random_value*seed_water_in_oil; - } + count= sumReduce( Dm->Comm, count); + mass_loss= sumReduce( Dm->Comm, mass_loss); + if (rank == 0) printf("Remove mass %f from %f voxels \n",mass_loss,count); - count= sumReduce( Dm->Comm, count); - mass_loss= sumReduce( Dm->Comm, mass_loss); - if (rank == 0) printf("Remove mass %f from %f voxels \n",mass_loss,count); + // Need to initialize Aq, Bq, Den, Phi directly + //ScaLBL_CopyToDevice(Phi,phase.data(),7*Np*sizeof(double)); + ScaLBL_CopyToDevice(Aq, Aq_tmp, 7*Np*sizeof(double)); + ScaLBL_CopyToDevice(Bq, Bq_tmp, 7*Np*sizeof(double)); - // Need to initialize Aq, Bq, Den, Phi directly - //ScaLBL_CopyToDevice(Phi,phase.data(),7*Np*sizeof(double)); - ScaLBL_CopyToDevice(Aq, Aq_tmp, 7*Np*sizeof(double)); - ScaLBL_CopyToDevice(Bq, Bq_tmp, 7*Np*sizeof(double)); - - return(mass_loss); + return(mass_loss); } -//double ScaLBL_ColorModel::SeedPhaseField(const double seed_water_in_oil){ -// srand(time(NULL)); -// double mass_loss =0.f; -// double count =0.f; -// double *Aq_tmp, *Bq_tmp; -// -// Aq_tmp = new double [7*Np]; -// Bq_tmp = new double [7*Np]; -// -// ScaLBL_CopyToHost(Aq_tmp, Aq, 7*Np*sizeof(double)); -// ScaLBL_CopyToHost(Bq_tmp, Bq, 7*Np*sizeof(double)); -// -///* for (int k=1; kSDs(i,j,k) < 0.f){ -// // skip -// } -// else if (phase(i,j,k) > 0.f ){ -// phase(i,j,k) -= random_value*seed_water_in_oil; -// mass_loss += random_value*seed_water_in_oil; -// count++; -// } -// else { -// -// } -// } -// } -// } -// */ -// for (int n=0; n < ScaLBL_Comm->LastExterior(); n++){ -// double random_value = seed_water_in_oil*double(rand())/ RAND_MAX; -// double dA = Aq_tmp[n] + Aq_tmp[n+Np] + Aq_tmp[n+2*Np] + Aq_tmp[n+3*Np] + Aq_tmp[n+4*Np] + Aq_tmp[n+5*Np] + Aq_tmp[n+6*Np]; -// double dB = Bq_tmp[n] + Bq_tmp[n+Np] + Bq_tmp[n+2*Np] + Bq_tmp[n+3*Np] + Bq_tmp[n+4*Np] + Bq_tmp[n+5*Np] + Bq_tmp[n+6*Np]; -// double phase_id = (dA - dB) / (dA + dB); -// if (phase_id > 0.0){ -// Aq_tmp[n] -= 0.3333333333333333*random_value; -// Aq_tmp[n+Np] -= 0.1111111111111111*random_value; -// Aq_tmp[n+2*Np] -= 0.1111111111111111*random_value; -// Aq_tmp[n+3*Np] -= 0.1111111111111111*random_value; -// Aq_tmp[n+4*Np] -= 0.1111111111111111*random_value; -// Aq_tmp[n+5*Np] -= 0.1111111111111111*random_value; -// Aq_tmp[n+6*Np] -= 0.1111111111111111*random_value; -// -// Bq_tmp[n] += 0.3333333333333333*random_value; -// Bq_tmp[n+Np] += 0.1111111111111111*random_value; -// Bq_tmp[n+2*Np] += 0.1111111111111111*random_value; -// Bq_tmp[n+3*Np] += 0.1111111111111111*random_value; -// Bq_tmp[n+4*Np] += 0.1111111111111111*random_value; -// Bq_tmp[n+5*Np] += 0.1111111111111111*random_value; -// Bq_tmp[n+6*Np] += 0.1111111111111111*random_value; -// } -// mass_loss += random_value*seed_water_in_oil; -// } -// -// for (int n=ScaLBL_Comm->FirstInterior(); n < ScaLBL_Comm->LastInterior(); n++){ -// double random_value = seed_water_in_oil*double(rand())/ RAND_MAX; -// double dA = Aq_tmp[n] + Aq_tmp[n+Np] + Aq_tmp[n+2*Np] + Aq_tmp[n+3*Np] + Aq_tmp[n+4*Np] + Aq_tmp[n+5*Np] + Aq_tmp[n+6*Np]; -// double dB = Bq_tmp[n] + Bq_tmp[n+Np] + Bq_tmp[n+2*Np] + Bq_tmp[n+3*Np] + Bq_tmp[n+4*Np] + Bq_tmp[n+5*Np] + Bq_tmp[n+6*Np]; -// double phase_id = (dA - dB) / (dA + dB); -// if (phase_id > 0.0){ -// Aq_tmp[n] -= 0.3333333333333333*random_value; -// Aq_tmp[n+Np] -= 0.1111111111111111*random_value; -// Aq_tmp[n+2*Np] -= 0.1111111111111111*random_value; -// Aq_tmp[n+3*Np] -= 0.1111111111111111*random_value; -// Aq_tmp[n+4*Np] -= 0.1111111111111111*random_value; -// Aq_tmp[n+5*Np] -= 0.1111111111111111*random_value; -// Aq_tmp[n+6*Np] -= 0.1111111111111111*random_value; -// -// Bq_tmp[n] += 0.3333333333333333*random_value; -// Bq_tmp[n+Np] += 0.1111111111111111*random_value; -// Bq_tmp[n+2*Np] += 0.1111111111111111*random_value; -// Bq_tmp[n+3*Np] += 0.1111111111111111*random_value; -// Bq_tmp[n+4*Np] += 0.1111111111111111*random_value; -// Bq_tmp[n+5*Np] += 0.1111111111111111*random_value; -// Bq_tmp[n+6*Np] += 0.1111111111111111*random_value; -// } -// mass_loss += random_value*seed_water_in_oil; -// } -// -// count = Dm->Comm.sumReduce( count ); -// mass_loss = Dm->Comm.sumReduce( mass_loss ); -// if (rank == 0) printf("Remove mass %f from %f voxels \n",mass_loss,count); -// -// // Need to initialize Aq, Bq, Den, Phi directly -// //ScaLBL_CopyToDevice(Phi,phase.data(),7*Np*sizeof(double)); -// ScaLBL_CopyToDevice(Aq, Aq_tmp, 7*Np*sizeof(double)); -// ScaLBL_CopyToDevice(Bq, Bq_tmp, 7*Np*sizeof(double)); -// -// return(mass_loss); -//} - double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta_volume){ const RankInfoStruct rank_info(rank,nprocx,nprocy,nprocz); From 91f42ab74f14a44883d2d68e59750f93da1f7018 Mon Sep 17 00:00:00 2001 From: James McClure Date: Tue, 7 Apr 2020 08:45:06 -0400 Subject: [PATCH 104/121] condition unpack routines on BC for halo --- common/ScaLBL.cpp | 42 ++++++++++++++++++++------- example/Workflow/ComputeSaturation.py | 37 +++++++++++++++++++++++ 2 files changed, 69 insertions(+), 10 deletions(-) create mode 100755 example/Workflow/ComputeSaturation.py diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index 007a7290..12589ecf 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -1497,22 +1497,44 @@ void ScaLBL_Communicator::RecvHalo(double *data){ //................................................................................... ScaLBL_Scalar_Unpack(dvcRecvList_x, recvCount_x,recvbuf_x, data, N); ScaLBL_Scalar_Unpack(dvcRecvList_y, recvCount_y,recvbuf_y, data, N); - ScaLBL_Scalar_Unpack(dvcRecvList_z, recvCount_z,recvbuf_z, data, N); ScaLBL_Scalar_Unpack(dvcRecvList_X, recvCount_X,recvbuf_X, data, N); ScaLBL_Scalar_Unpack(dvcRecvList_Y, recvCount_Y,recvbuf_Y, data, N); - ScaLBL_Scalar_Unpack(dvcRecvList_Z, recvCount_Z,recvbuf_Z, data, N); ScaLBL_Scalar_Unpack(dvcRecvList_xy, recvCount_xy,recvbuf_xy, data, N); ScaLBL_Scalar_Unpack(dvcRecvList_xY, recvCount_xY,recvbuf_xY, data, N); ScaLBL_Scalar_Unpack(dvcRecvList_Xy, recvCount_Xy,recvbuf_Xy, data, N); ScaLBL_Scalar_Unpack(dvcRecvList_XY, recvCount_XY,recvbuf_XY, data, N); - ScaLBL_Scalar_Unpack(dvcRecvList_xz, recvCount_xz,recvbuf_xz, data, N); - ScaLBL_Scalar_Unpack(dvcRecvList_xZ, recvCount_xZ,recvbuf_xZ, data, N); - ScaLBL_Scalar_Unpack(dvcRecvList_Xz, recvCount_Xz,recvbuf_Xz, data, N); - ScaLBL_Scalar_Unpack(dvcRecvList_XZ, recvCount_XZ,recvbuf_XZ, data, N); - ScaLBL_Scalar_Unpack(dvcRecvList_yz, recvCount_yz,recvbuf_yz, data, N); - ScaLBL_Scalar_Unpack(dvcRecvList_yZ, recvCount_yZ,recvbuf_yZ, data, N); - ScaLBL_Scalar_Unpack(dvcRecvList_Yz, recvCount_Yz,recvbuf_Yz, data, N); - ScaLBL_Scalar_Unpack(dvcRecvList_YZ, recvCount_YZ,recvbuf_YZ, data, N); + + if (BoundaryCondition > 0){ + if (kproc != 0){ + //...Packing for z face(6,12,13,16,17)................................ + ScaLBL_Scalar_Unpack(dvcRecvList_z, recvCount_z,recvbuf_z, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_xz, recvCount_xz,recvbuf_xz, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_Xz, recvCount_Xz,recvbuf_Xz, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_yz, recvCount_yz,recvbuf_yz, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_Yz, recvCount_Yz,recvbuf_Yz, data, N); + } + if (kproc != nprocz-1){ + //...Packing for Z face(5,11,14,15,18)................................ + ScaLBL_Scalar_Unpack(dvcRecvList_Z, recvCount_Z,recvbuf_Z, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_xZ, recvCount_xZ,recvbuf_xZ, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_XZ, recvCount_XZ,recvbuf_XZ, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_yZ, recvCount_yZ,recvbuf_yZ, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_YZ, recvCount_YZ,recvbuf_YZ, data, N); + } + } + else { + ScaLBL_Scalar_Unpack(dvcRecvList_z, recvCount_z,recvbuf_z, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_xz, recvCount_xz,recvbuf_xz, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_Xz, recvCount_Xz,recvbuf_Xz, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_yz, recvCount_yz,recvbuf_yz, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_Yz, recvCount_Yz,recvbuf_Yz, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_Z, recvCount_Z,recvbuf_Z, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_xZ, recvCount_xZ,recvbuf_xZ, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_XZ, recvCount_XZ,recvbuf_XZ, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_yZ, recvCount_yZ,recvbuf_yZ, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_YZ, recvCount_YZ,recvbuf_YZ, data, N); + } + //................................................................................... Lock=false; // unlock the communicator after communications complete //................................................................................... diff --git a/example/Workflow/ComputeSaturation.py b/example/Workflow/ComputeSaturation.py new file mode 100755 index 00000000..56a34ece --- /dev/null +++ b/example/Workflow/ComputeSaturation.py @@ -0,0 +1,37 @@ +import sys +import numpy as np +import matplotlib.pylab as plt + +FILENAME=sys.argv[1] +Nx=int(sys.argv[2]) +Ny=int(sys.argv[3]) +Nz=int(sys.argv[4]) + +# read the input image +Output = np.fromfile(FILENAME,dtype = np.uint8) +Output.shape = (Nz,Ny,Nx) + +Oil=np.count_nonzero(Output==1) +Water=np.count_nonzero(Output==2) +Sw=Water/(Oil+Water) + +Porosity=1.0-(Oil+Water)/(Nx*Ny*Nz) + +print(FILENAME,"Porosity=", Porosity) + +SaturationProfile=np.zeros(Nz) +PorosityProfile=np.zeros(Nz) +# Compute saturation slice by slice +for idx in range(0, Nz): + Slice = Output[idx,:,:] + Oil=np.count_nonzero(Slice==1) + Water=np.count_nonzero(Slice==2) + SaturationProfile[idx]=Water/(Oil+Water) + PorosityProfile[idx]=(Oil+Water)/(Nx*Ny) + + +plt.figure() +plt.plot(SaturationProfile) +plt.xlabel('Position (z)') +plt.ylabel('Water Saturation') +plt.show() From a82c8995fe64cc9c34e39e5400a53c3e2b687a3e Mon Sep 17 00:00:00 2001 From: James McClure Date: Tue, 7 Apr 2020 09:27:32 -0400 Subject: [PATCH 105/121] make sure not to remove solid for reflection BC --- common/Domain.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/common/Domain.cpp b/common/Domain.cpp index 03d0c5ca..33d6117a 100644 --- a/common/Domain.cpp +++ b/common/Domain.cpp @@ -592,10 +592,10 @@ void Domain::Decomp( const std::string& Filename ) double sum; double sum_local=0.0; double iVol_global = 1.0/(1.0*(Nx-2)*(Ny-2)*(Nz-2)*nprocs); - if (BoundaryCondition > 0) iVol_global = 1.0/(1.0*(Nx-2)*nprocx*(Ny-2)*nprocy*((Nz-2)*nprocz-6)); + if (BoundaryCondition > 0 && BoundaryCondition !=5) iVol_global = 1.0/(1.0*(Nx-2)*nprocx*(Ny-2)*nprocy*((Nz-2)*nprocz-6)); //......................................................... // If external boundary conditions are applied remove solid - if (BoundaryCondition > 0 && kproc() == 0){ + if (BoundaryCondition > 0 && BoundaryCondition !=5 && kproc() == 0){ if (inlet_layers_z < 4){ inlet_layers_z=4; if(RANK==0){ @@ -611,7 +611,7 @@ void Domain::Decomp( const std::string& Filename ) } } } - if (BoundaryCondition > 0 && kproc() == nprocz-1){ + if (BoundaryCondition > 0 && BoundaryCondition !=5 && kproc() == nprocz-1){ if (outlet_layers_z < 4){ outlet_layers_z=4; if(RANK==nprocs-1){ @@ -1061,10 +1061,10 @@ void Domain::ReadIDs(){ double sum; double sum_local=0.0; double iVol_global = 1.0/(1.0*(Nx-2)*(Ny-2)*(Nz-2)*nprocs); - if (BoundaryCondition > 0) iVol_global = 1.0/(1.0*(Nx-2)*nprocx()*(Ny-2)*nprocy()*((Nz-2)*nprocz()-6)); + if (BoundaryCondition > 0 && BoundaryCondition !=5) iVol_global = 1.0/(1.0*(Nx-2)*nprocx()*(Ny-2)*nprocy()*((Nz-2)*nprocz()-6)); //......................................................... // If external boundary conditions are applied remove solid - if (BoundaryCondition > 0 && kproc() == 0){ + if (BoundaryCondition > 0 && BoundaryCondition !=5 && kproc() == 0){ if (inlet_layers_z < 4) inlet_layers_z=4; for (int k=0; k 0 && kproc() == nprocz()-1){ + if (BoundaryCondition > 0 && BoundaryCondition !=5 && kproc() == nprocz()-1){ if (outlet_layers_z < 4) outlet_layers_z=4; for (int k=Nz-outlet_layers_z; k Date: Tue, 7 Apr 2020 09:58:32 -0400 Subject: [PATCH 106/121] disable periodic BC override --- models/ColorModel.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 21a1f597..def4ab4e 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -128,21 +128,21 @@ void ScaLBL_ColorModel::ReadParams(string filename){ // Override user-specified boundary condition for specific protocols auto protocol = color_db->getWithDefault( "protocol", "none" ); if (protocol == "seed water"){ - if (BoundaryCondition != 0 ){ + if (BoundaryCondition != 0 && BoundaryCondition != 5){ BoundaryCondition = 0; if (rank==0) printf("WARNING: protocol (seed water) supports only full periodic boundary condition \n"); } domain_db->putScalar( "BC", BoundaryCondition ); } else if (protocol == "open connected oil"){ - if (BoundaryCondition != 0 ){ + if (BoundaryCondition != 0 && BoundaryCondition != 5){ BoundaryCondition = 0; if (rank==0) printf("WARNING: protocol (open connected oil) supports only full periodic boundary condition \n"); } domain_db->putScalar( "BC", BoundaryCondition ); } else if (protocol == "shell aggregation"){ - if (BoundaryCondition != 0 ){ + if (BoundaryCondition != 0 && BoundaryCondition != 5){ BoundaryCondition = 0; if (rank==0) printf("WARNING: protocol (shell aggregation) supports only full periodic boundary condition \n"); } From af8b2d799aaa27859f257ae75e52651325687814 Mon Sep 17 00:00:00 2001 From: James McClure Date: Tue, 7 Apr 2020 10:06:54 -0400 Subject: [PATCH 107/121] enable target Ca for reflection BC --- models/ColorModel.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index def4ab4e..83ddc5d4 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -567,8 +567,8 @@ void ScaLBL_ColorModel::Run(){ if (color_db->keyExists( "timestep" )){ timestep = color_db->getScalar( "timestep" ); } - if (BoundaryCondition != 0 && SET_CAPILLARY_NUMBER==true){ - if (rank == 0) printf("WARINING: capillary number target only supported for BC = 0 \n"); + if (BoundaryCondition != 0 && BoundaryCondition != 5 && SET_CAPILLARY_NUMBER==true){ + if (rank == 0) printf("WARINING: capillary number target only supported for BC = 0 or 5 \n"); SET_CAPILLARY_NUMBER=false; } if (analysis_db->keyExists( "seed_water" )){ From 3b006fbc3c8aa7ed81b0e34b6f0a67687e58881e Mon Sep 17 00:00:00 2001 From: James McClure Date: Tue, 7 Apr 2020 10:38:21 -0400 Subject: [PATCH 108/121] reflect BC for D3Q7 --- common/ScaLBL.cpp | 11 ++++++++++- common/ScaLBL.h | 4 ++++ cpu/D3Q7.cpp | 15 +++++++++++++++ gpu/D3Q7.cu | 37 +++++++++++++++++++++++++++++++++++++ 4 files changed, 66 insertions(+), 1 deletion(-) diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index 12589ecf..ef9b2341 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -1286,7 +1286,16 @@ void ScaLBL_Communicator::BiRecvD3Q7AA(double *Aq, double *Bq){ ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,Aq,N); ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,recvCount_Z,recvCount_Z,recvbuf_Z,Bq,N); } - + if (BoundaryCondition == 5){ + if (kproc == 0){ + ScaLBL_D3Q7_Reflection_BC_z(dvcSendList_z, Aq, sendCount_z, N); + ScaLBL_D3Q7_Reflection_BC_z(dvcSendList_z, Bq, sendCount_z, N); + } + if (kproc == nprocz-1){ + ScaLBL_D3Q7_Reflection_BC_Z(dvcSendList_Z, Aq, sendCount_Z, N); + ScaLBL_D3Q7_Reflection_BC_Z(dvcSendList_Z, Bq, sendCount_Z, N); + } + } //................................................................................... Lock=false; // unlock the communicator after communications complete //................................................................................... diff --git a/common/ScaLBL.h b/common/ScaLBL.h index 92956f1f..dec8b3d1 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -141,6 +141,10 @@ extern "C" void ScaLBL_D3Q19_Reflection_BC_z(int *list, double *dist, int count, extern "C" void ScaLBL_D3Q19_Reflection_BC_Z(int *list, double *dist, int count, int Np); +extern "C" void ScaLBL_D3Q7_Reflection_BC_z(int *list, double *dist, int count, int Np); + +extern "C" void ScaLBL_D3Q7_Reflection_BC_Z(int *list, double *dist, int count, int Np); + extern "C" void ScaLBL_SetSlice_z(double *Phi, double value, int Nx, int Ny, int Nz, int Slice); extern "C" void ScaLBL_CopySlice_z(double *Phi, int Nx, int Ny, int Nz, int Source, int Destination); diff --git a/cpu/D3Q7.cpp b/cpu/D3Q7.cpp index 344e6851..0940b3b6 100644 --- a/cpu/D3Q7.cpp +++ b/cpu/D3Q7.cpp @@ -72,6 +72,21 @@ extern "C" void ScaLBL_UnpackDenD3Q7(int *list, int count, double *recvbuf, int } } +extern "C" void ScaLBL_D3Q7_Reflection_BC_z(int *list, double *dist, int count, int Np){ + for (int idx=0; idx>>(list, dist, count, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_Reflection_BC_z (kernel): %s \n",cudaGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_Reflection_BC_Z(int *list, double *dist, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_Reflection_BC_Z<<>>(list, dist, count, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_Reflection_BC_Z (kernel): %s \n",cudaGetErrorString(err)); + } +} + extern "C" void ScaLBL_D3Q7_Unpack(int q, int *list, int start, int count, double *recvbuf, double *dist, int N){ int GRID = count / 512 + 1; dvc_ScaLBL_D3Q7_Unpack <<>>(q, list, start, count, recvbuf, dist, N); From 7636220a4894a1a7c485e289e928408a50efd54e Mon Sep 17 00:00:00 2001 From: James McClure Date: Tue, 7 Apr 2020 10:43:01 -0400 Subject: [PATCH 109/121] add header for print --- gpu/D3Q7.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/gpu/D3Q7.cu b/gpu/D3Q7.cu index c10a865b..8a551f78 100644 --- a/gpu/D3Q7.cu +++ b/gpu/D3Q7.cu @@ -1,4 +1,5 @@ // GPU Functions for D3Q7 Lattice Boltzmann Methods +#include #define NBLOCKS 560 #define NTHREADS 128 From cfa40bdcba7fb109136dfd619da098ff8dd8596d Mon Sep 17 00:00:00 2001 From: James McClure Date: Tue, 7 Apr 2020 13:57:29 -0400 Subject: [PATCH 110/121] fix declare --- cpu/D3Q7.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpu/D3Q7.cpp b/cpu/D3Q7.cpp index 0940b3b6..48f71495 100644 --- a/cpu/D3Q7.cpp +++ b/cpu/D3Q7.cpp @@ -73,6 +73,7 @@ extern "C" void ScaLBL_UnpackDenD3Q7(int *list, int count, double *recvbuf, int } extern "C" void ScaLBL_D3Q7_Reflection_BC_z(int *list, double *dist, int count, int Np){ + int n; for (int idx=0; idx Date: Fri, 10 Apr 2020 15:03:15 -0400 Subject: [PATCH 111/121] debugging strange mass conservation issue --- analysis/SubPhase.cpp | 6 +- common/ScaLBL.cpp | 14 +- models/ColorModel.cpp | 49 +++---- tests/TestMassConservationD3Q7.cpp | 206 +++++++++++++++-------------- tests/lbpm_color_simulator.cpp | 2 +- 5 files changed, 143 insertions(+), 134 deletions(-) diff --git a/analysis/SubPhase.cpp b/analysis/SubPhase.cpp index 76541ffd..7ef8194b 100644 --- a/analysis/SubPhase.cpp +++ b/analysis/SubPhase.cpp @@ -280,7 +280,7 @@ void SubPhase::Basic(){ dir_y = 0.0; dir_z = 1.0; } - if (Dm->BoundaryCondition > 0 ){ + if (Dm->BoundaryCondition == 1 || Dm->BoundaryCondition == 2 || Dm->BoundaryCondition == 3 || Dm->BoundaryCondition == 4 ){ // compute the pressure drop double pressure_drop = (Pressure(Nx*Ny + Nx + 1) - 1.0) / 3.0; double length = ((Nz-2)*Dm->nprocz()); @@ -376,8 +376,8 @@ void SubPhase::Full(){ // If external boundary conditions are set, do not average over the inlet kmin=1; kmax=Nz-1; - if (Dm->BoundaryCondition > 0 && Dm->kproc() == 0) kmin=4; - if (Dm->BoundaryCondition > 0 && Dm->kproc() == Dm->nprocz()-1) kmax=Nz-4; + if (Dm->BoundaryCondition > 0 && Dm->BoundaryCondition != 5 && Dm->kproc() == 0) kmin=4; + if (Dm->BoundaryCondition > 0 && Dm->BoundaryCondition != 5 && Dm->kproc() == Dm->nprocz()-1) kmax=Nz-4; imin=jmin=1; // If inlet layers exist use these as default diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index ef9b2341..07aa3f1d 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -1286,7 +1286,7 @@ void ScaLBL_Communicator::BiRecvD3Q7AA(double *Aq, double *Bq){ ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,Aq,N); ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,recvCount_Z,recvCount_Z,recvbuf_Z,Bq,N); } - if (BoundaryCondition == 5){ +/* if (BoundaryCondition == 5){ if (kproc == 0){ ScaLBL_D3Q7_Reflection_BC_z(dvcSendList_z, Aq, sendCount_z, N); ScaLBL_D3Q7_Reflection_BC_z(dvcSendList_z, Bq, sendCount_z, N); @@ -1296,6 +1296,7 @@ void ScaLBL_Communicator::BiRecvD3Q7AA(double *Aq, double *Bq){ ScaLBL_D3Q7_Reflection_BC_Z(dvcSendList_Z, Bq, sendCount_Z, N); } } + */ //................................................................................... Lock=false; // unlock the communicator after communications complete //................................................................................... @@ -1543,10 +1544,15 @@ void ScaLBL_Communicator::RecvHalo(double *data){ ScaLBL_Scalar_Unpack(dvcRecvList_yZ, recvCount_yZ,recvbuf_yZ, data, N); ScaLBL_Scalar_Unpack(dvcRecvList_YZ, recvCount_YZ,recvbuf_YZ, data, N); } - //................................................................................... Lock=false; // unlock the communicator after communications complete //................................................................................... + if (BoundaryCondition == 5 && kproc == 0){ + ScaLBL_CopySlice_z(data,Nx,Ny,Nz,1,0); + } + if (BoundaryCondition == 5 && kproc == nprocz-1){ + ScaLBL_CopySlice_z(data,Nx,Ny,Nz,Nz-2,Nz-1); + } } void ScaLBL_Communicator::RegularLayout(IntArray map, const double *data, DoubleArray ®data){ @@ -1583,7 +1589,7 @@ void ScaLBL_Communicator::RegularLayout(IntArray map, const double *data, Double void ScaLBL_Communicator::Color_BC_z(int *Map, double *Phi, double *Den, double vA, double vB){ if (kproc == 0) { if (BoundaryCondition == 5){ - ScaLBL_CopySlice_z(Phi,Nx,Ny,Nz,1,0); + //ScaLBL_CopySlice_z(Phi,Nx,Ny,Nz,1,0); } else { // Set the phase indicator field and density on the z inlet @@ -1596,7 +1602,7 @@ void ScaLBL_Communicator::Color_BC_z(int *Map, double *Phi, double *Den, double void ScaLBL_Communicator::Color_BC_Z(int *Map, double *Phi, double *Den, double vA, double vB){ if (kproc == nprocz-1){ if (BoundaryCondition == 5){ - ScaLBL_CopySlice_z(Phi,Nx,Ny,Nz,Nz-2,Nz-1); + //ScaLBL_CopySlice_z(Phi,Nx,Ny,Nz,Nz-2,Nz-1); } else { // Set the phase indicator field and density on the Z outlet diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 83ddc5d4..a0f339c6 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -470,7 +470,8 @@ void ScaLBL_ColorModel::Initialize(){ ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - if (BoundaryCondition >0 ){ + // establish reservoirs for external bC + if (BoundaryCondition == 1 || BoundaryCondition == 2 || BoundaryCondition == 3 || BoundaryCondition == 4 ){ if (Dm->kproc()==0){ ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,0); ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,1); @@ -743,7 +744,7 @@ void ScaLBL_ColorModel::Run(){ //************************************************************************ PROFILE_STOP("Update"); - if (rank==0 && timestep%analysis_interval == 0 && BoundaryCondition > 0){ + if (rank==0 && timestep%analysis_interval == 0 && BoundaryCondition == 4){ printf("%i %f \n",timestep,din); } // Run the analysis @@ -1159,7 +1160,7 @@ double ScaLBL_ColorModel::MorphOpenConnected(double target_volume_change){ ScaLBL_CopyToDevice(Phi,phase.data(),N*sizeof(double)); ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - if (BoundaryCondition >0 ){ + if (BoundaryCondition == 1 || BoundaryCondition == 2 || BoundaryCondition == 3 || BoundaryCondition == 4){ if (Dm->kproc()==0){ ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,0); ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,1); @@ -1447,7 +1448,7 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta // 7. Re-initialize phase field and density ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - if (BoundaryCondition >0 ){ + if (BoundaryCondition == 1 || BoundaryCondition == 2 || BoundaryCondition == 3 || BoundaryCondition == 4){ if (Dm->kproc()==0){ ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,0); ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,1); @@ -1516,25 +1517,25 @@ void ScaLBL_ColorModel::WriteDebug(){ fwrite(PhaseField.data(),8,N,VELZ_FILE); fclose(VELZ_FILE); -// ScaLBL_Comm->RegularLayout(Map,&ColorGrad[0],PhaseField); -// FILE *CGX_FILE; -// sprintf(LocalRankFilename,"Gradient_X.%05i.raw",rank); -// CGX_FILE = fopen(LocalRankFilename,"wb"); -// fwrite(PhaseField.data(),8,N,CGX_FILE); -// fclose(CGX_FILE); -// -// ScaLBL_Comm->RegularLayout(Map,&ColorGrad[Np],PhaseField); -// FILE *CGY_FILE; -// sprintf(LocalRankFilename,"Gradient_Y.%05i.raw",rank); -// CGY_FILE = fopen(LocalRankFilename,"wb"); -// fwrite(PhaseField.data(),8,N,CGY_FILE); -// fclose(CGY_FILE); -// -// ScaLBL_Comm->RegularLayout(Map,&ColorGrad[2*Np],PhaseField); -// FILE *CGZ_FILE; -// sprintf(LocalRankFilename,"Gradient_Z.%05i.raw",rank); -// CGZ_FILE = fopen(LocalRankFilename,"wb"); -// fwrite(PhaseField.data(),8,N,CGZ_FILE); -// fclose(CGZ_FILE); +/* ScaLBL_Comm->RegularLayout(Map,&ColorGrad[0],PhaseField); + FILE *CGX_FILE; + sprintf(LocalRankFilename,"Gradient_X.%05i.raw",rank); + CGX_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,CGX_FILE); + fclose(CGX_FILE); + ScaLBL_Comm->RegularLayout(Map,&ColorGrad[Np],PhaseField); + FILE *CGY_FILE; + sprintf(LocalRankFilename,"Gradient_Y.%05i.raw",rank); + CGY_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,CGY_FILE); + fclose(CGY_FILE); + + ScaLBL_Comm->RegularLayout(Map,&ColorGrad[2*Np],PhaseField); + FILE *CGZ_FILE; + sprintf(LocalRankFilename,"Gradient_Z.%05i.raw",rank); + CGZ_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,CGZ_FILE); + fclose(CGZ_FILE); +*/ } diff --git a/tests/TestMassConservationD3Q7.cpp b/tests/TestMassConservationD3Q7.cpp index bbfe8cae..35e42c1c 100644 --- a/tests/TestMassConservationD3Q7.cpp +++ b/tests/TestMassConservationD3Q7.cpp @@ -69,10 +69,11 @@ int main(int argc, char **argv) // Initialize MPI int rank,nprocs; MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm comm = MPI_COMM_WORLD; MPI_Comm_rank(comm,&rank); MPI_Comm_size(comm,&nprocs); // parallel domain size (# of sub-domains) + int CleanCheck = 0; if (rank == 0){ printf("********************************************************\n"); @@ -84,68 +85,68 @@ int main(int argc, char **argv) } } { - auto filename = argv[1]; - ScaLBL_ColorModel CM(rank,nprocs,comm); - CM.ReadParams(filename); - CM.SetDomain(); - int i,j,k,n; - int Nx,Ny,Nz,N,Np; - Nx = CM.Nx; - Ny = CM.Ny; - Nz = CM.Nz; - N = Nx*Ny*Nz; + auto filename = argv[1]; + ScaLBL_ColorModel CM(rank,nprocs,comm); + CM.ReadParams(filename); + CM.SetDomain(); + int i,j,k,n; + int Nx,Ny,Nz,N,Np; + Nx = CM.Nx; + Ny = CM.Ny; + Nz = CM.Nz; + N = Nx*Ny*Nz; - //CM.ReadInput(); - double radius=0.4*double(Nx); - InitializeBubble(CM,radius); - CM.Create(); // creating the model will create data structure to match the pore structure and allocate variables - CM.Initialize(); // initializing the model will set initial conditions for variables - //CM.Run(); - //CM.WriteDebug(); + //CM.ReadInput(); + double radius=0.4*double(Nx); + InitializeBubble(CM,radius); + CM.Create(); // creating the model will create data structure to match the pore structure and allocate variables + CM.Initialize(); // initializing the model will set initial conditions for variables + //CM.Run(); + //CM.WriteDebug(); - CM.timestepMax = 10; - CM.Run(); + CM.timestepMax = 10; + CM.Run(); - Np = CM.Np; - double *DenOriginal, *DenFinal; - DenOriginal = new double [2*Np]; - DenFinal = new double [2*Np]; + Np = CM.Np; + double *DenOriginal, *DenFinal; + DenOriginal = new double [2*Np]; + DenFinal = new double [2*Np]; - // Run the odd timestep - ScaLBL_CopyToHost(DenOriginal,CM.Den,2*Np*sizeof(double)); - /* + // Run the odd timestep + ScaLBL_CopyToHost(DenOriginal,CM.Den,2*Np*sizeof(double)); + /* CM.ScaLBL_Comm->BiSendD3Q7AA(CM.Aq,CM.Bq); //READ FROM NORMAL ScaLBL_D3Q7_AAodd_PhaseField(CM.NeighborList, CM.dvcMap, CM.Aq, CM.Bq, CM.Den, CM.Phi, CM.ScaLBL_Comm->FirstInterior(), CM.ScaLBL_Comm->LastInterior(), CM.Np); CM.ScaLBL_Comm->BiRecvD3Q7AA(CM.Aq,CM.Bq); //WRITE INTO OPPOSITE ScaLBL_DeviceBarrier(); ScaLBL_D3Q7_AAodd_PhaseField(CM.NeighborList, CM.dvcMap, CM.Aq, CM.Bq, CM.Den, CM.Phi, 0, CM.ScaLBL_Comm->LastExterior(), CM.Np); - */ + */ - CM.timestepMax = 2; - CM.Run(); - int D3Q7[7][3]={{0,0,0},{1,0,0},{-1,0,0},{0,1,0},{0,-1,0},{0,0,1},{0,0,-1}}; - // Compare and make sure mass is conserved at every lattice site - auto Error = new double[N]; - auto A_q = new double[7*Np]; - //auto B_q = new double[7*Np]; - bool CleanCheck = true; - double original,final, sum_q; - double total_mass_A_0 = 0.0; - double total_mass_B_0= 0.0; - double total_mass_A_1 = 0.0; - double total_mass_B_1= 0.0; - int count_negative_A = 0; - int count_negative_B = 0; - ScaLBL_CopyToHost(DenFinal,CM.Den,2*Np*sizeof(double)); - ScaLBL_CopyToHost(A_q,CM.Aq,7*Np*sizeof(double)); - for (i=0; i-1){ - //printf("idx=%i\n",idx); + CM.timestepMax = 2; + CM.timestep = 0; + CM.Run(); + int D3Q7[7][3]={{0,0,0},{1,0,0},{-1,0,0},{0,1,0},{0,-1,0},{0,0,1},{0,0,-1}}; + // Compare and make sure mass is conserved at every lattice site + auto Error = new double[N]; + auto A_q = new double[7*Np]; + //auto B_q = new double[7*Np]; + double original,final, sum_q; + double total_mass_A_0 = 0.0; + double total_mass_B_0= 0.0; + double total_mass_A_1 = 0.0; + double total_mass_B_1= 0.0; + int count_negative_A = 0; + int count_negative_B = 0; + ScaLBL_CopyToHost(DenFinal,CM.Den,2*Np*sizeof(double)); + ScaLBL_CopyToHost(A_q,CM.Aq,7*Np*sizeof(double)); + for (i=0; i-1){ + //printf("idx=%i\n",idx); final = DenFinal[idx]; if (final < 0.0) count_negative_A++; original = DenOriginal[idx]; @@ -153,60 +154,61 @@ int main(int argc, char **argv) total_mass_A_1 += final; sum_q = A_q[idx]; for (int q=1; q<7; q++){ - int Cqx = D3Q7[q][0]; - int Cqy = D3Q7[q][1]; - int Cqz = D3Q7[q][2]; - int iq = CM.Map(i-Cqx,j-Cqy,k-Cqz); - if (iq < Np && iq > -1){ - sum_q += A_q[q*Np+iq]; - } - else if (q%2==0){ - sum_q += A_q[(q-1)*Np+idx]; - } - else{ - sum_q += A_q[(q+1)*Np+idx]; - } + int Cqx = D3Q7[q][0]; + int Cqy = D3Q7[q][1]; + int Cqz = D3Q7[q][2]; + int iq = CM.Map(i-Cqx,j-Cqy,k-Cqz); + if (iq < Np && iq > -1){ + sum_q += A_q[q*Np+iq]; + } + else if (q%2==0){ + sum_q += A_q[(q-1)*Np+idx]; + } + else{ + sum_q += A_q[(q+1)*Np+idx]; + } } Error[n] = sum_q - original; - - /*if (fabs(DenFinal[idx] - DenOriginal[idx]) > 1e-15){ - //if (CM.Dm->id[n] == 0) printf("Solid phase! \n"); - //if (CM.Dm->id[n] == 1) printf("Wetting phase! \n"); - //if (CM.Dm->id[n] == 2) printf("Non-wetting phase! \n"); - printf("Mass not conserved: WP density, site=%i,%i,%i, original = %f, final = %f \n",i,j,k,original,final); - CleanCheck=false; - Error[n] += final-original; - }*/ + + if (fabs(DenFinal[idx] - DenOriginal[idx]) > 1e-15){ + //if (CM.Dm->id[n] == 0) printf("Solid phase! \n"); + //if (CM.Dm->id[n] == 1) printf("Wetting phase! \n"); + //if (CM.Dm->id[n] == 2) printf("Non-wetting phase! \n"); + //printf("Mass not conserved: WP density, site=%i,%i,%i, original = %f, final = %f \n",i,j,k,original,final); + CleanCheck=false; + Error[n] += final-original; + } final = DenFinal[Np+idx]; if (final < 0.0) count_negative_B++; original = DenOriginal[Np+idx]; total_mass_B_0 += original; total_mass_B_1 += final; - /*if (fabs(DenFinal[Np+idx] - DenOriginal[Np+idx]) > 1e-15){ - //if (CM.Dm->id[n] == 0) printf("Solid phase! \n"); - //if (CM.Dm->id[n] == 1) printf("Wetting phase! \n"); - //if (CM.Dm->id[n] == 2) printf("Non-wetting phase! \n"); - printf("Mass not conserved: NWP density, site=%i,%i,%i, original = %f, final = %f \n",i,j,k,original,final); - CleanCheck=false; - Error[n] += final-original; - }*/ + if (fabs(DenFinal[Np+idx] - DenOriginal[Np+idx]) > 1e-15){ + //if (CM.Dm->id[n] == 0) printf("Solid phase! \n"); + //if (CM.Dm->id[n] == 1) printf("Wetting phase! \n"); + //if (CM.Dm->id[n] == 2) printf("Non-wetting phase! \n"); + //printf("Mass not conserved: NWP density, site=%i,%i,%i, original = %f, final = %f \n",i,j,k,original,final); + CleanCheck=false; + Error[n] += final-original; + + } + } } } } - } - printf("Negative density values for A = %i \n",count_negative_A); - printf("Negative density values for B = %i \n",count_negative_B); - printf("Global mass difference A = %.5g\n",total_mass_A_1-total_mass_A_0); - printf("Global mass difference B = %.5g\n",total_mass_B_1-total_mass_B_0); + printf("Negative density values for A = %i \n",count_negative_A); + printf("Negative density values for B = %i \n",count_negative_B); + printf("Global mass difference A = %.5g\n",total_mass_A_1-total_mass_A_0); + printf("Global mass difference B = %.5g\n",total_mass_B_1-total_mass_B_0); - if (count_negative_A > 0 ||count_negative_B > 0) CleanCheck=1; - if (fabs(total_mass_A_1-total_mass_A_0) > 1.0e-15||fabs(total_mass_B_1-total_mass_B_0) > 1.0e-15 ) CleanCheck=2; + if (count_negative_A > 0 ||count_negative_B > 0) CleanCheck=1; + if (fabs(total_mass_A_1-total_mass_A_0) > 1.0e-13||fabs(total_mass_B_1-total_mass_B_0) > 1.0e-13 ) CleanCheck=2; - /* - FILE *OUTFILE; - OUTFILE = fopen("error.raw","wb"); - fwrite(Error,8,N,OUTFILE); - fclose(OUTFILE); + FILE *OUTFILE; + OUTFILE = fopen("error.raw","wb"); + fwrite(Error,8,N,OUTFILE); + fclose(OUTFILE); + /* if (rank==0) printf("Checking that the correct velocity is retained \n"); // Swap convention is observed -- velocity is negative @@ -256,15 +258,15 @@ int main(int argc, char **argv) } } } -*/ - if (CleanCheck){ - if (rank==0) printf("Test passed: mass conservation for D3Q7 \n"); - } - else { - if (rank==0) printf("Test failed!: mass conservation for D3Q7 \n"); + */ + if (CleanCheck == 0){ + if (rank==0) printf("Test passed: mass conservation for D3Q7 \n"); + } + else { + if (rank==0) printf("Test failed!: mass conservation for D3Q7 \n"); + } } -} // **************************************************** MPI_Barrier(comm); MPI_Finalize(); diff --git a/tests/lbpm_color_simulator.cpp b/tests/lbpm_color_simulator.cpp index 1f63c653..79b2a718 100644 --- a/tests/lbpm_color_simulator.cpp +++ b/tests/lbpm_color_simulator.cpp @@ -58,7 +58,7 @@ int main(int argc, char **argv) ColorModel.Create(); // creating the model will create data structure to match the pore structure and allocate variables ColorModel.Initialize(); // initializing the model will set initial conditions for variables ColorModel.Run(); - //ColorModel.WriteDebug(); + ColorModel.WriteDebug(); PROFILE_STOP("Main"); PROFILE_SAVE("lbpm_color_simulator",1); From d1d92ea6bbc793fe66e007fffb3f79e3aec75a74 Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 10 Apr 2020 21:31:44 -0400 Subject: [PATCH 112/121] debug mass conservation test --- tests/TestMassConservationD3Q7.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/TestMassConservationD3Q7.cpp b/tests/TestMassConservationD3Q7.cpp index 35e42c1c..6186fd60 100644 --- a/tests/TestMassConservationD3Q7.cpp +++ b/tests/TestMassConservationD3Q7.cpp @@ -41,7 +41,7 @@ inline void InitializeBubble(ScaLBL_ColorModel &ColorModel, double BubbleRadius) int jglobal= j+(Ny-2)*ColorModel.Mask->jproc(); int kglobal= k+(Nz-2)*ColorModel.Mask->kproc(); // Initialize phase position field for parallel bubble test - if (jglobal < 40){ + if (kglobal < 40){ ColorModel.Mask->id[n] = 0; } else if ((iglobal-0.5*(Nx-2)*nprocx)*(iglobal-0.5*(Nx-2)*nprocx) @@ -183,15 +183,15 @@ int main(int argc, char **argv) original = DenOriginal[Np+idx]; total_mass_B_0 += original; total_mass_B_1 += final; - if (fabs(DenFinal[Np+idx] - DenOriginal[Np+idx]) > 1e-15){ + /*if (fabs(DenFinal[Np+idx] - DenOriginal[Np+idx]) > 1e-15){ //if (CM.Dm->id[n] == 0) printf("Solid phase! \n"); //if (CM.Dm->id[n] == 1) printf("Wetting phase! \n"); //if (CM.Dm->id[n] == 2) printf("Non-wetting phase! \n"); //printf("Mass not conserved: NWP density, site=%i,%i,%i, original = %f, final = %f \n",i,j,k,original,final); CleanCheck=false; Error[n] += final-original; - - } + } + */ } } } From d6a8647ee1d0fd4f8ef30a079b7526ede60aa0c4 Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 17 Apr 2020 12:21:29 -0400 Subject: [PATCH 113/121] cannot exclude inlet / outlet without screwing up topology --- analysis/SubPhase.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/analysis/SubPhase.cpp b/analysis/SubPhase.cpp index 7ef8194b..2a5e3350 100644 --- a/analysis/SubPhase.cpp +++ b/analysis/SubPhase.cpp @@ -161,12 +161,12 @@ void SubPhase::Basic(){ // If external boundary conditions are set, do not average over the inlet kmin=1; kmax=Nz-1; imin=jmin=1; - // If inlet/outlet layers exist use these as default + /*// If inlet/outlet layers exist use these as default if (Dm->inlet_layers_x > 0) imin = Dm->inlet_layers_x; if (Dm->inlet_layers_y > 0) jmin = Dm->inlet_layers_y; if (Dm->inlet_layers_z > 0 && Dm->kproc() == 0) kmin += Dm->inlet_layers_z; if (Dm->outlet_layers_z > 0 && Dm->kproc() == Dm->nprocz()-1) kmax -= Dm->outlet_layers_z; - + */ nb.reset(); wb.reset(); double count_w = 0.0; @@ -376,16 +376,17 @@ void SubPhase::Full(){ // If external boundary conditions are set, do not average over the inlet kmin=1; kmax=Nz-1; - if (Dm->BoundaryCondition > 0 && Dm->BoundaryCondition != 5 && Dm->kproc() == 0) kmin=4; + /*if (Dm->BoundaryCondition > 0 && Dm->BoundaryCondition != 5 && Dm->kproc() == 0) kmin=4; if (Dm->BoundaryCondition > 0 && Dm->BoundaryCondition != 5 && Dm->kproc() == Dm->nprocz()-1) kmax=Nz-4; - + */ imin=jmin=1; - // If inlet layers exist use these as default + /*// If inlet layers exist use these as default + * NOTE -- excluding inlet / outlet will screw up topological averages!!! if (Dm->inlet_layers_x > 0) imin = Dm->inlet_layers_x; if (Dm->inlet_layers_y > 0) jmin = Dm->inlet_layers_y; if (Dm->inlet_layers_z > 0 && Dm->kproc() == 0) kmin += Dm->inlet_layers_z; if (Dm->outlet_layers_z > 0 && Dm->kproc() == Dm->nprocz()-1) kmax -= Dm->outlet_layers_z; - + */ nd.reset(); nc.reset(); wd.reset(); wc.reset(); iwn.reset(); iwnc.reset(); Dm->CommunicateMeshHalo(Phi); From 8dc9aed0abc0ca518a902f61dc38d60445e93e96 Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 17 Apr 2020 17:55:00 -0400 Subject: [PATCH 114/121] fix mass conservation test --- tests/TestMassConservationD3Q7.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/TestMassConservationD3Q7.cpp b/tests/TestMassConservationD3Q7.cpp index 6186fd60..17c50d19 100644 --- a/tests/TestMassConservationD3Q7.cpp +++ b/tests/TestMassConservationD3Q7.cpp @@ -202,7 +202,7 @@ int main(int argc, char **argv) printf("Global mass difference B = %.5g\n",total_mass_B_1-total_mass_B_0); if (count_negative_A > 0 ||count_negative_B > 0) CleanCheck=1; - if (fabs(total_mass_A_1-total_mass_A_0) > 1.0e-13||fabs(total_mass_B_1-total_mass_B_0) > 1.0e-13 ) CleanCheck=2; + if (fabs(total_mass_A_1-total_mass_A_0) > 1.0e-8 || fabs(total_mass_B_1-total_mass_B_0) > 1.0e-8) CleanCheck=2; FILE *OUTFILE; OUTFILE = fopen("error.raw","wb"); From 67896fcbe291d792950545747aeeee4602856e9b Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 17 Apr 2020 18:35:47 -0400 Subject: [PATCH 115/121] remove debug dump --- tests/lbpm_color_simulator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/lbpm_color_simulator.cpp b/tests/lbpm_color_simulator.cpp index 79b2a718..1f63c653 100644 --- a/tests/lbpm_color_simulator.cpp +++ b/tests/lbpm_color_simulator.cpp @@ -58,7 +58,7 @@ int main(int argc, char **argv) ColorModel.Create(); // creating the model will create data structure to match the pore structure and allocate variables ColorModel.Initialize(); // initializing the model will set initial conditions for variables ColorModel.Run(); - ColorModel.WriteDebug(); + //ColorModel.WriteDebug(); PROFILE_STOP("Main"); PROFILE_SAVE("lbpm_color_simulator",1); From b495c9916c2be425fe739c899299e5de38fc0674 Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 17 Apr 2020 19:12:08 -0400 Subject: [PATCH 116/121] seed water morphdelta negative by default --- models/ColorModel.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index a0f339c6..189f0059 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -546,18 +546,18 @@ void ScaLBL_ColorModel::Run(){ USE_MORPH = true; } else if (protocol == "seed water"){ - morph_delta = 0.05; + morph_delta = -0.05; seed_water = 0.01; USE_SEED = true; USE_MORPH = true; } else if (protocol == "open connected oil"){ - morph_delta = 0.05; + morph_delta = -0.05; USE_MORPH = true; USE_MORPHOPEN_OIL = true; } else if (protocol == "shell aggregation"){ - morph_delta = 0.05; + morph_delta = -0.05; USE_MORPH = true; } if (color_db->keyExists( "capillary_number" )){ From 16e187e1dc0d637a28d1efce7f2c1716a3bc1017 Mon Sep 17 00:00:00 2001 From: James McClure Date: Mon, 4 May 2020 14:50:23 -0400 Subject: [PATCH 117/121] add pseudo-reflection --- common/Domain.cpp | 220 ++++++++++++++++++++++++++-------------------- 1 file changed, 127 insertions(+), 93 deletions(-) diff --git a/common/Domain.cpp b/common/Domain.cpp index 33d6117a..32e13501 100644 --- a/common/Domain.cpp +++ b/common/Domain.cpp @@ -256,6 +256,7 @@ void Domain::Decomp( const std::string& Filename ) int64_t i,j,k,n; int64_t xStart,yStart,zStart; int checkerSize; + bool USE_CHECKER = false; //int inlet_layers_x, inlet_layers_y, inlet_layers_z; //int outlet_layers_x, outlet_layers_y, outlet_layers_z; xStart=yStart=zStart=0; @@ -295,6 +296,7 @@ void Domain::Decomp( const std::string& Filename ) } if (database->keyExists( "checkerSize" )){ checkerSize = database->getScalar( "checkerSize" ); + USE_CHECKER = true; } else { checkerSize = SIZE[0]; @@ -367,7 +369,7 @@ void Domain::Decomp( const std::string& Filename ) } } printf("Read segmented data from %s \n",Filename.c_str()); - + // relabel the data std::vector LabelCount(ReadValues.size(),0); for (int k = 0; k 0){ - // use checkerboard pattern - printf("Checkerboard pattern at x inlet for %i layers \n",inlet_layers_x); - for (int k = 0; k 0){ + // use checkerboard pattern + printf("Checkerboard pattern at x inlet for %i layers \n",inlet_layers_x); + for (int k = 0; k 0){ + printf("Checkerboard pattern at y inlet for %i layers \n",inlet_layers_y); + // use checkerboard pattern + for (int k = 0; k 0){ + printf("Checkerboard pattern at z inlet for %i layers, saturated with phase label=%i \n",inlet_layers_z,inlet_layers_phase); + // use checkerboard pattern + for (int k = zStart; k < zStart+inlet_layers_z; k++){ + for (int j = 0; j 0){ + // use checkerboard pattern + printf("Checkerboard pattern at x outlet for %i layers \n",outlet_layers_x); + for (int k = 0; k 0){ + printf("Checkerboard pattern at y outlet for %i layers \n",outlet_layers_y); + // use checkerboard pattern + for (int k = 0; k 0){ + printf("Checkerboard pattern at z outlet for %i layers, saturated with phase label=%i \n",outlet_layers_z,outlet_layers_phase); + // use checkerboard pattern + for (int k = zStart + nz*nprocz - outlet_layers_z; k < zStart + nz*nprocz; k++){ + for (int j = 0; j 0){ - printf("Checkerboard pattern at y inlet for %i layers \n",inlet_layers_y); - // use checkerboard pattern - for (int k = 0; k 0){ - printf("Checkerboard pattern at z inlet for %i layers, saturated with phase label=%i \n",inlet_layers_z,inlet_layers_phase); - // use checkerboard pattern + printf("Mixed reflection pattern at z inlet for %i layers, saturated with phase label=%i \n",inlet_layers_z,inlet_layers_phase); for (int k = zStart; k < zStart+inlet_layers_z; k++){ for (int j = 0; j 0){ + SegData[k*global_Nx*global_Ny+j*global_Nx+i] = reflection_id; } } } } } - - if (outlet_layers_x > 0){ - // use checkerboard pattern - printf("Checkerboard pattern at x outlet for %i layers \n",outlet_layers_x); - for (int k = 0; k 0){ - printf("Checkerboard pattern at y outlet for %i layers \n",outlet_layers_y); - // use checkerboard pattern - for (int k = 0; k 0){ - printf("Checkerboard pattern at z outlet for %i layers, saturated with phase label=%i \n",outlet_layers_z,outlet_layers_phase); - // use checkerboard pattern + printf("Mixed reflection pattern at z outlet for %i layers, saturated with phase label=%i \n",outlet_layers_z,outlet_layers_phase); for (int k = zStart + nz*nprocz - outlet_layers_z; k < zStart + nz*nprocz; k++){ for (int j = 0; j 0){ + SegData[k*global_Nx*global_Ny+j*global_Nx+i] = reflection_id; } } } From d424771849c46bb15ff0831c29e7ef77afb21ff7 Mon Sep 17 00:00:00 2001 From: James McClure Date: Mon, 4 May 2020 15:12:50 -0400 Subject: [PATCH 118/121] fix scope in Domain inlet/outlet --- common/Domain.cpp | 53 ++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/common/Domain.cpp b/common/Domain.cpp index 32e13501..e355310f 100644 --- a/common/Domain.cpp +++ b/common/Domain.cpp @@ -390,14 +390,11 @@ void Domain::Decomp( const std::string& Filename ) } } } - if (RANK==0){ - for (size_t idx=0; idx 0){ // use checkerboard pattern @@ -516,31 +513,31 @@ void Domain::Decomp( const std::string& Filename ) } } } - } - else { - if (inlet_layers_z > 0){ - printf("Mixed reflection pattern at z inlet for %i layers, saturated with phase label=%i \n",inlet_layers_z,inlet_layers_phase); - for (int k = zStart; k < zStart+inlet_layers_z; k++){ - for (int j = 0; j 0){ - SegData[k*global_Nx*global_Ny+j*global_Nx+i] = reflection_id; + else { + if (inlet_layers_z > 0){ + printf("Mixed reflection pattern at z inlet for %i layers, saturated with phase label=%i \n",inlet_layers_z,inlet_layers_phase); + for (int k = zStart; k < zStart+inlet_layers_z; k++){ + for (int j = 0; j 0){ + SegData[k*global_Nx*global_Ny+j*global_Nx+i] = reflection_id; + } } } } } - } - if (outlet_layers_z > 0){ - printf("Mixed reflection pattern at z outlet for %i layers, saturated with phase label=%i \n",outlet_layers_z,outlet_layers_phase); - for (int k = zStart + nz*nprocz - outlet_layers_z; k < zStart + nz*nprocz; k++){ - for (int j = 0; j 0){ - SegData[k*global_Nx*global_Ny+j*global_Nx+i] = reflection_id; + if (outlet_layers_z > 0){ + printf("Mixed reflection pattern at z outlet for %i layers, saturated with phase label=%i \n",outlet_layers_z,outlet_layers_phase); + for (int k = zStart + nz*nprocz - outlet_layers_z; k < zStart + nz*nprocz; k++){ + for (int j = 0; j 0){ + SegData[k*global_Nx*global_Ny+j*global_Nx+i] = reflection_id; + } } } } From 7b731e327be8989fe1983e21d813627272ea05eb Mon Sep 17 00:00:00 2001 From: James McClure Date: Mon, 4 May 2020 15:26:41 -0400 Subject: [PATCH 119/121] update pseudo-reflection --- common/Domain.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/Domain.cpp b/common/Domain.cpp index e355310f..3dec0128 100644 --- a/common/Domain.cpp +++ b/common/Domain.cpp @@ -520,7 +520,7 @@ void Domain::Decomp( const std::string& Filename ) for (int j = 0; j 0){ SegData[k*global_Nx*global_Ny+j*global_Nx+i] = reflection_id; } @@ -534,7 +534,7 @@ void Domain::Decomp( const std::string& Filename ) for (int j = 0; j 0){ SegData[k*global_Nx*global_Ny+j*global_Nx+i] = reflection_id; } From 214917021aaf9d89c6efa6d2f795683df9421ff3 Mon Sep 17 00:00:00 2001 From: James McClure Date: Wed, 6 May 2020 14:10:57 -0400 Subject: [PATCH 120/121] rescale force after user time interval --- models/ColorModel.cpp | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 189f0059..49fc635c 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -494,6 +494,7 @@ void ScaLBL_ColorModel::Run(){ int IMAGE_COUNT = 0; std::vector ImageList; bool SET_CAPILLARY_NUMBER = false; + bool RESCALE_FORCE = false; bool MORPH_ADAPT = false; bool USE_MORPH = false; bool USE_SEED = false; @@ -502,6 +503,7 @@ void ScaLBL_ColorModel::Run(){ int MAX_MORPH_TIMESTEPS = 50000; // maximum number of LBM timesteps to spend in morphological adaptation routine int MIN_STEADY_TIMESTEPS = 100000; int MAX_STEADY_TIMESTEPS = 200000; + int RESCALE_FORCE_AFTER_TIMESTEP = 0; int RAMP_TIMESTEPS = 0;//50000; // number of timesteps to run initially (to get a reasonable velocity field before other pieces kick in) int CURRENT_MORPH_TIMESTEPS=0; // counter for number of timesteps spent in morphological adaptation routine (reset each time) int CURRENT_STEADY_TIMESTEPS=0; // counter for number of timesteps spent in morphological adaptation routine (reset each time) @@ -563,7 +565,9 @@ void ScaLBL_ColorModel::Run(){ if (color_db->keyExists( "capillary_number" )){ capillary_number = color_db->getScalar( "capillary_number" ); SET_CAPILLARY_NUMBER=true; - //RESCALE_FORCE_MAX = 1; + } + if (color_db->keyExists( "rescale_force_after_timestep" )){ + RESCALE_FORCE_AFTER_TIMESTEP = color_db->getScalar( "rescale_force_after_timestep" ); } if (color_db->keyExists( "timestep" )){ timestep = color_db->getScalar( "timestep" ); @@ -791,7 +795,20 @@ void ScaLBL_ColorModel::Run(){ isSteady = true; if (CURRENT_STEADY_TIMESTEPS > MAX_STEADY_TIMESTEPS) isSteady = true; - + if (RESCALE_FORCE == true && SET_CAPILLARY_NUMBER == true && CURRENT_STEADY_TIMESTEPS > RESCALE_FORCE_AFTER_TIMESTEP){ + RESCALE_FORCE = false; + Fx *= capillary_number / Ca; + Fy *= capillary_number / Ca; + Fz *= capillary_number / Ca; + if (force_mag > 1e-3){ + Fx *= 1e-3/force_mag; // impose ceiling for stability + Fy *= 1e-3/force_mag; + Fz *= 1e-3/force_mag; + } + if (rank == 0) printf(" -- adjust force by factor %f \n ",capillary_number / Ca); + Averages->SetParams(rhoA,rhoB,tauA,tauB,Fx,Fy,Fz,alpha,beta); + color_db->putVector("F",{Fx,Fy,Fz}); + } if ( isSteady ){ MORPH_ADAPT = true; CURRENT_MORPH_TIMESTEPS=0; @@ -952,12 +969,17 @@ void ScaLBL_ColorModel::Run(){ CURRENT_STEADY_TIMESTEPS=0; initial_volume = volA*Dm->Volume; delta_volume = 0.0; + if (RESCALE_FORCE_AFTER_TIMESTEP > 0) + RESCALE_FORCE = true; } else if (!(USE_DIRECT) && CURRENT_MORPH_TIMESTEPS > MAX_MORPH_TIMESTEPS) { MORPH_ADAPT = false; CURRENT_STEADY_TIMESTEPS=0; initial_volume = volA*Dm->Volume; delta_volume = 0.0; + RESCALE_FORCE = true; + if (RESCALE_FORCE_AFTER_TIMESTEP > 0) + RESCALE_FORCE = true; } } morph_timesteps += analysis_interval; From 09a9a05a8780941240fd790b6825cb3f771d73c3 Mon Sep 17 00:00:00 2001 From: James McClure Date: Wed, 6 May 2020 15:12:50 -0400 Subject: [PATCH 121/121] enable force adaptation --- models/ColorModel.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 49fc635c..7b883657 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -568,6 +568,7 @@ void ScaLBL_ColorModel::Run(){ } if (color_db->keyExists( "rescale_force_after_timestep" )){ RESCALE_FORCE_AFTER_TIMESTEP = color_db->getScalar( "rescale_force_after_timestep" ); + RESCALE_FORCE = true; } if (color_db->keyExists( "timestep" )){ timestep = color_db->getScalar( "timestep" );