Updated TestCommD3Q19 to ensure clean MPI exit

This commit is contained in:
James E McClure 2016-01-08 12:57:42 -05:00
parent a043dd82c2
commit b03fa6dc16

View File

@ -158,232 +158,233 @@ int main(int argc, char **argv)
// Initialize MPI // Initialize MPI
int rank,nprocs; int rank,nprocs;
MPI_Init(&argc,&argv); MPI_Init(&argc,&argv);
MPI_Comm comm = MPI_COMM_WORLD; MPI_Comm comm = MPI_COMM_WORLD;
MPI_Comm_rank(comm,&rank); MPI_Comm_rank(comm,&rank);
MPI_Comm_size(comm,&nprocs); MPI_Comm_size(comm,&nprocs);
// parallel domain size (# of sub-domains) {
int nprocx,nprocy,nprocz; // parallel domain size (# of sub-domains)
int iproc,jproc,kproc; int nprocx,nprocy,nprocz;
//***************************************** int iproc,jproc,kproc;
// MPI ranks for all 18 neighbors //*****************************************
//********************************** // MPI ranks for all 18 neighbors
int rank_x,rank_y,rank_z,rank_X,rank_Y,rank_Z; //**********************************
int rank_xy,rank_XY,rank_xY,rank_Xy; int rank_x,rank_y,rank_z,rank_X,rank_Y,rank_Z;
int rank_xz,rank_XZ,rank_xZ,rank_Xz; int rank_xy,rank_XY,rank_xY,rank_Xy;
int rank_yz,rank_YZ,rank_yZ,rank_Yz; int rank_xz,rank_XZ,rank_xZ,rank_Xz;
//********************************** int rank_yz,rank_YZ,rank_yZ,rank_Yz;
MPI_Request req1[18],req2[18]; //**********************************
MPI_Status stat1[18],stat2[18]; MPI_Request req1[18],req2[18];
MPI_Status stat1[18],stat2[18];
if (rank == 0){ if (rank == 0){
printf("********************************************************\n"); printf("********************************************************\n");
printf("Running Unit Test for D3Q19 MPI Communication \n"); printf("Running Unit Test for D3Q19 MPI Communication \n");
printf("********************************************************\n"); printf("********************************************************\n");
} }
// BGK Model parameters // BGK Model parameters
string FILENAME; string FILENAME;
unsigned int nBlocks, nthreads; unsigned int nBlocks, nthreads;
int timestepMax, interval; int timestepMax, interval;
double tau,Fx,Fy,Fz,tol; double tau,Fx,Fy,Fz,tol;
// Domain variables // Domain variables
double Lx,Ly,Lz; double Lx,Ly,Lz;
int nspheres; int nspheres;
int Nx,Ny,Nz; int Nx,Ny,Nz;
int i,j,k,n; int i,j,k,n;
if (rank==0){
//.......................................................................
// Reading the domain information file
//.......................................................................
ifstream domain("Domain.in");
if (domain.good()){
domain >> nprocx;
domain >> nprocy;
domain >> nprocz;
domain >> Nx;
domain >> Ny;
domain >> Nz;
domain >> nspheres;
domain >> Lx;
domain >> Ly;
domain >> Lz;
}
else if (nprocs==1){
nprocx=nprocy=nprocz=1;
Nx=Ny=Nz=50;
nspheres=0;
Lx=Ly=Lz=1;
}
else if (nprocs==2){
nprocx=nprocy=1;
nprocz=2;
Nx=Ny=Nz=50;
nspheres=0;
Lx=Ly=Lz=1;
}
else if (nprocs==4){
nprocx=nprocy=2;
nprocz=1;
Nx=Ny=Nz=50;
nspheres=0;
Lx=Ly=Lz=1;
}
else if (nprocs==8){
nprocx=nprocy=nprocz=2;
Nx=Ny=Nz=50;
nspheres=0;
Lx=Ly=Lz=1;
}
//.......................................................................
}
// **************************************************************
// Broadcast simulation parameters from rank 0 to all other procs
MPI_Barrier(comm);
//.................................................
MPI_Bcast(&Nx,1,MPI_INT,0,comm);
MPI_Bcast(&Ny,1,MPI_INT,0,comm);
MPI_Bcast(&Nz,1,MPI_INT,0,comm);
MPI_Bcast(&nBlocks,1,MPI_INT,0,comm);
MPI_Bcast(&nthreads,1,MPI_INT,0,comm);
MPI_Bcast(&timestepMax,1,MPI_INT,0,comm);
MPI_Bcast(&Nx,1,MPI_INT,0,comm);
MPI_Bcast(&Ny,1,MPI_INT,0,comm);
MPI_Bcast(&Nz,1,MPI_INT,0,comm);
MPI_Bcast(&nprocx,1,MPI_INT,0,comm);
MPI_Bcast(&nprocy,1,MPI_INT,0,comm);
MPI_Bcast(&nprocz,1,MPI_INT,0,comm);
MPI_Bcast(&nspheres,1,MPI_INT,0,comm);
MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm);
//.................................................
MPI_Barrier(comm);
// **************************************************************
// **************************************************************
if (nprocs != nprocx*nprocy*nprocz){
printf("nprocx = %i \n",nprocx);
printf("nprocy = %i \n",nprocy);
printf("nprocz = %i \n",nprocz);
INSIST(nprocs == nprocx*nprocy*nprocz,"Fatal error in processor count!");
}
if (rank==0){
printf("********************************************************\n");
printf("Sub-domain size = %i x %i x %i\n",Nz,Nz,Nz);
printf("Parallel domain size = %i x %i x %i\n",nprocx,nprocy,nprocz);
printf("********************************************************\n");
}
MPI_Barrier(comm);
kproc = rank/(nprocx*nprocy);
jproc = (rank-nprocx*nprocy*kproc)/nprocx;
iproc = rank-nprocx*nprocy*kproc-nprocz*jproc;
double iVol_global = 1.0/Nx/Ny/Nz/nprocx/nprocy/nprocz;
int BoundaryCondition=0;
Domain Dm(Nx,Ny,Nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BoundaryCondition);
InitializeRanks( rank, nprocx, nprocy, nprocz, iproc, jproc, kproc,
rank_x, rank_y, rank_z, rank_X, rank_Y, rank_Z,
rank_xy, rank_XY, rank_xY, rank_Xy, rank_xz, rank_XZ, rank_xZ, rank_Xz,
rank_yz, rank_YZ, rank_yZ, rank_Yz );
Nx += 2;
Ny += 2;
Nz += 2;
int N = Nx*Ny*Nz;
int dist_mem_size = N*sizeof(double);
if (rank==0){
//....................................................................... //.......................................................................
// Reading the domain information file // Assign the phase ID field
//....................................................................... //.......................................................................
ifstream domain("Domain.in"); char LocalRankString[8];
if (domain.good()){ sprintf(LocalRankString,"%05d",rank);
domain >> nprocx; char LocalRankFilename[40];
domain >> nprocy; sprintf(LocalRankFilename,"ID.%05i",rank);
domain >> nprocz;
domain >> Nx;
domain >> Ny;
domain >> Nz;
domain >> nspheres;
domain >> Lx;
domain >> Ly;
domain >> Lz;
}
else if (nprocs==1){
nprocx=nprocy=nprocz=1;
Nx=Ny=Nz=50;
nspheres=0;
Lx=Ly=Lz=1;
}
else if (nprocs==2){
nprocx=nprocy=1;
nprocz=2;
Nx=Ny=Nz=50;
nspheres=0;
Lx=Ly=Lz=1;
}
else if (nprocs==4){
nprocx=nprocy=2;
nprocz=1;
Nx=Ny=Nz=50;
nspheres=0;
Lx=Ly=Lz=1;
}
else if (nprocs==8){
nprocx=nprocy=nprocz=2;
Nx=Ny=Nz=50;
nspheres=0;
Lx=Ly=Lz=1;
}
//.......................................................................
}
// **************************************************************
// Broadcast simulation parameters from rank 0 to all other procs
MPI_Barrier(comm);
//.................................................
MPI_Bcast(&Nx,1,MPI_INT,0,comm);
MPI_Bcast(&Ny,1,MPI_INT,0,comm);
MPI_Bcast(&Nz,1,MPI_INT,0,comm);
MPI_Bcast(&nBlocks,1,MPI_INT,0,comm);
MPI_Bcast(&nthreads,1,MPI_INT,0,comm);
MPI_Bcast(&timestepMax,1,MPI_INT,0,comm);
MPI_Bcast(&Nx,1,MPI_INT,0,comm); char *id;
MPI_Bcast(&Ny,1,MPI_INT,0,comm); id = new char[Nx*Ny*Nz];
MPI_Bcast(&Nz,1,MPI_INT,0,comm);
MPI_Bcast(&nprocx,1,MPI_INT,0,comm);
MPI_Bcast(&nprocy,1,MPI_INT,0,comm);
MPI_Bcast(&nprocz,1,MPI_INT,0,comm);
MPI_Bcast(&nspheres,1,MPI_INT,0,comm);
MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm);
//.................................................
MPI_Barrier(comm);
// **************************************************************
// **************************************************************
if (nprocs != nprocx*nprocy*nprocz){ /*
printf("nprocx = %i \n",nprocx); * if (rank==0) printf("Assigning phase ID from file \n");
printf("nprocy = %i \n",nprocy); * if (rank==0) printf("Initialize from segmented data: solid=0, NWP=1, WP=2 \n");
printf("nprocz = %i \n",nprocz);
INSIST(nprocs == nprocx*nprocy*nprocz,"Fatal error in processor count!");
}
if (rank==0){
printf("********************************************************\n");
printf("Sub-domain size = %i x %i x %i\n",Nz,Nz,Nz);
printf("Parallel domain size = %i x %i x %i\n",nprocx,nprocy,nprocz);
printf("********************************************************\n");
}
MPI_Barrier(comm);
kproc = rank/(nprocx*nprocy);
jproc = (rank-nprocx*nprocy*kproc)/nprocx;
iproc = rank-nprocx*nprocy*kproc-nprocz*jproc;
double iVol_global = 1.0/Nx/Ny/Nz/nprocx/nprocy/nprocz;
int BoundaryCondition=0;
Domain Dm(Nx,Ny,Nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BoundaryCondition);
InitializeRanks( rank, nprocx, nprocy, nprocz, iproc, jproc, kproc,
rank_x, rank_y, rank_z, rank_X, rank_Y, rank_Z,
rank_xy, rank_XY, rank_xY, rank_Xy, rank_xz, rank_XZ, rank_xZ, rank_Xz,
rank_yz, rank_YZ, rank_yZ, rank_Yz );
Nx += 2;
Ny += 2;
Nz += 2;
int N = Nx*Ny*Nz;
int dist_mem_size = N*sizeof(double);
//.......................................................................
// Assign the phase ID field
//.......................................................................
char LocalRankString[8];
sprintf(LocalRankString,"%05d",rank);
char LocalRankFilename[40];
sprintf(LocalRankFilename,"ID.%05i",rank);
char *id;
id = new char[Nx*Ny*Nz];
/*
* if (rank==0) printf("Assigning phase ID from file \n");
* if (rank==0) printf("Initialize from segmented data: solid=0, NWP=1, WP=2 \n");
FILE *IDFILE = fopen(LocalRankFilename,"rb"); FILE *IDFILE = fopen(LocalRankFilename,"rb");
if (IDFILE==NULL) ERROR("Error opening file: ID.xxxxx"); if (IDFILE==NULL) ERROR("Error opening file: ID.xxxxx");
fread(id,1,N,IDFILE); fread(id,1,N,IDFILE);
fclose(IDFILE); fclose(IDFILE);
*/ */
// Setup the domain // Setup the domain
for (k=0;k<Nz;k++){ for (k=0;k<Nz;k++){
for (j=0;j<Ny;j++){ for (j=0;j<Ny;j++){
for (i=0;i<Nx;i++){ for (i=0;i<Nx;i++){
n = k*Nx*Ny+j*Nx+i; n = k*Nx*Ny+j*Nx+i;
id[n] = 1; id[n] = 1;
Dm.id[n] = id[n]; Dm.id[n] = id[n];
}
}
}
Dm.CommInit(comm);
//.......................................................................
// Compute the media porosity
//.......................................................................
double sum;
double sum_local=0.0, porosity;
char component = 0; // solid phase
for (k=1;k<Nz-1;k++){
for (j=1;j<Ny-1;j++){
for (i=1;i<Nx-1;i++){
n = k*Nx*Ny+j*Nx+i;
if (id[n] == component){
sum_local+=1.0;
} }
} }
} }
} Dm.CommInit(comm);
MPI_Allreduce(&sum_local,&sum,1,MPI_DOUBLE,MPI_SUM,comm);
porosity = 1.0-sum*iVol_global;
if (rank==0) printf("Media porosity = %f \n",porosity);
//.......................................................................
//........................................................................... //.......................................................................
MPI_Barrier(comm); // Compute the media porosity
if (rank == 0) cout << "Domain set." << endl; //.......................................................................
//........................................................................... double sum;
double sum_local=0.0, porosity;
char component = 0; // solid phase
for (k=1;k<Nz-1;k++){
for (j=1;j<Ny-1;j++){
for (i=1;i<Nx-1;i++){
n = k*Nx*Ny+j*Nx+i;
if (id[n] == component){
sum_local+=1.0;
}
}
}
}
MPI_Allreduce(&sum_local,&sum,1,MPI_DOUBLE,MPI_SUM,comm);
porosity = 1.0-sum*iVol_global;
if (rank==0) printf("Media porosity = %f \n",porosity);
//.......................................................................
//........................................................................... //...........................................................................
if (rank==0) printf ("Create ScaLBL_Communicator \n"); MPI_Barrier(comm);
// Create a communicator for the device if (rank == 0) cout << "Domain set." << endl;
ScaLBL_Communicator ScaLBL_Comm(Dm); //...........................................................................
//...........device phase ID.................................................
if (rank==0) printf ("Copying phase ID to device \n");
char *ID;
AllocateDeviceMemory((void **) &ID, N); // Allocate device memory
// Copy to the device
CopyToDevice(ID, id, N);
//...........................................................................
//........................................................................... //...........................................................................
// MAIN VARIABLES ALLOCATED HERE if (rank==0) printf ("Create ScaLBL_Communicator \n");
//........................................................................... // Create a communicator for the device
// LBM variables ScaLBL_Communicator ScaLBL_Comm(Dm);
if (rank==0) printf ("Allocating distributions \n");
//......................device distributions.................................
double *f_even,*f_odd;
//...........................................................................
AllocateDeviceMemory((void **) &f_even, 10*dist_mem_size); // Allocate device memory
AllocateDeviceMemory((void **) &f_odd, 9*dist_mem_size); // Allocate device memory
//...........................................................................
double *f_even_host,*f_odd_host;
f_even_host = new double [10*N];
f_odd_host = new double [9*N];
//...........................................................................
/* // Write the communcation structure into a file for debugging //...........device phase ID.................................................
if (rank==0) printf ("Copying phase ID to device \n");
char *ID;
AllocateDeviceMemory((void **) &ID, N); // Allocate device memory
// Copy to the device
CopyToDevice(ID, id, N);
//...........................................................................
//...........................................................................
// MAIN VARIABLES ALLOCATED HERE
//...........................................................................
// LBM variables
if (rank==0) printf ("Allocating distributions \n");
//......................device distributions.................................
double *f_even,*f_odd;
//...........................................................................
AllocateDeviceMemory((void **) &f_even, 10*dist_mem_size); // Allocate device memory
AllocateDeviceMemory((void **) &f_odd, 9*dist_mem_size); // Allocate device memory
//...........................................................................
double *f_even_host,*f_odd_host;
f_even_host = new double [10*N];
f_odd_host = new double [9*N];
//...........................................................................
/* // Write the communcation structure into a file for debugging
char LocalCommFile[40]; char LocalCommFile[40];
sprintf(LocalCommFile,"%s%s","Comm.",LocalRankString); sprintf(LocalCommFile,"%s%s","Comm.",LocalRankString);
FILE *CommFile; FILE *CommFile;
@ -410,47 +411,14 @@ int main(int argc, char **argv)
fprintf(CommFile,"Yz=%d, ",rank_Yz); fprintf(CommFile,"Yz=%d, ",rank_Yz);
fprintf(CommFile,"\n"); fprintf(CommFile,"\n");
fclose(CommFile); fclose(CommFile);
*/ */
if (rank==0) printf("Setting the distributions, size = : %i\n", N); if (rank==0) printf("Setting the distributions, size = : %i\n", N);
//........................................................................... //...........................................................................
GlobalFlipInitD3Q19(f_even_host, f_odd_host, Nx-2, Ny-2, Nz-2,iproc,jproc,kproc,nprocx,nprocy,nprocz); GlobalFlipInitD3Q19(f_even_host, f_odd_host, Nx-2, Ny-2, Nz-2,iproc,jproc,kproc,nprocx,nprocy,nprocz);
CopyToDevice(f_even, f_even_host, 10*dist_mem_size); CopyToDevice(f_even, f_even_host, 10*dist_mem_size);
CopyToDevice(f_odd, f_odd_host, 9*dist_mem_size); CopyToDevice(f_odd, f_odd_host, 9*dist_mem_size);
DeviceBarrier(); DeviceBarrier();
MPI_Barrier(comm); MPI_Barrier(comm);
//*************************************************************************
// Pack and send the D3Q19 distributions
ScaLBL_Comm.SendD3Q19(f_even, f_odd);
//*************************************************************************
// Swap the distributions for momentum transport
//*************************************************************************
SwapD3Q19(ID, f_even, f_odd, Nx, Ny, Nz);
//*************************************************************************
// Wait for communications to complete and unpack the distributions
ScaLBL_Comm.RecvD3Q19(f_even, f_odd);
//*************************************************************************
//...........................................................................
int check;
CopyToHost(f_even_host,f_even,10*N*sizeof(double));
CopyToHost(f_odd_host,f_odd,9*N*sizeof(double));
check = GlobalCheckDebugDist(f_even_host, f_odd_host, Nx-2, Ny-2, Nz-2,iproc,jproc,kproc,nprocx,nprocy,nprocz);
//...........................................................................
int timestep = 0;
if (rank==0) printf("********************************************************\n");
if (rank==0) printf("No. of timesteps for timing: %i \n", 100);
//.......create and start timer............
double starttime,stoptime,cputime;
MPI_Barrier(comm);
starttime = MPI_Wtime();
//.........................................
//************ MAIN ITERATION LOOP (timing communications)***************************************/
while (timestep < 100){
//************************************************************************* //*************************************************************************
// Pack and send the D3Q19 distributions // Pack and send the D3Q19 distributions
ScaLBL_Comm.SendD3Q19(f_even, f_odd); ScaLBL_Comm.SendD3Q19(f_even, f_odd);
@ -463,35 +431,68 @@ int main(int argc, char **argv)
ScaLBL_Comm.RecvD3Q19(f_even, f_odd); ScaLBL_Comm.RecvD3Q19(f_even, f_odd);
//************************************************************************* //*************************************************************************
DeviceBarrier(); //...........................................................................
int check;
CopyToHost(f_even_host,f_even,10*N*sizeof(double));
CopyToHost(f_odd_host,f_odd,9*N*sizeof(double));
check = GlobalCheckDebugDist(f_even_host, f_odd_host, Nx-2, Ny-2, Nz-2,iproc,jproc,kproc,nprocx,nprocy,nprocz);
//...........................................................................
int timestep = 0;
if (rank==0) printf("********************************************************\n");
if (rank==0) printf("No. of timesteps for timing: %i \n", 100);
//.......create and start timer............
double starttime,stoptime,cputime;
MPI_Barrier(comm); MPI_Barrier(comm);
// Iteration completed! starttime = MPI_Wtime();
timestep++; //.........................................
//...................................................................
//************ MAIN ITERATION LOOP (timing communications)***************************************/
while (timestep < 100){
//*************************************************************************
// Pack and send the D3Q19 distributions
ScaLBL_Comm.SendD3Q19(f_even, f_odd);
//*************************************************************************
// Swap the distributions for momentum transport
//*************************************************************************
SwapD3Q19(ID, f_even, f_odd, Nx, Ny, Nz);
//*************************************************************************
// Wait for communications to complete and unpack the distributions
ScaLBL_Comm.RecvD3Q19(f_even, f_odd);
//*************************************************************************
DeviceBarrier();
MPI_Barrier(comm);
// Iteration completed!
timestep++;
//...................................................................
}
//************************************************************************/
stoptime = MPI_Wtime();
// cout << "CPU time: " << (stoptime - starttime) << " seconds" << endl;
cputime = stoptime - starttime;
// cout << "Lattice update rate: "<< double(Nx*Ny*Nz*timestep)/cputime/1000000 << " MLUPS" << endl;
double MLUPS = double(Nx*Ny*Nz*timestep)/cputime/1000000;
if (rank==0) printf("********************************************************\n");
if (rank==0) printf("CPU time = %f \n", cputime);
if (rank==0) printf("Lattice update rate (per process)= %f MLUPS \n", MLUPS);
MLUPS *= nprocs;
if (rank==0) printf("Lattice update rate (process)= %f MLUPS \n", MLUPS);
if (rank==0) printf("********************************************************\n");
// Number of memory references from the swap algorithm (per timestep)
// 18 reads and 18 writes for each lattice site
double MemoryRefs = (Nx-2)*(Ny-2)*(Nz-2)*36;
// number of memory references for the swap algorithm - GigaBytes / second
if (rank==0) printf("DRAM bandwidth (per process)= %f GB/sec \n",MemoryRefs*8*timestep/1e9);
// Report bandwidth in Gigabits per second
// communication bandwidth includes both send and recieve
if (rank==0) printf("Communication bandwidth (per process)= %f Gbit/sec \n",ScaLBL_Comm.CommunicationCount*64*timestep/1e9);
if (rank==0) printf("Aggregated communication bandwidth = %f Gbit/sec \n",nprocs*ScaLBL_Comm.CommunicationCount*64*timestep/1e9);
} }
//************************************************************************/
stoptime = MPI_Wtime();
// cout << "CPU time: " << (stoptime - starttime) << " seconds" << endl;
cputime = stoptime - starttime;
// cout << "Lattice update rate: "<< double(Nx*Ny*Nz*timestep)/cputime/1000000 << " MLUPS" << endl;
double MLUPS = double(Nx*Ny*Nz*timestep)/cputime/1000000;
if (rank==0) printf("********************************************************\n");
if (rank==0) printf("CPU time = %f \n", cputime);
if (rank==0) printf("Lattice update rate (per process)= %f MLUPS \n", MLUPS);
MLUPS *= nprocs;
if (rank==0) printf("Lattice update rate (process)= %f MLUPS \n", MLUPS);
if (rank==0) printf("********************************************************\n");
// Number of memory references from the swap algorithm (per timestep)
// 18 reads and 18 writes for each lattice site
double MemoryRefs = (Nx-2)*(Ny-2)*(Nz-2)*36;
// number of memory references for the swap algorithm - GigaBytes / second
if (rank==0) printf("DRAM bandwidth (per process)= %f GB/sec \n",MemoryRefs*8*timestep/1e9);
// Report bandwidth in Gigabits per second
// communication bandwidth includes both send and recieve
if (rank==0) printf("Communication bandwidth (per process)= %f Gbit/sec \n",ScaLBL_Comm.CommunicationCount*64*timestep/1e9);
if (rank==0) printf("Aggregated communication bandwidth = %f Gbit/sec \n",nprocs*ScaLBL_Comm.CommunicationCount*64*timestep/1e9);
// **************************************************** // ****************************************************
MPI_Barrier(comm); MPI_Barrier(comm);
MPI_Finalize(); MPI_Finalize();