Merge branch 'ScaLBL' of github.com:JamesEMcClure/LBPM-WIA into ScaLBL

This commit is contained in:
James E McClure 2018-03-19 10:41:03 -04:00
commit 4debc1dbe2
17 changed files with 63416 additions and 140 deletions

View File

@ -0,0 +1,6 @@
1.0 1.0
1.0 1.0
1.0e-3 0.95
0.0 0.0 1.0e-6
0 0 10.0 1.0
100 5000 1e-5

View File

@ -0,0 +1,3 @@
10 10 12
320 320 320
1.0 1.0 1.0

View File

@ -0,0 +1,6 @@
1.0 1.0
1.0 1.0
1.0e-3 0.95
0.0 0.0 1.0e-6
0 0 10.0 1.0
100 5000 1e-5

View File

@ -0,0 +1,3 @@
15 15 16
320 320 320
1.0 1.0 1.0

View File

@ -0,0 +1,5 @@
1.0
0.0 0.0 1.0e-6
0 0 1.0 1.0
100 1000 1.0e-5

View File

@ -0,0 +1,3 @@
1 1 1
320 320 320
1.0 1.0 1.0

View File

@ -0,0 +1,13 @@
#!/bin/bash
NRANKS=3600
echo $NRANKS
DIR=$NRANKS"p"
mkdir -p $DIR
BASEDIST="SignDist.0"
BASEID="ID.0"
for i in `seq -w 0 $NRANKS`; do idfile="$BASEID$i"; echo $idfile; cp ID.00000 $DIR/$idfile; done
for i in `seq -w 0 $NRANKS`; do distfile="$BASEDIST$i"; echo $distfile; cp SignDist.00000 $DIR/$distfile; done

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,48 @@
#!/bin/bash
#BSUB -P CSC275MCCLURE
#BSUB -J spheres
#BSUB -o spheres.o%J
#BSUB -W 10
#BSUB -nnodes 1
##BSUB -env "all,JOB_FEATURE=NVME"
date
module load gcc cuda
export SCALBL_DIR=$HOME/summit/build/LBPM-WIA/tests
jsrun -n1 -r1 -g1 -c1 -brs $SCALBL_DIR/GenerateSphereTest 1896
# Create the 1200 GPU case
NRANKS=1200
echo $NRANKS
DIR=$NRANKS"p"
mkdir -p $DIR
BASEDIST="SignDist.0"
BASEID="ID.0"
for i in `seq -w 0 $NRANKS`; do idfile="$BASEID$i"; echo $idfile; cp ID.00000 $DIR/$idfile; done
for i in `seq -w 0 $NRANKS`; do distfile="$BASEDIST$i"; echo $distfile; cp SignDist.00000 $DIR/$distfile; done
# Create the 3600 GPU case
NRANKS=3600
echo $NRANKS
DIR=$NRANKS"p"
mkdir -p $DIR
BASEDIST="SignDist.0"
BASEID="ID.0"
for i in `seq -w 0 $NRANKS`; do idfile="$BASEID$i"; echo $idfile; cp ID.00000 $DIR/$idfile; done
for i in `seq -w 0 $NRANKS`; do distfile="$BASEDIST$i"; echo $distfile; cp SignDist.00000 $DIR/$distfile; done
exit;

View File

@ -0,0 +1,26 @@
#!/bin/bash
#BSUB -P CSC275MCCLURE
#BSUB -J COLOR
#BSUB -o test-1200g.o%J
#BSUB -W 10
#BSUB -nnodes 200
##BSUB -env "all,JOB_FEATURE=NVME"
date
module load gcc cuda
#source $OLCF_SPECTRUM_MPI_ROOT/jsm_pmix/bin/export_smpi_env -gpu
#cd /ccs/home/mcclurej/summit/build/ScaLBL/example/Sph1896/
#cp Domain.in.8g Domain.in
export LBPM_WIA_DIR=$HOME/summit/build/LBPM-WIA/tests
cd /gpfs/alpinetds/csc275/scratch/mcclurej/SCALING/WEAK/1200p
jsrun -n1200 -r6 -g1 -c1 -brs --smpiargs="-gpu" $LBPM_WIA_DIR/TestCommD3Q19
exit;

View File

@ -0,0 +1,26 @@
#!/bin/bash
#BSUB -P CSC275MCCLURE
#BSUB -J COLOR
#BSUB -o color-3600g.o%J
#BSUB -W 10
#BSUB -nnodes 600
##BSUB -env "all,JOB_FEATURE=NVME"
date
module load gcc cuda
#source $OLCF_SPECTRUM_MPI_ROOT/jsm_pmix/bin/export_smpi_env -gpu
#cd /ccs/home/mcclurej/summit/build/ScaLBL/example/Sph1896/
#cp Domain.in.8g Domain.in
export LBPM_WIA_DIR=$HOME/summit/build/LBPM-WIA/tests
cd /gpfs/alpinetds/csc275/scratch/mcclurej/SCALING/WEAK/3600p
jsrun -n3600 -r6 -g1 -c1 -brs --smpiargs="-gpu" $LBPM_WIA_DIR/TestCommD3Q19
exit;

View File

@ -0,0 +1,247 @@
Fri Mar 16 12:44:21 EDT 2018
********************************************************
Running Unit Test for D3Q19 MPI Communication
********************************************************
********************************************************
Sub-domain size = 320 x 320 x 320
Parallel domain size = 10 x 10 x 12
********************************************************
Assigning phase ID from file
Initialize from segmented data: solid=0, NWP=1, WP=2
Media porosity = 0.359970
Domain set.
Create ScaLBL_Communicator
Set up memory efficient layout
Allocating distributions
Setting up device map and neighbor list
Setting the distributions, size = : 11795503
********************************************************
No. of timesteps for timing: 100
********************************************************
CPU time = 1.330030
Lattice update rate (per process)= 886.860134 MLUPS
Lattice update rate (process)= 1064232.160581 MLUPS
********************************************************
DRAM bandwidth (per process)= 339.710486 GB/sec
Communication bandwidth (per process)= 2.813606 Gbit/sec
Aggregated communication bandwidth = 3376.327680 Gbit/sec
******************************************
error in distribution q = 14
i,j,k= 1281, 486, 2097
dist = 1.14
n= 1
------------------------------------------------------------
Sender: LSF System <lsfadmin@batch2>
Subject: Job 43347: <COLOR> in cluster <summit> Exited
Job <COLOR> was submitted from host <login2> by user <mcclurej> in cluster <summit> at Fri Mar 16 12:43:59 2018
Job was executed on host(s) <1*batch2>, in queue <batch>, as user <mcclurej> in cluster <summit> at Fri Mar 16 12:44:01 2018
<42*a02n03>
<42*a02n04>
<42*a02n06>
<42*a02n07>
<42*a02n08>
<42*a02n09>
<42*a02n10>
<42*a02n18>
<42*a26n01>
<42*a26n02>
<42*a26n03>
<42*a26n04>
<42*a26n05>
<42*a26n06>
<42*a26n07>
<42*a26n08>
<42*a26n09>
<42*a26n10>
<42*a26n11>
<42*a26n12>
<42*a26n13>
<42*a26n14>
<42*a26n15>
<42*a26n16>
<42*a26n17>
<42*a26n18>
<42*a27n01>
<42*a27n02>
<42*a27n03>
<42*a27n04>
<42*a27n05>
<42*a27n06>
<42*a27n07>
<42*a27n08>
<42*a27n09>
<42*a27n10>
<42*a27n11>
<42*a27n13>
<42*a27n14>
<42*a27n15>
<42*a27n17>
<42*a27n18>
<42*a28n01>
<42*a28n02>
<42*a28n03>
<42*a28n04>
<42*a28n05>
<42*a28n06>
<42*a28n07>
<42*a28n08>
<42*a28n09>
<42*a28n10>
<42*a28n11>
<42*a28n12>
<42*a28n13>
<42*a28n14>
<42*a28n15>
<42*a28n16>
<42*a28n17>
<42*a28n18>
<42*a29n01>
<42*a29n02>
<42*a29n03>
<42*a29n04>
<42*a29n05>
<42*a29n06>
<42*a29n07>
<42*a29n08>
<42*a29n09>
<42*a29n10>
<42*a29n11>
<42*a29n12>
<42*a29n13>
<42*a29n15>
<42*a29n16>
<42*a29n17>
<42*a29n18>
<42*a30n01>
<42*a30n02>
<42*a30n03>
<42*a30n04>
<42*a30n05>
<42*a30n06>
<42*a30n07>
<42*a30n08>
<42*a30n09>
<42*a30n10>
<42*a30n11>
<42*a30n12>
<42*a30n13>
<42*a30n14>
<42*a30n15>
<42*a30n16>
<42*a30n17>
<42*a30n18>
<42*a31n01>
<42*a31n02>
<42*a31n03>
<42*a31n04>
<42*a31n05>
<42*a31n06>
<42*a31n07>
<42*a31n08>
<42*a31n09>
<42*a31n10>
<42*a31n11>
<42*a31n12>
<42*a31n13>
<42*a31n14>
<42*a31n15>
<42*a31n16>
<42*a31n17>
<42*a31n18>
<42*a32n01>
<42*a32n02>
<42*a32n03>
<42*a32n04>
<42*a32n05>
<42*a32n06>
<42*a32n07>
<42*a32n08>
<42*a32n09>
<42*a32n10>
<42*a32n11>
<42*a32n12>
<42*a32n13>
<42*a32n14>
<42*a32n15>
<42*a32n16>
<42*a32n17>
<42*a32n18>
<42*a33n01>
<42*a33n02>
<42*a33n03>
<42*a33n04>
<42*a33n05>
<42*a33n06>
<42*a33n07>
<42*a33n08>
<42*a33n09>
<42*a33n10>
<42*a33n11>
<42*a33n12>
<42*a33n13>
<42*a33n14>
<42*a33n15>
<42*a33n16>
<42*a33n17>
<42*a33n18>
<42*a34n01>
<42*a34n02>
<42*a34n03>
<42*a34n05>
<42*a34n06>
<42*a34n07>
<42*a34n08>
<42*a34n09>
<42*a34n10>
<42*a34n11>
<42*a34n12>
<42*a34n13>
<42*a34n14>
<42*a34n16>
<42*a34n17>
<42*a34n18>
<42*a35n01>
<42*a35n02>
<42*a35n03>
<42*a35n04>
<42*a35n05>
<42*a35n06>
<42*a35n07>
<42*a35n08>
<42*a35n09>
<42*a35n10>
<42*a35n11>
<42*a35n12>
<42*a35n13>
<42*a35n14>
<42*a35n15>
<42*a35n16>
<42*a35n17>
<42*a35n18>
<42*a36n01>
<42*a36n02>
<42*a36n03>
<42*a36n04>
<42*a36n05>
<42*a36n06>
<42*a36n07>
<42*a36n09>
<42*a36n10>
<42*a36n11>
<42*a36n12>
<42*a36n13>
<42*a36n14>
<42*a36n15>
<42*a36n17>
<42*a36n18>
<42*b01n01>
</ccs/home/mcclurej> was used as the home directory.
</gpfs/alpinetds/csc275/scratch/mcclurej/SCALING/WEAK> was used as the working directory.
Started at Fri Mar 16 12:44:01 2018
Terminated at Fri Mar 16 12:44:49 2018
Results reported at Fri Mar 16 12:44:49 2018
The output (if any) is above this job summary.

File diff suppressed because it is too large Load Diff

View File

@ -52,7 +52,7 @@ ADD_LBPM_TEST_1_2_4( TestBlobIdentify )
#ADD_LBPM_TEST_PARALLEL( TestTwoPhase 8 )
ADD_LBPM_TEST_PARALLEL( TestBlobAnalyze 8 )
ADD_LBPM_TEST_PARALLEL( TestSegDist 8 )
#ADD_LBPM_TEST_PARALLEL( TestCommD3Q19 8 )
ADD_LBPM_TEST_PARALLEL( TestCommD3Q19 8 )
#ADD_LBPM_TEST_PARALLEL( TestMassConservationD3Q7 1 )
ADD_LBPM_TEST_1_2_4( testCommunication )
ADD_LBPM_TEST_1_2_4( testUtilities )

View File

@ -11,7 +11,7 @@
using namespace std;
extern void GlobalFlipScaLBL_D3Q19_Init(double *dist_even, double *dist_odd, int Nx, int Ny, int Nz,
extern void GlobalFlipScaLBL_D3Q19_Init(double *dist, IntArray Map, int Np, int Nx, int Ny, int Nz,
int iproc, int jproc, int kproc, int nprocx, int nprocy, int nprocz)
{
// Set of Discrete velocities for the D3Q19 Model
@ -24,64 +24,57 @@ extern void GlobalFlipScaLBL_D3Q19_Init(double *dist_even, double *dist_odd, int
int x,y,z; // Global indices
int xn,yn,zn; // Global indices of neighbor
int X,Y,Z; // Global size
int idx;
X = Nx*nprocx;
Y = Ny*nprocy;
Z = Nz*nprocz;
NULL_USE(Z);
N = (Nx+2)*(Ny+2)*(Nz+2); // size of the array including halo
for (k=0; k<Nz; k++){
for (j=0; j<Ny; j++){
for (i=0; i<Nx; i++){
n = (k+1)*(Nx+2)*(Ny+2) + (j+1)*(Nx+2) + i+1;
// Get the 'global' index
x = iproc*Nx+i;
y = jproc*Ny+j;
z = kproc*Nz+k;
for (q=0; q<9; q++){
// Odd distribution
Cqx = D3Q19[2*q][0];
Cqy = D3Q19[2*q][1];
Cqz = D3Q19[2*q][2];
xn = x - Cqx;
yn = y - Cqy;
zn = z - Cqz;
if (xn < 0) xn += nprocx*Nx;
if (yn < 0) yn += nprocy*Ny;
if (zn < 0) zn += nprocz*Nz;
if (!(xn < nprocx*Nx)) xn -= nprocx*Nx;
if (!(yn < nprocy*Ny)) yn -= nprocy*Ny;
if (!(zn < nprocz*Nz)) zn -= nprocz*Nz;
dist_even[(q+1)*N+n] = (zn*X*Y+yn*X+xn) + (2*q+1)*0.01;
// Odd distribution
xn = x + Cqx;
yn = y + Cqy;
zn = z + Cqz;
if (xn < 0) xn += nprocx*Nx;
if (yn < 0) yn += nprocy*Ny;
if (zn < 0) zn += nprocz*Nz;
if (!(xn < nprocx*Nx)) xn -= nprocx*Nx;
if (!(yn < nprocy*Ny)) yn -= nprocy*Ny;
if (!(zn < nprocz*Nz)) zn -= nprocz*Nz;
dist_odd[q*N+n] = (zn*X*Y+yn*X+xn) + 2*(q+1)*0.01;
//n = (k+1)*(Nx+2)*(Ny+2) + (j+1)*(Nx+2) + i+1;
idx=Map(i,j,k);
if (idx > 0){
// Get the 'global' index
x = iproc*Nx+i;
y = jproc*Ny+j;
z = kproc*Nz+k;
for (q=0; q<18; q++){
// Odd distribution
Cqx = D3Q19[q][0];
Cqy = D3Q19[q][1];
Cqz = D3Q19[q][2];
xn = x - Cqx;
yn = y - Cqy;
zn = z - Cqz;
xn=x; yn=y;zn=z;
if (xn < 0) xn += nprocx*Nx;
if (yn < 0) yn += nprocy*Ny;
if (zn < 0) zn += nprocz*Nz;
if (!(xn < nprocx*Nx)) xn -= nprocx*Nx;
if (!(yn < nprocy*Ny)) yn -= nprocy*Ny;
if (!(zn < nprocz*Nz)) zn -= nprocz*Nz;
dist[(q+1)*Np+idx] = (zn*X*Y+yn*X+xn) + (q+1)*0.01;
}
}
}
}
}
}
extern int GlobalCheckDebugDist(double *dist_even, double *dist_odd, int Nx, int Ny, int Nz,
int iproc, int jproc, int kproc, int nprocx, int nprocy, int nprocz)
extern int GlobalCheckDebugDist(double *dist, IntArray Map, int Np, int Nx, int Ny, int Nz,
int iproc, int jproc, int kproc, int nprocx, int nprocy, int nprocz, int start, int finish)
{
int returnValue = 0;
int q,i,j,k,n,N;
int q,i,j,k,n,N,idx;
int Cqx,Cqy,Cqz; // Discrete velocity
int x,y,z; // Global indices
int xn,yn,zn; // Global indices of neighbor
@ -89,37 +82,30 @@ extern int GlobalCheckDebugDist(double *dist_even, double *dist_odd, int Nx, int
X = Nx*nprocx;
Y = Ny*nprocy;
Z = Nz*nprocz;
NULL_USE(Z);
NULL_USE(Z);
N = (Nx+2)*(Ny+2)*(Nz+2); // size of the array including halo
for (k=0; k<Nz; k++){
for (j=0; j<Ny; j++){
for (i=0; i<Nx; i++){
n = (k+1)*(Nx+2)*(Ny+2) + (j+1)*(Nx+2) + i+1;
idx=Map(i,j,k);
// Get the 'global' index
x = iproc*Nx+i;
y = jproc*Ny+j;
z = kproc*Nz+k;
for (q=0; q<9; q++){
if (idx > start && idx< finish){
// Get the 'global' index
x = iproc*Nx+i;
y = jproc*Ny+j;
z = kproc*Nz+k;
for (q=0; q<18; q++){
if (dist_even[(q+1)*N+n] != (z*X*Y+y*X+x) + 2*(q+1)*0.01){
printf("******************************************\n");
printf("error in even distribution q = %i \n", 2*(q+1));
printf("i,j,k= %i, %i, %i \n", x,y,z);
printf("dist = %5.2f \n", dist_even[(q+1)*N+n]);
printf("n= %i \n",z*X*Y+y*X+x);
returnValue++;
}
if (dist[(q+1)*Np+idx] != (z*X*Y+y*X+x) + (q+1)*0.01){
printf("******************************************\n");
printf("error in distribution q = %i \n", (q+1));
printf("i,j,k= %i, %i, %i \n", x,y,z);
printf("dist = %5.2f \n", dist[(q+1)*Np+idx]);
printf("n= %i \n",z*X*Y+y*X+x);
returnValue++;
}
if (dist_odd[q*N+n] != (z*X*Y+y*X+x) + (2*q+1)*0.01){
printf("******************************************\n");
printf("error in odd distribution q = %i \n", 2*q+1);
printf("i,j,k= %i, %i, %i \n", x,y,z);
printf("dist = %5.2f \n", dist_odd[q*N+n]);
printf("n= %i \n",z*X*Y+y*X+x);
returnValue++;
}
}
}
@ -311,20 +297,20 @@ int main(int argc, char **argv)
char *id;
id = new char[Nx*Ny*Nz];
/*
* if (rank==0) printf("Assigning phase ID from file \n");
* if (rank==0) printf("Initialize from segmented data: solid=0, NWP=1, WP=2 \n");
FILE *IDFILE = fopen(LocalRankFilename,"rb");
if (IDFILE==NULL) ERROR("Error opening file: ID.xxxxx");
fread(id,1,N,IDFILE);
fclose(IDFILE);
*/
if (rank==0) printf("Assigning phase ID from file \n");
if (rank==0) printf("Initialize from segmented data: solid=0, NWP=1, WP=2 \n");
FILE *IDFILE = fopen(LocalRankFilename,"rb");
if (IDFILE==NULL) ERROR("Error opening file: ID.xxxxx");
fread(id,1,N,IDFILE);
fclose(IDFILE);
// Setup the domain
for (k=0;k<Nz;k++){
for (j=0;j<Ny;j++){
for (i=0;i<Nx;i++){
n = k*Nx*Ny+j*Nx+i;
id[n] = 1;
//id[n] = 1;
Dm.id[n] = id[n];
}
}
@ -337,6 +323,7 @@ int main(int argc, char **argv)
double sum;
double sum_local=0.0, porosity;
char component = 0; // solid phase
int Np=0;
for (k=1;k<Nz-1;k++){
for (j=1;j<Ny-1;j++){
for (i=1;i<Nx-1;i++){
@ -344,6 +331,7 @@ int main(int argc, char **argv)
if (id[n] == component){
sum_local+=1.0;
}
else Np++;
}
}
}
@ -357,33 +345,53 @@ int main(int argc, char **argv)
if (rank == 0) cout << "Domain set." << endl;
//...........................................................................
//...........................................................................
if (rank==0) printf ("Create ScaLBL_Communicator \n");
// Create a communicator for the device
// Create a communicator for the device (will use optimized layout)
ScaLBL_Communicator ScaLBL_Comm(Dm);
//...........device phase ID.................................................
if (rank==0) printf ("Copying phase ID to device \n");
char *ID;
ScaLBL_AllocateDeviceMemory((void **) &ID, N); // Allocate device memory
// Copy to the device
ScaLBL_CopyToDevice(ID, id, N);
//...........................................................................
//...........................................................................
// MAIN VARIABLES ALLOCATED HERE
//...........................................................................
// LBM variables
if (rank==0) printf ("Allocating distributions \n");
if (rank==0) printf ("Set up memory efficient layout \n");
int neighborSize=18*Np*sizeof(int);
int *neighborList;
IntArray Map(Nx,Ny,Nz);
neighborList= new int[18*Np];
ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Dm.id,Np);
MPI_Barrier(comm);
//......................device distributions.................................
double *f_even,*f_odd;
dist_mem_size = Np*sizeof(double);
if (rank==0) printf ("Allocating distributions \n");
int *NeighborList;
int *dvcMap;
double *fq;
//...........................................................................
ScaLBL_AllocateDeviceMemory((void **) &f_even, 10*dist_mem_size); // Allocate device memory
ScaLBL_AllocateDeviceMemory((void **) &f_odd, 9*dist_mem_size); // Allocate device memory
ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize);
ScaLBL_AllocateDeviceMemory((void **) &dvcMap, sizeof(int)*Np);
ScaLBL_AllocateDeviceMemory((void **) &fq, 19*dist_mem_size);
//...........................................................................
double *f_even_host,*f_odd_host;
f_even_host = new double [10*N];
f_odd_host = new double [9*N];
double *fq_host;
fq_host = new double [19*Np];
// Update GPU data structures
if (rank==0) printf ("Setting up device map and neighbor list \n");
int *TmpMap;
TmpMap=new int[Np];
for (k=1; k<Nz-1; k++){
for (j=1; j<Ny-1; j++){
for (i=1; i<Nx-1; i++){
int idx=Map(i,j,k);
if (!(idx < 0))
TmpMap[idx] = k*Nx*Ny+j*Nx+i;
}
}
}
ScaLBL_CopyToDevice(dvcMap, TmpMap, sizeof(int)*Np);
ScaLBL_DeviceBarrier();
delete [] TmpMap;
//...........................................................................
/* // Write the communcation structure into a file for debugging
@ -414,29 +422,27 @@ int main(int argc, char **argv)
fprintf(CommFile,"\n");
fclose(CommFile);
*/
if (rank==0) printf("Setting the distributions, size = : %i\n", N);
if (rank==0) printf("Setting the distributions, size = : %i\n", Np);
//...........................................................................
GlobalFlipScaLBL_D3Q19_Init(f_even_host, f_odd_host, Nx-2, Ny-2, Nz-2,iproc,jproc,kproc,nprocx,nprocy,nprocz);
ScaLBL_CopyToDevice(f_even, f_even_host, 10*dist_mem_size);
ScaLBL_CopyToDevice(f_odd, f_odd_host, 9*dist_mem_size);
GlobalFlipScaLBL_D3Q19_Init(fq_host, Map, Np, Nx-2, Ny-2, Nz-2,iproc,jproc,kproc,nprocx,nprocy,nprocz);
ScaLBL_CopyToDevice(fq, fq_host, 19*dist_mem_size);
ScaLBL_DeviceBarrier();
MPI_Barrier(comm);
//*************************************************************************
// Pack and send the D3Q19 distributions
ScaLBL_Comm.SendD3Q19(f_even, f_odd);
//*************************************************************************
// Swap the distributions for momentum transport
//*************************************************************************
ScaLBL_D3Q19_Swap(ID, f_even, f_odd, Nx, Ny, Nz);
//*************************************************************************
// Wait for communications to complete and unpack the distributions
ScaLBL_Comm.RecvD3Q19(f_even, f_odd);
//*************************************************************************
// First timestep
ScaLBL_Comm.SendD3Q19AA(fq); //READ FROM NORMAL
ScaLBL_Comm.RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
// Second timestep
ScaLBL_Comm.SendD3Q19AA(fq); //READ FROM NORMAL
ScaLBL_Comm.RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
//...........................................................................
ScaLBL_CopyToHost(f_even_host,f_even,10*N*sizeof(double));
ScaLBL_CopyToHost(f_odd_host,f_odd,9*N*sizeof(double));
check = GlobalCheckDebugDist(f_even_host, f_odd_host, Nx-2, Ny-2, Nz-2,iproc,jproc,kproc,nprocx,nprocy,nprocz);
ScaLBL_CopyToHost(fq_host,fq,19*Np*sizeof(double));
check = GlobalCheckDebugDist(fq_host, Map, Np, Nx-2, Ny-2, Nz-2,iproc,jproc,kproc,nprocx,nprocy,nprocz,0,ScaLBL_Comm.next);
//...........................................................................
int timestep = 0;
@ -453,17 +459,16 @@ int main(int argc, char **argv)
//************ MAIN ITERATION LOOP (timing communications)***************************************/
while (timestep < 100){
//*************************************************************************
// Pack and send the D3Q19 distributions
ScaLBL_Comm.SendD3Q19(f_even, f_odd);
//*************************************************************************
// Swap the distributions for momentum transport
//*************************************************************************
ScaLBL_D3Q19_Swap(ID, f_even, f_odd, Nx, Ny, Nz);
//*************************************************************************
// Wait for communications to complete and unpack the distributions
ScaLBL_Comm.RecvD3Q19(f_even, f_odd);
//*************************************************************************
// First timestep
ScaLBL_Comm.SendD3Q19AA(fq); //READ FROM NORMAL
ScaLBL_Comm.RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
// Second timestep
ScaLBL_Comm.SendD3Q19AA(fq); //READ FROM NORMAL
ScaLBL_Comm.RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
//*********************************************
ScaLBL_DeviceBarrier();
MPI_Barrier(comm);
@ -476,7 +481,7 @@ int main(int argc, char **argv)
// cout << "CPU time: " << (stoptime - starttime) << " seconds" << endl;
cputime = stoptime - starttime;
// cout << "Lattice update rate: "<< double(Nx*Ny*Nz*timestep)/cputime/1000000 << " MLUPS" << endl;
double MLUPS = double(Nx*Ny*Nz*timestep)/cputime/1000000;
double MLUPS = double(Np)*double(timestep)/cputime*1e-6;
if (rank==0) printf("********************************************************\n");
if (rank==0) printf("CPU time = %f \n", cputime);
if (rank==0) printf("Lattice update rate (per process)= %f MLUPS \n", MLUPS);
@ -486,9 +491,9 @@ int main(int argc, char **argv)
// Number of memory references from the swap algorithm (per timestep)
// 18 reads and 18 writes for each lattice site
double MemoryRefs = (Nx-2)*(Ny-2)*(Nz-2)*36;
double MemoryRefs = double(Np)*36;
// number of memory references for the swap algorithm - GigaBytes / second
if (rank==0) printf("DRAM bandwidth (per process)= %f GB/sec \n",MemoryRefs*8*timestep/1e9);
if (rank==0) printf("DRAM bandwidth (per process)= %f GB/sec \n",MemoryRefs*8*double(timestep)*1e-9);
// Report bandwidth in Gigabits per second
// communication bandwidth includes both send and recieve
if (rank==0) printf("Communication bandwidth (per process)= %f Gbit/sec \n",ScaLBL_Comm.CommunicationCount*64*timestep/1e9);

View File

@ -645,7 +645,7 @@ int main(int argc, char **argv)
// Compute the walltime per timestep
cputime = (stoptime - starttime)/timestep;
// Performance obtained from each node
double MLUPS = double(Nx*Ny*Nz)/cputime/1000000;
double MLUPS = double(Np)/cputime/1000000;
if (rank==0) printf("********************************************************\n");
if (rank==0) printf("CPU time = %f \n", cputime);
@ -655,16 +655,6 @@ int main(int argc, char **argv)
if (rank==0) printf("********************************************************\n");
// ************************************************************************
double *PHASE;
int SIZE=Nx*Ny*Nz*sizeof(double);
PHASE= new double [Nx*Ny*Nz];
ScaLBL_CopyToHost(&PHASE[0],&Phi[0],SIZE);
FILE *OUTFILE;
sprintf(LocalRankFilename,"Phase.%05i.raw",rank);
OUTFILE = fopen(LocalRankFilename,"wb");
fwrite(PHASE,8,N,OUTFILE);
fclose(OUTFILE);
PROFILE_STOP("Main");
PROFILE_SAVE("lbpm_color_simulator",1);

View File

@ -424,7 +424,7 @@ int main(int argc, char **argv)
// Compute the walltime per timestep
cputime = (stoptime - starttime)/timestep;
// Performance obtained from each node
double MLUPS = double(Nx*Ny*Nz)/cputime/1000000;
double MLUPS = double(Np)/cputime/1000000;
if (rank==0) printf("********************************************************\n");
if (rank==0) printf("CPU time = %f \n", cputime);