Merge branch 'ScaLBL' of github.com:JamesEMcClure/LBPM-WIA into ScaLBL
This commit is contained in:
commit
4debc1dbe2
6
example/systems/summit/1200p/Color.in
Normal file
6
example/systems/summit/1200p/Color.in
Normal file
@ -0,0 +1,6 @@
|
||||
1.0 1.0
|
||||
1.0 1.0
|
||||
1.0e-3 0.95
|
||||
0.0 0.0 1.0e-6
|
||||
0 0 10.0 1.0
|
||||
100 5000 1e-5
|
3
example/systems/summit/1200p/Domain.in
Normal file
3
example/systems/summit/1200p/Domain.in
Normal file
@ -0,0 +1,3 @@
|
||||
10 10 12
|
||||
320 320 320
|
||||
1.0 1.0 1.0
|
6
example/systems/summit/3600p/Color.in
Normal file
6
example/systems/summit/3600p/Color.in
Normal file
@ -0,0 +1,6 @@
|
||||
1.0 1.0
|
||||
1.0 1.0
|
||||
1.0e-3 0.95
|
||||
0.0 0.0 1.0e-6
|
||||
0 0 10.0 1.0
|
||||
100 5000 1e-5
|
3
example/systems/summit/3600p/Domain.in
Normal file
3
example/systems/summit/3600p/Domain.in
Normal file
@ -0,0 +1,3 @@
|
||||
15 15 16
|
||||
320 320 320
|
||||
1.0 1.0 1.0
|
5
example/systems/summit/3600p/Permeability.in
Normal file
5
example/systems/summit/3600p/Permeability.in
Normal file
@ -0,0 +1,5 @@
|
||||
1.0
|
||||
0.0 0.0 1.0e-6
|
||||
0 0 1.0 1.0
|
||||
100 1000 1.0e-5
|
||||
|
3
example/systems/summit/Domain.in
Normal file
3
example/systems/summit/Domain.in
Normal file
@ -0,0 +1,3 @@
|
||||
1 1 1
|
||||
320 320 320
|
||||
1.0 1.0 1.0
|
13
example/systems/summit/GeneratePeriodicCase.sh
Executable file
13
example/systems/summit/GeneratePeriodicCase.sh
Executable file
@ -0,0 +1,13 @@
|
||||
#!/bin/bash
|
||||
|
||||
NRANKS=3600
|
||||
echo $NRANKS
|
||||
|
||||
DIR=$NRANKS"p"
|
||||
mkdir -p $DIR
|
||||
|
||||
BASEDIST="SignDist.0"
|
||||
BASEID="ID.0"
|
||||
|
||||
for i in `seq -w 0 $NRANKS`; do idfile="$BASEID$i"; echo $idfile; cp ID.00000 $DIR/$idfile; done
|
||||
for i in `seq -w 0 $NRANKS`; do distfile="$BASEDIST$i"; echo $distfile; cp SignDist.00000 $DIR/$distfile; done
|
1901
example/systems/summit/pack.out
Normal file
1901
example/systems/summit/pack.out
Normal file
File diff suppressed because it is too large
Load Diff
48
example/systems/summit/summit-spheres-1g.lsf
Normal file
48
example/systems/summit/summit-spheres-1g.lsf
Normal file
@ -0,0 +1,48 @@
|
||||
#!/bin/bash
|
||||
#BSUB -P CSC275MCCLURE
|
||||
#BSUB -J spheres
|
||||
#BSUB -o spheres.o%J
|
||||
#BSUB -W 10
|
||||
#BSUB -nnodes 1
|
||||
##BSUB -env "all,JOB_FEATURE=NVME"
|
||||
|
||||
date
|
||||
|
||||
module load gcc cuda
|
||||
|
||||
export SCALBL_DIR=$HOME/summit/build/LBPM-WIA/tests
|
||||
|
||||
jsrun -n1 -r1 -g1 -c1 -brs $SCALBL_DIR/GenerateSphereTest 1896
|
||||
|
||||
|
||||
|
||||
# Create the 1200 GPU case
|
||||
NRANKS=1200
|
||||
echo $NRANKS
|
||||
|
||||
DIR=$NRANKS"p"
|
||||
mkdir -p $DIR
|
||||
|
||||
BASEDIST="SignDist.0"
|
||||
BASEID="ID.0"
|
||||
|
||||
for i in `seq -w 0 $NRANKS`; do idfile="$BASEID$i"; echo $idfile; cp ID.00000 $DIR/$idfile; done
|
||||
for i in `seq -w 0 $NRANKS`; do distfile="$BASEDIST$i"; echo $distfile; cp SignDist.00000 $DIR/$distfile; done
|
||||
|
||||
|
||||
# Create the 3600 GPU case
|
||||
NRANKS=3600
|
||||
echo $NRANKS
|
||||
|
||||
DIR=$NRANKS"p"
|
||||
mkdir -p $DIR
|
||||
|
||||
BASEDIST="SignDist.0"
|
||||
BASEID="ID.0"
|
||||
|
||||
for i in `seq -w 0 $NRANKS`; do idfile="$BASEID$i"; echo $idfile; cp ID.00000 $DIR/$idfile; done
|
||||
for i in `seq -w 0 $NRANKS`; do distfile="$BASEDIST$i"; echo $distfile; cp SignDist.00000 $DIR/$distfile; done
|
||||
|
||||
|
||||
exit;
|
||||
|
26
example/systems/summit/summit-test-1200g.lsf
Normal file
26
example/systems/summit/summit-test-1200g.lsf
Normal file
@ -0,0 +1,26 @@
|
||||
#!/bin/bash
|
||||
|
||||
#BSUB -P CSC275MCCLURE
|
||||
#BSUB -J COLOR
|
||||
#BSUB -o test-1200g.o%J
|
||||
#BSUB -W 10
|
||||
#BSUB -nnodes 200
|
||||
##BSUB -env "all,JOB_FEATURE=NVME"
|
||||
|
||||
date
|
||||
|
||||
module load gcc cuda
|
||||
#source $OLCF_SPECTRUM_MPI_ROOT/jsm_pmix/bin/export_smpi_env -gpu
|
||||
|
||||
#cd /ccs/home/mcclurej/summit/build/ScaLBL/example/Sph1896/
|
||||
|
||||
#cp Domain.in.8g Domain.in
|
||||
|
||||
export LBPM_WIA_DIR=$HOME/summit/build/LBPM-WIA/tests
|
||||
|
||||
cd /gpfs/alpinetds/csc275/scratch/mcclurej/SCALING/WEAK/1200p
|
||||
|
||||
jsrun -n1200 -r6 -g1 -c1 -brs --smpiargs="-gpu" $LBPM_WIA_DIR/TestCommD3Q19
|
||||
|
||||
|
||||
exit;
|
26
example/systems/summit/summit-test-3600g.lsf
Normal file
26
example/systems/summit/summit-test-3600g.lsf
Normal file
@ -0,0 +1,26 @@
|
||||
#!/bin/bash
|
||||
|
||||
#BSUB -P CSC275MCCLURE
|
||||
#BSUB -J COLOR
|
||||
#BSUB -o color-3600g.o%J
|
||||
#BSUB -W 10
|
||||
#BSUB -nnodes 600
|
||||
##BSUB -env "all,JOB_FEATURE=NVME"
|
||||
|
||||
date
|
||||
|
||||
module load gcc cuda
|
||||
#source $OLCF_SPECTRUM_MPI_ROOT/jsm_pmix/bin/export_smpi_env -gpu
|
||||
|
||||
#cd /ccs/home/mcclurej/summit/build/ScaLBL/example/Sph1896/
|
||||
|
||||
#cp Domain.in.8g Domain.in
|
||||
|
||||
export LBPM_WIA_DIR=$HOME/summit/build/LBPM-WIA/tests
|
||||
|
||||
cd /gpfs/alpinetds/csc275/scratch/mcclurej/SCALING/WEAK/3600p
|
||||
|
||||
jsrun -n3600 -r6 -g1 -c1 -brs --smpiargs="-gpu" $LBPM_WIA_DIR/TestCommD3Q19
|
||||
|
||||
|
||||
exit;
|
247
example/systems/summit/test-1200g.log
Normal file
247
example/systems/summit/test-1200g.log
Normal file
@ -0,0 +1,247 @@
|
||||
Fri Mar 16 12:44:21 EDT 2018
|
||||
********************************************************
|
||||
Running Unit Test for D3Q19 MPI Communication
|
||||
********************************************************
|
||||
********************************************************
|
||||
Sub-domain size = 320 x 320 x 320
|
||||
Parallel domain size = 10 x 10 x 12
|
||||
********************************************************
|
||||
Assigning phase ID from file
|
||||
Initialize from segmented data: solid=0, NWP=1, WP=2
|
||||
Media porosity = 0.359970
|
||||
Domain set.
|
||||
Create ScaLBL_Communicator
|
||||
Set up memory efficient layout
|
||||
Allocating distributions
|
||||
Setting up device map and neighbor list
|
||||
Setting the distributions, size = : 11795503
|
||||
********************************************************
|
||||
No. of timesteps for timing: 100
|
||||
********************************************************
|
||||
CPU time = 1.330030
|
||||
Lattice update rate (per process)= 886.860134 MLUPS
|
||||
Lattice update rate (process)= 1064232.160581 MLUPS
|
||||
********************************************************
|
||||
DRAM bandwidth (per process)= 339.710486 GB/sec
|
||||
Communication bandwidth (per process)= 2.813606 Gbit/sec
|
||||
Aggregated communication bandwidth = 3376.327680 Gbit/sec
|
||||
******************************************
|
||||
error in distribution q = 14
|
||||
i,j,k= 1281, 486, 2097
|
||||
dist = 1.14
|
||||
n= 1
|
||||
|
||||
------------------------------------------------------------
|
||||
Sender: LSF System <lsfadmin@batch2>
|
||||
Subject: Job 43347: <COLOR> in cluster <summit> Exited
|
||||
|
||||
Job <COLOR> was submitted from host <login2> by user <mcclurej> in cluster <summit> at Fri Mar 16 12:43:59 2018
|
||||
Job was executed on host(s) <1*batch2>, in queue <batch>, as user <mcclurej> in cluster <summit> at Fri Mar 16 12:44:01 2018
|
||||
<42*a02n03>
|
||||
<42*a02n04>
|
||||
<42*a02n06>
|
||||
<42*a02n07>
|
||||
<42*a02n08>
|
||||
<42*a02n09>
|
||||
<42*a02n10>
|
||||
<42*a02n18>
|
||||
<42*a26n01>
|
||||
<42*a26n02>
|
||||
<42*a26n03>
|
||||
<42*a26n04>
|
||||
<42*a26n05>
|
||||
<42*a26n06>
|
||||
<42*a26n07>
|
||||
<42*a26n08>
|
||||
<42*a26n09>
|
||||
<42*a26n10>
|
||||
<42*a26n11>
|
||||
<42*a26n12>
|
||||
<42*a26n13>
|
||||
<42*a26n14>
|
||||
<42*a26n15>
|
||||
<42*a26n16>
|
||||
<42*a26n17>
|
||||
<42*a26n18>
|
||||
<42*a27n01>
|
||||
<42*a27n02>
|
||||
<42*a27n03>
|
||||
<42*a27n04>
|
||||
<42*a27n05>
|
||||
<42*a27n06>
|
||||
<42*a27n07>
|
||||
<42*a27n08>
|
||||
<42*a27n09>
|
||||
<42*a27n10>
|
||||
<42*a27n11>
|
||||
<42*a27n13>
|
||||
<42*a27n14>
|
||||
<42*a27n15>
|
||||
<42*a27n17>
|
||||
<42*a27n18>
|
||||
<42*a28n01>
|
||||
<42*a28n02>
|
||||
<42*a28n03>
|
||||
<42*a28n04>
|
||||
<42*a28n05>
|
||||
<42*a28n06>
|
||||
<42*a28n07>
|
||||
<42*a28n08>
|
||||
<42*a28n09>
|
||||
<42*a28n10>
|
||||
<42*a28n11>
|
||||
<42*a28n12>
|
||||
<42*a28n13>
|
||||
<42*a28n14>
|
||||
<42*a28n15>
|
||||
<42*a28n16>
|
||||
<42*a28n17>
|
||||
<42*a28n18>
|
||||
<42*a29n01>
|
||||
<42*a29n02>
|
||||
<42*a29n03>
|
||||
<42*a29n04>
|
||||
<42*a29n05>
|
||||
<42*a29n06>
|
||||
<42*a29n07>
|
||||
<42*a29n08>
|
||||
<42*a29n09>
|
||||
<42*a29n10>
|
||||
<42*a29n11>
|
||||
<42*a29n12>
|
||||
<42*a29n13>
|
||||
<42*a29n15>
|
||||
<42*a29n16>
|
||||
<42*a29n17>
|
||||
<42*a29n18>
|
||||
<42*a30n01>
|
||||
<42*a30n02>
|
||||
<42*a30n03>
|
||||
<42*a30n04>
|
||||
<42*a30n05>
|
||||
<42*a30n06>
|
||||
<42*a30n07>
|
||||
<42*a30n08>
|
||||
<42*a30n09>
|
||||
<42*a30n10>
|
||||
<42*a30n11>
|
||||
<42*a30n12>
|
||||
<42*a30n13>
|
||||
<42*a30n14>
|
||||
<42*a30n15>
|
||||
<42*a30n16>
|
||||
<42*a30n17>
|
||||
<42*a30n18>
|
||||
<42*a31n01>
|
||||
<42*a31n02>
|
||||
<42*a31n03>
|
||||
<42*a31n04>
|
||||
<42*a31n05>
|
||||
<42*a31n06>
|
||||
<42*a31n07>
|
||||
<42*a31n08>
|
||||
<42*a31n09>
|
||||
<42*a31n10>
|
||||
<42*a31n11>
|
||||
<42*a31n12>
|
||||
<42*a31n13>
|
||||
<42*a31n14>
|
||||
<42*a31n15>
|
||||
<42*a31n16>
|
||||
<42*a31n17>
|
||||
<42*a31n18>
|
||||
<42*a32n01>
|
||||
<42*a32n02>
|
||||
<42*a32n03>
|
||||
<42*a32n04>
|
||||
<42*a32n05>
|
||||
<42*a32n06>
|
||||
<42*a32n07>
|
||||
<42*a32n08>
|
||||
<42*a32n09>
|
||||
<42*a32n10>
|
||||
<42*a32n11>
|
||||
<42*a32n12>
|
||||
<42*a32n13>
|
||||
<42*a32n14>
|
||||
<42*a32n15>
|
||||
<42*a32n16>
|
||||
<42*a32n17>
|
||||
<42*a32n18>
|
||||
<42*a33n01>
|
||||
<42*a33n02>
|
||||
<42*a33n03>
|
||||
<42*a33n04>
|
||||
<42*a33n05>
|
||||
<42*a33n06>
|
||||
<42*a33n07>
|
||||
<42*a33n08>
|
||||
<42*a33n09>
|
||||
<42*a33n10>
|
||||
<42*a33n11>
|
||||
<42*a33n12>
|
||||
<42*a33n13>
|
||||
<42*a33n14>
|
||||
<42*a33n15>
|
||||
<42*a33n16>
|
||||
<42*a33n17>
|
||||
<42*a33n18>
|
||||
<42*a34n01>
|
||||
<42*a34n02>
|
||||
<42*a34n03>
|
||||
<42*a34n05>
|
||||
<42*a34n06>
|
||||
<42*a34n07>
|
||||
<42*a34n08>
|
||||
<42*a34n09>
|
||||
<42*a34n10>
|
||||
<42*a34n11>
|
||||
<42*a34n12>
|
||||
<42*a34n13>
|
||||
<42*a34n14>
|
||||
<42*a34n16>
|
||||
<42*a34n17>
|
||||
<42*a34n18>
|
||||
<42*a35n01>
|
||||
<42*a35n02>
|
||||
<42*a35n03>
|
||||
<42*a35n04>
|
||||
<42*a35n05>
|
||||
<42*a35n06>
|
||||
<42*a35n07>
|
||||
<42*a35n08>
|
||||
<42*a35n09>
|
||||
<42*a35n10>
|
||||
<42*a35n11>
|
||||
<42*a35n12>
|
||||
<42*a35n13>
|
||||
<42*a35n14>
|
||||
<42*a35n15>
|
||||
<42*a35n16>
|
||||
<42*a35n17>
|
||||
<42*a35n18>
|
||||
<42*a36n01>
|
||||
<42*a36n02>
|
||||
<42*a36n03>
|
||||
<42*a36n04>
|
||||
<42*a36n05>
|
||||
<42*a36n06>
|
||||
<42*a36n07>
|
||||
<42*a36n09>
|
||||
<42*a36n10>
|
||||
<42*a36n11>
|
||||
<42*a36n12>
|
||||
<42*a36n13>
|
||||
<42*a36n14>
|
||||
<42*a36n15>
|
||||
<42*a36n17>
|
||||
<42*a36n18>
|
||||
<42*b01n01>
|
||||
</ccs/home/mcclurej> was used as the home directory.
|
||||
</gpfs/alpinetds/csc275/scratch/mcclurej/SCALING/WEAK> was used as the working directory.
|
||||
Started at Fri Mar 16 12:44:01 2018
|
||||
Terminated at Fri Mar 16 12:44:49 2018
|
||||
Results reported at Fri Mar 16 12:44:49 2018
|
||||
|
||||
The output (if any) is above this job summary.
|
||||
|
60994
example/systems/summit/test-3600g.log
Normal file
60994
example/systems/summit/test-3600g.log
Normal file
File diff suppressed because it is too large
Load Diff
@ -52,7 +52,7 @@ ADD_LBPM_TEST_1_2_4( TestBlobIdentify )
|
||||
#ADD_LBPM_TEST_PARALLEL( TestTwoPhase 8 )
|
||||
ADD_LBPM_TEST_PARALLEL( TestBlobAnalyze 8 )
|
||||
ADD_LBPM_TEST_PARALLEL( TestSegDist 8 )
|
||||
#ADD_LBPM_TEST_PARALLEL( TestCommD3Q19 8 )
|
||||
ADD_LBPM_TEST_PARALLEL( TestCommD3Q19 8 )
|
||||
#ADD_LBPM_TEST_PARALLEL( TestMassConservationD3Q7 1 )
|
||||
ADD_LBPM_TEST_1_2_4( testCommunication )
|
||||
ADD_LBPM_TEST_1_2_4( testUtilities )
|
||||
|
@ -11,7 +11,7 @@
|
||||
using namespace std;
|
||||
|
||||
|
||||
extern void GlobalFlipScaLBL_D3Q19_Init(double *dist_even, double *dist_odd, int Nx, int Ny, int Nz,
|
||||
extern void GlobalFlipScaLBL_D3Q19_Init(double *dist, IntArray Map, int Np, int Nx, int Ny, int Nz,
|
||||
int iproc, int jproc, int kproc, int nprocx, int nprocy, int nprocz)
|
||||
{
|
||||
// Set of Discrete velocities for the D3Q19 Model
|
||||
@ -24,64 +24,57 @@ extern void GlobalFlipScaLBL_D3Q19_Init(double *dist_even, double *dist_odd, int
|
||||
int x,y,z; // Global indices
|
||||
int xn,yn,zn; // Global indices of neighbor
|
||||
int X,Y,Z; // Global size
|
||||
int idx;
|
||||
X = Nx*nprocx;
|
||||
Y = Ny*nprocy;
|
||||
Z = Nz*nprocz;
|
||||
NULL_USE(Z);
|
||||
N = (Nx+2)*(Ny+2)*(Nz+2); // size of the array including halo
|
||||
|
||||
|
||||
for (k=0; k<Nz; k++){
|
||||
for (j=0; j<Ny; j++){
|
||||
for (i=0; i<Nx; i++){
|
||||
|
||||
n = (k+1)*(Nx+2)*(Ny+2) + (j+1)*(Nx+2) + i+1;
|
||||
//n = (k+1)*(Nx+2)*(Ny+2) + (j+1)*(Nx+2) + i+1;
|
||||
idx=Map(i,j,k);
|
||||
|
||||
// Get the 'global' index
|
||||
x = iproc*Nx+i;
|
||||
y = jproc*Ny+j;
|
||||
z = kproc*Nz+k;
|
||||
for (q=0; q<9; q++){
|
||||
// Odd distribution
|
||||
Cqx = D3Q19[2*q][0];
|
||||
Cqy = D3Q19[2*q][1];
|
||||
Cqz = D3Q19[2*q][2];
|
||||
xn = x - Cqx;
|
||||
yn = y - Cqy;
|
||||
zn = z - Cqz;
|
||||
if (xn < 0) xn += nprocx*Nx;
|
||||
if (yn < 0) yn += nprocy*Ny;
|
||||
if (zn < 0) zn += nprocz*Nz;
|
||||
if (!(xn < nprocx*Nx)) xn -= nprocx*Nx;
|
||||
if (!(yn < nprocy*Ny)) yn -= nprocy*Ny;
|
||||
if (!(zn < nprocz*Nz)) zn -= nprocz*Nz;
|
||||
if (idx > 0){
|
||||
// Get the 'global' index
|
||||
x = iproc*Nx+i;
|
||||
y = jproc*Ny+j;
|
||||
z = kproc*Nz+k;
|
||||
for (q=0; q<18; q++){
|
||||
// Odd distribution
|
||||
Cqx = D3Q19[q][0];
|
||||
Cqy = D3Q19[q][1];
|
||||
Cqz = D3Q19[q][2];
|
||||
xn = x - Cqx;
|
||||
yn = y - Cqy;
|
||||
zn = z - Cqz;
|
||||
xn=x; yn=y;zn=z;
|
||||
if (xn < 0) xn += nprocx*Nx;
|
||||
if (yn < 0) yn += nprocy*Ny;
|
||||
if (zn < 0) zn += nprocz*Nz;
|
||||
if (!(xn < nprocx*Nx)) xn -= nprocx*Nx;
|
||||
if (!(yn < nprocy*Ny)) yn -= nprocy*Ny;
|
||||
if (!(zn < nprocz*Nz)) zn -= nprocz*Nz;
|
||||
|
||||
dist_even[(q+1)*N+n] = (zn*X*Y+yn*X+xn) + (2*q+1)*0.01;
|
||||
|
||||
// Odd distribution
|
||||
xn = x + Cqx;
|
||||
yn = y + Cqy;
|
||||
zn = z + Cqz;
|
||||
if (xn < 0) xn += nprocx*Nx;
|
||||
if (yn < 0) yn += nprocy*Ny;
|
||||
if (zn < 0) zn += nprocz*Nz;
|
||||
if (!(xn < nprocx*Nx)) xn -= nprocx*Nx;
|
||||
if (!(yn < nprocy*Ny)) yn -= nprocy*Ny;
|
||||
if (!(zn < nprocz*Nz)) zn -= nprocz*Nz;
|
||||
|
||||
dist_odd[q*N+n] = (zn*X*Y+yn*X+xn) + 2*(q+1)*0.01;
|
||||
dist[(q+1)*Np+idx] = (zn*X*Y+yn*X+xn) + (q+1)*0.01;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
extern int GlobalCheckDebugDist(double *dist_even, double *dist_odd, int Nx, int Ny, int Nz,
|
||||
int iproc, int jproc, int kproc, int nprocx, int nprocy, int nprocz)
|
||||
extern int GlobalCheckDebugDist(double *dist, IntArray Map, int Np, int Nx, int Ny, int Nz,
|
||||
int iproc, int jproc, int kproc, int nprocx, int nprocy, int nprocz, int start, int finish)
|
||||
{
|
||||
|
||||
int returnValue = 0;
|
||||
int q,i,j,k,n,N;
|
||||
int q,i,j,k,n,N,idx;
|
||||
int Cqx,Cqy,Cqz; // Discrete velocity
|
||||
int x,y,z; // Global indices
|
||||
int xn,yn,zn; // Global indices of neighbor
|
||||
@ -89,37 +82,30 @@ extern int GlobalCheckDebugDist(double *dist_even, double *dist_odd, int Nx, int
|
||||
X = Nx*nprocx;
|
||||
Y = Ny*nprocy;
|
||||
Z = Nz*nprocz;
|
||||
NULL_USE(Z);
|
||||
NULL_USE(Z);
|
||||
N = (Nx+2)*(Ny+2)*(Nz+2); // size of the array including halo
|
||||
for (k=0; k<Nz; k++){
|
||||
for (j=0; j<Ny; j++){
|
||||
for (i=0; i<Nx; i++){
|
||||
|
||||
n = (k+1)*(Nx+2)*(Ny+2) + (j+1)*(Nx+2) + i+1;
|
||||
idx=Map(i,j,k);
|
||||
|
||||
// Get the 'global' index
|
||||
x = iproc*Nx+i;
|
||||
y = jproc*Ny+j;
|
||||
z = kproc*Nz+k;
|
||||
for (q=0; q<9; q++){
|
||||
if (idx > start && idx< finish){
|
||||
// Get the 'global' index
|
||||
x = iproc*Nx+i;
|
||||
y = jproc*Ny+j;
|
||||
z = kproc*Nz+k;
|
||||
for (q=0; q<18; q++){
|
||||
|
||||
if (dist_even[(q+1)*N+n] != (z*X*Y+y*X+x) + 2*(q+1)*0.01){
|
||||
printf("******************************************\n");
|
||||
printf("error in even distribution q = %i \n", 2*(q+1));
|
||||
printf("i,j,k= %i, %i, %i \n", x,y,z);
|
||||
printf("dist = %5.2f \n", dist_even[(q+1)*N+n]);
|
||||
printf("n= %i \n",z*X*Y+y*X+x);
|
||||
returnValue++;
|
||||
}
|
||||
if (dist[(q+1)*Np+idx] != (z*X*Y+y*X+x) + (q+1)*0.01){
|
||||
printf("******************************************\n");
|
||||
printf("error in distribution q = %i \n", (q+1));
|
||||
printf("i,j,k= %i, %i, %i \n", x,y,z);
|
||||
printf("dist = %5.2f \n", dist[(q+1)*Np+idx]);
|
||||
printf("n= %i \n",z*X*Y+y*X+x);
|
||||
returnValue++;
|
||||
}
|
||||
|
||||
|
||||
if (dist_odd[q*N+n] != (z*X*Y+y*X+x) + (2*q+1)*0.01){
|
||||
printf("******************************************\n");
|
||||
printf("error in odd distribution q = %i \n", 2*q+1);
|
||||
printf("i,j,k= %i, %i, %i \n", x,y,z);
|
||||
printf("dist = %5.2f \n", dist_odd[q*N+n]);
|
||||
printf("n= %i \n",z*X*Y+y*X+x);
|
||||
returnValue++;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -311,20 +297,20 @@ int main(int argc, char **argv)
|
||||
char *id;
|
||||
id = new char[Nx*Ny*Nz];
|
||||
|
||||
/*
|
||||
* if (rank==0) printf("Assigning phase ID from file \n");
|
||||
* if (rank==0) printf("Initialize from segmented data: solid=0, NWP=1, WP=2 \n");
|
||||
FILE *IDFILE = fopen(LocalRankFilename,"rb");
|
||||
if (IDFILE==NULL) ERROR("Error opening file: ID.xxxxx");
|
||||
fread(id,1,N,IDFILE);
|
||||
fclose(IDFILE);
|
||||
*/
|
||||
|
||||
if (rank==0) printf("Assigning phase ID from file \n");
|
||||
if (rank==0) printf("Initialize from segmented data: solid=0, NWP=1, WP=2 \n");
|
||||
FILE *IDFILE = fopen(LocalRankFilename,"rb");
|
||||
if (IDFILE==NULL) ERROR("Error opening file: ID.xxxxx");
|
||||
fread(id,1,N,IDFILE);
|
||||
fclose(IDFILE);
|
||||
|
||||
// Setup the domain
|
||||
for (k=0;k<Nz;k++){
|
||||
for (j=0;j<Ny;j++){
|
||||
for (i=0;i<Nx;i++){
|
||||
n = k*Nx*Ny+j*Nx+i;
|
||||
id[n] = 1;
|
||||
//id[n] = 1;
|
||||
Dm.id[n] = id[n];
|
||||
}
|
||||
}
|
||||
@ -337,6 +323,7 @@ int main(int argc, char **argv)
|
||||
double sum;
|
||||
double sum_local=0.0, porosity;
|
||||
char component = 0; // solid phase
|
||||
int Np=0;
|
||||
for (k=1;k<Nz-1;k++){
|
||||
for (j=1;j<Ny-1;j++){
|
||||
for (i=1;i<Nx-1;i++){
|
||||
@ -344,6 +331,7 @@ int main(int argc, char **argv)
|
||||
if (id[n] == component){
|
||||
sum_local+=1.0;
|
||||
}
|
||||
else Np++;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -357,33 +345,53 @@ int main(int argc, char **argv)
|
||||
if (rank == 0) cout << "Domain set." << endl;
|
||||
//...........................................................................
|
||||
|
||||
|
||||
//...........................................................................
|
||||
if (rank==0) printf ("Create ScaLBL_Communicator \n");
|
||||
// Create a communicator for the device
|
||||
// Create a communicator for the device (will use optimized layout)
|
||||
ScaLBL_Communicator ScaLBL_Comm(Dm);
|
||||
|
||||
//...........device phase ID.................................................
|
||||
if (rank==0) printf ("Copying phase ID to device \n");
|
||||
char *ID;
|
||||
ScaLBL_AllocateDeviceMemory((void **) &ID, N); // Allocate device memory
|
||||
// Copy to the device
|
||||
ScaLBL_CopyToDevice(ID, id, N);
|
||||
if (rank==0) printf ("Set up memory efficient layout \n");
|
||||
int neighborSize=18*Np*sizeof(int);
|
||||
int *neighborList;
|
||||
IntArray Map(Nx,Ny,Nz);
|
||||
neighborList= new int[18*Np];
|
||||
ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Dm.id,Np);
|
||||
MPI_Barrier(comm);
|
||||
|
||||
//......................device distributions.................................
|
||||
dist_mem_size = Np*sizeof(double);
|
||||
if (rank==0) printf ("Allocating distributions \n");
|
||||
|
||||
int *NeighborList;
|
||||
int *dvcMap;
|
||||
double *fq;
|
||||
//...........................................................................
|
||||
ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize);
|
||||
ScaLBL_AllocateDeviceMemory((void **) &dvcMap, sizeof(int)*Np);
|
||||
ScaLBL_AllocateDeviceMemory((void **) &fq, 19*dist_mem_size);
|
||||
//...........................................................................
|
||||
|
||||
//...........................................................................
|
||||
// MAIN VARIABLES ALLOCATED HERE
|
||||
//...........................................................................
|
||||
// LBM variables
|
||||
if (rank==0) printf ("Allocating distributions \n");
|
||||
//......................device distributions.................................
|
||||
double *f_even,*f_odd;
|
||||
//...........................................................................
|
||||
ScaLBL_AllocateDeviceMemory((void **) &f_even, 10*dist_mem_size); // Allocate device memory
|
||||
ScaLBL_AllocateDeviceMemory((void **) &f_odd, 9*dist_mem_size); // Allocate device memory
|
||||
//...........................................................................
|
||||
double *f_even_host,*f_odd_host;
|
||||
f_even_host = new double [10*N];
|
||||
f_odd_host = new double [9*N];
|
||||
double *fq_host;
|
||||
fq_host = new double [19*Np];
|
||||
|
||||
// Update GPU data structures
|
||||
if (rank==0) printf ("Setting up device map and neighbor list \n");
|
||||
int *TmpMap;
|
||||
TmpMap=new int[Np];
|
||||
for (k=1; k<Nz-1; k++){
|
||||
for (j=1; j<Ny-1; j++){
|
||||
for (i=1; i<Nx-1; i++){
|
||||
int idx=Map(i,j,k);
|
||||
if (!(idx < 0))
|
||||
TmpMap[idx] = k*Nx*Ny+j*Nx+i;
|
||||
}
|
||||
}
|
||||
}
|
||||
ScaLBL_CopyToDevice(dvcMap, TmpMap, sizeof(int)*Np);
|
||||
ScaLBL_DeviceBarrier();
|
||||
delete [] TmpMap;
|
||||
|
||||
//...........................................................................
|
||||
|
||||
/* // Write the communcation structure into a file for debugging
|
||||
@ -414,29 +422,27 @@ int main(int argc, char **argv)
|
||||
fprintf(CommFile,"\n");
|
||||
fclose(CommFile);
|
||||
*/
|
||||
if (rank==0) printf("Setting the distributions, size = : %i\n", N);
|
||||
if (rank==0) printf("Setting the distributions, size = : %i\n", Np);
|
||||
//...........................................................................
|
||||
GlobalFlipScaLBL_D3Q19_Init(f_even_host, f_odd_host, Nx-2, Ny-2, Nz-2,iproc,jproc,kproc,nprocx,nprocy,nprocz);
|
||||
ScaLBL_CopyToDevice(f_even, f_even_host, 10*dist_mem_size);
|
||||
ScaLBL_CopyToDevice(f_odd, f_odd_host, 9*dist_mem_size);
|
||||
GlobalFlipScaLBL_D3Q19_Init(fq_host, Map, Np, Nx-2, Ny-2, Nz-2,iproc,jproc,kproc,nprocx,nprocy,nprocz);
|
||||
ScaLBL_CopyToDevice(fq, fq_host, 19*dist_mem_size);
|
||||
ScaLBL_DeviceBarrier();
|
||||
MPI_Barrier(comm);
|
||||
//*************************************************************************
|
||||
// Pack and send the D3Q19 distributions
|
||||
ScaLBL_Comm.SendD3Q19(f_even, f_odd);
|
||||
//*************************************************************************
|
||||
// Swap the distributions for momentum transport
|
||||
//*************************************************************************
|
||||
ScaLBL_D3Q19_Swap(ID, f_even, f_odd, Nx, Ny, Nz);
|
||||
//*************************************************************************
|
||||
// Wait for communications to complete and unpack the distributions
|
||||
ScaLBL_Comm.RecvD3Q19(f_even, f_odd);
|
||||
//*************************************************************************
|
||||
// First timestep
|
||||
ScaLBL_Comm.SendD3Q19AA(fq); //READ FROM NORMAL
|
||||
|
||||
ScaLBL_Comm.RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
|
||||
|
||||
// Second timestep
|
||||
ScaLBL_Comm.SendD3Q19AA(fq); //READ FROM NORMAL
|
||||
|
||||
ScaLBL_Comm.RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
|
||||
|
||||
|
||||
//...........................................................................
|
||||
ScaLBL_CopyToHost(f_even_host,f_even,10*N*sizeof(double));
|
||||
ScaLBL_CopyToHost(f_odd_host,f_odd,9*N*sizeof(double));
|
||||
check = GlobalCheckDebugDist(f_even_host, f_odd_host, Nx-2, Ny-2, Nz-2,iproc,jproc,kproc,nprocx,nprocy,nprocz);
|
||||
ScaLBL_CopyToHost(fq_host,fq,19*Np*sizeof(double));
|
||||
check = GlobalCheckDebugDist(fq_host, Map, Np, Nx-2, Ny-2, Nz-2,iproc,jproc,kproc,nprocx,nprocy,nprocz,0,ScaLBL_Comm.next);
|
||||
//...........................................................................
|
||||
|
||||
int timestep = 0;
|
||||
@ -453,17 +459,16 @@ int main(int argc, char **argv)
|
||||
//************ MAIN ITERATION LOOP (timing communications)***************************************/
|
||||
while (timestep < 100){
|
||||
|
||||
//*************************************************************************
|
||||
// Pack and send the D3Q19 distributions
|
||||
ScaLBL_Comm.SendD3Q19(f_even, f_odd);
|
||||
//*************************************************************************
|
||||
// Swap the distributions for momentum transport
|
||||
//*************************************************************************
|
||||
ScaLBL_D3Q19_Swap(ID, f_even, f_odd, Nx, Ny, Nz);
|
||||
//*************************************************************************
|
||||
// Wait for communications to complete and unpack the distributions
|
||||
ScaLBL_Comm.RecvD3Q19(f_even, f_odd);
|
||||
//*************************************************************************
|
||||
// First timestep
|
||||
ScaLBL_Comm.SendD3Q19AA(fq); //READ FROM NORMAL
|
||||
|
||||
ScaLBL_Comm.RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
|
||||
|
||||
// Second timestep
|
||||
ScaLBL_Comm.SendD3Q19AA(fq); //READ FROM NORMAL
|
||||
|
||||
ScaLBL_Comm.RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
|
||||
//*********************************************
|
||||
|
||||
ScaLBL_DeviceBarrier();
|
||||
MPI_Barrier(comm);
|
||||
@ -476,7 +481,7 @@ int main(int argc, char **argv)
|
||||
// cout << "CPU time: " << (stoptime - starttime) << " seconds" << endl;
|
||||
cputime = stoptime - starttime;
|
||||
// cout << "Lattice update rate: "<< double(Nx*Ny*Nz*timestep)/cputime/1000000 << " MLUPS" << endl;
|
||||
double MLUPS = double(Nx*Ny*Nz*timestep)/cputime/1000000;
|
||||
double MLUPS = double(Np)*double(timestep)/cputime*1e-6;
|
||||
if (rank==0) printf("********************************************************\n");
|
||||
if (rank==0) printf("CPU time = %f \n", cputime);
|
||||
if (rank==0) printf("Lattice update rate (per process)= %f MLUPS \n", MLUPS);
|
||||
@ -486,9 +491,9 @@ int main(int argc, char **argv)
|
||||
|
||||
// Number of memory references from the swap algorithm (per timestep)
|
||||
// 18 reads and 18 writes for each lattice site
|
||||
double MemoryRefs = (Nx-2)*(Ny-2)*(Nz-2)*36;
|
||||
double MemoryRefs = double(Np)*36;
|
||||
// number of memory references for the swap algorithm - GigaBytes / second
|
||||
if (rank==0) printf("DRAM bandwidth (per process)= %f GB/sec \n",MemoryRefs*8*timestep/1e9);
|
||||
if (rank==0) printf("DRAM bandwidth (per process)= %f GB/sec \n",MemoryRefs*8*double(timestep)*1e-9);
|
||||
// Report bandwidth in Gigabits per second
|
||||
// communication bandwidth includes both send and recieve
|
||||
if (rank==0) printf("Communication bandwidth (per process)= %f Gbit/sec \n",ScaLBL_Comm.CommunicationCount*64*timestep/1e9);
|
||||
|
@ -645,7 +645,7 @@ int main(int argc, char **argv)
|
||||
// Compute the walltime per timestep
|
||||
cputime = (stoptime - starttime)/timestep;
|
||||
// Performance obtained from each node
|
||||
double MLUPS = double(Nx*Ny*Nz)/cputime/1000000;
|
||||
double MLUPS = double(Np)/cputime/1000000;
|
||||
|
||||
if (rank==0) printf("********************************************************\n");
|
||||
if (rank==0) printf("CPU time = %f \n", cputime);
|
||||
@ -655,16 +655,6 @@ int main(int argc, char **argv)
|
||||
if (rank==0) printf("********************************************************\n");
|
||||
|
||||
// ************************************************************************
|
||||
double *PHASE;
|
||||
int SIZE=Nx*Ny*Nz*sizeof(double);
|
||||
PHASE= new double [Nx*Ny*Nz];
|
||||
ScaLBL_CopyToHost(&PHASE[0],&Phi[0],SIZE);
|
||||
|
||||
FILE *OUTFILE;
|
||||
sprintf(LocalRankFilename,"Phase.%05i.raw",rank);
|
||||
OUTFILE = fopen(LocalRankFilename,"wb");
|
||||
fwrite(PHASE,8,N,OUTFILE);
|
||||
fclose(OUTFILE);
|
||||
|
||||
PROFILE_STOP("Main");
|
||||
PROFILE_SAVE("lbpm_color_simulator",1);
|
||||
|
@ -424,7 +424,7 @@ int main(int argc, char **argv)
|
||||
// Compute the walltime per timestep
|
||||
cputime = (stoptime - starttime)/timestep;
|
||||
// Performance obtained from each node
|
||||
double MLUPS = double(Nx*Ny*Nz)/cputime/1000000;
|
||||
double MLUPS = double(Np)/cputime/1000000;
|
||||
|
||||
if (rank==0) printf("********************************************************\n");
|
||||
if (rank==0) printf("CPU time = %f \n", cputime);
|
||||
|
Loading…
Reference in New Issue
Block a user