#include #include #include #include #include #include #include #include #include "pmmc.h" #include "Domain.h" #include "Extras.h" #include "D3Q19.h" #include "D3Q7.h" #include "Color.h" #include "Communication.h" //#define CBUB //#define WRITE_SURFACES //#define USE_EXP_CONTACT_ANGLE using namespace std; //************************************************************************* // Implementation of Two-Phase Immiscible LBM using CUDA //************************************************************************* inline void PackID(int *list, int count, char *sendbuf, char *ID){ // Fill in the phase ID values from neighboring processors // This packs up the values that need to be sent from one processor to another int idx,n; for (idx=0; idx> tau; // Viscosity parameter input >> alpha; // Surface Tension parameter input >> beta; // Width of the interface input >> phi_s; // value of phi at the solid surface // Line 4: wetting phase saturation to initialize input >> wp_saturation; // Line 5: External force components (Fx,Fy, Fz) input >> Fx; input >> Fy; input >> Fz; // Line 6: Pressure Boundary conditions input >> Restart; input >> pBC; input >> din; input >> dout; // Line 7: time-stepping criteria input >> timestepMax; // max no. of timesteps input >> interval; // restart interval input >> tol; // error tolerance // Line 8: Number of external forces to use input >> SimCount; // Number of paired forces to simulate input >> SimDelta; // Delta - percentage increase for force pair //............................................................. //....................................................................... // Reading the domain information file //....................................................................... ifstream domain("Domain.in"); domain >> nprocx; domain >> nprocy; domain >> nprocz; domain >> Nx; domain >> Ny; domain >> Nz; domain >> nspheres; domain >> Lx; domain >> Ly; domain >> Lz; //....................................................................... } // ************************************************************** // Broadcast simulation parameters from rank 0 to all other procs MPI_Barrier(MPI_COMM_WORLD); //................................................. MPI_Bcast(&tau,1,MPI_DOUBLE,0,MPI_COMM_WORLD); MPI_Bcast(&alpha,1,MPI_DOUBLE,0,MPI_COMM_WORLD); MPI_Bcast(&beta,1,MPI_DOUBLE,0,MPI_COMM_WORLD); MPI_Bcast(&das,1,MPI_DOUBLE,0,MPI_COMM_WORLD); MPI_Bcast(&dbs,1,MPI_DOUBLE,0,MPI_COMM_WORLD); MPI_Bcast(&phi_s,1,MPI_DOUBLE,0,MPI_COMM_WORLD); MPI_Bcast(&wp_saturation,1,MPI_DOUBLE,0,MPI_COMM_WORLD); MPI_Bcast(&pBC,1,MPI_LOGICAL,0,MPI_COMM_WORLD); MPI_Bcast(&Restart,1,MPI_LOGICAL,0,MPI_COMM_WORLD); MPI_Bcast(&din,1,MPI_DOUBLE,0,MPI_COMM_WORLD); MPI_Bcast(&dout,1,MPI_DOUBLE,0,MPI_COMM_WORLD); MPI_Bcast(&Fx,1,MPI_DOUBLE,0,MPI_COMM_WORLD); MPI_Bcast(&Fy,1,MPI_DOUBLE,0,MPI_COMM_WORLD); MPI_Bcast(&Fz,1,MPI_DOUBLE,0,MPI_COMM_WORLD); MPI_Bcast(×tepMax,1,MPI_INT,0,MPI_COMM_WORLD); MPI_Bcast(&interval,1,MPI_INT,0,MPI_COMM_WORLD); MPI_Bcast(&tol,1,MPI_DOUBLE,0,MPI_COMM_WORLD); MPI_Bcast(&SimCount,1,MPI_INT,0,MPI_COMM_WORLD); MPI_Bcast(&SimDelta,1,MPI_DOUBLE,0,MPI_COMM_WORLD); // Computational domain MPI_Bcast(&Nx,1,MPI_INT,0,MPI_COMM_WORLD); MPI_Bcast(&Ny,1,MPI_INT,0,MPI_COMM_WORLD); MPI_Bcast(&Nz,1,MPI_INT,0,MPI_COMM_WORLD); MPI_Bcast(&nprocx,1,MPI_INT,0,MPI_COMM_WORLD); MPI_Bcast(&nprocy,1,MPI_INT,0,MPI_COMM_WORLD); MPI_Bcast(&nprocz,1,MPI_INT,0,MPI_COMM_WORLD); MPI_Bcast(&nspheres,1,MPI_INT,0,MPI_COMM_WORLD); MPI_Bcast(&Lx,1,MPI_DOUBLE,0,MPI_COMM_WORLD); MPI_Bcast(&Ly,1,MPI_DOUBLE,0,MPI_COMM_WORLD); MPI_Bcast(&Lz,1,MPI_DOUBLE,0,MPI_COMM_WORLD); //................................................. MPI_Barrier(MPI_COMM_WORLD); RESTART_INTERVAL=interval; // ************************************************************** // ************************************************************** double Ps = -(das-dbs)/(das+dbs); double rlxA = 1.f/tau; double rlxB = 8.f*(2.f-rlxA)/(8.f-rlxA); double xIntPos; xIntPos = log((1.0+phi_s)/(1.0-phi_s))/(2.0*beta); // Set the density values inside the solid based on the input value phi_s das = (phi_s+1.0)*0.5; dbs = 1.0 - das; if (nprocs != nprocx*nprocy*nprocz){ printf("nprocx = %i \n",nprocx); printf("nprocy = %i \n",nprocy); printf("nprocz = %i \n",nprocz); INSIST(nprocs == nprocx*nprocy*nprocz,"Fatal error in processor count!"); } if (rank==0){ printf("********************************************************\n"); printf("tau = %f \n", tau); printf("alpha = %f \n", alpha); printf("beta = %f \n", beta); printf("das = %f \n", das); printf("dbs = %f \n", dbs); printf("Value of phi at solid surface = %f \n", phi_s); printf("gamma_{wn} = %f \n", 5.796*alpha); // printf("cos theta_c = %f \n", 1.05332*Ps); printf("Initial Force(x) = %f \n", Fx); printf("Initial Force(y) = %f \n", Fy); printf("Initial Force(z) = %f \n", Fz); printf("Sub-domain size = %i x %i x %i\n",Nz,Nz,Nz); printf("Parallel domain size = %i x %i x %i\n",nprocx,nprocy,nprocz); printf("********************************************************\n"); if (rank==0) printf("Number of paired forces = %i \n", SimCount); if (rank==0) printf("Percent difference for force pair = %f \n", SimDelta); if (rank==0) printf("********************************************************\n"); } InitializeRanks( rank, nprocx, nprocy, nprocz, iproc, jproc, kproc, rank_x, rank_y, rank_z, rank_X, rank_Y, rank_Z, rank_xy, rank_XY, rank_xY, rank_Xy, rank_xz, rank_XZ, rank_xZ, rank_Xz, rank_yz, rank_YZ, rank_yZ, rank_Yz ); MPI_Barrier(MPI_COMM_WORLD); Nz += 2; Nx = Ny = Nz; // Cubic domain int N = Nx*Ny*Nz; int dist_mem_size = N*sizeof(double); //....................................................................... if (rank == 0) printf("Read input media... \n"); //....................................................................... //....................................................................... // Filenames used char LocalRankString[8]; char LocalRankFilename[40]; char LocalRestartFile[40]; char tmpstr[10]; sprintf(LocalRankString,"%05d",rank); sprintf(LocalRankFilename,"%s%s","ID.",LocalRankString); sprintf(LocalRestartFile,"%s%s","Restart.",LocalRankString); // .......... READ THE INPUT FILE ....................................... // char value; char *id; id = new char[N]; int sum = 0; double sum_local; double iVol_global = 1.0/(1.0*(Nx-2)*(Ny-2)*(Nz-2)*nprocs); double porosity; DoubleArray SignDist(Nx,Ny,Nz); //....................................................................... #ifdef CBUB // Initializes a constrained bubble test double BubbleBot = 20.0; // How big to make the NWP bubble double BubbleTop = 60.0; // How big to make the NWP bubble double TubeRadius = 15.5; // Radius of the capillary tube sum=0; for (k=0;k 0.0){ id[n] = 2; } // compute the porosity (actual interface location used) if (SignDist.data[n] > 1.0){ sum++; } } } } //......................................................... // If pressure boundary conditions are applied remove solid if (pBC && kproc == 0){ for (k=0; k<3; k++){ for (j=0;j fluid_isovalue) int cube[8][3] = {{0,0,0},{1,0,0},{0,1,0},{1,1,0},{0,0,1},{1,0,1},{0,1,1},{1,1,1}}; // cube corners DoubleArray CubeValues(2,2,2); // int count_in=0,count_out=0; // int nodx,nody,nodz; // initialize lists for vertices for surfaces, common line DTMutableList nw_pts(20); DTMutableList ns_pts(20); DTMutableList ws_pts(20); DTMutableList nws_pts(20); // initialize triangle lists for surfaces IntArray nw_tris(3,20); IntArray ns_tris(3,20); IntArray ws_tris(3,20); // initialize list for line segments IntArray nws_seg(2,20); DTMutableList tmp(20); DoubleArray Values(20); DoubleArray ContactAngle(20); DoubleArray Curvature(20); DoubleArray DistValues(20); DoubleArray InterfaceSpeed(20); DoubleArray NormalVector(60); // IntArray store; int n_nw_pts=0,n_ns_pts=0,n_ws_pts=0,n_nws_pts=0; int n_nw_tris=0, n_ns_tris=0, n_ws_tris=0, n_nws_seg=0; // double s,s1,s2,s3; // Triangle sides (lengths) Point A,B,C,P; // double area; // Initialize arrays for local solid surface DTMutableList local_sol_pts(20); int n_local_sol_pts = 0; IntArray local_sol_tris(3,18); int n_local_sol_tris; DoubleArray values(20); DTMutableList local_nws_pts(20); int n_local_nws_pts; //int n_nw_tris_beg, n_ns_tris_beg, n_ws_tris_beg; int c; //int newton_steps = 0; //........................................................................... int ncubes = (Nx-2)*(Ny-2)*(Nz-2); // Exclude the "upper" halo IntArray cubeList(3,ncubes); int nc=0; //........................................................................... // Set up the cube list (very regular in this case due to lack of blob-ID) // Set up kstart, kfinish so that the reservoirs are excluded from averaging int kstart,kfinish; kstart = 1; kfinish = Nz-1; // if (pBC && kproc==0) kstart = 4; // if (pBC && kproc==nprocz-1) kfinish = Nz-4; for (k=kstart; k CPU //........................................................................... DeviceBarrier(); ComputePressureD3Q19(ID,f_even,f_odd,Pressure,Nx,Ny,Nz); CopyToHost(Phase.data,Phi,N*sizeof(double)); CopyToHost(Press.data,Pressure,N*sizeof(double)); CopyToHost(Vel_x.data,&Velocity[0],N*sizeof(double)); CopyToHost(Vel_y.data,&Velocity[N],N*sizeof(double)); CopyToHost(Vel_z.data,&Velocity[2*N],N*sizeof(double)); MPI_Barrier(MPI_COMM_WORLD); //........................................................................... int timestep = 0; if (rank==0) printf("********************************************************\n"); if (rank==0) printf("No. of timesteps: %i \n", timestepMax); //.......create and start timer............ double starttime,stoptime,cputime; sendtag = recvtag = 5; FILE *TIMELOG; FILE *FINALSTATE; FILE *SPEED; if (rank==0){ TIMELOG= fopen("timelog.tcat","a+"); FINALSTATE= fopen("finalstate.tcat","a"); if (fseek(TIMELOG,0,SEEK_CUR) == fseek(TIMELOG,0,SEEK_SET)){ // If timelog is empty, write a short header to list the averages fprintf(TIMELOG,"--------------------------------------------------------------------------------------\n"); fprintf(TIMELOG,"timestep dEs "); // Timestep, Change in Surface Energy fprintf(TIMELOG,"sw pw pn awn ans aws Jwn Kwn lwns efawns "); // Scalar averages fprintf(TIMELOG,"vw[x, y, z] vn[x, y, z] vwn[x, y, z]"); // Velocity averages fprintf(TIMELOG,"Gwn [xx, yy, zz, xy, xz, yz] "); // Orientation tensors fprintf(TIMELOG,"Gws [xx, yy, zz, xy, xz, yz] "); fprintf(TIMELOG,"Gns [xx, yy, zz, xy, xz, yz] "); fprintf(TIMELOG,"trJwn trawn trRwn pc Fx Fy Fz \n"); fprintf(TIMELOG,"--------------------------------------------------------------------------------------\n"); } } err = 1.0; sat_w_previous = 1.01; // slightly impossible value! if (rank==0) printf("Begin timesteps: error tolerance is %f \n", tol); for (int SimNumber=0; SimNumber < 2*SimCount; SimNumber++){ // Increment the external force if (SimNumber%2 == 0){ Fx = 2.0*Fx/(1.0+SimDelta); Fy = 2.0*Fy/(1.0+SimDelta); Fz = 2.0*Fz/(1.0+SimDelta); } else{ Fx += SimDelta*Fx; Fy += SimDelta*Fy; Fz += SimDelta*Fz; } if (rank==0) printf("Simulating {Fx,Fy,Fz} = %f, %f, %f \n", Fx,Fy,Fz); //************ MAIN ITERATION LOOP ***************************************/ MPI_Barrier(MPI_COMM_WORLD); starttime = MPI_Wtime(); //......................................... timestep = 0; while (timestep < timestepMax+5 ){ //************************************************************************* // Fused Color Gradient and Collision //************************************************************************* ColorCollideOpt( ID,f_even,f_odd,Phi,ColorGrad, Velocity,Nx,Ny,Nz,rlxA,rlxB,alpha,beta,Fx,Fy,Fz); //************************************************************************* //................................................................................... PackDist(1,dvcSendList_x,0,sendCount_x,sendbuf_x,f_even,N); PackDist(4,dvcSendList_x,sendCount_x,sendCount_x,sendbuf_x,f_even,N); PackDist(5,dvcSendList_x,2*sendCount_x,sendCount_x,sendbuf_x,f_even,N); PackDist(6,dvcSendList_x,3*sendCount_x,sendCount_x,sendbuf_x,f_even,N); PackDist(7,dvcSendList_x,4*sendCount_x,sendCount_x,sendbuf_x,f_even,N); //...Packing for X face(1,7,9,11,13)................................ PackDist(0,dvcSendList_X,0,sendCount_X,sendbuf_X,f_odd,N); PackDist(3,dvcSendList_X,sendCount_X,sendCount_X,sendbuf_X,f_odd,N); PackDist(4,dvcSendList_X,2*sendCount_X,sendCount_X,sendbuf_X,f_odd,N); PackDist(5,dvcSendList_X,3*sendCount_X,sendCount_X,sendbuf_X,f_odd,N); PackDist(6,dvcSendList_X,4*sendCount_X,sendCount_X,sendbuf_X,f_odd,N); //...Packing for y face(4,8,9,16,18)................................. PackDist(2,dvcSendList_y,0,sendCount_y,sendbuf_y,f_even,N); PackDist(4,dvcSendList_y,sendCount_y,sendCount_y,sendbuf_y,f_even,N); PackDist(4,dvcSendList_y,2*sendCount_y,sendCount_y,sendbuf_y,f_odd,N); PackDist(8,dvcSendList_y,3*sendCount_y,sendCount_y,sendbuf_y,f_even,N); PackDist(9,dvcSendList_y,4*sendCount_y,sendCount_y,sendbuf_y,f_even,N); //...Packing for Y face(3,7,10,15,17)................................. PackDist(1,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,f_odd,N); PackDist(3,dvcSendList_Y,sendCount_Y,sendCount_Y,sendbuf_Y,f_odd,N); PackDist(5,dvcSendList_Y,2*sendCount_Y,sendCount_Y,sendbuf_Y,f_even,N); PackDist(7,dvcSendList_Y,3*sendCount_Y,sendCount_Y,sendbuf_Y,f_odd,N); PackDist(8,dvcSendList_Y,4*sendCount_Y,sendCount_Y,sendbuf_Y,f_odd,N); //...Packing for z face(6,12,13,16,17)................................ PackDist(3,dvcSendList_z,0,sendCount_z,sendbuf_z,f_even,N); PackDist(6,dvcSendList_z,sendCount_z,sendCount_z,sendbuf_z,f_even,N); PackDist(6,dvcSendList_z,2*sendCount_z,sendCount_z,sendbuf_z,f_odd,N); PackDist(8,dvcSendList_z,3*sendCount_z,sendCount_z,sendbuf_z,f_even,N); PackDist(8,dvcSendList_z,4*sendCount_z,sendCount_z,sendbuf_z,f_odd,N); //...Packing for Z face(5,11,14,15,18)................................ PackDist(2,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,f_odd,N); PackDist(5,dvcSendList_Z,sendCount_Z,sendCount_Z,sendbuf_Z,f_odd,N); PackDist(7,dvcSendList_Z,2*sendCount_Z,sendCount_Z,sendbuf_Z,f_even,N); PackDist(7,dvcSendList_Z,3*sendCount_Z,sendCount_Z,sendbuf_Z,f_odd,N); PackDist(9,dvcSendList_Z,4*sendCount_Z,sendCount_Z,sendbuf_Z,f_even,N); //...Pack the xy edge (8)................................ PackDist(4,dvcSendList_xy,0,sendCount_xy,sendbuf_xy,f_even,N); //...Pack the Xy edge (9)................................ PackDist(4,dvcSendList_Xy,0,sendCount_Xy,sendbuf_Xy,f_odd,N); //...Pack the xY edge (10)................................ PackDist(5,dvcSendList_xY,0,sendCount_xY,sendbuf_xY,f_even,N); //...Pack the XY edge (7)................................ PackDist(3,dvcSendList_XY,0,sendCount_XY,sendbuf_XY,f_odd,N); //...Pack the xz edge (12)................................ PackDist(6,dvcSendList_xz,0,sendCount_xz,sendbuf_xz,f_even,N); //...Pack the xZ edge (14)................................ PackDist(7,dvcSendList_xZ,0,sendCount_xZ,sendbuf_xZ,f_even,N); //...Pack the Xz edge (13)................................ PackDist(6,dvcSendList_Xz,0,sendCount_Xz,sendbuf_Xz,f_odd,N); //...Pack the XZ edge (11)................................ PackDist(5,dvcSendList_XZ,0,sendCount_XZ,sendbuf_XZ,f_odd,N); //...Pack the xz edge (12)................................ //...Pack the yz edge (16)................................ PackDist(8,dvcSendList_yz,0,sendCount_yz,sendbuf_yz,f_even,N); //...Pack the yZ edge (18)................................ PackDist(9,dvcSendList_yZ,0,sendCount_yZ,sendbuf_yZ,f_even,N); //...Pack the Yz edge (17)................................ PackDist(8,dvcSendList_Yz,0,sendCount_Yz,sendbuf_Yz,f_odd,N); //...Pack the YZ edge (15)................................ PackDist(7,dvcSendList_YZ,0,sendCount_YZ,sendbuf_YZ,f_odd,N); //................................................................................... //................................................................................... // Send all the distributions MPI_Isend(sendbuf_x, 5*sendCount_x,MPI_DOUBLE,rank_x,sendtag,MPI_COMM_WORLD,&req1[0]); MPI_Irecv(recvbuf_X, 5*recvCount_X,MPI_DOUBLE,rank_X,recvtag,MPI_COMM_WORLD,&req2[0]); MPI_Isend(sendbuf_X, 5*sendCount_X,MPI_DOUBLE,rank_X,sendtag,MPI_COMM_WORLD,&req1[1]); MPI_Irecv(recvbuf_x, 5*recvCount_x,MPI_DOUBLE,rank_x,recvtag,MPI_COMM_WORLD,&req2[1]); MPI_Isend(sendbuf_y, 5*sendCount_y,MPI_DOUBLE,rank_y,sendtag,MPI_COMM_WORLD,&req1[2]); MPI_Irecv(recvbuf_Y, 5*recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,MPI_COMM_WORLD,&req2[2]); MPI_Isend(sendbuf_Y, 5*sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,MPI_COMM_WORLD,&req1[3]); MPI_Irecv(recvbuf_y, 5*recvCount_y,MPI_DOUBLE,rank_y,recvtag,MPI_COMM_WORLD,&req2[3]); MPI_Isend(sendbuf_z, 5*sendCount_z,MPI_DOUBLE,rank_z,sendtag,MPI_COMM_WORLD,&req1[4]); MPI_Irecv(recvbuf_Z, 5*recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,MPI_COMM_WORLD,&req2[4]); MPI_Isend(sendbuf_Z, 5*sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,MPI_COMM_WORLD,&req1[5]); MPI_Irecv(recvbuf_z, 5*recvCount_z,MPI_DOUBLE,rank_z,recvtag,MPI_COMM_WORLD,&req2[5]); MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_xy,sendtag,MPI_COMM_WORLD,&req1[6]); MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_XY,recvtag,MPI_COMM_WORLD,&req2[6]); MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_XY,sendtag,MPI_COMM_WORLD,&req1[7]); MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_xy,recvtag,MPI_COMM_WORLD,&req2[7]); MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_Xy,sendtag,MPI_COMM_WORLD,&req1[8]); MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_xY,recvtag,MPI_COMM_WORLD,&req2[8]); MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_xY,sendtag,MPI_COMM_WORLD,&req1[9]); MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_Xy,recvtag,MPI_COMM_WORLD,&req2[9]); MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_xz,sendtag,MPI_COMM_WORLD,&req1[10]); MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_XZ,recvtag,MPI_COMM_WORLD,&req2[10]); MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_XZ,sendtag,MPI_COMM_WORLD,&req1[11]); MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_xz,recvtag,MPI_COMM_WORLD,&req2[11]); MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_Xz,sendtag,MPI_COMM_WORLD,&req1[12]); MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_xZ,recvtag,MPI_COMM_WORLD,&req2[12]); MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_xZ,sendtag,MPI_COMM_WORLD,&req1[13]); MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_Xz,recvtag,MPI_COMM_WORLD,&req2[13]); MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_yz,sendtag,MPI_COMM_WORLD,&req1[14]); MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_YZ,recvtag,MPI_COMM_WORLD,&req2[14]); MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_YZ,sendtag,MPI_COMM_WORLD,&req1[15]); MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_yz,recvtag,MPI_COMM_WORLD,&req2[15]); MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_Yz,sendtag,MPI_COMM_WORLD,&req1[16]); MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_yZ,recvtag,MPI_COMM_WORLD,&req2[16]); MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_yZ,sendtag,MPI_COMM_WORLD,&req1[17]); MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_Yz,recvtag,MPI_COMM_WORLD,&req2[17]); //................................................................................... //************************************************************************* // Carry out the density streaming step for mass transport //************************************************************************* // DensityStreamD3Q7(ID, Den, Copy, Phi, ColorGrad, Velocity, beta, Nx, Ny, Nz, pBC, S); //************************************************************************* MassColorCollideD3Q7(ID, A_even, A_odd, B_even, B_odd, Den, Phi, ColorGrad, Velocity, beta, N, pBC); //************************************************************************* // Swap the distributions for momentum transport //************************************************************************* SwapD3Q19(ID, f_even, f_odd, Nx, Ny, Nz); //************************************************************************* //................................................................................... // Wait for completion of D3Q19 communication MPI_Waitall(18,req1,stat1); MPI_Waitall(18,req2,stat2); //................................................................................... // Unpack the distributions on the device //................................................................................... //...Map recieve list for the X face: q=2,8,10,12,13 ................................. UnpackDist(0,-1,0,0,dvcRecvList_X,0,recvCount_X,recvbuf_X,f_odd,Nx,Ny,Nz); UnpackDist(3,-1,-1,0,dvcRecvList_X,recvCount_X,recvCount_X,recvbuf_X,f_odd,Nx,Ny,Nz); UnpackDist(4,-1,1,0,dvcRecvList_X,2*recvCount_X,recvCount_X,recvbuf_X,f_odd,Nx,Ny,Nz); UnpackDist(5,-1,0,-1,dvcRecvList_X,3*recvCount_X,recvCount_X,recvbuf_X,f_odd,Nx,Ny,Nz); UnpackDist(6,-1,0,1,dvcRecvList_X,4*recvCount_X,recvCount_X,recvbuf_X,f_odd,Nx,Ny,Nz); //................................................................................... //...Map recieve list for the x face: q=1,7,9,11,13.................................. UnpackDist(1,1,0,0,dvcRecvList_x,0,recvCount_x,recvbuf_x,f_even,Nx,Ny,Nz); UnpackDist(4,1,1,0,dvcRecvList_x,recvCount_x,recvCount_x,recvbuf_x,f_even,Nx,Ny,Nz); UnpackDist(5,1,-1,0,dvcRecvList_x,2*recvCount_x,recvCount_x,recvbuf_x,f_even,Nx,Ny,Nz); UnpackDist(6,1,0,1,dvcRecvList_x,3*recvCount_x,recvCount_x,recvbuf_x,f_even,Nx,Ny,Nz); UnpackDist(7,1,0,-1,dvcRecvList_x,4*recvCount_x,recvCount_x,recvbuf_x,f_even,Nx,Ny,Nz); //................................................................................... //...Map recieve list for the y face: q=4,8,9,16,18 ................................... UnpackDist(1,0,-1,0,dvcRecvList_Y,0,recvCount_Y,recvbuf_Y,f_odd,Nx,Ny,Nz); UnpackDist(3,-1,-1,0,dvcRecvList_Y,recvCount_Y,recvCount_Y,recvbuf_Y,f_odd,Nx,Ny,Nz); UnpackDist(5,1,-1,0,dvcRecvList_Y,2*recvCount_Y,recvCount_Y,recvbuf_Y,f_even,Nx,Ny,Nz); UnpackDist(7,0,-1,-1,dvcRecvList_Y,3*recvCount_Y,recvCount_Y,recvbuf_Y,f_odd,Nx,Ny,Nz); UnpackDist(8,0,-1,1,dvcRecvList_Y,4*recvCount_Y,recvCount_Y,recvbuf_Y,f_odd,Nx,Ny,Nz); //................................................................................... //...Map recieve list for the Y face: q=3,7,10,15,17 .................................. UnpackDist(2,0,1,0,dvcRecvList_y,0,recvCount_y,recvbuf_y,f_even,Nx,Ny,Nz); UnpackDist(4,1,1,0,dvcRecvList_y,recvCount_y,recvCount_y,recvbuf_y,f_even,Nx,Ny,Nz); UnpackDist(4,-1,1,0,dvcRecvList_y,2*recvCount_y,recvCount_y,recvbuf_y,f_odd,Nx,Ny,Nz); UnpackDist(8,0,1,1,dvcRecvList_y,3*recvCount_y,recvCount_y,recvbuf_y,f_even,Nx,Ny,Nz); UnpackDist(9,0,1,-1,dvcRecvList_y,4*recvCount_y,recvCount_y,recvbuf_y,f_even,Nx,Ny,Nz); //................................................................................... //...Map recieve list for the z face<<<6,12,13,16,17).............................................. UnpackDist(2,0,0,-1,dvcRecvList_Z,0,recvCount_Z,recvbuf_Z,f_odd,Nx,Ny,Nz); UnpackDist(5,-1,0,-1,dvcRecvList_Z,recvCount_Z,recvCount_Z,recvbuf_Z,f_odd,Nx,Ny,Nz); UnpackDist(7,1,0,-1,dvcRecvList_Z,2*recvCount_Z,recvCount_Z,recvbuf_Z,f_even,Nx,Ny,Nz); UnpackDist(7,0,-1,-1,dvcRecvList_Z,3*recvCount_Z,recvCount_Z,recvbuf_Z,f_odd,Nx,Ny,Nz); UnpackDist(9,0,1,-1,dvcRecvList_Z,4*recvCount_Z,recvCount_Z,recvbuf_Z,f_even,Nx,Ny,Nz); //...Map recieve list for the Z face<<<5,11,14,15,18).............................................. UnpackDist(3,0,0,1,dvcRecvList_z,0,recvCount_z,recvbuf_z,f_even,Nx,Ny,Nz); UnpackDist(6,1,0,1,dvcRecvList_z,recvCount_z,recvCount_z,recvbuf_z,f_even,Nx,Ny,Nz); UnpackDist(6,-1,0,1,dvcRecvList_z,2*recvCount_z,recvCount_z,recvbuf_z,f_odd,Nx,Ny,Nz); UnpackDist(8,0,1,1,dvcRecvList_z,3*recvCount_z,recvCount_z,recvbuf_z,f_even,Nx,Ny,Nz); UnpackDist(8,0,-1,1,dvcRecvList_z,4*recvCount_z,recvCount_z,recvbuf_z,f_odd,Nx,Ny,Nz); //.................................................................................. //...Map recieve list for the xy edge <<<8)................................ UnpackDist(3,-1,-1,0,dvcRecvList_XY,0,recvCount_XY,recvbuf_XY,f_odd,Nx,Ny,Nz); //...Map recieve list for the Xy edge <<<9)................................ UnpackDist(5,1,-1,0,dvcRecvList_xY,0,recvCount_xY,recvbuf_xY,f_even,Nx,Ny,Nz); //...Map recieve list for the xY edge <<<10)................................ UnpackDist(4,-1,1,0,dvcRecvList_Xy,0,recvCount_Xy,recvbuf_Xy,f_odd,Nx,Ny,Nz); //...Map recieve list for the XY edge <<<7)................................ UnpackDist(4,1,1,0,dvcRecvList_xy,0,recvCount_xy,recvbuf_xy,f_even,Nx,Ny,Nz); //...Map recieve list for the xz edge <<<12)................................ UnpackDist(5,-1,0,-1,dvcRecvList_XZ,0,recvCount_XZ,recvbuf_XZ,f_odd,Nx,Ny,Nz); //...Map recieve list for the xZ edge <<<14)................................ UnpackDist(6,-1,0,1,dvcRecvList_Xz,0,recvCount_Xz,recvbuf_Xz,f_odd,Nx,Ny,Nz); //...Map recieve list for the Xz edge <<<13)................................ UnpackDist(7,1,0,-1,dvcRecvList_xZ,0,recvCount_xZ,recvbuf_xZ,f_even,Nx,Ny,Nz); //...Map recieve list for the XZ edge <<<11)................................ UnpackDist(6,1,0,1,dvcRecvList_xz,0,recvCount_xz,recvbuf_xz,f_even,Nx,Ny,Nz); //...Map recieve list for the yz edge <<<16)................................ UnpackDist(7,0,-1,-1,dvcRecvList_YZ,0,recvCount_YZ,recvbuf_YZ,f_odd,Nx,Ny,Nz); //...Map recieve list for the yZ edge <<<18)................................ UnpackDist(8,0,-1,1,dvcRecvList_Yz,0,recvCount_Yz,recvbuf_Yz,f_odd,Nx,Ny,Nz); //...Map recieve list for the Yz edge <<<17)................................ UnpackDist(9,0,1,-1,dvcRecvList_yZ,0,recvCount_yZ,recvbuf_yZ,f_even,Nx,Ny,Nz); //...Map recieve list for the YZ edge <<<15)................................ UnpackDist(8,0,1,1,dvcRecvList_yz,0,recvCount_yz,recvbuf_yz,f_even,Nx,Ny,Nz); //................................................................................... //................................................................................... PackDist(1,dvcSendList_x,0,sendCount_x,sendbuf_x,A_even,N); PackDist(1,dvcSendList_x,sendCount_x,sendCount_x,sendbuf_x,B_even,N); //...Packing for X face(1,7,9,11,13)................................ PackDist(0,dvcSendList_X,0,sendCount_X,sendbuf_X,A_odd,N); PackDist(0,dvcSendList_X,sendCount_X,sendCount_X,sendbuf_X,B_odd,N); //...Packing for y face(4,8,9,16,18)................................. PackDist(2,dvcSendList_y,0,sendCount_y,sendbuf_y,A_even,N); PackDist(2,dvcSendList_y,sendCount_y,sendCount_y,sendbuf_y,B_even,N); //...Packing for Y face(3,7,10,15,17)................................. PackDist(1,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,A_odd,N); PackDist(1,dvcSendList_Y,sendCount_Y,sendCount_Y,sendbuf_Y,B_odd,N); //...Packing for z face(6,12,13,16,17)................................ PackDist(3,dvcSendList_z,0,sendCount_z,sendbuf_z,A_even,N); PackDist(3,dvcSendList_z,sendCount_z,sendCount_z,sendbuf_z,B_even,N); //...Packing for Z face(5,11,14,15,18)................................ PackDist(2,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,A_odd,N); PackDist(2,dvcSendList_Z,sendCount_Z,sendCount_Z,sendbuf_Z,B_odd,N); //................................................................................... //................................................................................... // Send all the distributions MPI_Isend(sendbuf_x, 2*sendCount_x,MPI_DOUBLE,rank_x,sendtag,MPI_COMM_WORLD,&req1[0]); MPI_Irecv(recvbuf_X, 2*recvCount_X,MPI_DOUBLE,rank_X,recvtag,MPI_COMM_WORLD,&req2[0]); MPI_Isend(sendbuf_X, 2*sendCount_X,MPI_DOUBLE,rank_X,sendtag,MPI_COMM_WORLD,&req1[1]); MPI_Irecv(recvbuf_x, 2*recvCount_x,MPI_DOUBLE,rank_x,recvtag,MPI_COMM_WORLD,&req2[1]); MPI_Isend(sendbuf_y, 2*sendCount_y,MPI_DOUBLE,rank_y,sendtag,MPI_COMM_WORLD,&req1[2]); MPI_Irecv(recvbuf_Y, 2*recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,MPI_COMM_WORLD,&req2[2]); MPI_Isend(sendbuf_Y, 2*sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,MPI_COMM_WORLD,&req1[3]); MPI_Irecv(recvbuf_y, 2*recvCount_y,MPI_DOUBLE,rank_y,recvtag,MPI_COMM_WORLD,&req2[3]); MPI_Isend(sendbuf_z, 2*sendCount_z,MPI_DOUBLE,rank_z,sendtag,MPI_COMM_WORLD,&req1[4]); MPI_Irecv(recvbuf_Z, 2*recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,MPI_COMM_WORLD,&req2[4]); MPI_Isend(sendbuf_Z, 2*sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,MPI_COMM_WORLD,&req1[5]); MPI_Irecv(recvbuf_z, 2*recvCount_z,MPI_DOUBLE,rank_z,recvtag,MPI_COMM_WORLD,&req2[5]); //................................................................................... SwapD3Q7(ID, A_even, A_odd, Nx, Ny, Nz); SwapD3Q7(ID, B_even, B_odd, Nx, Ny, Nz); //................................................................................... // Wait for completion of D3Q19 communication MPI_Waitall(6,req1,stat1); MPI_Waitall(6,req2,stat2); //................................................................................... // Unpack the distributions on the device //................................................................................... //...Map recieve list for the X face: q=2,8,10,12,13 ................................. UnpackDist(0,-1,0,0,dvcRecvList_X,0,recvCount_X,recvbuf_X,A_odd,Nx,Ny,Nz); UnpackDist(0,-1,0,0,dvcRecvList_X,recvCount_X,recvCount_X,recvbuf_X,B_odd,Nx,Ny,Nz); //................................................................................... //...Map recieve list for the x face: q=1,7,9,11,13.................................. UnpackDist(1,1,0,0,dvcRecvList_x,0,recvCount_x,recvbuf_x,A_even,Nx,Ny,Nz); UnpackDist(1,1,0,0,dvcRecvList_x,recvCount_x,recvCount_x,recvbuf_x,B_even,Nx,Ny,Nz); //................................................................................... //...Map recieve list for the y face: q=4,8,9,16,18 ................................... UnpackDist(1,0,-1,0,dvcRecvList_Y,0,recvCount_Y,recvbuf_Y,A_odd,Nx,Ny,Nz); UnpackDist(1,0,-1,0,dvcRecvList_Y,recvCount_Y,recvCount_Y,recvbuf_Y,B_odd,Nx,Ny,Nz); //................................................................................... //...Map recieve list for the Y face: q=3,7,10,15,17 .................................. UnpackDist(2,0,1,0,dvcRecvList_y,0,recvCount_y,recvbuf_y,A_even,Nx,Ny,Nz); UnpackDist(2,0,1,0,dvcRecvList_y,recvCount_y,recvCount_y,recvbuf_y,B_even,Nx,Ny,Nz); //................................................................................... //...Map recieve list for the z face<<<6,12,13,16,17).............................................. UnpackDist(2,0,0,-1,dvcRecvList_Z,0,recvCount_Z,recvbuf_Z,A_odd,Nx,Ny,Nz); UnpackDist(2,0,0,-1,dvcRecvList_Z,recvCount_Z,recvCount_Z,recvbuf_Z,B_odd,Nx,Ny,Nz); //...Map recieve list for the Z face<<<5,11,14,15,18).............................................. UnpackDist(3,0,0,1,dvcRecvList_z,0,recvCount_z,recvbuf_z,A_even,Nx,Ny,Nz); UnpackDist(3,0,0,1,dvcRecvList_z,recvCount_z,recvCount_z,recvbuf_z,B_even,Nx,Ny,Nz); //.................................................................................. //.................................................................................. ComputeDensityD3Q7(ID, A_even, A_odd, &Den[0], Nx, Ny, Nz); ComputeDensityD3Q7(ID, B_even, B_odd, &Den[N], Nx, Ny, Nz); //************************************************************************* // Compute the phase indicator field //************************************************************************* // ComputePhi(ID, Phi, Copy, Den, N); ComputePhi(ID, Phi, Den, N); //************************************************************************* //................................................................................... PackValues(dvcSendList_x, sendCount_x,sendbuf_x, Phi, N); PackValues(dvcSendList_y, sendCount_y,sendbuf_y, Phi, N); PackValues(dvcSendList_z, sendCount_z,sendbuf_z, Phi, N); PackValues(dvcSendList_X, sendCount_X,sendbuf_X, Phi, N); PackValues(dvcSendList_Y, sendCount_Y,sendbuf_Y, Phi, N); PackValues(dvcSendList_Z, sendCount_Z,sendbuf_Z, Phi, N); PackValues(dvcSendList_xy, sendCount_xy,sendbuf_xy, Phi, N); PackValues(dvcSendList_xY, sendCount_xY,sendbuf_xY, Phi, N); PackValues(dvcSendList_Xy, sendCount_Xy,sendbuf_Xy, Phi, N); PackValues(dvcSendList_XY, sendCount_XY,sendbuf_XY, Phi, N); PackValues(dvcSendList_xz, sendCount_xz,sendbuf_xz, Phi, N); PackValues(dvcSendList_xZ, sendCount_xZ,sendbuf_xZ, Phi, N); PackValues(dvcSendList_Xz, sendCount_Xz,sendbuf_Xz, Phi, N); PackValues(dvcSendList_XZ, sendCount_XZ,sendbuf_XZ, Phi, N); PackValues(dvcSendList_yz, sendCount_yz,sendbuf_yz, Phi, N); PackValues(dvcSendList_yZ, sendCount_yZ,sendbuf_yZ, Phi, N); PackValues(dvcSendList_Yz, sendCount_Yz,sendbuf_Yz, Phi, N); PackValues(dvcSendList_YZ, sendCount_YZ,sendbuf_YZ, Phi, N); //................................................................................... // Send / Recv all the phase indcator field values //................................................................................... MPI_Isend(sendbuf_x, sendCount_x,MPI_DOUBLE,rank_x,sendtag,MPI_COMM_WORLD,&req1[0]); MPI_Irecv(recvbuf_X, recvCount_X,MPI_DOUBLE,rank_X,recvtag,MPI_COMM_WORLD,&req2[0]); MPI_Isend(sendbuf_X, sendCount_X,MPI_DOUBLE,rank_X,sendtag,MPI_COMM_WORLD,&req1[1]); MPI_Irecv(recvbuf_x, recvCount_x,MPI_DOUBLE,rank_x,recvtag,MPI_COMM_WORLD,&req2[1]); MPI_Isend(sendbuf_y, sendCount_y,MPI_DOUBLE,rank_y,sendtag,MPI_COMM_WORLD,&req1[2]); MPI_Irecv(recvbuf_Y, recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,MPI_COMM_WORLD,&req2[2]); MPI_Isend(sendbuf_Y, sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,MPI_COMM_WORLD,&req1[3]); MPI_Irecv(recvbuf_y, recvCount_y,MPI_DOUBLE,rank_y,recvtag,MPI_COMM_WORLD,&req2[3]); MPI_Isend(sendbuf_z, sendCount_z,MPI_DOUBLE,rank_z,sendtag,MPI_COMM_WORLD,&req1[4]); MPI_Irecv(recvbuf_Z, recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,MPI_COMM_WORLD,&req2[4]); MPI_Isend(sendbuf_Z, sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,MPI_COMM_WORLD,&req1[5]); MPI_Irecv(recvbuf_z, recvCount_z,MPI_DOUBLE,rank_z,recvtag,MPI_COMM_WORLD,&req2[5]); MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_xy,sendtag,MPI_COMM_WORLD,&req1[6]); MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_XY,recvtag,MPI_COMM_WORLD,&req2[6]); MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_XY,sendtag,MPI_COMM_WORLD,&req1[7]); MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_xy,recvtag,MPI_COMM_WORLD,&req2[7]); MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_Xy,sendtag,MPI_COMM_WORLD,&req1[8]); MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_xY,recvtag,MPI_COMM_WORLD,&req2[8]); MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_xY,sendtag,MPI_COMM_WORLD,&req1[9]); MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_Xy,recvtag,MPI_COMM_WORLD,&req2[9]); MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_xz,sendtag,MPI_COMM_WORLD,&req1[10]); MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_XZ,recvtag,MPI_COMM_WORLD,&req2[10]); MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_XZ,sendtag,MPI_COMM_WORLD,&req1[11]); MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_xz,recvtag,MPI_COMM_WORLD,&req2[11]); MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_Xz,sendtag,MPI_COMM_WORLD,&req1[12]); MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_xZ,recvtag,MPI_COMM_WORLD,&req2[12]); MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_xZ,sendtag,MPI_COMM_WORLD,&req1[13]); MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_Xz,recvtag,MPI_COMM_WORLD,&req2[13]); MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_yz,sendtag,MPI_COMM_WORLD,&req1[14]); MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_YZ,recvtag,MPI_COMM_WORLD,&req2[14]); MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_YZ,sendtag,MPI_COMM_WORLD,&req1[15]); MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_yz,recvtag,MPI_COMM_WORLD,&req2[15]); MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_Yz,sendtag,MPI_COMM_WORLD,&req1[16]); MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_yZ,recvtag,MPI_COMM_WORLD,&req2[16]); MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_yZ,sendtag,MPI_COMM_WORLD,&req1[17]); MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_Yz,recvtag,MPI_COMM_WORLD,&req2[17]); //................................................................................... //................................................................................... // Wait for completion of Indicator Field communication //................................................................................... MPI_Waitall(18,req1,stat1); MPI_Waitall(18,req2,stat2); DeviceBarrier(); //................................................................................... //................................................................................... UnpackValues(dvcRecvList_x, recvCount_x,recvbuf_x, Phi, N); UnpackValues(dvcRecvList_y, recvCount_y,recvbuf_y, Phi, N); UnpackValues(dvcRecvList_z, recvCount_z,recvbuf_z, Phi, N); UnpackValues(dvcRecvList_X, recvCount_X,recvbuf_X, Phi, N); UnpackValues(dvcRecvList_Y, recvCount_Y,recvbuf_Y, Phi, N); UnpackValues(dvcRecvList_Z, recvCount_Z,recvbuf_Z, Phi, N); UnpackValues(dvcRecvList_xy, recvCount_xy,recvbuf_xy, Phi, N); UnpackValues(dvcRecvList_xY, recvCount_xY,recvbuf_xY, Phi, N); UnpackValues(dvcRecvList_Xy, recvCount_Xy,recvbuf_Xy, Phi, N); UnpackValues(dvcRecvList_XY, recvCount_XY,recvbuf_XY, Phi, N); UnpackValues(dvcRecvList_xz, recvCount_xz,recvbuf_xz, Phi, N); UnpackValues(dvcRecvList_xZ, recvCount_xZ,recvbuf_xZ, Phi, N); UnpackValues(dvcRecvList_Xz, recvCount_Xz,recvbuf_Xz, Phi, N); UnpackValues(dvcRecvList_XZ, recvCount_XZ,recvbuf_XZ, Phi, N); UnpackValues(dvcRecvList_yz, recvCount_yz,recvbuf_yz, Phi, N); UnpackValues(dvcRecvList_yZ, recvCount_yZ,recvbuf_yZ, Phi, N); UnpackValues(dvcRecvList_Yz, recvCount_Yz,recvbuf_Yz, Phi, N); UnpackValues(dvcRecvList_YZ, recvCount_YZ,recvbuf_YZ, Phi, N); //................................................................................... if (pBC && kproc == 0) { PressureBC_inlet(f_even,f_odd,din,Nx,Ny,Nz); ColorBC_inlet(Phi,Den,A_even,A_odd,B_even,B_odd,Nx,Ny,Nz); } if (pBC && kproc == nprocz-1){ PressureBC_outlet(f_even,f_odd,dout,Nx,Ny,Nz,Nx*Ny*(Nz-2)); ColorBC_outlet(Phi,Den,A_even,A_odd,B_even,B_odd,Nx,Ny,Nz); } //................................................................................... MPI_Barrier(MPI_COMM_WORLD); // Timestep completed! timestep++; //................................................................... if (timestep%1000 == 995){ //........................................................................... // Copy the phase indicator field for the earlier timestep DeviceBarrier(); CopyToHost(Phase_tplus.data,Phi,N*sizeof(double)); //........................................................................... } if (timestep%1000 == 0){ //........................................................................... // Copy the data for for the analysis timestep //........................................................................... // Copy the phase from the GPU -> CPU //........................................................................... DeviceBarrier(); ComputePressureD3Q19(ID,f_even,f_odd,Pressure,Nx,Ny,Nz); CopyToHost(Phase.data,Phi,N*sizeof(double)); CopyToHost(Press.data,Pressure,N*sizeof(double)); CopyToHost(Vel_x.data,&Velocity[0],N*sizeof(double)); CopyToHost(Vel_y.data,&Velocity[N],N*sizeof(double)); CopyToHost(Vel_z.data,&Velocity[2*N],N*sizeof(double)); MPI_Barrier(MPI_COMM_WORLD); } if (timestep%1000 == 5){ //........................................................................... // Copy the phase indicator field for the later timestep DeviceBarrier(); CopyToHost(Phase_tminus.data,Phi,N*sizeof(double)); //........................................................................... // Calculate the time derivative of the phase indicator field for (n=0; n 0.0 ){ // 1-D index for this cube corner n = i + j*Nx + k*Nx*Ny; // Compute the non-wetting phase volume contribution if ( Phase(i,j,k) > 0.0 ) nwp_volume += 1.0; // volume averages over the non-wetting phase if ( Phase(i,j,k) > 0.999 ){ // volume the excludes the interfacial region vol_n += 1.0; // pressure pan += Press(i,j,k); // velocity van(0) += Vel_x(i,j,k); van(1) += Vel_y(i,j,k); van(2) += Vel_z(i,j,k); } // volume averages over the wetting phase if ( Phase(i,j,k) < -0.999 ){ // volume the excludes the interfacial region vol_w += 1.0; // pressure paw += Press(i,j,k); // velocity vaw(0) += Vel_x(i,j,k); vaw(1) += Vel_y(i,j,k); vaw(2) += Vel_z(i,j,k); } } } } } for (c=0;c 0.0) trJwn_global /= trawn_global; if (trawn_global > 0.0) trRwn_global /= trawn_global; trRwn_global = 2.0*fabs(trRwn_global); trJwn_global = fabs(trJwn_global); if (awn_global > 0.0) for (i=0; i<3; i++) vawn_global(i) /= awn_global; if (awn_global > 0.0) for (i=0; i<6; i++) Gwn_global(i) /= awn_global; if (ans_global > 0.0) for (i=0; i<6; i++) Gns_global(i) /= ans_global; if (aws_global > 0.0) for (i=0; i<6; i++) Gws_global(i) /= aws_global; sat_w = 1.0 - nwp_volume_global*iVol_global/porosity; // Compute the specific interfacial areas and common line length (dimensionless per unit volume) awn_global = awn_global*iVol_global*D; ans_global = ans_global*iVol_global*D; aws_global = aws_global*iVol_global*D; dEs = dEs*iVol_global*D; lwns_global = lwns_global*iVol_global*D*D; pc_global = (pan_global-paw_global)*D/(5.796*alpha); Jwn_global *= D; trJwn_global *= D; //......................................................................... if (rank==0){ fprintf(TIMELOG,"%i %.5g ",timestep-5+timestepMax*SimNumber,dEs); // change in surface energy fprintf(TIMELOG,"%.5g %.5g %.5g ",sat_w,paw_global,pan_global); // saturation and pressure fprintf(TIMELOG,"%.5g %.5g %.5g ",awn_global,ans_global,aws_global); // interfacial areas fprintf(TIMELOG,"%.5g %5g ",Jwn_global, Kwn_global); // curvature of wn interface fprintf(TIMELOG,"%.5g ",lwns_global); // common curve length fprintf(TIMELOG,"%.5g ",efawns_global); // average contact angle fprintf(TIMELOG,"%.5g %.5g %.5g ",vaw_global(0),vaw_global(1),vaw_global(2)); // average velocity of w phase fprintf(TIMELOG,"%.5g %.5g %.5g ",van_global(0),van_global(1),van_global(2)); // average velocity of n phase fprintf(TIMELOG,"%.5g %.5g %.5g ",vawn_global(0),vawn_global(1),vawn_global(2)); // velocity of wn interface fprintf(TIMELOG,"%.5g %.5g %.5g %.5g %.5g %.5g ", Gwn_global(0),Gwn_global(1),Gwn_global(2),Gwn_global(3),Gwn_global(4),Gwn_global(5)); // orientation of wn interface fprintf(TIMELOG,"%.5g %.5g %.5g %.5g %.5g %.5g ", Gns_global(0),Gns_global(1),Gns_global(2),Gns_global(3),Gns_global(4),Gns_global(5)); // orientation of ns interface fprintf(TIMELOG,"%.5g %.5g %.5g %.5g %.5g %.5g ", Gws_global(0),Gws_global(1),Gws_global(2),Gws_global(3),Gws_global(4),Gws_global(5)); // orientation of ws interface fprintf(TIMELOG,"%.5g %.5g %.5g %.5g ",trawn_global, trJwn_global, trRwn_global, pc_global); // Trimmed curvature fprintf(TIMELOG,"%.5g %.5g %.5g\n",Fx, Fy, Fz); // External force fflush(TIMELOG); } } } //************************************************************************/ DeviceBarrier(); MPI_Barrier(MPI_COMM_WORLD); stoptime = MPI_Wtime(); if (rank==0){ fprintf(FINALSTATE,"%i %.5g ",timestep-5,dEs); // change in surface energy fprintf(FINALSTATE,"%.5g %.5g %.5g ",sat_w,paw_global,pan_global); // saturation and pressure fprintf(FINALSTATE,"%.5g %.5g %.5g ",awn_global,ans_global,aws_global); // interfacial areas fprintf(FINALSTATE,"%.5g %5g ",Jwn_global, Kwn_global); // curvature of wn interface fprintf(FINALSTATE,"%.5g ",lwns_global); // common curve length fprintf(FINALSTATE,"%.5g ",efawns_global); // average contact angle fprintf(FINALSTATE,"%.5g %.5g %.5g ",vaw_global(0),vaw_global(1),vaw_global(2)); // average velocity of w phase fprintf(FINALSTATE,"%.5g %.5g %.5g ",van_global(0),van_global(1),van_global(2)); // average velocity of n phase fprintf(FINALSTATE,"%.5g %.5g %.5g ",vawn_global(0),vawn_global(1),vawn_global(2)); // velocity of wn interface fprintf(FINALSTATE,"%.5g %.5g %.5g %.5g %.5g %.5g ", Gwn_global(0),Gwn_global(1),Gwn_global(2),Gwn_global(3),Gwn_global(4),Gwn_global(5)); // orientation of wn interface fprintf(FINALSTATE,"%.5g %.5g %.5g %.5g %.5g %.5g ", Gns_global(0),Gns_global(1),Gns_global(2),Gns_global(3),Gns_global(4),Gns_global(5)); // orientation of ns interface fprintf(FINALSTATE,"%.5g %.5g %.5g %.5g %.5g %.5g ", Gws_global(0),Gws_global(1),Gws_global(2),Gws_global(3),Gws_global(4),Gws_global(5)); // orientation of ws interface fprintf(FINALSTATE,"%.5g %.5g %.5g %.5g ",trawn_global, trJwn_global, trRwn_global, pc_global); // Trimmed curvature fprintf(FINALSTATE,"%.5g %.5g %.5g \n",Fx, Fy, Fz); // External force } sprintf(tmpstr,"Sim%03d",SimNumber); if (rank==0){ mkdir(tmpstr,0777); } MPI_Barrier(MPI_COMM_WORLD); // Copy the data to the CPU CopyToHost(cDistEven,f_even,10*N*sizeof(double)); CopyToHost(cDistOdd,f_odd,9*N*sizeof(double)); CopyToHost(cDen,Den,2*N*sizeof(double)); // Read in the restart file to CPU buffers sprintf(LocalRestartFile,"%s/%s%s",tmpstr,"Restart.",LocalRankString); WriteCheckpoint(LocalRestartFile, cDen, cDistEven, cDistOdd, N); sprintf(LocalRankFilename,"%s/%s%s",tmpstr,"dPdt.",LocalRankString); SPEED = fopen(LocalRankFilename,"wb"); fwrite(dPdt.data,8,N,SPEED); fclose(SPEED); } fclose(TIMELOG); fclose(FINALSTATE); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep cputime = (stoptime - starttime)/timestep; // Performance obtained from each node double MLUPS = double(Nx*Ny*Nz)/cputime/1000000; if (rank==0) printf("********************************************************\n"); if (rank==0) printf("CPU time = %f \n", cputime); if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS); MLUPS *= nprocs; if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS); if (rank==0) printf("********************************************************\n"); // **************************************************** MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); // **************************************************** }