#include "common/ScaLBL.h" #include ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr Dm){ //...................................................................................... Lock=false; // unlock the communicator //...................................................................................... // Create a separate copy of the communicator for the device MPI_COMM_SCALBL = Dm->Comm.dup(); //...................................................................................... // Copy the domain size and communication information directly from Dm Nx = Dm->Nx; Ny = Dm->Ny; Nz = Dm->Nz; N = Nx*Ny*Nz; next=0; rank=Dm->rank(); rank_x=Dm->rank_x(); rank_y=Dm->rank_y(); rank_z=Dm->rank_z(); rank_X=Dm->rank_X(); rank_Y=Dm->rank_Y(); rank_Z=Dm->rank_Z(); rank_xy=Dm->rank_xy(); rank_XY=Dm->rank_XY(); rank_xY=Dm->rank_xY(); rank_Xy=Dm->rank_Xy(); rank_xz=Dm->rank_xz(); rank_XZ=Dm->rank_XZ(); rank_xZ=Dm->rank_xZ(); rank_Xz=Dm->rank_Xz(); rank_yz=Dm->rank_yz(); rank_YZ=Dm->rank_YZ(); rank_yZ=Dm->rank_yZ(); rank_Yz=Dm->rank_Yz(); sendCount_x=Dm->sendCount("x"); sendCount_y=Dm->sendCount("y"); sendCount_z=Dm->sendCount("z"); sendCount_X=Dm->sendCount("X"); sendCount_Y=Dm->sendCount("Y"); sendCount_Z=Dm->sendCount("Z"); sendCount_xy=Dm->sendCount("xy"); sendCount_yz=Dm->sendCount("yz"); sendCount_xz=Dm->sendCount("xz"); sendCount_Xy=Dm->sendCount("Xy"); sendCount_Yz=Dm->sendCount("Yz"); sendCount_xZ=Dm->sendCount("xZ"); sendCount_xY=Dm->sendCount("xY"); sendCount_yZ=Dm->sendCount("yZ"); sendCount_Xz=Dm->sendCount("Xz"); sendCount_XY=Dm->sendCount("XY"); sendCount_YZ=Dm->sendCount("YZ"); sendCount_XZ=Dm->sendCount("XZ"); recvCount_x=Dm->recvCount("x"); recvCount_y=Dm->recvCount("y"); recvCount_z=Dm->recvCount("z"); recvCount_X=Dm->recvCount("X"); recvCount_Y=Dm->recvCount("Y"); recvCount_Z=Dm->recvCount("Z"); recvCount_xy=Dm->recvCount("xy"); recvCount_yz=Dm->recvCount("yz"); recvCount_xz=Dm->recvCount("xz"); recvCount_Xy=Dm->recvCount("Xy"); recvCount_Yz=Dm->recvCount("Yz"); recvCount_xZ=Dm->recvCount("xZ"); recvCount_xY=Dm->recvCount("xY"); recvCount_yZ=Dm->recvCount("yZ"); recvCount_Xz=Dm->recvCount("Xz"); recvCount_XY=Dm->recvCount("XY"); recvCount_YZ=Dm->recvCount("YZ"); recvCount_XZ=Dm->recvCount("XZ"); iproc = Dm->iproc(); jproc = Dm->jproc(); kproc = Dm->kproc(); nprocx = Dm->nprocx(); nprocy = Dm->nprocy(); nprocz = Dm->nprocz(); BoundaryCondition = Dm->BoundaryCondition; //...................................................................................... ScaLBL_AllocateZeroCopy((void **) &sendbuf_x, 2*5*sendCount_x*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &sendbuf_X, 2*5*sendCount_X*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &sendbuf_y, 2*5*sendCount_y*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &sendbuf_Y, 2*5*sendCount_Y*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &sendbuf_z, 2*5*sendCount_z*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &sendbuf_Z, 2*5*sendCount_Z*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &sendbuf_xy, 2*sendCount_xy*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &sendbuf_xY, 2*sendCount_xY*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &sendbuf_Xy, 2*sendCount_Xy*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &sendbuf_XY, 2*sendCount_XY*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &sendbuf_xz, 2*sendCount_xz*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &sendbuf_xZ, 2*sendCount_xZ*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &sendbuf_Xz, 2*sendCount_Xz*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &sendbuf_XZ, 2*sendCount_XZ*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &sendbuf_yz, 2*sendCount_yz*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &sendbuf_yZ, 2*sendCount_yZ*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &sendbuf_Yz, 2*sendCount_Yz*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &sendbuf_YZ, 2*sendCount_YZ*sizeof(double)); // Allocate device memory //...................................................................................... ScaLBL_AllocateZeroCopy((void **) &recvbuf_x, 2*5*recvCount_x*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &recvbuf_X, 2*5*recvCount_X*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &recvbuf_y, 2*5*recvCount_y*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &recvbuf_Y, 2*5*recvCount_Y*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &recvbuf_z, 2*5*recvCount_z*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &recvbuf_Z, 2*5*recvCount_Z*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &recvbuf_xy, 2*recvCount_xy*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &recvbuf_xY, 2*recvCount_xY*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &recvbuf_Xy, 2*recvCount_Xy*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &recvbuf_XY, 2*recvCount_XY*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &recvbuf_xz, 2*recvCount_xz*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &recvbuf_xZ, 2*recvCount_xZ*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &recvbuf_Xz, 2*recvCount_Xz*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &recvbuf_XZ, 2*recvCount_XZ*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &recvbuf_yz, 2*recvCount_yz*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &recvbuf_yZ, 2*recvCount_yZ*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &recvbuf_Yz, 2*recvCount_Yz*sizeof(double)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &recvbuf_YZ, 2*recvCount_YZ*sizeof(double)); // Allocate device memory //...................................................................................... ScaLBL_AllocateZeroCopy((void **) &dvcSendList_x, sendCount_x*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcSendList_X, sendCount_X*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcSendList_y, sendCount_y*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcSendList_Y, sendCount_Y*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcSendList_z, sendCount_z*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcSendList_Z, sendCount_Z*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcSendList_xy, sendCount_xy*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcSendList_xY, sendCount_xY*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcSendList_Xy, sendCount_Xy*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcSendList_XY, sendCount_XY*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcSendList_xz, sendCount_xz*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcSendList_xZ, sendCount_xZ*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcSendList_Xz, sendCount_Xz*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcSendList_XZ, sendCount_XZ*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcSendList_yz, sendCount_yz*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcSendList_yZ, sendCount_yZ*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcSendList_Yz, sendCount_Yz*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcSendList_YZ, sendCount_YZ*sizeof(int)); // Allocate device memory //...................................................................................... ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_x, recvCount_x*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_X, recvCount_X*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_y, recvCount_y*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_Y, recvCount_Y*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_z, recvCount_z*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_Z, recvCount_Z*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_xy, recvCount_xy*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_xY, recvCount_xY*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_Xy, recvCount_Xy*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_XY, recvCount_XY*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_xz, recvCount_xz*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_xZ, recvCount_xZ*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_Xz, recvCount_Xz*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_XZ, recvCount_XZ*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_yz, recvCount_yz*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_yZ, recvCount_yZ*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_Yz, recvCount_Yz*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_YZ, recvCount_YZ*sizeof(int)); // Allocate device memory //...................................................................................... ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_x, 5*recvCount_x*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_X, 5*recvCount_X*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_y, 5*recvCount_y*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_Y, 5*recvCount_Y*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_z, 5*recvCount_z*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_Z, 5*recvCount_Z*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_xy, recvCount_xy*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_xY, recvCount_xY*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_Xy, recvCount_Xy*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_XY, recvCount_XY*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_xz, recvCount_xz*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_xZ, recvCount_xZ*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_Xz, recvCount_Xz*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_XZ, recvCount_XZ*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_yz, recvCount_yz*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_yZ, recvCount_yZ*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_Yz, recvCount_Yz*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_YZ, recvCount_YZ*sizeof(int)); // Allocate device memory //...................................................................................... //...................................................................................... ScaLBL_CopyToZeroCopy(dvcSendList_x,Dm->sendList("x"),sendCount_x*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcSendList_X,Dm->sendList("X"),sendCount_X*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcSendList_y,Dm->sendList("y"),sendCount_y*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcSendList_Y,Dm->sendList("Y"),sendCount_Y*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcSendList_z,Dm->sendList("z"),sendCount_z*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcSendList_Z,Dm->sendList("Z"),sendCount_Z*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcSendList_xy,Dm->sendList("xy"),sendCount_xy*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcSendList_XY,Dm->sendList("XY"),sendCount_XY*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcSendList_xY,Dm->sendList("xY"),sendCount_xY*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcSendList_Xy,Dm->sendList("Xy"),sendCount_Xy*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcSendList_xz,Dm->sendList("xz"),sendCount_xz*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcSendList_XZ,Dm->sendList("XZ"),sendCount_XZ*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcSendList_xZ,Dm->sendList("xZ"),sendCount_xZ*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcSendList_Xz,Dm->sendList("Xz"),sendCount_Xz*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcSendList_yz,Dm->sendList("yz"),sendCount_yz*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcSendList_YZ,Dm->sendList("YZ"),sendCount_YZ*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcSendList_yZ,Dm->sendList("yZ"),sendCount_yZ*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcSendList_Yz,Dm->sendList("Yz"),sendCount_Yz*sizeof(int)); //...................................................................................... ScaLBL_CopyToZeroCopy(dvcRecvList_x,Dm->recvList("x"),recvCount_x*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcRecvList_X,Dm->recvList("X"),recvCount_X*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcRecvList_y,Dm->recvList("y"),recvCount_y*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcRecvList_Y,Dm->recvList("Y"),recvCount_Y*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcRecvList_z,Dm->recvList("z"),recvCount_z*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcRecvList_Z,Dm->recvList("Z"),recvCount_Z*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcRecvList_xy,Dm->recvList("xy"),recvCount_xy*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcRecvList_XY,Dm->recvList("XY"),recvCount_XY*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcRecvList_xY,Dm->recvList("xY"),recvCount_xY*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcRecvList_Xy,Dm->recvList("Xy"),recvCount_Xy*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcRecvList_xz,Dm->recvList("xz"),recvCount_xz*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcRecvList_XZ,Dm->recvList("XZ"),recvCount_XZ*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcRecvList_xZ,Dm->recvList("xZ"),recvCount_xZ*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcRecvList_Xz,Dm->recvList("Xz"),recvCount_Xz*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcRecvList_yz,Dm->recvList("yz"),recvCount_yz*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcRecvList_YZ,Dm->recvList("YZ"),recvCount_YZ*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcRecvList_yZ,Dm->recvList("yZ"),recvCount_yZ*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcRecvList_Yz,Dm->recvList("Yz"),recvCount_Yz*sizeof(int)); //...................................................................................... MPI_COMM_SCALBL.barrier(); //................................................................................... // Set up the recieve distribution lists //................................................................................... //...Map recieve list for the X face: q=2,8,10,12,14 ................................. D3Q19_MapRecv(-1,0,0, Dm->recvList("X"),0,recvCount_X,dvcRecvDist_X); D3Q19_MapRecv(-1,-1,0,Dm->recvList("X"),recvCount_X,recvCount_X,dvcRecvDist_X); D3Q19_MapRecv(-1,1,0, Dm->recvList("X"),2*recvCount_X,recvCount_X,dvcRecvDist_X); D3Q19_MapRecv(-1,0,-1,Dm->recvList("X"),3*recvCount_X,recvCount_X,dvcRecvDist_X); D3Q19_MapRecv(-1,0,1, Dm->recvList("X"),4*recvCount_X,recvCount_X,dvcRecvDist_X); //................................................................................... //...Map recieve list for the x face: q=1,7,9,11,13.................................. D3Q19_MapRecv(1,0,0, Dm->recvList("x"),0,recvCount_x,dvcRecvDist_x); D3Q19_MapRecv(1,1,0, Dm->recvList("x"),recvCount_x,recvCount_x,dvcRecvDist_x); D3Q19_MapRecv(1,-1,0,Dm->recvList("x"),2*recvCount_x,recvCount_x,dvcRecvDist_x); D3Q19_MapRecv(1,0,1, Dm->recvList("x"),3*recvCount_x,recvCount_x,dvcRecvDist_x); D3Q19_MapRecv(1,0,-1,Dm->recvList("x"),4*recvCount_x,recvCount_x,dvcRecvDist_x); //................................................................................... //...Map recieve list for the y face: q=4,8,9,16,18 ................................... D3Q19_MapRecv(0,-1,0, Dm->recvList("Y"),0,recvCount_Y,dvcRecvDist_Y); D3Q19_MapRecv(-1,-1,0,Dm->recvList("Y"),recvCount_Y,recvCount_Y,dvcRecvDist_Y); D3Q19_MapRecv(1,-1,0, Dm->recvList("Y"),2*recvCount_Y,recvCount_Y,dvcRecvDist_Y); D3Q19_MapRecv(0,-1,-1,Dm->recvList("Y"),3*recvCount_Y,recvCount_Y,dvcRecvDist_Y); D3Q19_MapRecv(0,-1,1, Dm->recvList("Y"),4*recvCount_Y,recvCount_Y,dvcRecvDist_Y); //................................................................................... //...Map recieve list for the Y face: q=3,7,10,15,17 .................................. D3Q19_MapRecv(0,1,0, Dm->recvList("y"),0,recvCount_y,dvcRecvDist_y); D3Q19_MapRecv(1,1,0, Dm->recvList("y"),recvCount_y,recvCount_y,dvcRecvDist_y); D3Q19_MapRecv(-1,1,0,Dm->recvList("y"),2*recvCount_y,recvCount_y,dvcRecvDist_y); D3Q19_MapRecv(0,1,1, Dm->recvList("y"),3*recvCount_y,recvCount_y,dvcRecvDist_y); D3Q19_MapRecv(0,1,-1,Dm->recvList("y"),4*recvCount_y,recvCount_y,dvcRecvDist_y); //................................................................................... //...Map recieve list for the z face<<<6,12,13,16,17).............................................. D3Q19_MapRecv(0,0,-1, Dm->recvList("Z"),0,recvCount_Z,dvcRecvDist_Z); D3Q19_MapRecv(-1,0,-1,Dm->recvList("Z"),recvCount_Z,recvCount_Z,dvcRecvDist_Z); D3Q19_MapRecv(1,0,-1, Dm->recvList("Z"),2*recvCount_Z,recvCount_Z,dvcRecvDist_Z); D3Q19_MapRecv(0,-1,-1,Dm->recvList("Z"),3*recvCount_Z,recvCount_Z,dvcRecvDist_Z); D3Q19_MapRecv(0,1,-1, Dm->recvList("Z"),4*recvCount_Z,recvCount_Z,dvcRecvDist_Z); //...Map recieve list for the Z face<<<5,11,14,15,18).............................................. D3Q19_MapRecv(0,0,1, Dm->recvList("z"),0,recvCount_z,dvcRecvDist_z); D3Q19_MapRecv(1,0,1, Dm->recvList("z"),recvCount_z,recvCount_z,dvcRecvDist_z); D3Q19_MapRecv(-1,0,1,Dm->recvList("z"),2*recvCount_z,recvCount_z,dvcRecvDist_z); D3Q19_MapRecv(0,1,1, Dm->recvList("z"),3*recvCount_z,recvCount_z,dvcRecvDist_z); D3Q19_MapRecv(0,-1,1,Dm->recvList("z"),4*recvCount_z,recvCount_z,dvcRecvDist_z); //.................................................................................. //...Map recieve list for the xy edge <<<8)................................ D3Q19_MapRecv(-1,-1,0,Dm->recvList("XY"),0,recvCount_XY,dvcRecvDist_XY); //...Map recieve list for the Xy edge <<<9)................................ D3Q19_MapRecv(1,-1,0,Dm->recvList("xY"),0,recvCount_xY,dvcRecvDist_xY); //...Map recieve list for the xY edge <<<10)................................ D3Q19_MapRecv(-1,1,0,Dm->recvList("Xy"),0,recvCount_Xy,dvcRecvDist_Xy); //...Map recieve list for the XY edge <<<7)................................ D3Q19_MapRecv(1,1,0,Dm->recvList("xy"),0,recvCount_xy,dvcRecvDist_xy); //...Map recieve list for the xz edge <<<12)................................ D3Q19_MapRecv(-1,0,-1,Dm->recvList("XZ"),0,recvCount_XZ,dvcRecvDist_XZ); //...Map recieve list for the xZ edge <<<14)................................ D3Q19_MapRecv(-1,0,1,Dm->recvList("Xz"),0,recvCount_Xz,dvcRecvDist_Xz); //...Map recieve list for the Xz edge <<<13)................................ D3Q19_MapRecv(1,0,-1,Dm->recvList("xZ"),0,recvCount_xZ,dvcRecvDist_xZ); //...Map recieve list for the XZ edge <<<11)................................ D3Q19_MapRecv(1,0,1,Dm->recvList("xz"),0,recvCount_xz,dvcRecvDist_xz); //...Map recieve list for the yz edge <<<16)................................ D3Q19_MapRecv(0,-1,-1,Dm->recvList("YZ"),0,recvCount_YZ,dvcRecvDist_YZ); //...Map recieve list for the yZ edge <<<18)................................ D3Q19_MapRecv(0,-1,1,Dm->recvList("Yz"),0,recvCount_Yz,dvcRecvDist_Yz); //...Map recieve list for the Yz edge <<<17)................................ D3Q19_MapRecv(0,1,-1,Dm->recvList("yZ"),0,recvCount_yZ,dvcRecvDist_yZ); //...Map recieve list for the YZ edge <<<15)................................ D3Q19_MapRecv(0,1,1,Dm->recvList("yz"),0,recvCount_yz,dvcRecvDist_yz); //................................................................................... //...................................................................................... MPI_COMM_SCALBL.barrier(); ScaLBL_DeviceBarrier(); //...................................................................................... SendCount = sendCount_x+sendCount_X+sendCount_y+sendCount_Y+sendCount_z+sendCount_Z+ sendCount_xy+sendCount_Xy+sendCount_xY+sendCount_XY+ sendCount_xZ+sendCount_Xz+sendCount_xZ+sendCount_XZ+ sendCount_yz+sendCount_Yz+sendCount_yZ+sendCount_YZ; RecvCount = recvCount_x+recvCount_X+recvCount_y+recvCount_Y+recvCount_z+recvCount_Z+ recvCount_xy+recvCount_Xy+recvCount_xY+recvCount_XY+ recvCount_xZ+recvCount_Xz+recvCount_xZ+recvCount_XZ+ recvCount_yz+recvCount_Yz+recvCount_yZ+recvCount_YZ; CommunicationCount = SendCount+RecvCount; //...................................................................................... } ScaLBL_Communicator::~ScaLBL_Communicator() { ScaLBL_FreeDeviceMemory( sendbuf_x ); ScaLBL_FreeDeviceMemory( sendbuf_X ); ScaLBL_FreeDeviceMemory( sendbuf_y ); ScaLBL_FreeDeviceMemory( sendbuf_Y ); ScaLBL_FreeDeviceMemory( sendbuf_z ); ScaLBL_FreeDeviceMemory( sendbuf_Z ); ScaLBL_FreeDeviceMemory( sendbuf_xy ); ScaLBL_FreeDeviceMemory( sendbuf_xY ); ScaLBL_FreeDeviceMemory( sendbuf_Xy ); ScaLBL_FreeDeviceMemory( sendbuf_XY ); ScaLBL_FreeDeviceMemory( sendbuf_xz ); ScaLBL_FreeDeviceMemory( sendbuf_xZ ); ScaLBL_FreeDeviceMemory( sendbuf_Xz ); ScaLBL_FreeDeviceMemory( sendbuf_XZ ); ScaLBL_FreeDeviceMemory( sendbuf_yz ); ScaLBL_FreeDeviceMemory( sendbuf_yZ ); ScaLBL_FreeDeviceMemory( sendbuf_Yz ); ScaLBL_FreeDeviceMemory( sendbuf_YZ ); ScaLBL_FreeDeviceMemory( recvbuf_x ); ScaLBL_FreeDeviceMemory( recvbuf_X ); ScaLBL_FreeDeviceMemory( recvbuf_y ); ScaLBL_FreeDeviceMemory( recvbuf_Y ); ScaLBL_FreeDeviceMemory( recvbuf_z ); ScaLBL_FreeDeviceMemory( recvbuf_Z ); ScaLBL_FreeDeviceMemory( recvbuf_xy ); ScaLBL_FreeDeviceMemory( recvbuf_xY ); ScaLBL_FreeDeviceMemory( recvbuf_Xy ); ScaLBL_FreeDeviceMemory( recvbuf_XY ); ScaLBL_FreeDeviceMemory( recvbuf_xz ); ScaLBL_FreeDeviceMemory( recvbuf_xZ ); ScaLBL_FreeDeviceMemory( recvbuf_Xz ); ScaLBL_FreeDeviceMemory( recvbuf_XZ ); ScaLBL_FreeDeviceMemory( recvbuf_yz ); ScaLBL_FreeDeviceMemory( recvbuf_yZ ); ScaLBL_FreeDeviceMemory( recvbuf_Yz ); ScaLBL_FreeDeviceMemory( recvbuf_YZ ); ScaLBL_FreeDeviceMemory( dvcSendList_x ); ScaLBL_FreeDeviceMemory( dvcSendList_X ); ScaLBL_FreeDeviceMemory( dvcSendList_y ); ScaLBL_FreeDeviceMemory( dvcSendList_Y ); ScaLBL_FreeDeviceMemory( dvcSendList_z ); ScaLBL_FreeDeviceMemory( dvcSendList_Z ); ScaLBL_FreeDeviceMemory( dvcSendList_xy ); ScaLBL_FreeDeviceMemory( dvcSendList_xY ); ScaLBL_FreeDeviceMemory( dvcSendList_Xy ); ScaLBL_FreeDeviceMemory( dvcSendList_XY ); ScaLBL_FreeDeviceMemory( dvcSendList_xz ); ScaLBL_FreeDeviceMemory( dvcSendList_xZ ); ScaLBL_FreeDeviceMemory( dvcSendList_Xz ); ScaLBL_FreeDeviceMemory( dvcSendList_XZ ); ScaLBL_FreeDeviceMemory( dvcSendList_yz ); ScaLBL_FreeDeviceMemory( dvcSendList_yZ ); ScaLBL_FreeDeviceMemory( dvcSendList_Yz ); ScaLBL_FreeDeviceMemory( dvcSendList_YZ ); ScaLBL_FreeDeviceMemory( dvcRecvList_x ); ScaLBL_FreeDeviceMemory( dvcRecvList_X ); ScaLBL_FreeDeviceMemory( dvcRecvList_y ); ScaLBL_FreeDeviceMemory( dvcRecvList_Y ); ScaLBL_FreeDeviceMemory( dvcRecvList_z ); ScaLBL_FreeDeviceMemory( dvcRecvList_Z ); ScaLBL_FreeDeviceMemory( dvcRecvList_xy ); ScaLBL_FreeDeviceMemory( dvcRecvList_xY ); ScaLBL_FreeDeviceMemory( dvcRecvList_Xy ); ScaLBL_FreeDeviceMemory( dvcRecvList_XY ); ScaLBL_FreeDeviceMemory( dvcRecvList_xz ); ScaLBL_FreeDeviceMemory( dvcRecvList_xZ ); ScaLBL_FreeDeviceMemory( dvcRecvList_Xz ); ScaLBL_FreeDeviceMemory( dvcRecvList_XZ ); ScaLBL_FreeDeviceMemory( dvcRecvList_yz ); ScaLBL_FreeDeviceMemory( dvcRecvList_yZ ); ScaLBL_FreeDeviceMemory( dvcRecvList_Yz ); ScaLBL_FreeDeviceMemory( dvcRecvList_YZ ); ScaLBL_FreeDeviceMemory( dvcRecvDist_x ); ScaLBL_FreeDeviceMemory( dvcRecvDist_X ); ScaLBL_FreeDeviceMemory( dvcRecvDist_y ); ScaLBL_FreeDeviceMemory( dvcRecvDist_Y ); ScaLBL_FreeDeviceMemory( dvcRecvDist_z ); ScaLBL_FreeDeviceMemory( dvcRecvDist_Z ); ScaLBL_FreeDeviceMemory( dvcRecvDist_xy ); ScaLBL_FreeDeviceMemory( dvcRecvDist_xY ); ScaLBL_FreeDeviceMemory( dvcRecvDist_Xy ); ScaLBL_FreeDeviceMemory( dvcRecvDist_XY ); ScaLBL_FreeDeviceMemory( dvcRecvDist_xz ); ScaLBL_FreeDeviceMemory( dvcRecvDist_xZ ); ScaLBL_FreeDeviceMemory( dvcRecvDist_Xz ); ScaLBL_FreeDeviceMemory( dvcRecvDist_XZ ); ScaLBL_FreeDeviceMemory( dvcRecvDist_yz ); ScaLBL_FreeDeviceMemory( dvcRecvDist_yZ ); ScaLBL_FreeDeviceMemory( dvcRecvDist_Yz ); ScaLBL_FreeDeviceMemory( dvcRecvDist_YZ ); } double ScaLBL_Communicator::GetPerformance(int *NeighborList, double *fq, int Np){ /* EACH MPI PROCESS GETS ITS OWN MEASUREMENT*/ /* use MRT kernels to check performance without communication / synchronization */ int TIMESTEPS=500; double RLX_SETA=1.0; double RLX_SETB = 8.f*(2.f-RLX_SETA)/(8.f-RLX_SETA); double FX = 0.0; double FY = 0.0; double FZ = 0.0; ScaLBL_D3Q19_Init(fq, Np); //.......create and start timer............ Barrier(); auto t1 = std::chrono::system_clock::now(); for (int t=0; t( t2 - t1 ).count(); double cputime = 0.5*diff/TIMESTEPS; // Performance obtained from each node double MLUPS = double(Np)/cputime/1000000; return MLUPS; } int ScaLBL_Communicator::LastExterior(){ return next; } int ScaLBL_Communicator::FirstInterior(){ return first_interior; } int ScaLBL_Communicator::LastInterior(){ return last_interior; } void ScaLBL_Communicator::D3Q19_MapRecv(int Cqx, int Cqy, int Cqz, const int *list, int start, int count, int *d3q19_recvlist){ int i,j,k,n,nn,idx; int * ReturnDist; ReturnDist=new int [count]; for (idx=0; idx 0) Map(i,j,k) = -2; // this label is for parallel communication sites else Map(i,j,k) = -1; // this label is for solid bounce-back sites } } } //printf("Exterior... \n"); // ********* Exterior ********** // Step 1/2: Index the outer walls of the grid only idx=0; next=0; for (k=1; k 0){ // Counts for the six faces if (i>0 && i<=width) Map(n)=idx++; else if (j>0 && j<=width) Map(n)=idx++; else if (k>0 && k<=width) Map(n)=idx++; else if (i>Nx-width-2 && iNy-width-2 && jNz-width-2 && k 0 ){ Map(n) = idx++; //neighborList[idx++] = n; // index of self in regular layout } } } } last_interior=idx; Np = (last_interior/16 + 1)*16; //printf(" Np=%i \n",Np); // Now use Map to determine the neighbors for each lattice direction for (k=1;k Np) printf("ScaLBL_Communicator::MemoryOptimizedLayout: Map(%i,%i,%i) = %i > %i \n",i,j,k,Map(i,j,k),Np); else if (!(idx<0)){ // store the idx associated with each neighbor // store idx for self if neighbor is in solid or out of domain //D3Q19 = {{1,0,0},{-1,0,0} // {0,1,0},{0,-1,0} // {0,0,1},{0,0,-1}, // {1,1,0},{-1,-1,0}, // {1,-1,0},{-1,1,0}, // {1,0,1},{-1,0,-1}, // {1,0,-1},{-1,0,1}, // {0,1,1},{0,-1,-1}, // {0,1,-1},{0,-1,1}}; int neighbor; // cycle through the neighbors of lattice site idx neighbor=Map(i-1,j,k); if (neighbor<0) neighborList[idx]=idx + 2*Np; else neighborList[idx]=neighbor + 1*Np; neighbor=Map(i+1,j,k); if (neighbor<0) neighborList[Np+idx] = idx + 1*Np; else neighborList[Np+idx]= neighbor + 2*Np; neighbor=Map(i,j-1,k); if (neighbor<0) neighborList[2*Np+idx]=idx + 4*Np; else neighborList[2*Np+idx]=neighbor + 3*Np; neighbor=Map(i,j+1,k); if (neighbor<0) neighborList[3*Np+idx]=idx + 3*Np; else neighborList[3*Np+idx]=neighbor + 4*Np; neighbor=Map(i,j,k-1); if (neighbor<0) neighborList[4*Np+idx]=idx + 6*Np; else neighborList[4*Np+idx]=neighbor + 5*Np; neighbor=Map(i,j,k+1); if (neighbor<0) neighborList[5*Np+idx]=idx + 5*Np; else neighborList[5*Np+idx]=neighbor + 6*Np; neighbor=Map(i-1,j-1,k); if (neighbor<0) neighborList[6*Np+idx]=idx + 8*Np; else neighborList[6*Np+idx]=neighbor + 7*Np; neighbor=Map(i+1,j+1,k); if (neighbor<0) neighborList[7*Np+idx]=idx + 7*Np; else neighborList[7*Np+idx]=neighbor+8*Np; neighbor=Map(i-1,j+1,k); if (neighbor<0) neighborList[8*Np+idx]=idx + 10*Np; else neighborList[8*Np+idx]=neighbor + 9*Np; neighbor=Map(i+1,j-1,k); if (neighbor<0) neighborList[9*Np+idx]=idx + 9*Np; else neighborList[9*Np+idx]=neighbor + 10*Np; neighbor=Map(i-1,j,k-1); if (neighbor<0) neighborList[10*Np+idx]=idx + 12*Np; else neighborList[10*Np+idx]=neighbor + 11*Np; neighbor=Map(i+1,j,k+1); if (neighbor<0) neighborList[11*Np+idx]=idx + 11*Np; else neighborList[11*Np+idx]=neighbor + 12*Np; neighbor=Map(i-1,j,k+1); if (neighbor<0) neighborList[12*Np+idx]=idx + 14*Np; else neighborList[12*Np+idx]=neighbor + 13*Np; neighbor=Map(i+1,j,k-1); if (neighbor<0) neighborList[13*Np+idx]=idx + 13*Np; else neighborList[13*Np+idx]=neighbor + 14*Np; neighbor=Map(i,j-1,k-1); if (neighbor<0) neighborList[14*Np+idx]=idx + 16*Np; else neighborList[14*Np+idx]=neighbor + 15*Np; neighbor=Map(i,j+1,k+1); if (neighbor<0) neighborList[15*Np+idx]=idx + 15*Np; else neighborList[15*Np+idx]=neighbor + 16*Np; neighbor=Map(i,j-1,k+1); if (neighbor<0) neighborList[16*Np+idx]=idx + 18*Np; else neighborList[16*Np+idx]=neighbor + 17*Np; neighbor=Map(i,j+1,k-1); if (neighbor<0) neighborList[17*Np+idx]=idx + 17*Np; else neighborList[17*Np+idx]=neighbor + 18*Np; } } } } //for (idx=0; idx 0 && kproc == 0){ // don't unpack little z //...Packing for Z face(5,11,14,15,18)................................ ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,Aq,N); ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,recvCount_Z,recvCount_Z,recvbuf_Z,Bq,N); } else if (BoundaryCondition > 0 && kproc == nprocz-1){ // don't unpack big z //...Packing for z face(6,12,13,16,17)................................ ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,0,recvCount_z,recvbuf_z,Aq,N); ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,recvCount_z,recvCount_z,recvbuf_z,Bq,N); } else { //...Packing for z face(6,12,13,16,17)................................ ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,0,recvCount_z,recvbuf_z,Aq,N); ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,recvCount_z,recvCount_z,recvbuf_z,Bq,N); //...Packing for Z face(5,11,14,15,18)................................ ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,Aq,N); ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,recvCount_Z,recvCount_Z,recvbuf_Z,Bq,N); } //................................................................................... Lock=false; // unlock the communicator after communications complete //................................................................................... } void ScaLBL_Communicator::SendD3Q7AA(double *Aq, int Component){ // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 if (Lock==true){ ERROR("ScaLBL Error (SendD3Q7): ScaLBL_Communicator is locked -- did you forget to match Send/Recv calls?"); } else{ Lock=true; } // assign tag of 19 to D3Q19 communication sendtag = recvtag = 7; ScaLBL_DeviceBarrier(); // Pack the distributions //...Packing for x face(2,8,10,12,14)................................ ScaLBL_D3Q19_Pack(2,dvcSendList_x,0,sendCount_x,sendbuf_x,&Aq[Component*7*N],N); req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, sendCount_x,rank_x,sendtag); req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, recvCount_X,rank_X,recvtag); //...Packing for X face(1,7,9,11,13)................................ ScaLBL_D3Q19_Pack(1,dvcSendList_X,0,sendCount_X,sendbuf_X,&Aq[Component*7*N],N); req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, sendCount_X,rank_X,sendtag); req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, recvCount_x,rank_x,recvtag); //...Packing for y face(4,8,9,16,18)................................. ScaLBL_D3Q19_Pack(4,dvcSendList_y,0,sendCount_y,sendbuf_y,&Aq[Component*7*N],N); req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, sendCount_y,rank_y,sendtag); req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, recvCount_Y,rank_Y,recvtag); //...Packing for Y face(3,7,10,15,17)................................. ScaLBL_D3Q19_Pack(3,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,&Aq[Component*7*N],N); req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, sendCount_Y,rank_Y,sendtag); req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, recvCount_y,rank_y,recvtag); //...Packing for z face(6,12,13,16,17)................................ ScaLBL_D3Q19_Pack(6,dvcSendList_z,0,sendCount_z,sendbuf_z,&Aq[Component*7*N],N); req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, sendCount_z,rank_z,sendtag); req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, recvCount_Z,rank_Z,recvtag); //...Packing for Z face(5,11,14,15,18)................................ ScaLBL_D3Q19_Pack(5,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,&Aq[Component*7*N],N); req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, sendCount_Z,rank_Z,sendtag); req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, recvCount_z,rank_z,recvtag); } void ScaLBL_Communicator::RecvD3Q7AA(double *Aq, int Component){ // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 //................................................................................... // Wait for completion of D3Q19 communication MPI_COMM_SCALBL.waitAll(6,req1); MPI_COMM_SCALBL.waitAll(6,req2); ScaLBL_DeviceBarrier(); //................................................................................... // NOTE: AA Routine writes to opposite // Unpack the distributions on the device //................................................................................... //...Unpacking for x face(2,8,10,12,14)................................ ScaLBL_D3Q7_Unpack(2,dvcRecvDist_x,0,recvCount_x,recvbuf_x,&Aq[Component*7*N],N); //................................................................................... //...Packing for X face(1,7,9,11,13)................................ ScaLBL_D3Q7_Unpack(1,dvcRecvDist_X,0,recvCount_X,recvbuf_X,&Aq[Component*7*N],N); //................................................................................... //...Packing for y face(4,8,9,16,18)................................. ScaLBL_D3Q7_Unpack(4,dvcRecvDist_y,0,recvCount_y,recvbuf_y,&Aq[Component*7*N],N); //................................................................................... //...Packing for Y face(3,7,10,15,17)................................. ScaLBL_D3Q7_Unpack(3,dvcRecvDist_Y,0,recvCount_Y,recvbuf_Y,&Aq[Component*7*N],N); //................................................................................... if (BoundaryCondition > 0){ if (kproc != 0){ //...Packing for z face(6,12,13,16,17)................................ ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,0,recvCount_z,recvbuf_z,&Aq[Component*7*N],N); } if (kproc != nprocz-1){ //...Packing for Z face(5,11,14,15,18)................................ ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,&Aq[Component*7*N],N); } } else { //...Packing for z face(6,12,13,16,17)................................ ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,0,recvCount_z,recvbuf_z,&Aq[Component*7*N],N); //...Packing for Z face(5,11,14,15,18)................................ ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,&Aq[Component*7*N],N); } //................................................................................... Lock=false; // unlock the communicator after communications complete //................................................................................... } void ScaLBL_Communicator::TriSendD3Q7AA(double *Aq, double *Bq, double *Cq){ // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 if (Lock==true){ ERROR("ScaLBL Error (SendD3Q19): ScaLBL_Communicator is locked -- did you forget to match Send/Recv calls?"); } else{ Lock=true; } // assign tag of 19 to D3Q19 communication sendtag = recvtag = 15; ScaLBL_DeviceBarrier(); // Pack the distributions //...Packing for x face(2,8,10,12,14)................................ ScaLBL_D3Q19_Pack(2,dvcSendList_x,0,sendCount_x,sendbuf_x,Aq,N); ScaLBL_D3Q19_Pack(2,dvcSendList_x,sendCount_x,sendCount_x,sendbuf_x,Bq,N); ScaLBL_D3Q19_Pack(2,dvcSendList_x,2*sendCount_x,sendCount_x,sendbuf_x,Cq,N); //...Packing for X face(1,7,9,11,13)................................ ScaLBL_D3Q19_Pack(1,dvcSendList_X,0,sendCount_X,sendbuf_X,Aq,N); ScaLBL_D3Q19_Pack(1,dvcSendList_X,sendCount_X,sendCount_X,sendbuf_X,Bq,N); ScaLBL_D3Q19_Pack(1,dvcSendList_X,2*sendCount_X,sendCount_X,sendbuf_X,Cq,N); //...Packing for y face(4,8,9,16,18)................................. ScaLBL_D3Q19_Pack(4,dvcSendList_y,0,sendCount_y,sendbuf_y,Aq,N); ScaLBL_D3Q19_Pack(4,dvcSendList_y,sendCount_y,sendCount_y,sendbuf_y,Bq,N); ScaLBL_D3Q19_Pack(4,dvcSendList_y,2*sendCount_y,sendCount_y,sendbuf_y,Cq,N); //...Packing for Y face(3,7,10,15,17)................................. ScaLBL_D3Q19_Pack(3,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,Aq,N); ScaLBL_D3Q19_Pack(3,dvcSendList_Y,sendCount_Y,sendCount_Y,sendbuf_Y,Bq,N); ScaLBL_D3Q19_Pack(3,dvcSendList_Y,2*sendCount_Y,sendCount_Y,sendbuf_Y,Cq,N); //...Packing for z face(6,12,13,16,17)................................ ScaLBL_D3Q19_Pack(6,dvcSendList_z,0,sendCount_z,sendbuf_z,Aq,N); ScaLBL_D3Q19_Pack(6,dvcSendList_z,sendCount_z,sendCount_z,sendbuf_z,Bq,N); ScaLBL_D3Q19_Pack(6,dvcSendList_z,2*sendCount_z,sendCount_z,sendbuf_z,Cq,N); //...Packing for Z face(5,11,14,15,18)................................ ScaLBL_D3Q19_Pack(5,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,Aq,N); ScaLBL_D3Q19_Pack(5,dvcSendList_Z,sendCount_Z,sendCount_Z,sendbuf_Z,Bq,N); ScaLBL_D3Q19_Pack(5,dvcSendList_Z,2*sendCount_Z,sendCount_Z,sendbuf_Z,Cq,N); //................................................................................... // Send all the distributions req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 3*sendCount_x,rank_x,sendtag); req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 3*recvCount_X,rank_X,recvtag); req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 3*sendCount_X,rank_X,sendtag); req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 3*recvCount_x,rank_x,recvtag); req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 3*sendCount_y,rank_y,sendtag); req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 3*recvCount_Y,rank_Y,recvtag); req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 3*sendCount_Y,rank_Y,sendtag); req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 3*recvCount_y,rank_y,recvtag); req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 3*sendCount_z,rank_z,sendtag); req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 3*recvCount_Z,rank_Z,recvtag); req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 3*sendCount_Z,rank_Z,sendtag); req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 3*recvCount_z,rank_z,recvtag); } void ScaLBL_Communicator::TriRecvD3Q7AA(double *Aq, double *Bq, double *Cq){ // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 //................................................................................... // Wait for completion of D3Q19 communication MPI_COMM_SCALBL.waitAll(6,req1); MPI_COMM_SCALBL.waitAll(6,req2); ScaLBL_DeviceBarrier(); //................................................................................... // NOTE: AA Routine writes to opposite // Unpack the distributions on the device //................................................................................... //...Unpacking for x face(2,8,10,12,14)................................ ScaLBL_D3Q7_Unpack(2,dvcRecvDist_x,0,recvCount_x,recvbuf_x,Aq,N); ScaLBL_D3Q7_Unpack(2,dvcRecvDist_x,recvCount_x,recvCount_x,recvbuf_x,Bq,N); ScaLBL_D3Q7_Unpack(2,dvcRecvDist_x,2*recvCount_x,recvCount_x,recvbuf_x,Cq,N); //................................................................................... //...Packing for X face(1,7,9,11,13)................................ ScaLBL_D3Q7_Unpack(1,dvcRecvDist_X,0,recvCount_X,recvbuf_X,Aq,N); ScaLBL_D3Q7_Unpack(1,dvcRecvDist_X,recvCount_X,recvCount_X,recvbuf_X,Bq,N); ScaLBL_D3Q7_Unpack(1,dvcRecvDist_X,2*recvCount_X,recvCount_X,recvbuf_X,Cq,N); //................................................................................... //...Packing for y face(4,8,9,16,18)................................. ScaLBL_D3Q7_Unpack(4,dvcRecvDist_y,0,recvCount_y,recvbuf_y,Aq,N); ScaLBL_D3Q7_Unpack(4,dvcRecvDist_y,recvCount_y,recvCount_y,recvbuf_y,Bq,N); ScaLBL_D3Q7_Unpack(4,dvcRecvDist_y,2*recvCount_y,recvCount_y,recvbuf_y,Cq,N); //................................................................................... //...Packing for Y face(3,7,10,15,17)................................. ScaLBL_D3Q7_Unpack(3,dvcRecvDist_Y,0,recvCount_Y,recvbuf_Y,Aq,N); ScaLBL_D3Q7_Unpack(3,dvcRecvDist_Y,recvCount_Y,recvCount_Y,recvbuf_Y,Bq,N); ScaLBL_D3Q7_Unpack(3,dvcRecvDist_Y,2*recvCount_Y,recvCount_Y,recvbuf_Y,Cq,N); //................................................................................... if (BoundaryCondition > 0 && kproc == 0){ // don't unpack little z //...Packing for Z face(5,11,14,15,18)................................ ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,Aq,N); ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,recvCount_Z,recvCount_Z,recvbuf_Z,Bq,N); ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,2*recvCount_Z,recvCount_Z,recvbuf_Z,Cq,N); } else if (BoundaryCondition > 0 && kproc == nprocz-1){ // don't unpack big z //...Packing for z face(6,12,13,16,17)................................ ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,0,recvCount_z,recvbuf_z,Aq,N); ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,recvCount_z,recvCount_z,recvbuf_z,Bq,N); ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,2*recvCount_z,recvCount_z,recvbuf_z,Cq,N); } else { //...Packing for z face(6,12,13,16,17)................................ ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,0,recvCount_z,recvbuf_z,Aq,N); ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,recvCount_z,recvCount_z,recvbuf_z,Bq,N); ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,2*recvCount_z,recvCount_z,recvbuf_z,Cq,N); //...Packing for Z face(5,11,14,15,18)................................ ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,Aq,N); ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,recvCount_Z,recvCount_Z,recvbuf_Z,Bq,N); ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,2*recvCount_Z,recvCount_Z,recvbuf_Z,Cq,N); } //................................................................................... Lock=false; // unlock the communicator after communications complete //................................................................................... } void ScaLBL_Communicator::SendHalo(double *data){ //................................................................................... if (Lock==true){ ERROR("ScaLBL Error (SendHalo): ScaLBL_Communicator is locked -- did you forget to match Send/Recv calls?"); } else{ Lock=true; } ScaLBL_DeviceBarrier(); //................................................................................... sendtag = recvtag = 1; //................................................................................... ScaLBL_Scalar_Pack(dvcSendList_x, sendCount_x,sendbuf_x, data, N); ScaLBL_Scalar_Pack(dvcSendList_y, sendCount_y,sendbuf_y, data, N); ScaLBL_Scalar_Pack(dvcSendList_z, sendCount_z,sendbuf_z, data, N); ScaLBL_Scalar_Pack(dvcSendList_X, sendCount_X,sendbuf_X, data, N); ScaLBL_Scalar_Pack(dvcSendList_Y, sendCount_Y,sendbuf_Y, data, N); ScaLBL_Scalar_Pack(dvcSendList_Z, sendCount_Z,sendbuf_Z, data, N); ScaLBL_Scalar_Pack(dvcSendList_xy, sendCount_xy,sendbuf_xy, data, N); ScaLBL_Scalar_Pack(dvcSendList_xY, sendCount_xY,sendbuf_xY, data, N); ScaLBL_Scalar_Pack(dvcSendList_Xy, sendCount_Xy,sendbuf_Xy, data, N); ScaLBL_Scalar_Pack(dvcSendList_XY, sendCount_XY,sendbuf_XY, data, N); ScaLBL_Scalar_Pack(dvcSendList_xz, sendCount_xz,sendbuf_xz, data, N); ScaLBL_Scalar_Pack(dvcSendList_xZ, sendCount_xZ,sendbuf_xZ, data, N); ScaLBL_Scalar_Pack(dvcSendList_Xz, sendCount_Xz,sendbuf_Xz, data, N); ScaLBL_Scalar_Pack(dvcSendList_XZ, sendCount_XZ,sendbuf_XZ, data, N); ScaLBL_Scalar_Pack(dvcSendList_yz, sendCount_yz,sendbuf_yz, data, N); ScaLBL_Scalar_Pack(dvcSendList_yZ, sendCount_yZ,sendbuf_yZ, data, N); ScaLBL_Scalar_Pack(dvcSendList_Yz, sendCount_Yz,sendbuf_Yz, data, N); ScaLBL_Scalar_Pack(dvcSendList_YZ, sendCount_YZ,sendbuf_YZ, data, N); //................................................................................... // Send / Recv all the phase indcator field values //................................................................................... req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, sendCount_x,rank_x,sendtag); req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, recvCount_X,rank_X,recvtag); req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, sendCount_X,rank_X,sendtag); req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, recvCount_x,rank_x,recvtag); req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, sendCount_y,rank_y,sendtag); req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, recvCount_Y,rank_Y,recvtag); req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, sendCount_Y,rank_Y,sendtag); req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, recvCount_y,rank_y,recvtag); req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, sendCount_z,rank_z,sendtag); req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, recvCount_Z,rank_Z,recvtag); req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, sendCount_Z,rank_Z,sendtag); req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, recvCount_z,rank_z,recvtag); req1[6] = MPI_COMM_SCALBL.Isend(sendbuf_xy, sendCount_xy,rank_xy,sendtag); req2[6] = MPI_COMM_SCALBL.Irecv(recvbuf_XY, recvCount_XY,rank_XY,recvtag); req1[7] = MPI_COMM_SCALBL.Isend(sendbuf_XY, sendCount_XY,rank_XY,sendtag); req2[7] = MPI_COMM_SCALBL.Irecv(recvbuf_xy, recvCount_xy,rank_xy,recvtag); req1[8] = MPI_COMM_SCALBL.Isend(sendbuf_Xy, sendCount_Xy,rank_Xy,sendtag); req2[8] = MPI_COMM_SCALBL.Irecv(recvbuf_xY, recvCount_xY,rank_xY,recvtag); req1[9] = MPI_COMM_SCALBL.Isend(sendbuf_xY, sendCount_xY,rank_xY,sendtag); req2[9] = MPI_COMM_SCALBL.Irecv(recvbuf_Xy, recvCount_Xy,rank_Xy,recvtag); req1[10] = MPI_COMM_SCALBL.Isend(sendbuf_xz, sendCount_xz,rank_xz,sendtag); req2[10] = MPI_COMM_SCALBL.Irecv(recvbuf_XZ, recvCount_XZ,rank_XZ,recvtag); req1[11] = MPI_COMM_SCALBL.Isend(sendbuf_XZ, sendCount_XZ,rank_XZ,sendtag); req2[11] = MPI_COMM_SCALBL.Irecv(recvbuf_xz, recvCount_xz,rank_xz,recvtag); req1[12] = MPI_COMM_SCALBL.Isend(sendbuf_Xz, sendCount_Xz,rank_Xz,sendtag); req2[12] = MPI_COMM_SCALBL.Irecv(recvbuf_xZ, recvCount_xZ,rank_xZ,recvtag); req1[13] = MPI_COMM_SCALBL.Isend(sendbuf_xZ, sendCount_xZ,rank_xZ,sendtag); req2[13] = MPI_COMM_SCALBL.Irecv(recvbuf_Xz, recvCount_Xz,rank_Xz,recvtag); req1[14] = MPI_COMM_SCALBL.Isend(sendbuf_yz, sendCount_yz,rank_yz,sendtag); req2[14] = MPI_COMM_SCALBL.Irecv(recvbuf_YZ, recvCount_YZ,rank_YZ,recvtag); req1[15] = MPI_COMM_SCALBL.Isend(sendbuf_YZ, sendCount_YZ,rank_YZ,sendtag); req2[15] = MPI_COMM_SCALBL.Irecv(recvbuf_yz, recvCount_yz,rank_yz,recvtag); req1[16] = MPI_COMM_SCALBL.Isend(sendbuf_Yz, sendCount_Yz,rank_Yz,sendtag); req2[16] = MPI_COMM_SCALBL.Irecv(recvbuf_yZ, recvCount_yZ,rank_yZ,recvtag); req1[17] = MPI_COMM_SCALBL.Isend(sendbuf_yZ, sendCount_yZ,rank_yZ,sendtag); req2[17] = MPI_COMM_SCALBL.Irecv(recvbuf_Yz, recvCount_Yz,rank_Yz,recvtag); //................................................................................... } void ScaLBL_Communicator::RecvHalo(double *data){ //................................................................................... MPI_COMM_SCALBL.waitAll(18,req1); MPI_COMM_SCALBL.waitAll(18,req2); ScaLBL_DeviceBarrier(); //................................................................................... //................................................................................... ScaLBL_Scalar_Unpack(dvcRecvList_x, recvCount_x,recvbuf_x, data, N); ScaLBL_Scalar_Unpack(dvcRecvList_y, recvCount_y,recvbuf_y, data, N); ScaLBL_Scalar_Unpack(dvcRecvList_z, recvCount_z,recvbuf_z, data, N); ScaLBL_Scalar_Unpack(dvcRecvList_X, recvCount_X,recvbuf_X, data, N); ScaLBL_Scalar_Unpack(dvcRecvList_Y, recvCount_Y,recvbuf_Y, data, N); ScaLBL_Scalar_Unpack(dvcRecvList_Z, recvCount_Z,recvbuf_Z, data, N); ScaLBL_Scalar_Unpack(dvcRecvList_xy, recvCount_xy,recvbuf_xy, data, N); ScaLBL_Scalar_Unpack(dvcRecvList_xY, recvCount_xY,recvbuf_xY, data, N); ScaLBL_Scalar_Unpack(dvcRecvList_Xy, recvCount_Xy,recvbuf_Xy, data, N); ScaLBL_Scalar_Unpack(dvcRecvList_XY, recvCount_XY,recvbuf_XY, data, N); ScaLBL_Scalar_Unpack(dvcRecvList_xz, recvCount_xz,recvbuf_xz, data, N); ScaLBL_Scalar_Unpack(dvcRecvList_xZ, recvCount_xZ,recvbuf_xZ, data, N); ScaLBL_Scalar_Unpack(dvcRecvList_Xz, recvCount_Xz,recvbuf_Xz, data, N); ScaLBL_Scalar_Unpack(dvcRecvList_XZ, recvCount_XZ,recvbuf_XZ, data, N); ScaLBL_Scalar_Unpack(dvcRecvList_yz, recvCount_yz,recvbuf_yz, data, N); ScaLBL_Scalar_Unpack(dvcRecvList_yZ, recvCount_yZ,recvbuf_yZ, data, N); ScaLBL_Scalar_Unpack(dvcRecvList_Yz, recvCount_Yz,recvbuf_Yz, data, N); ScaLBL_Scalar_Unpack(dvcRecvList_YZ, recvCount_YZ,recvbuf_YZ, data, N); //................................................................................... Lock=false; // unlock the communicator after communications complete //................................................................................... } void ScaLBL_Communicator::RegularLayout(IntArray map, const double *data, DoubleArray ®data){ // Gets data from the device and stores in regular layout int i,j,k,idx; int Nx = map.size(0); int Ny = map.size(1); int Nz = map.size(2); // initialize the array regdata.fill(0.f); double *TmpDat; double value; TmpDat = new double [N]; ScaLBL_CopyToHost(&TmpDat[0],&data[0], N*sizeof(double)); for (k=0; k