/* ScaLBL.h * Header file for Scalable Lattice Boltzmann Library * Separate implementations for GPU and CPU must both follow the conventions defined in this header * This libarry contains the essential components of the LBM * - streaming implementations * - collision terms to model various physics * - communication framework for the LBM * Refer to Domain.h for setup of parallel domains */ #include "Domain.h" extern "C" void ScaLBL_AllocateDeviceMemory(void** address, size_t size); extern "C" void ScaLBL_FreeDeviceMemory(void* pointer); extern "C" void ScaLBL_CopyToDevice(void* dest, const void* source, size_t size); extern "C" void ScaLBL_CopyToHost(void* dest, const void* source, size_t size); extern "C" void ScaLBL_DeviceBarrier(); extern "C" void ScaLBL_D3Q19_Pack(int q, int *list, int start, int count, double *sendbuf, double *dist, int N); extern "C" void ScaLBL_D3Q19_Unpack(int q, int *list, int start, int count, double *recvbuf, double *dist, int N); extern "C" void ScaLBL_D3Q7_Unpack(int q, int *list, int start, int count, double *recvbuf, double *dist, int N); extern "C" void ScaLBL_Scalar_Pack(int *list, int count, double *sendbuf, double *Data, int N); extern "C" void ScaLBL_Scalar_Unpack(int *list, int count, double *recvbuf, double *Data, int N); extern "C" void ScaLBL_PackDenD3Q7(int *list, int count, double *sendbuf, int number, double *Data, int N); extern "C" void ScaLBL_UnpackDenD3Q7(int *list, int count, double *recvbuf, int number, double *Data, int N); extern "C" void ScaLBL_D3Q19_Init(double *Dist, int Np); extern "C" void ScaLBL_D3Q19_Momentum(double *dist, double *vel, int Np); extern "C" void ScaLBL_D3Q19_Pressure(const char *ID, const double *disteven, const double *distodd, double *Pressure, int Nx, int Ny, int Nz); // MRT MODEL extern "C" void ScaLBL_D3Q19_AAeven_MRT(double *dist, int start, int finish, int Np, double rlx_setA, double rlx_setB, double Fx, double Fy, double Fz); extern "C" void ScaLBL_D3Q19_AAodd_MRT(int *d_neighborList, double *dist, int start, int finish, int Np, double rlx_setA, double rlx_setB, double Fx, double Fy, double Fz); // COLOR MODEL extern "C" void ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *Aq, double *Bq, double *Den, double *Phi, double *Vel, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np); extern "C" void ScaLBL_D3Q19_AAodd_Color(int *d_neighborList, int *Map, double *dist, double *Aq, double *Bq, double *Den, double *Phi, double *Vel, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np); extern "C" void ScaLBL_D3Q7_AAodd_PhaseField(int *NeighborList, int *Map, double *Aq, double *Bq, double *Den, double *Phi, int start, int finish, int Np); extern "C" void ScaLBL_D3Q7_AAeven_PhaseField(int *Map, double *Aq, double *Bq, double *Den, double *Phi, int start, int finish, int Np); extern "C" void ScaLBL_D3Q19_Gradient(int *Map, double *Phi, double *ColorGrad, int start, int finish, int Np, int Nx, int Ny, int Nz); extern "C" void ScaLBL_PhaseField_Init(int *Map, double *Phi, double *Den, double *Aq, double *Bq, int Np); // BOUNDARY CONDITION ROUTINES //extern "C" void ScaLBL_D3Q19_Pressure_BC_z(double *disteven, double *distodd, double din, // int Nx, int Ny, int Nz); //extern "C" void ScaLBL_D3Q19_Pressure_BC_Z(double *disteven, double *distodd, double dout, // int Nx, int Ny, int Nz, int outlet); extern "C" void ScaLBL_D3Q19_AAodd_Pressure_BC_z(int *neighborList, int *list, double *dist, double din, int count, int Np); extern "C" void ScaLBL_D3Q19_AAodd_Pressure_BC_Z(int *neighborList, int *list, double *dist, double dout, int count, int Np); extern "C" void ScaLBL_D3Q19_AAeven_Pressure_BC_z(int *list, double *dist, double din, int count, int Np); extern "C" void ScaLBL_D3Q19_AAeven_Pressure_BC_Z(int *list, double *dist, double dout, int count, int Np); extern "C" double ScaLBL_D3Q19_AAodd_Flux_BC_z(int *neighborList, int *list, double *dist, double flux, double area, int count, int N); extern "C" double ScaLBL_D3Q19_AAeven_Flux_BC_z(int *list, double *dist, double flux, double area, int count, int N); extern "C" void ScaLBL_Color_BC(int *list, int *Map, double *Phi, double *Den, double vA, double vB, int count, int Np); extern "C" void ScaLBL_SetSlice_z(double *Phi, double value, int Nx, int Ny, int Nz, int Slice); // LIST OF DEPRECATED FUNCTIONS (probably delete) //extern "C" double ScaLBL_D3Q19_Flux_BC_z(double *disteven, double *distodd, double flux, // int Nx, int Ny, int Nz); //extern "C" double ScaLBL_D3Q19_Flux_BC_Z(double *disteven, double *distodd, double flux, // int Nx, int Ny, int Nz, int outlet); //extern "C" void ScaLBL_D3Q19_Velocity_BC_z(double *disteven, double *distodd, double uz, // int Nx, int Ny, int Nz); //extern "C" void ScaLBL_D3Q19_Velocity_BC_Z(double *disteven, double *distodd, double uz, // int Nx, int Ny, int Nz, int outlet); //extern "C" void ScaLBL_Color_BC_z(double *Phi, double *Den, double *A_even, double *A_odd, // double *B_even, double *B_odd, int Nx, int Ny, int Nz); //extern "C" void ScaLBL_Color_BC_Z(double *Phi, double *Den, double *A_even, double *A_odd, // double *B_even, double *B_odd, int Nx, int Ny, int Nz); //extern "C" void ScaLBL_D3Q19_AAeven_Compact(char *ID, double *d_dist, int Np); //extern "C" void ScaLBL_D3Q19_AAodd_Compact(char *ID,int *d_neighborList, double *d_dist, int Np); //extern "C" void ScaLBL_ComputePhaseField(char *ID, double *Phi, double *Den, int N); //extern "C" void ScaLBL_D3Q7_Init(double *Dist, double *Den, int Np); //extern "C" void ScaLBL_D3Q7_Init(char *ID, double *f_even, double *f_odd, double *Den, int Nx, int Ny, int Nz); //extern "C" void ScaLBL_D3Q7_Swap(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz); //extern "C" void ScaLBL_D3Q7_Density(char *ID, double *disteven, double *distodd, double *Den, // int Nx, int Ny, int Nz); //extern "C" void ScaLBL_D3Q19_AA_Init(double *f_even, double *f_odd, int Np); //extern "C" void ScaLBL_D3Q19_Init(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz); //extern "C" void ScaLBL_D3Q19_Swap(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz); //extern "C" void ScaLBL_D3Q19_Swap_Compact(int *neighborList, double *disteven, double *distodd, int Np); //extern "C" void ScaLBL_D3Q19_MRT(char *ID, double *f_even, double *f_odd, double rlxA, double rlxB, // double Fx, double Fy, double Fz,int Nx, int Ny, int Nz); //extern "C" void ScaLBL_Color_Init(char *ID, double *Den, double *Phi, double das, double dbs, int Nx, int Ny, int Nz); //extern "C" void ScaLBL_ColorDistance_Init(char *ID, double *Den, double *Phi, double *Distance, // double das, double dbs, double beta, double xp, int Nx, int Ny, int Nz); //extern "C" void ScaLBL_D3Q19_ColorGradient(char *ID, double *phi, double *ColorGrad, int Nx, int Ny, int Nz); //extern "C" void ScaLBL_D3Q19_ColorCollide( char *ID, double *disteven, double *distodd, double *phi, double *ColorGrad, // double *Velocity, int Nx, int Ny, int Nz,double rlx_setA, double rlx_setB, // double alpha, double beta, double Fx, double Fy, double Fz); //extern "C" void ScaLBL_D3Q7_ColorCollideMass(char *ID, double *A_even, double *A_odd, double *B_even, double *B_odd, // double *Den, double *Phi, double *ColorGrad, double *Velocity, double beta, int N, bool pBC); //extern "C" void ScaLBL_D3Q19_AAeven_Color(double *dist, double *Aq, double *Bq, double *Den, double *Vel, // double *ColorGrad, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, // double Fx, double Fy, double Fz, int start, int finish, int Np); //extern "C" void ScaLBL_D3Q19_AAodd_Color(int *d_neighborList, double *dist, double *Aq, double *Bq, double *Den, double *Vel, // double *ColorGrad, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, // double Fx, double Fy, double Fz, int start, int finish, int Np); /*extern "C" void ScaLBL_D3Q19_AAeven_ColorMomentum(double *dist, double *Den, double *Vel, double *ColorGrad, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, double Fx, double Fy, double Fz, int start, int finish, int Np); extern "C" void ScaLBL_D3Q19_AAodd_ColorMomentum(int *d_neighborList, double *dist, double *Den, double *Vel, double *ColorGrad, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, double Fx, double Fy, double Fz, int start, int finish, int Np); extern "C" void ScaLBL_D3Q19_AAeven_ColorMass(double *Aq, double *Bq, double *Den, double *Vel, double *ColorGrad, double beta, int start, int finish, int Np); extern "C" void ScaLBL_D3Q19_AAodd_ColorMass(int *d_neighborList, double *Aq, double *Bq, double *Den, double *Vel, double *ColorGrad, double beta, int start, int finish, int Np); /* class ScaLBL_Color{ public: ScaLBL_Color(Domain &Dm); ~ScaLBL_Color(); int *NeighborList; int *dvcMap; // double *f_even,*f_odd; double *fq, *Aq, *Bq; double *Den, *Phi; double *ColorGrad; double *Vel; double *Pressure; ScaLBL_Communicator ScaLBL_Comm; //Create a second communicator based on the regular data layout ScaLBL_Communicator ScaLBL_Comm_Regular; void Initialize(Domain &Dm); void Run(int ×tep); private: }; void ScaLBL_Color::ScaLBL_Color(Domain &Dm){ } void ScaLBL_Color::Initialize(Domain &Dm){ } void ScaLBL_Color::Run(int ×tep){ } */ class ScaLBL_Communicator{ public: //...................................................................................... ScaLBL_Communicator(Domain &Dm); //ScaLBL_Communicator(Domain &Dm, IntArray &Map); ~ScaLBL_Communicator(); //...................................................................................... unsigned long int CommunicationCount,SendCount,RecvCount; int Nx,Ny,Nz,N; int next; //...................................................................................... // Set up for D319 distributions // - determines how much memory is allocated // - buffers are reused to send D3Q7 distributions and halo exchange as needed //...................................................................................... // Buffers to store data sent and recieved by this MPI process double *sendbuf_x, *sendbuf_y, *sendbuf_z, *sendbuf_X, *sendbuf_Y, *sendbuf_Z; double *sendbuf_xy, *sendbuf_yz, *sendbuf_xz, *sendbuf_Xy, *sendbuf_Yz, *sendbuf_xZ; double *sendbuf_xY, *sendbuf_yZ, *sendbuf_Xz, *sendbuf_XY, *sendbuf_YZ, *sendbuf_XZ; double *recvbuf_x, *recvbuf_y, *recvbuf_z, *recvbuf_X, *recvbuf_Y, *recvbuf_Z; double *recvbuf_xy, *recvbuf_yz, *recvbuf_xz, *recvbuf_Xy, *recvbuf_Yz, *recvbuf_xZ; double *recvbuf_xY, *recvbuf_yZ, *recvbuf_Xz, *recvbuf_XY, *recvbuf_YZ, *recvbuf_XZ; //...................................................................................... void MemoryOptimizedLayoutAA(IntArray &Map, int *neighborList, char *id, int Np); void MemoryOptimizedLayout(IntArray &Map, int *neighborList, char *id, int Np); void MemoryOptimizedLayoutFull(IntArray &Map, int *neighborList, char *id, int Np); void MemoryDenseLayout(IntArray &Map, int *neighborList, char *id, int Np); void MemoryDenseLayoutFull(IntArray &Map, int *neighborList, char *id, int Np); void SendD3Q19(double *f_even, double *f_odd); void RecvD3Q19(double *f_even, double *f_odd); void SendD3Q19AA(double *f_even, double *f_odd); void RecvD3Q19AA(double *f_even, double *f_odd); void SendD3Q19AA(double *dist); void RecvD3Q19AA(double *dist); void BiSendD3Q7(double *A_even, double *A_odd, double *B_even, double *B_odd); void BiRecvD3Q7(double *A_even, double *A_odd, double *B_even, double *B_odd); void BiSendD3Q7AA(double *Aq, double *Bq); void BiRecvD3Q7AA(double *Aq, double *Bq); void SendHalo(double *data); void RecvHalo(double *data); void RegularLayout(IntArray map, double *data, double *regdata); // Routines to set boundary conditions void Color_BC_z(int *Map, double *Phi, double *Den, double vA, double vB); void Color_BC_Z(int *Map, double *Phi, double *Den, double vA, double vB); void D3Q19_Pressure_BC_z(int *neighborList, double *fq, double din, int time); void D3Q19_Pressure_BC_Z(int *neighborList, double *fq, double dout, int time); double D3Q19_Flux_BC_z(int *neighborList, double *fq, double flux, int time); void TestSendD3Q19(double *f_even, double *f_odd); void TestRecvD3Q19(double *f_even, double *f_odd); // Debugging and unit testing functions void PrintD3Q19(); private: //void D3Q19_MapRecv_OLD(int q, int Cqx, int Cqy, int Cqz, int *list, int start, int count, int *d3q19_recvlist); void D3Q19_MapRecv(int Cqx, int Cqy, int Cqz, int *list, int start, int count, int *d3q19_recvlist); bool Lock; // use Lock to make sure only one call at a time to protect data in transit // only one set of Send requests can be active at any time (per instance) int i,j,k,n; int iproc,jproc,kproc; int nprocx,nprocy,nprocz; int sendtag,recvtag; // Give the object it's own MPI communicator RankInfoStruct rank_info; MPI_Group Group; // Group of processors associated with this domain MPI_Comm MPI_COMM_SCALBL; // MPI Communicator for this domain MPI_Request req1[18],req2[18]; MPI_Status stat1[18],stat2[18]; //...................................................................................... // MPI ranks for all 18 neighbors //...................................................................................... // These variables are all private to prevent external things from modifying them!! //...................................................................................... int rank; int rank_x,rank_y,rank_z,rank_X,rank_Y,rank_Z; int rank_xy,rank_XY,rank_xY,rank_Xy; int rank_xz,rank_XZ,rank_xZ,rank_Xz; int rank_yz,rank_YZ,rank_yZ,rank_Yz; //...................................................................................... //...................................................................................... int sendCount_x, sendCount_y, sendCount_z, sendCount_X, sendCount_Y, sendCount_Z; int sendCount_xy, sendCount_yz, sendCount_xz, sendCount_Xy, sendCount_Yz, sendCount_xZ; int sendCount_xY, sendCount_yZ, sendCount_Xz, sendCount_XY, sendCount_YZ, sendCount_XZ; //...................................................................................... int recvCount_x, recvCount_y, recvCount_z, recvCount_X, recvCount_Y, recvCount_Z; int recvCount_xy, recvCount_yz, recvCount_xz, recvCount_Xy, recvCount_Yz, recvCount_xZ; int recvCount_xY, recvCount_yZ, recvCount_Xz, recvCount_XY, recvCount_YZ, recvCount_XZ; //...................................................................................... // Send buffers that reside on the compute device int *dvcSendList_x, *dvcSendList_y, *dvcSendList_z, *dvcSendList_X, *dvcSendList_Y, *dvcSendList_Z; int *dvcSendList_xy, *dvcSendList_yz, *dvcSendList_xz, *dvcSendList_Xy, *dvcSendList_Yz, *dvcSendList_xZ; int *dvcSendList_xY, *dvcSendList_yZ, *dvcSendList_Xz, *dvcSendList_XY, *dvcSendList_YZ, *dvcSendList_XZ; // Recieve buffers that reside on the compute device int *dvcRecvList_x, *dvcRecvList_y, *dvcRecvList_z, *dvcRecvList_X, *dvcRecvList_Y, *dvcRecvList_Z; int *dvcRecvList_xy, *dvcRecvList_yz, *dvcRecvList_xz, *dvcRecvList_Xy, *dvcRecvList_Yz, *dvcRecvList_xZ; int *dvcRecvList_xY, *dvcRecvList_yZ, *dvcRecvList_Xz, *dvcRecvList_XY, *dvcRecvList_YZ, *dvcRecvList_XZ; // Recieve buffers for the distributions int *dvcRecvDist_x, *dvcRecvDist_y, *dvcRecvDist_z, *dvcRecvDist_X, *dvcRecvDist_Y, *dvcRecvDist_Z; int *dvcRecvDist_xy, *dvcRecvDist_yz, *dvcRecvDist_xz, *dvcRecvDist_Xy, *dvcRecvDist_Yz, *dvcRecvDist_xZ; int *dvcRecvDist_xY, *dvcRecvDist_yZ, *dvcRecvDist_Xz, *dvcRecvDist_XY, *dvcRecvDist_YZ, *dvcRecvDist_XZ; //...................................................................................... }; ScaLBL_Communicator::ScaLBL_Communicator(Domain &Dm){ //...................................................................................... Lock=false; // unlock the communicator //...................................................................................... // Create a separate copy of the communicator for the device //MPI_Comm_group(Dm.Comm,&Group); //MPI_Comm_create(Dm.Comm,Group,&MPI_COMM_SCALBL); MPI_Comm_dup(Dm.Comm,&MPI_COMM_SCALBL); //...................................................................................... // Copy the domain size and communication information directly from Dm Nx = Dm.Nx; Ny = Dm.Ny; Nz = Dm.Nz; N = Nx*Ny*Nz; next=0; rank=Dm.rank; rank_x=Dm.rank_x; rank_y=Dm.rank_y; rank_z=Dm.rank_z; rank_X=Dm.rank_X; rank_Y=Dm.rank_Y; rank_Z=Dm.rank_Z; rank_xy=Dm.rank_xy; rank_XY=Dm.rank_XY; rank_xY=Dm.rank_xY; rank_Xy=Dm.rank_Xy; rank_xz=Dm.rank_xz; rank_XZ=Dm.rank_XZ; rank_xZ=Dm.rank_xZ; rank_Xz=Dm.rank_Xz; rank_yz=Dm.rank_yz; rank_YZ=Dm.rank_YZ; rank_yZ=Dm.rank_yZ; rank_Yz=Dm.rank_Yz; sendCount_x=Dm.sendCount_x; sendCount_y=Dm.sendCount_y; sendCount_z=Dm.sendCount_z; sendCount_X=Dm.sendCount_X; sendCount_Y=Dm.sendCount_Y; sendCount_Z=Dm.sendCount_Z; sendCount_xy=Dm.sendCount_xy; sendCount_yz=Dm.sendCount_yz; sendCount_xz=Dm.sendCount_xz; sendCount_Xy=Dm.sendCount_Xy; sendCount_Yz=Dm.sendCount_Yz; sendCount_xZ=Dm.sendCount_xZ; sendCount_xY=Dm.sendCount_xY; sendCount_yZ=Dm.sendCount_yZ; sendCount_Xz=Dm.sendCount_Xz; sendCount_XY=Dm.sendCount_XY; sendCount_YZ=Dm.sendCount_YZ; sendCount_XZ=Dm.sendCount_XZ; recvCount_x=Dm.recvCount_x; recvCount_y=Dm.recvCount_y; recvCount_z=Dm.recvCount_z; recvCount_X=Dm.recvCount_X; recvCount_Y=Dm.recvCount_Y; recvCount_Z=Dm.recvCount_Z; recvCount_xy=Dm.recvCount_xy; recvCount_yz=Dm.recvCount_yz; recvCount_xz=Dm.recvCount_xz; recvCount_Xy=Dm.recvCount_Xy; recvCount_Yz=Dm.recvCount_Yz; recvCount_xZ=Dm.recvCount_xZ; recvCount_xY=Dm.recvCount_xY; recvCount_yZ=Dm.recvCount_yZ; recvCount_Xz=Dm.recvCount_Xz; recvCount_XY=Dm.recvCount_XY; recvCount_YZ=Dm.recvCount_YZ; recvCount_XZ=Dm.recvCount_XZ; iproc = Dm.iproc; jproc = Dm.jproc; kproc = Dm.kproc; nprocx = Dm.nprocx; nprocy = Dm.nprocy; nprocz = Dm.nprocz; //...................................................................................... ScaLBL_AllocateDeviceMemory((void **) &sendbuf_x, 5*sendCount_x*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &sendbuf_X, 5*sendCount_X*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &sendbuf_y, 5*sendCount_y*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &sendbuf_Y, 5*sendCount_Y*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &sendbuf_z, 5*sendCount_z*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &sendbuf_Z, 5*sendCount_Z*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &sendbuf_xy, sendCount_xy*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &sendbuf_xY, sendCount_xY*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &sendbuf_Xy, sendCount_Xy*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &sendbuf_XY, sendCount_XY*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &sendbuf_xz, sendCount_xz*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &sendbuf_xZ, sendCount_xZ*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &sendbuf_Xz, sendCount_Xz*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &sendbuf_XZ, sendCount_XZ*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &sendbuf_yz, sendCount_yz*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &sendbuf_yZ, sendCount_yZ*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &sendbuf_Yz, sendCount_Yz*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &sendbuf_YZ, sendCount_YZ*sizeof(double)); // Allocate device memory //...................................................................................... ScaLBL_AllocateDeviceMemory((void **) &recvbuf_x, 5*recvCount_x*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &recvbuf_X, 5*recvCount_X*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &recvbuf_y, 5*recvCount_y*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &recvbuf_Y, 5*recvCount_Y*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &recvbuf_z, 5*recvCount_z*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &recvbuf_Z, 5*recvCount_Z*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &recvbuf_xy, recvCount_xy*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &recvbuf_xY, recvCount_xY*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &recvbuf_Xy, recvCount_Xy*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &recvbuf_XY, recvCount_XY*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &recvbuf_xz, recvCount_xz*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &recvbuf_xZ, recvCount_xZ*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &recvbuf_Xz, recvCount_Xz*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &recvbuf_XZ, recvCount_XZ*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &recvbuf_yz, recvCount_yz*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &recvbuf_yZ, recvCount_yZ*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &recvbuf_Yz, recvCount_Yz*sizeof(double)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &recvbuf_YZ, recvCount_YZ*sizeof(double)); // Allocate device memory //...................................................................................... ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_x, sendCount_x*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_X, sendCount_X*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_y, sendCount_y*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_Y, sendCount_Y*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_z, sendCount_z*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_Z, sendCount_Z*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_xy, sendCount_xy*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_xY, sendCount_xY*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_Xy, sendCount_Xy*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_XY, sendCount_XY*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_xz, sendCount_xz*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_xZ, sendCount_xZ*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_Xz, sendCount_Xz*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_XZ, sendCount_XZ*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_yz, sendCount_yz*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_yZ, sendCount_yZ*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_Yz, sendCount_Yz*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_YZ, sendCount_YZ*sizeof(int)); // Allocate device memory //...................................................................................... ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_x, recvCount_x*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_X, recvCount_X*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_y, recvCount_y*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_Y, recvCount_Y*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_z, recvCount_z*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_Z, recvCount_Z*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_xy, recvCount_xy*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_xY, recvCount_xY*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_Xy, recvCount_Xy*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_XY, recvCount_XY*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_xz, recvCount_xz*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_xZ, recvCount_xZ*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_Xz, recvCount_Xz*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_XZ, recvCount_XZ*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_yz, recvCount_yz*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_yZ, recvCount_yZ*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_Yz, recvCount_Yz*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_YZ, recvCount_YZ*sizeof(int)); // Allocate device memory //...................................................................................... ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_x, 5*recvCount_x*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_X, 5*recvCount_X*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_y, 5*recvCount_y*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_Y, 5*recvCount_Y*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_z, 5*recvCount_z*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_Z, 5*recvCount_Z*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_xy, recvCount_xy*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_xY, recvCount_xY*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_Xy, recvCount_Xy*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_XY, recvCount_XY*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_xz, recvCount_xz*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_xZ, recvCount_xZ*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_Xz, recvCount_Xz*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_XZ, recvCount_XZ*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_yz, recvCount_yz*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_yZ, recvCount_yZ*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_Yz, recvCount_Yz*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_YZ, recvCount_YZ*sizeof(int)); // Allocate device memory //...................................................................................... ScaLBL_CopyToDevice(dvcSendList_x,Dm.sendList_x,sendCount_x*sizeof(int)); ScaLBL_CopyToDevice(dvcSendList_X,Dm.sendList_X,sendCount_X*sizeof(int)); ScaLBL_CopyToDevice(dvcSendList_y,Dm.sendList_y,sendCount_y*sizeof(int)); ScaLBL_CopyToDevice(dvcSendList_Y,Dm.sendList_Y,sendCount_Y*sizeof(int)); ScaLBL_CopyToDevice(dvcSendList_z,Dm.sendList_z,sendCount_z*sizeof(int)); ScaLBL_CopyToDevice(dvcSendList_Z,Dm.sendList_Z,sendCount_Z*sizeof(int)); ScaLBL_CopyToDevice(dvcSendList_xy,Dm.sendList_xy,sendCount_xy*sizeof(int)); ScaLBL_CopyToDevice(dvcSendList_XY,Dm.sendList_XY,sendCount_XY*sizeof(int)); ScaLBL_CopyToDevice(dvcSendList_xY,Dm.sendList_xY,sendCount_xY*sizeof(int)); ScaLBL_CopyToDevice(dvcSendList_Xy,Dm.sendList_Xy,sendCount_Xy*sizeof(int)); ScaLBL_CopyToDevice(dvcSendList_xz,Dm.sendList_xz,sendCount_xz*sizeof(int)); ScaLBL_CopyToDevice(dvcSendList_XZ,Dm.sendList_XZ,sendCount_XZ*sizeof(int)); ScaLBL_CopyToDevice(dvcSendList_xZ,Dm.sendList_xZ,sendCount_xZ*sizeof(int)); ScaLBL_CopyToDevice(dvcSendList_Xz,Dm.sendList_Xz,sendCount_Xz*sizeof(int)); ScaLBL_CopyToDevice(dvcSendList_yz,Dm.sendList_yz,sendCount_yz*sizeof(int)); ScaLBL_CopyToDevice(dvcSendList_YZ,Dm.sendList_YZ,sendCount_YZ*sizeof(int)); ScaLBL_CopyToDevice(dvcSendList_yZ,Dm.sendList_yZ,sendCount_yZ*sizeof(int)); ScaLBL_CopyToDevice(dvcSendList_Yz,Dm.sendList_Yz,sendCount_Yz*sizeof(int)); //...................................................................................... ScaLBL_CopyToDevice(dvcRecvList_x,Dm.recvList_x,recvCount_x*sizeof(int)); ScaLBL_CopyToDevice(dvcRecvList_X,Dm.recvList_X,recvCount_X*sizeof(int)); ScaLBL_CopyToDevice(dvcRecvList_y,Dm.recvList_y,recvCount_y*sizeof(int)); ScaLBL_CopyToDevice(dvcRecvList_Y,Dm.recvList_Y,recvCount_Y*sizeof(int)); ScaLBL_CopyToDevice(dvcRecvList_z,Dm.recvList_z,recvCount_z*sizeof(int)); ScaLBL_CopyToDevice(dvcRecvList_Z,Dm.recvList_Z,recvCount_Z*sizeof(int)); ScaLBL_CopyToDevice(dvcRecvList_xy,Dm.recvList_xy,recvCount_xy*sizeof(int)); ScaLBL_CopyToDevice(dvcRecvList_XY,Dm.recvList_XY,recvCount_XY*sizeof(int)); ScaLBL_CopyToDevice(dvcRecvList_xY,Dm.recvList_xY,recvCount_xY*sizeof(int)); ScaLBL_CopyToDevice(dvcRecvList_Xy,Dm.recvList_Xy,recvCount_Xy*sizeof(int)); ScaLBL_CopyToDevice(dvcRecvList_xz,Dm.recvList_xz,recvCount_xz*sizeof(int)); ScaLBL_CopyToDevice(dvcRecvList_XZ,Dm.recvList_XZ,recvCount_XZ*sizeof(int)); ScaLBL_CopyToDevice(dvcRecvList_xZ,Dm.recvList_xZ,recvCount_xZ*sizeof(int)); ScaLBL_CopyToDevice(dvcRecvList_Xz,Dm.recvList_Xz,recvCount_Xz*sizeof(int)); ScaLBL_CopyToDevice(dvcRecvList_yz,Dm.recvList_yz,recvCount_yz*sizeof(int)); ScaLBL_CopyToDevice(dvcRecvList_YZ,Dm.recvList_YZ,recvCount_YZ*sizeof(int)); ScaLBL_CopyToDevice(dvcRecvList_yZ,Dm.recvList_yZ,recvCount_yZ*sizeof(int)); ScaLBL_CopyToDevice(dvcRecvList_Yz,Dm.recvList_Yz,recvCount_Yz*sizeof(int)); //...................................................................................... MPI_Barrier(MPI_COMM_SCALBL); //................................................................................... // Set up the recieve distribution lists //................................................................................... //...Map recieve list for the X face: q=2,8,10,12,14 ................................. D3Q19_MapRecv(-1,0,0,Dm.recvList_X,0,recvCount_X,dvcRecvDist_X); D3Q19_MapRecv(-1,-1,0,Dm.recvList_X,recvCount_X,recvCount_X,dvcRecvDist_X); D3Q19_MapRecv(-1,1,0,Dm.recvList_X,2*recvCount_X,recvCount_X,dvcRecvDist_X); D3Q19_MapRecv(-1,0,-1,Dm.recvList_X,3*recvCount_X,recvCount_X,dvcRecvDist_X); D3Q19_MapRecv(-1,0,1,Dm.recvList_X,4*recvCount_X,recvCount_X,dvcRecvDist_X); //................................................................................... //...Map recieve list for the x face: q=1,7,9,11,13.................................. D3Q19_MapRecv(1,0,0,Dm.recvList_x,0,recvCount_x,dvcRecvDist_x); D3Q19_MapRecv(1,1,0,Dm.recvList_x,recvCount_x,recvCount_x,dvcRecvDist_x); D3Q19_MapRecv(1,-1,0,Dm.recvList_x,2*recvCount_x,recvCount_x,dvcRecvDist_x); D3Q19_MapRecv(1,0,1,Dm.recvList_x,3*recvCount_x,recvCount_x,dvcRecvDist_x); D3Q19_MapRecv(1,0,-1,Dm.recvList_x,4*recvCount_x,recvCount_x,dvcRecvDist_x); //................................................................................... //...Map recieve list for the y face: q=4,8,9,16,18 ................................... D3Q19_MapRecv(0,-1,0,Dm.recvList_Y,0,recvCount_Y,dvcRecvDist_Y); D3Q19_MapRecv(-1,-1,0,Dm.recvList_Y,recvCount_Y,recvCount_Y,dvcRecvDist_Y); D3Q19_MapRecv(1,-1,0,Dm.recvList_Y,2*recvCount_Y,recvCount_Y,dvcRecvDist_Y); D3Q19_MapRecv(0,-1,-1,Dm.recvList_Y,3*recvCount_Y,recvCount_Y,dvcRecvDist_Y); D3Q19_MapRecv(0,-1,1,Dm.recvList_Y,4*recvCount_Y,recvCount_Y,dvcRecvDist_Y); //................................................................................... //...Map recieve list for the Y face: q=3,7,10,15,17 .................................. D3Q19_MapRecv(0,1,0,Dm.recvList_y,0,recvCount_y,dvcRecvDist_y); D3Q19_MapRecv(1,1,0,Dm.recvList_y,recvCount_y,recvCount_y,dvcRecvDist_y); D3Q19_MapRecv(-1,1,0,Dm.recvList_y,2*recvCount_y,recvCount_y,dvcRecvDist_y); D3Q19_MapRecv(0,1,1,Dm.recvList_y,3*recvCount_y,recvCount_y,dvcRecvDist_y); D3Q19_MapRecv(0,1,-1,Dm.recvList_y,4*recvCount_y,recvCount_y,dvcRecvDist_y); //................................................................................... //...Map recieve list for the z face<<<6,12,13,16,17).............................................. D3Q19_MapRecv(0,0,-1,Dm.recvList_Z,0,recvCount_Z,dvcRecvDist_Z); D3Q19_MapRecv(-1,0,-1,Dm.recvList_Z,recvCount_Z,recvCount_Z,dvcRecvDist_Z); D3Q19_MapRecv(1,0,-1,Dm.recvList_Z,2*recvCount_Z,recvCount_Z,dvcRecvDist_Z); D3Q19_MapRecv(0,-1,-1,Dm.recvList_Z,3*recvCount_Z,recvCount_Z,dvcRecvDist_Z); D3Q19_MapRecv(0,1,-1,Dm.recvList_Z,4*recvCount_Z,recvCount_Z,dvcRecvDist_Z); //...Map recieve list for the Z face<<<5,11,14,15,18).............................................. D3Q19_MapRecv(0,0,1,Dm.recvList_z,0,recvCount_z,dvcRecvDist_z); D3Q19_MapRecv(1,0,1,Dm.recvList_z,recvCount_z,recvCount_z,dvcRecvDist_z); D3Q19_MapRecv(-1,0,1,Dm.recvList_z,2*recvCount_z,recvCount_z,dvcRecvDist_z); D3Q19_MapRecv(0,1,1,Dm.recvList_z,3*recvCount_z,recvCount_z,dvcRecvDist_z); D3Q19_MapRecv(0,-1,1,Dm.recvList_z,4*recvCount_z,recvCount_z,dvcRecvDist_z); //.................................................................................. //...Map recieve list for the xy edge <<<8)................................ D3Q19_MapRecv(-1,-1,0,Dm.recvList_XY,0,recvCount_XY,dvcRecvDist_XY); //...Map recieve list for the Xy edge <<<9)................................ D3Q19_MapRecv(1,-1,0,Dm.recvList_xY,0,recvCount_xY,dvcRecvDist_xY); //...Map recieve list for the xY edge <<<10)................................ D3Q19_MapRecv(-1,1,0,Dm.recvList_Xy,0,recvCount_Xy,dvcRecvDist_Xy); //...Map recieve list for the XY edge <<<7)................................ D3Q19_MapRecv(1,1,0,Dm.recvList_xy,0,recvCount_xy,dvcRecvDist_xy); //...Map recieve list for the xz edge <<<12)................................ D3Q19_MapRecv(-1,0,-1,Dm.recvList_XZ,0,recvCount_XZ,dvcRecvDist_XZ); //...Map recieve list for the xZ edge <<<14)................................ D3Q19_MapRecv(-1,0,1,Dm.recvList_Xz,0,recvCount_Xz,dvcRecvDist_Xz); //...Map recieve list for the Xz edge <<<13)................................ D3Q19_MapRecv(1,0,-1,Dm.recvList_xZ,0,recvCount_xZ,dvcRecvDist_xZ); //...Map recieve list for the XZ edge <<<11)................................ D3Q19_MapRecv(1,0,1,Dm.recvList_xz,0,recvCount_xz,dvcRecvDist_xz); //...Map recieve list for the yz edge <<<16)................................ D3Q19_MapRecv(0,-1,-1,Dm.recvList_YZ,0,recvCount_YZ,dvcRecvDist_YZ); //...Map recieve list for the yZ edge <<<18)................................ D3Q19_MapRecv(0,-1,1,Dm.recvList_Yz,0,recvCount_Yz,dvcRecvDist_Yz); //...Map recieve list for the Yz edge <<<17)................................ D3Q19_MapRecv(0,1,-1,Dm.recvList_yZ,0,recvCount_yZ,dvcRecvDist_yZ); //...Map recieve list for the YZ edge <<<15)................................ D3Q19_MapRecv(0,1,1,Dm.recvList_yz,0,recvCount_yz,dvcRecvDist_yz); //................................................................................... //...................................................................................... MPI_Barrier(MPI_COMM_SCALBL); ScaLBL_DeviceBarrier(); //...................................................................................... SendCount = sendCount_x+sendCount_X+sendCount_y+sendCount_Y+sendCount_z+sendCount_Z+ sendCount_xy+sendCount_Xy+sendCount_xY+sendCount_XY+ sendCount_xZ+sendCount_Xz+sendCount_xZ+sendCount_XZ+ sendCount_yz+sendCount_Yz+sendCount_yZ+sendCount_YZ; RecvCount = recvCount_x+recvCount_X+recvCount_y+recvCount_Y+recvCount_z+recvCount_Z+ recvCount_xy+recvCount_Xy+recvCount_xY+recvCount_XY+ recvCount_xZ+recvCount_Xz+recvCount_xZ+recvCount_XZ+ recvCount_yz+recvCount_Yz+recvCount_yZ+recvCount_YZ; CommunicationCount = SendCount+RecvCount; //...................................................................................... } ScaLBL_Communicator::~ScaLBL_Communicator(){ // destrutor does nothing (bad idea) // -- note that there needs to be a way to free memory allocated on the device!!! } void ScaLBL_Communicator::D3Q19_MapRecv(int Cqx, int Cqy, int Cqz, int *list, int start, int count, int *d3q19_recvlist){ int i,j,k,n,nn,idx; int * ReturnDist; ReturnDist=new int [count]; for (idx=0; idx Np ){ ERROR("ScaLBL_Communicator::MemoryDenseLayout: Failed to create memory efficient layout!\n"); } // for (k=1;k Np) printf("ScaLBL_Communicator::MemoryDenseLayout: Map(%i,%i,%i) = %i > %i \n",i,j,k,Map(i,j,k),Np); else if (!(idx<0)){ // store the idx associated with each neighbor // store idx for self if neighbor is in solid or out of domain //D3Q19 = {{1,0,0},{-1,0,0} // {0,1,0},{0,-1,0} // {0,0,1},{0,0,-1}, // {1,1,0},{-1,-1,0}, // {1,-1,0},{-1,1,0}, // {1,0,1},{-1,0,-1}, // {1,0,-1},{-1,0,1}, // {0,1,1},{0,-1,-1}, // {0,1,-1},{0,-1,1}}; // note that only odd distributions need to be stored to execute the swap algorithm int neighbor; // cycle through the neighbors of lattice site idx neighbor=Map(i+1,j,k); if (neighbor==-2) neighborList[idx]=-1; else if (neighbor<0) neighborList[idx]=idx; else neighborList[idx]=neighbor; neighbor=Map(i,j+1,k); if (neighbor==-2) neighborList[Np+idx]=-1; else if (neighbor<0) neighborList[Np+idx]=idx; else neighborList[Np+idx]=neighbor; neighbor=Map(i,j,k+1); if (neighbor==-2) neighborList[2*Np+idx]=-1; else if (neighbor<0) neighborList[2*Np+idx]=idx; else neighborList[2*Np+idx]=neighbor; neighbor=Map(i+1,j+1,k); if (neighbor==-2) neighborList[3*Np+idx]=-1; else if (neighbor<0) neighborList[3*Np+idx]=idx; else neighborList[3*Np+idx]=neighbor; neighbor=Map(i+1,j-1,k); if (neighbor==-2) neighborList[4*Np+idx]=-1; else if (neighbor<0) neighborList[4*Np+idx]=idx; else neighborList[4*Np+idx]=neighbor; neighbor=Map(i+1,j,k+1); if (neighbor==-2) neighborList[5*Np+idx]=-1; else if (neighbor<0) neighborList[5*Np+idx]=idx; else neighborList[5*Np+idx]=neighbor; neighbor=Map(i+1,j,k-1); if (neighbor==-2) neighborList[6*Np+idx]=-1; else if (neighbor<0) neighborList[6*Np+idx]=idx; else neighborList[6*Np+idx]=neighbor; neighbor=Map(i,j+1,k+1); if (neighbor==-2) neighborList[7*Np+idx]=-1; else if (neighbor<0) neighborList[7*Np+idx]=idx; else neighborList[7*Np+idx]=neighbor; neighbor=Map(i,j+1,k-1); if (neighbor==-2) neighborList[8*Np+idx]=-1; else if (neighbor<0) neighborList[8*Np+idx]=idx; else neighborList[8*Np+idx]=neighbor; } } } } //for (idx=0; idx Np ){ ERROR("ScaLBL_Communicator::MemoryDenseLayoutFull: Failed to create memory efficient layout!\n"); } // if (rank == 0) { // printf("* Displaying the final map from rank %d\n",rank); // // for (k=1;k Np) printf("ScaLBL_Communicator::MemoryDenseLayoutFull: Map(%i,%i,%i) = %i > %i \n",i,j,k,Map(i,j,k),Np); else if (!(idx<0)){ // store the idx associated with each neighbor // store idx for self if neighbor is in solid or out of domain //D3Q19 = {{1,0,0},{-1,0,0} // {0,1,0},{0,-1,0} // {0,0,1},{0,0,-1}, // {1,1,0},{-1,-1,0}, // {1,-1,0},{-1,1,0}, // {1,0,1},{-1,0,-1}, // {1,0,-1},{-1,0,1}, // {0,1,1},{0,-1,-1}, // {0,1,-1},{0,-1,1}}; /* * Storing the full neighbor list. The AA algorithm may require fewer neighbors but I'm saving everything for now... * */ int neighbor; // cycle through the neighbors of lattice site idx neighbor=Map(i+1,j,k); if (neighbor==-2) neighborList[idx]=-1; else if (neighbor<0) neighborList[idx]=idx; else neighborList[idx]=neighbor; // 2 neighbor=Map(i-1,j,k); if (neighbor==-2) neighborList[Np+idx]=-1; else if (neighbor<0) neighborList[Np+idx]=idx; else neighborList[Np+idx]=neighbor; neighbor=Map(i,j+1,k); if (neighbor==-2) neighborList[2*Np+idx]=-1; else if (neighbor<0) neighborList[2*Np+idx]=idx; else neighborList[2*Np+idx]=neighbor; // 4 neighbor=Map(i,j-1,k); if (neighbor==-2) neighborList[3*Np+idx]=-1; else if (neighbor<0) neighborList[3*Np+idx]=idx; else neighborList[3*Np+idx]=neighbor; neighbor=Map(i,j,k+1); if (neighbor==-2) neighborList[4*Np+idx]=-1; else if (neighbor<0) neighborList[4*Np+idx]=idx; else neighborList[4*Np+idx]=neighbor; // 6 neighbor=Map(i,j,k-1); if (neighbor==-2) neighborList[5*Np+idx]=-1; else if (neighbor<0) neighborList[5*Np+idx]=idx; else neighborList[5*Np+idx]=neighbor; neighbor=Map(i+1,j+1,k); if (neighbor==-2) neighborList[6*Np+idx]=-1; else if (neighbor<0) neighborList[6*Np+idx]=idx; else neighborList[6*Np+idx]=neighbor; // 8 neighbor=Map(i-1,j-1,k); if (neighbor==-2) neighborList[7*Np+idx]=-1; else if (neighbor<0) neighborList[7*Np+idx]=idx; else neighborList[7*Np+idx]=neighbor; neighbor=Map(i+1,j-1,k); if (neighbor==-2) neighborList[8*Np+idx]=-1; else if (neighbor<0) neighborList[8*Np+idx]=idx; else neighborList[8*Np+idx]=neighbor; // 10 neighbor=Map(i-1,j+1,k); if (neighbor==-2) neighborList[9*Np+idx]=-1; else if (neighbor<0) neighborList[9*Np+idx]=idx; else neighborList[9*Np+idx]=neighbor; neighbor=Map(i+1,j,k+1); if (neighbor==-2) neighborList[10*Np+idx]=-1; else if (neighbor<0) neighborList[10*Np+idx]=idx; else neighborList[10*Np+idx]=neighbor; // 12 neighbor=Map(i-1,j,k-1); if (neighbor==-2) neighborList[11*Np+idx]=-1; else if (neighbor<0) neighborList[11*Np+idx]=idx; else neighborList[11*Np+idx]=neighbor; neighbor=Map(i+1,j,k-1); if (neighbor==-2) neighborList[12*Np+idx]=-1; else if (neighbor<0) neighborList[12*Np+idx]=idx; else neighborList[12*Np+idx]=neighbor; // 14 neighbor=Map(i-1,j,k+1); if (neighbor==-2) neighborList[13*Np+idx]=-1; else if (neighbor<0) neighborList[13*Np+idx]=idx; else neighborList[13*Np+idx]=neighbor; neighbor=Map(i,j+1,k+1); if (neighbor==-2) neighborList[14*Np+idx]=-1; else if (neighbor<0) neighborList[14*Np+idx]=idx; else neighborList[14*Np+idx]=neighbor; // 16 neighbor=Map(i,j-1,k-1); if (neighbor==-2) neighborList[15*Np+idx]=-1; else if (neighbor<0) neighborList[15*Np+idx]=idx; else neighborList[15*Np+idx]=neighbor; neighbor=Map(i,j+1,k-1); if (neighbor==-2) neighborList[16*Np+idx]=-1; else if (neighbor<0) neighborList[16*Np+idx]=idx; else neighborList[16*Np+idx]=neighbor; // 18 neighbor=Map(i,j-1,k+1); if (neighbor==-2) neighborList[17*Np+idx]=-1; else if (neighbor<0) neighborList[17*Np+idx]=idx; else neighborList[17*Np+idx]=neighbor; } } } } //....................................................................... // Now map through SendList and RecvList to update indices // First loop over the send lists int *TempBuffer; TempBuffer = new int [5*RecvCount]; //....................................................................... // Re-index the send lists ScaLBL_CopyToHost(TempBuffer,dvcSendList_x,sendCount_x*sizeof(int)); for (i=0; i Np ){ ERROR("ScaLBL_Communicator::MemoryOptimizedLayout: Failed to create memory efficient layout!\n"); } // for (k=1;k Np) printf("ScaLBL_Communicator::MemoryOptimizedLayout: Map(%i,%i,%i) = %i > %i \n",i,j,k,Map(i,j,k),Np); else if (!(idx<0)){ // store the idx associated with each neighbor // store idx for self if neighbor is in solid or out of domain //D3Q19 = {{1,0,0},{-1,0,0} // {0,1,0},{0,-1,0} // {0,0,1},{0,0,-1}, // {1,1,0},{-1,-1,0}, // {1,-1,0},{-1,1,0}, // {1,0,1},{-1,0,-1}, // {1,0,-1},{-1,0,1}, // {0,1,1},{0,-1,-1}, // {0,1,-1},{0,-1,1}}; // note that only odd distributions need to be stored to execute the swap algorithm int neighbor; // cycle through the neighbors of lattice site idx neighbor=Map(i+1,j,k); if (neighbor==-2) neighborList[idx]=-1; else if (neighbor<0) neighborList[idx]=idx; else neighborList[idx]=neighbor; neighbor=Map(i,j+1,k); if (neighbor==-2) neighborList[Np+idx]=-1; else if (neighbor<0) neighborList[Np+idx]=idx; else neighborList[Np+idx]=neighbor; neighbor=Map(i,j,k+1); if (neighbor==-2) neighborList[2*Np+idx]=-1; else if (neighbor<0) neighborList[2*Np+idx]=idx; else neighborList[2*Np+idx]=neighbor; neighbor=Map(i+1,j+1,k); if (neighbor==-2) neighborList[3*Np+idx]=-1; else if (neighbor<0) neighborList[3*Np+idx]=idx; else neighborList[3*Np+idx]=neighbor; neighbor=Map(i+1,j-1,k); if (neighbor==-2) neighborList[4*Np+idx]=-1; else if (neighbor<0) neighborList[4*Np+idx]=idx; else neighborList[4*Np+idx]=neighbor; neighbor=Map(i+1,j,k+1); if (neighbor==-2) neighborList[5*Np+idx]=-1; else if (neighbor<0) neighborList[5*Np+idx]=idx; else neighborList[5*Np+idx]=neighbor; neighbor=Map(i+1,j,k-1); if (neighbor==-2) neighborList[6*Np+idx]=-1; else if (neighbor<0) neighborList[6*Np+idx]=idx; else neighborList[6*Np+idx]=neighbor; neighbor=Map(i,j+1,k+1); if (neighbor==-2) neighborList[7*Np+idx]=-1; else if (neighbor<0) neighborList[7*Np+idx]=idx; else neighborList[7*Np+idx]=neighbor; neighbor=Map(i,j+1,k-1); if (neighbor==-2) neighborList[8*Np+idx]=-1; else if (neighbor<0) neighborList[8*Np+idx]=idx; else neighborList[8*Np+idx]=neighbor; } } } } //for (idx=0; idx Np ){ ERROR("ScaLBL_Communicator::MemoryOptimizedLayout: Failed to create memory efficient layout!\n"); } // for (k=1;k Np) printf("ScaLBL_Communicator::MemoryOptimizedLayout: Map(%i,%i,%i) = %i > %i \n",i,j,k,Map(i,j,k),Np); else if (!(idx<0)){ // store the idx associated with each neighbor // store idx for self if neighbor is in solid or out of domain //D3Q19 = {{1,0,0},{-1,0,0} // {0,1,0},{0,-1,0} // {0,0,1},{0,0,-1}, // {1,1,0},{-1,-1,0}, // {1,-1,0},{-1,1,0}, // {1,0,1},{-1,0,-1}, // {1,0,-1},{-1,0,1}, // {0,1,1},{0,-1,-1}, // {0,1,-1},{0,-1,1}}; // note that only odd distributions need to be stored to execute the swap algorithm int neighbor; // cycle through the neighbors of lattice site idx neighbor=Map(i+1,j,k); if (neighbor==-2) neighborList[idx]=-1; else if (neighbor<0) neighborList[idx]=idx; else neighborList[idx]=neighbor; neighbor=Map(i-1,j,k); if (neighbor==-2) neighborList[Np+idx]=-1; else if (neighbor<0) neighborList[Np+idx]=idx; else neighborList[Np+idx]=neighbor; neighbor=Map(i,j+1,k); if (neighbor==-2) neighborList[2*Np+idx]=-1; else if (neighbor<0) neighborList[2*Np+idx]=idx; else neighborList[2*Np+idx]=neighbor; neighbor=Map(i,j-1,k); if (neighbor==-2) neighborList[3*Np+idx]=-1; else if (neighbor<0) neighborList[3*Np+idx]=idx; else neighborList[3*Np+idx]=neighbor; neighbor=Map(i,j,k+1); if (neighbor==-2) neighborList[4*Np+idx]=-1; else if (neighbor<0) neighborList[4*Np+idx]=idx; else neighborList[4*Np+idx]=neighbor; neighbor=Map(i,j,k-1); if (neighbor==-2) neighborList[5*Np+idx]=-1; else if (neighbor<0) neighborList[5*Np+idx]=idx; else neighborList[5*Np+idx]=neighbor; neighbor=Map(i+1,j+1,k); if (neighbor==-2) neighborList[6*Np+idx]=-1; else if (neighbor<0) neighborList[6*Np+idx]=idx; else neighborList[6*Np+idx]=neighbor; neighbor=Map(i-1,j-1,k); if (neighbor==-2) neighborList[7*Np+idx]=-1; else if (neighbor<0) neighborList[7*Np+idx]=idx; else neighborList[7*Np+idx]=neighbor; neighbor=Map(i+1,j-1,k); if (neighbor==-2) neighborList[8*Np+idx]=-1; else if (neighbor<0) neighborList[8*Np+idx]=idx; else neighborList[8*Np+idx]=neighbor; neighbor=Map(i-1,j+1,k); if (neighbor==-2) neighborList[9*Np+idx]=-1; else if (neighbor<0) neighborList[9*Np+idx]=idx; else neighborList[9*Np+idx]=neighbor; neighbor=Map(i+1,j,k+1); if (neighbor==-2) neighborList[10*Np+idx]=-1; else if (neighbor<0) neighborList[10*Np+idx]=idx; else neighborList[10*Np+idx]=neighbor; neighbor=Map(i-1,j,k-1); if (neighbor==-2) neighborList[11*Np+idx]=-1; else if (neighbor<0) neighborList[11*Np+idx]=idx; else neighborList[11*Np+idx]=neighbor; neighbor=Map(i+1,j,k-1); if (neighbor==-2) neighborList[12*Np+idx]=-1; else if (neighbor<0) neighborList[12*Np+idx]=idx; else neighborList[12*Np+idx]=neighbor; neighbor=Map(i-1,j,k+1); if (neighbor==-2) neighborList[13*Np+idx]=-1; else if (neighbor<0) neighborList[13*Np+idx]=idx; else neighborList[13*Np+idx]=neighbor; neighbor=Map(i,j+1,k+1); if (neighbor==-2) neighborList[14*Np+idx]=-1; else if (neighbor<0) neighborList[14*Np+idx]=idx; else neighborList[14*Np+idx]=neighbor; neighbor=Map(i,j-1,k-1); if (neighbor==-2) neighborList[15*Np+idx]=-1; else if (neighbor<0) neighborList[15*Np+idx]=idx; else neighborList[15*Np+idx]=neighbor; neighbor=Map(i,j+1,k-1); if (neighbor==-2) neighborList[16*Np+idx]=-1; else if (neighbor<0) neighborList[16*Np+idx]=idx; else neighborList[16*Np+idx]=neighbor; neighbor=Map(i,j-1,k+1); if (neighbor==-2) neighborList[17*Np+idx]=-1; else if (neighbor<0) neighborList[17*Np+idx]=idx; else neighborList[17*Np+idx]=neighbor; } } } } //for (idx=0; idx Np ){ ERROR("ScaLBL_Communicator::MemoryOptimizedLayout: Failed to create memory efficient layout!\n"); } /* for (k=1;k Np) printf("ScaLBL_Communicator::MemoryOptimizedLayout: Map(%i,%i,%i) = %i > %i \n",i,j,k,Map(i,j,k),Np); else if (!(idx<0)){ // store the idx associated with each neighbor // store idx for self if neighbor is in solid or out of domain //D3Q19 = {{1,0,0},{-1,0,0} // {0,1,0},{0,-1,0} // {0,0,1},{0,0,-1}, // {1,1,0},{-1,-1,0}, // {1,-1,0},{-1,1,0}, // {1,0,1},{-1,0,-1}, // {1,0,-1},{-1,0,1}, // {0,1,1},{0,-1,-1}, // {0,1,-1},{0,-1,1}}; int neighbor; // cycle through the neighbors of lattice site idx neighbor=Map(i-1,j,k); if (neighbor<0) neighborList[idx]=idx + 2*Np; else neighborList[idx]=neighbor + 1*Np; neighbor=Map(i+1,j,k); if (neighbor<0) neighborList[Np+idx] = idx + 1*Np; else neighborList[Np+idx]= neighbor + 2*Np; neighbor=Map(i,j-1,k); if (neighbor<0) neighborList[2*Np+idx]=idx + 4*Np; else neighborList[2*Np+idx]=neighbor + 3*Np; neighbor=Map(i,j+1,k); if (neighbor<0) neighborList[3*Np+idx]=idx + 3*Np; else neighborList[3*Np+idx]=neighbor + 4*Np; neighbor=Map(i,j,k-1); if (neighbor<0) neighborList[4*Np+idx]=idx + 6*Np; else neighborList[4*Np+idx]=neighbor + 5*Np; neighbor=Map(i,j,k+1); if (neighbor<0) neighborList[5*Np+idx]=idx + 5*Np; else neighborList[5*Np+idx]=neighbor + 6*Np; neighbor=Map(i-1,j-1,k); if (neighbor<0) neighborList[6*Np+idx]=idx + 8*Np; else neighborList[6*Np+idx]=neighbor + 7*Np; neighbor=Map(i+1,j+1,k); if (neighbor<0) neighborList[7*Np+idx]=idx + 7*Np; else neighborList[7*Np+idx]=neighbor+8*Np; neighbor=Map(i-1,j+1,k); if (neighbor<0) neighborList[8*Np+idx]=idx + 10*Np; else neighborList[8*Np+idx]=neighbor + 9*Np; neighbor=Map(i+1,j-1,k); if (neighbor<0) neighborList[9*Np+idx]=idx + 9*Np; else neighborList[9*Np+idx]=neighbor + 10*Np; neighbor=Map(i-1,j,k-1); if (neighbor<0) neighborList[10*Np+idx]=idx + 12*Np; else neighborList[10*Np+idx]=neighbor + 11*Np; neighbor=Map(i+1,j,k+1); if (neighbor<0) neighborList[11*Np+idx]=idx + 11*Np; else neighborList[11*Np+idx]=neighbor + 12*Np; neighbor=Map(i-1,j,k+1); if (neighbor<0) neighborList[12*Np+idx]=idx + 14*Np; else neighborList[12*Np+idx]=neighbor + 13*Np; neighbor=Map(i+1,j,k-1); if (neighbor<0) neighborList[13*Np+idx]=idx + 13*Np; else neighborList[13*Np+idx]=neighbor + 14*Np; neighbor=Map(i,j-1,k-1); if (neighbor<0) neighborList[14*Np+idx]=idx + 16*Np; else neighborList[14*Np+idx]=neighbor + 15*Np; neighbor=Map(i,j+1,k+1); if (neighbor<0) neighborList[15*Np+idx]=idx + 15*Np; else neighborList[15*Np+idx]=neighbor + 16*Np; neighbor=Map(i,j-1,k+1); if (neighbor<0) neighborList[16*Np+idx]=idx + 18*Np; else neighborList[16*Np+idx]=neighbor + 17*Np; neighbor=Map(i,j+1,k-1); if (neighbor<0) neighborList[17*Np+idx]=idx + 17*Np; else neighborList[17*Np+idx]=neighbor + 18*Np; } } } } //for (idx=0; idx