add wide halo class

This commit is contained in:
James McClure 2020-09-29 10:39:05 -04:00
parent a69504f96a
commit 6b335eaf28
2 changed files with 483 additions and 0 deletions

387
common/WideHalo.cpp Normal file
View File

@ -0,0 +1,387 @@
/*
This class implements support for halo widths larger than 1
*/
ScaLBLWideHalo_Communicator::ScaLBLWideHalo_Communicator(std::shared_ptr <Domain> Dm, int width)
{
//......................................................................................
Lock=false; // unlock the communicator
//......................................................................................
// Create a separate copy of the communicator for the device
MPI_Comm_dup(Dm->Comm,&MPI_COMM_SCALBL);
//......................................................................................
// Copy the domain size and communication information directly from Dm
Nx = Dm->Nx;
Ny = Dm->Ny;
Nz = Dm->Nz;
N = Nx*Ny*Nz;
Nxh = Nx + 2*(width - 1);
Nyh = Ny + 2*(width - 1);
Nzh = Nz + 2*(width - 1);
Nh = Nxh*Nyh*Nzh;
next=0;
rank=Dm->rank();
iproc = Dm->iproc();
jproc = Dm->jproc();
kproc = Dm->kproc();
nprocx = Dm->nprocx();
nprocy = Dm->nprocy();
nprocz = Dm->nprocz();
rank_info = RankInfoStruct(myrank,nprocx,nprocy,nprocz);
rank = rank_info.rank[1][1][1];
rank_X = rank_info.rank[2][1][1];
rank_x = rank_info.rank[0][1][1];
rank_Y = rank_info.rank[1][2][1];
rank_y = rank_info.rank[1][0][1];
rank_Z = rank_info.rank[1][1][2];
rank_z = rank_info.rank[1][1][0];
rank_XY = rank_info.rank[2][2][1];
rank_xy = rank_info.rank[0][0][1];
rank_Xy = rank_info.rank[2][0][1];
rank_xY = rank_info.rank[0][2][1];
rank_XZ = rank_info.rank[2][1][2];
rank_xz = rank_info.rank[0][1][0];
rank_Xz = rank_info.rank[2][1][0];
rank_xZ = rank_info.rank[0][1][2];
rank_YZ = rank_info.rank[1][2][2];
rank_yz = rank_info.rank[1][0][0];
rank_Yz = rank_info.rank[1][2][0];
rank_yZ = rank_info.rank[1][0][2];
rank_XYz = rank_info.rank[2][2][0];
rank_xyz = rank_info.rank[0][0][0];
rank_Xyz = rank_info.rank[2][0][0];
rank_xYz = rank_info.rank[0][2][0];
rank_XYZ = rank_info.rank[2][2][2];
rank_xyZ = rank_info.rank[0][0][2];
rank_XyZ = rank_info.rank[2][0][2];
rank_xYZ = rank_info.rank[0][2][2];
sendCount_x = (Ny-2)*(Nz-2)*width;
sendCount_y = (Nx-2)*(Nz-2)*width;
sendCount_z = (Nx-2)*(Ny-2)*width;
sendCount_X = (Ny-2)*(Nz-2)*width;
sendCount_Y = (Nx-2)*(Nz-2)*width;
sendCount_Z = (Nx-2)*(Ny-2)*width;
sendCount_xy = (Nz-2)*width*width;
sendCount_yz = (Nx-2)*width*width;
sendCount_xz = (Ny-2)*width*width;
sendCount_Xy = (Nz-2)*width*width;
sendCount_Yz = (Nx-2)*width*width;
sendCount_xZ = (Ny-2)*width*width;
sendCount_xY = (Nz-2)*width*width;
sendCount_yZ = (Nx-2)*width*width;
sendCount_Xz = (Ny-2)*width*width;
sendCount_XY = (Nz-2)*width*width;
sendCount_YZ = (Nx-2)*width*width;
sendCount_XZ = (Ny-2)*width*width;
sendCount_xyz = width*width*width;
sendCount_Xyz = width*width*width;
sendCount_xYz = width*width*width;
sendCount_XYz = width*width*width;
sendCount_xyZ = width*width*width;
sendCount_XyZ = width*width*width;
sendCount_xYZ = width*width*width;
sendCount_XYZ = width*width*width;
RecvCount_x = (Ny-2)*(Nz-2)*width;
RecvCount_y = (Nx-2)*(Nz-2)*width;
RecvCount_z = (Nx-2)*(Ny-2)*width;
RecvCount_X = (Ny-2)*(Nz-2)*width;
RecvCount_Y = (Nx-2)*(Nz-2)*width;
RecvCount_Z = (Nx-2)*(Ny-2)*width;
RecvCount_xy = (Nz-2)*width*width;
RecvCount_yz = (Nx-2)*width*width;
RecvCount_xz = (Ny-2)*width*width;
RecvCount_Xy = (Nz-2)*width*width;
RecvCount_Yz = (Nx-2)*width*width;
RecvCount_xZ = (Ny-2)*width*width;
RecvCount_xY = (Nz-2)*width*width;
RecvCount_yZ = (Nx-2)*width*width;
RecvCount_Xz = (Ny-2)*width*width;
RecvCount_XY = (Nz-2)*width*width;
RecvCount_YZ = (Nx-2)*width*width;
RecvCount_XZ = (Ny-2)*width*width;
RecvCount_xyz = width*width*width;
RecvCount_Xyz = width*width*width;
RecvCount_xYz = width*width*width;
RecvCount_XYz = width*width*width;
RecvCount_xyZ = width*width*width;
RecvCount_XyZ = width*width*width;
RecvCount_xYZ = width*width*width;
RecvCount_XYZ = width*width*width;
//......................................................................................
ScaLBL_AllocateZeroCopy((void **) &sendbuf_x, sendCount_x*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_X, sendCount_X*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_y, sendCount_y*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_Y, sendCount_Y*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_z, sendCount_z*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_Z, sendCount_Z*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_xy, sendCount_xy*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_xY, sendCount_xY*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_Xy, sendCount_Xy*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_XY, sendCount_XY*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_xz, sendCount_xz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_xZ, sendCount_xZ*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_Xz, sendCount_Xz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_XZ, sendCount_XZ*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_yz, sendCount_yz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_yZ, sendCount_yZ*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_Yz, sendCount_Yz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_YZ, sendCount_YZ*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_xyz, sendCount_xyz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_xYz, sendCount_xYz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_Xyz, sendCount_Xyz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_XYz, sendCount_XYz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_xyZ, sendCount_xyZ*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_xYZ, sendCount_xYZ*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_XyZ, sendCount_XyZ*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_XYZ, sendCount_XYZ*sizeof(double)); // Allocate device memory
//......................................................................................
ScaLBL_AllocateZeroCopy((void **) &recvbuf_x, recvCount_x*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_X, recvCount_X*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_y, recvCount_y*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_Y, recvCount_Y*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_z, recvCount_z*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_Z, recvCount_Z*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_xy, recvCount_xy*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_xY, recvCount_xY*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_Xy, recvCount_Xy*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_XY, recvCount_XY*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_xz, recvCount_xz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_xZ, recvCount_xZ*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_Xz, recvCount_Xz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_XZ, recvCount_XZ*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_yz, recvCount_yz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_yZ, recvCount_yZ*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_Yz, recvCount_Yz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_YZ, recvCount_YZ*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_xyz, recvCount_xyz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_xYz, recvCount_xYz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_Xyz, recvCount_Xyz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_XYz, recvCount_XYz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_xyZ, recvCount_xyZ*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_xYZ, recvCount_xYZ*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_XyZ, recvCount_XyZ*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_XYZ, recvCount_XYZ*sizeof(double)); // Allocate device memory
//......................................................................................
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_x, sendCount_x*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_X, sendCount_X*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_y, sendCount_y*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_Y, sendCount_Y*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_z, sendCount_z*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_Z, sendCount_Z*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_xy, sendCount_xy*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_xY, sendCount_xY*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_Xy, sendCount_Xy*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_XY, sendCount_XY*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_xz, sendCount_xz*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_xZ, sendCount_xZ*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_Xz, sendCount_Xz*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_XZ, sendCount_XZ*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_yz, sendCount_yz*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_yZ, sendCount_yZ*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_Yz, sendCount_Yz*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_YZ, sendCount_YZ*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_xyz, sendCount_xyz*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_xYz, sendCount_xYz*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_Xyz, sendCount_Xyz*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_XYz, sendCount_XYz*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_xyZ, sendCount_xyZ*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_xYZ, sendCount_xYZ*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_XyZ, sendCount_XyZ*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_XYZ, sendCount_XYZ*sizeof(int)); // Allocate device memory
//......................................................................................
ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_x, recvCount_x*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_X, recvCount_X*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_y, recvCount_y*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_Y, recvCount_Y*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_z, recvCount_z*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_Z, recvCount_Z*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_xy, recvCount_xy*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_xY, recvCount_xY*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_Xy, recvCount_Xy*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_XY, recvCount_XY*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_xz, recvCount_xz*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_xZ, recvCount_xZ*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_Xz, recvCount_Xz*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_XZ, recvCount_XZ*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_yz, recvCount_yz*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_yZ, recvCount_yZ*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_Yz, recvCount_Yz*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_YZ, recvCount_YZ*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_xyz, recvCount_xyZ*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_xYz, recvCount_xYZ*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_Xyz, recvCount_XyZ*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_XYz, recvCount_XYZ*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_xyZ, recvCount_xyZ*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_xYZ, recvCount_xYZ*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_XyZ, recvCount_XyZ*sizeof(int)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_XYZ, recvCount_XYZ*sizeof(int)); // Allocate device memory
//......................................................................................
MPI_Barrier(MPI_COMM_SCALBL);
/* Fill in communications patterns for the lists */
int *sendList, *recvList;
sendList = new int [width*max(Nxh,Nyh)*max(Nxh,Nzh)];
/* x face */
int count = 0;
for (int k=0; k<width; k++){
for (j=2; j<Nyh-2; j++){
for (i=2; i<Nxh-2; i++){
SendList[count] = (k+width)*Nxh*Nyh + j*Nxh + i;
RecvList[count++] = k*Nxh*Nyh + j*Nxh + i;
}
}
}
ScaLBL_CopyToZeroCopy(dvcSendList_x,sendList,sendCount_x*sizeof(int));
ScaLBL_CopyToZeroCopy(dvcRecvList_x,recvList,recvCount_x*sizeof(int));
/* X face */
count = 0;
for (int k=0; k<width; k++){
for (j=2; j<Nyh-2; j++){
for (i=2; i<Nxh-2; i++){
SendList[count] = (Nzh-width-k)*Nxh*Nyh + j*Nxh + i;
RecvList[count++] = (Nzh-k)*Nxh*Nyh + j*Nxh + i;
}
}
}
ScaLBL_CopyToZeroCopy(dvcSendList_X,sendList,sendCount_X*sizeof(int));
ScaLBL_CopyToZeroCopy(dvcRecvList_X,recvList,recvCount_X*sizeof(int));
/* y face */
count = 0;
for (k=2; k<Nzh-2; k++){
for (int j=0; j<width; j++){
for (i=2; i<Nxh-2; i++){
SendList[count] = k*Nxh*Nyh + (j+width)*Nxh + i;
RecvList[count++] = k*Nxh*Nyh + j*Nxh + i;
}
}
}
ScaLBL_CopyToZeroCopy(dvcSendList_y,sendList,sendCount_y*sizeof(int));
ScaLBL_CopyToZeroCopy(dvcRecvList_y,recvList,recvCount_y*sizeof(int));
/* Y face */
count = 0;
for (k=2; k<Nzh-2; k++){
for (int j=0; j<width; j++){
for (i=2; i<Nxh-2; i++){
SendList[count] = k*Nxh*Nyh + (Nyh-width-j)*Nxh + i;
RecvList[count++] = k*Nxh*Nyh + (Nyh-j)*Nxh + i;
}
}
}
ScaLBL_CopyToZeroCopy(dvcSendList_Y,sendList,sendCount_Y*sizeof(int));
ScaLBL_CopyToZeroCopy(dvcRecvList_Y,recvList,recvCount_Y*sizeof(int));
}
void ScaLBLWideHalo_Communicator::SendHalo(double *data){
//...................................................................................
if (Lock==true){
ERROR("ScaLBL Error (SendHalo): ScaLBL_Communicator is locked -- did you forget to match Send/Recv calls?");
}
else{
Lock=true;
}
ScaLBL_DeviceBarrier();
//...................................................................................
sendtag = recvtag = 1;
//...................................................................................
ScaLBL_Scalar_Pack(dvcSendList_x, sendCount_x,sendbuf_x, data, N);
ScaLBL_Scalar_Pack(dvcSendList_y, sendCount_y,sendbuf_y, data, N);
ScaLBL_Scalar_Pack(dvcSendList_z, sendCount_z,sendbuf_z, data, N);
ScaLBL_Scalar_Pack(dvcSendList_X, sendCount_X,sendbuf_X, data, N);
ScaLBL_Scalar_Pack(dvcSendList_Y, sendCount_Y,sendbuf_Y, data, N);
ScaLBL_Scalar_Pack(dvcSendList_Z, sendCount_Z,sendbuf_Z, data, N);
ScaLBL_Scalar_Pack(dvcSendList_xy, sendCount_xy,sendbuf_xy, data, N);
ScaLBL_Scalar_Pack(dvcSendList_xY, sendCount_xY,sendbuf_xY, data, N);
ScaLBL_Scalar_Pack(dvcSendList_Xy, sendCount_Xy,sendbuf_Xy, data, N);
ScaLBL_Scalar_Pack(dvcSendList_XY, sendCount_XY,sendbuf_XY, data, N);
ScaLBL_Scalar_Pack(dvcSendList_xz, sendCount_xz,sendbuf_xz, data, N);
ScaLBL_Scalar_Pack(dvcSendList_xZ, sendCount_xZ,sendbuf_xZ, data, N);
ScaLBL_Scalar_Pack(dvcSendList_Xz, sendCount_Xz,sendbuf_Xz, data, N);
ScaLBL_Scalar_Pack(dvcSendList_XZ, sendCount_XZ,sendbuf_XZ, data, N);
ScaLBL_Scalar_Pack(dvcSendList_yz, sendCount_yz,sendbuf_yz, data, N);
ScaLBL_Scalar_Pack(dvcSendList_yZ, sendCount_yZ,sendbuf_yZ, data, N);
ScaLBL_Scalar_Pack(dvcSendList_Yz, sendCount_Yz,sendbuf_Yz, data, N);
ScaLBL_Scalar_Pack(dvcSendList_YZ, sendCount_YZ,sendbuf_YZ, data, N);
//...................................................................................
// Send / Recv all the phase indcator field values
//...................................................................................
MPI_Isend(sendbuf_x, sendCount_x,MPI_DOUBLE,rank_x,sendtag,MPI_COMM_SCALBL,&req1[0]);
MPI_Irecv(recvbuf_X, recvCount_X,MPI_DOUBLE,rank_X,recvtag,MPI_COMM_SCALBL,&req2[0]);
MPI_Isend(sendbuf_X, sendCount_X,MPI_DOUBLE,rank_X,sendtag,MPI_COMM_SCALBL,&req1[1]);
MPI_Irecv(recvbuf_x, recvCount_x,MPI_DOUBLE,rank_x,recvtag,MPI_COMM_SCALBL,&req2[1]);
MPI_Isend(sendbuf_y, sendCount_y,MPI_DOUBLE,rank_y,sendtag,MPI_COMM_SCALBL,&req1[2]);
MPI_Irecv(recvbuf_Y, recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,MPI_COMM_SCALBL,&req2[2]);
MPI_Isend(sendbuf_Y, sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,MPI_COMM_SCALBL,&req1[3]);
MPI_Irecv(recvbuf_y, recvCount_y,MPI_DOUBLE,rank_y,recvtag,MPI_COMM_SCALBL,&req2[3]);
MPI_Isend(sendbuf_z, sendCount_z,MPI_DOUBLE,rank_z,sendtag,MPI_COMM_SCALBL,&req1[4]);
MPI_Irecv(recvbuf_Z, recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,MPI_COMM_SCALBL,&req2[4]);
MPI_Isend(sendbuf_Z, sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,MPI_COMM_SCALBL,&req1[5]);
MPI_Irecv(recvbuf_z, recvCount_z,MPI_DOUBLE,rank_z,recvtag,MPI_COMM_SCALBL,&req2[5]);
MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_xy,sendtag,MPI_COMM_SCALBL,&req1[6]);
MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_XY,recvtag,MPI_COMM_SCALBL,&req2[6]);
MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_XY,sendtag,MPI_COMM_SCALBL,&req1[7]);
MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_xy,recvtag,MPI_COMM_SCALBL,&req2[7]);
MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_Xy,sendtag,MPI_COMM_SCALBL,&req1[8]);
MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_xY,recvtag,MPI_COMM_SCALBL,&req2[8]);
MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_xY,sendtag,MPI_COMM_SCALBL,&req1[9]);
MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_Xy,recvtag,MPI_COMM_SCALBL,&req2[9]);
MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_xz,sendtag,MPI_COMM_SCALBL,&req1[10]);
MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_XZ,recvtag,MPI_COMM_SCALBL,&req2[10]);
MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_XZ,sendtag,MPI_COMM_SCALBL,&req1[11]);
MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_xz,recvtag,MPI_COMM_SCALBL,&req2[11]);
MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_Xz,sendtag,MPI_COMM_SCALBL,&req1[12]);
MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_xZ,recvtag,MPI_COMM_SCALBL,&req2[12]);
MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_xZ,sendtag,MPI_COMM_SCALBL,&req1[13]);
MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_Xz,recvtag,MPI_COMM_SCALBL,&req2[13]);
MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_yz,sendtag,MPI_COMM_SCALBL,&req1[14]);
MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_YZ,recvtag,MPI_COMM_SCALBL,&req2[14]);
MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_YZ,sendtag,MPI_COMM_SCALBL,&req1[15]);
MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_yz,recvtag,MPI_COMM_SCALBL,&req2[15]);
MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_Yz,sendtag,MPI_COMM_SCALBL,&req1[16]);
MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_yZ,recvtag,MPI_COMM_SCALBL,&req2[16]);
MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_yZ,sendtag,MPI_COMM_SCALBL,&req1[17]);
MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_Yz,recvtag,MPI_COMM_SCALBL,&req2[17]);
//...................................................................................
}
void ScaLBLWideHalo_Communicator::RecvHalo(double *data){
//...................................................................................
MPI_Waitall(18,req1,stat1);
MPI_Waitall(18,req2,stat2);
ScaLBL_DeviceBarrier();
//...................................................................................
//...................................................................................
ScaLBL_Scalar_Unpack(dvcRecvList_x, recvCount_x,recvbuf_x, data, N);
ScaLBL_Scalar_Unpack(dvcRecvList_y, recvCount_y,recvbuf_y, data, N);
ScaLBL_Scalar_Unpack(dvcRecvList_X, recvCount_X,recvbuf_X, data, N);
ScaLBL_Scalar_Unpack(dvcRecvList_Y, recvCount_Y,recvbuf_Y, data, N);
ScaLBL_Scalar_Unpack(dvcRecvList_xy, recvCount_xy,recvbuf_xy, data, N);
ScaLBL_Scalar_Unpack(dvcRecvList_xY, recvCount_xY,recvbuf_xY, data, N);
ScaLBL_Scalar_Unpack(dvcRecvList_Xy, recvCount_Xy,recvbuf_Xy, data, N);
ScaLBL_Scalar_Unpack(dvcRecvList_XY, recvCount_XY,recvbuf_XY, data, N);
ScaLBL_Scalar_Unpack(dvcRecvList_z, recvCount_z,recvbuf_z, data, N);
ScaLBL_Scalar_Unpack(dvcRecvList_xz, recvCount_xz,recvbuf_xz, data, N);
ScaLBL_Scalar_Unpack(dvcRecvList_Xz, recvCount_Xz,recvbuf_Xz, data, N);
ScaLBL_Scalar_Unpack(dvcRecvList_yz, recvCount_yz,recvbuf_yz, data, N);
ScaLBL_Scalar_Unpack(dvcRecvList_Yz, recvCount_Yz,recvbuf_Yz, data, N);
ScaLBL_Scalar_Unpack(dvcRecvList_Z, recvCount_Z,recvbuf_Z, data, N);
ScaLBL_Scalar_Unpack(dvcRecvList_xZ, recvCount_xZ,recvbuf_xZ, data, N);
ScaLBL_Scalar_Unpack(dvcRecvList_XZ, recvCount_XZ,recvbuf_XZ, data, N);
ScaLBL_Scalar_Unpack(dvcRecvList_yZ, recvCount_yZ,recvbuf_yZ, data, N);
ScaLBL_Scalar_Unpack(dvcRecvList_YZ, recvCount_YZ,recvbuf_YZ, data, N);
//...................................................................................
Lock=false; // unlock the communicator after communications complete
//...................................................................................
}
inline int getHaloBlock(){
}
}

96
common/WideHalo.h Normal file
View File

@ -0,0 +1,96 @@
/*
This class implements support for halo widths larger than 1
*/
class ScaLBLWideHalo_Communicator{
public:
//......................................................................................
ScaLBLWideHalo_Communicator(std::shared_ptr <Domain> Dm, int width);
~ScaLBLWideHalo_Communicator();
//......................................................................................
MPI_Comm MPI_COMM_SCALBL; // MPI Communicator
unsigned long int CommunicationCount,SendCount,RecvCount;
int Nx,Ny,Nz,N; // original domain structure
int Nxh,Nyh,Nzh,Nh; // with wide halo
RankInfoStruct rank_info;
int first_interior,last_interior;
//......................................................................................
// Set up for D3Q19 distributions -- all 27 neighbors are needed
//......................................................................................
// Buffers to store data sent and recieved by this MPI process
double *sendbuf_x, *sendbuf_y, *sendbuf_z, *sendbuf_X, *sendbuf_Y, *sendbuf_Z;
double *sendbuf_xy, *sendbuf_yz, *sendbuf_xz, *sendbuf_Xy, *sendbuf_Yz, *sendbuf_xZ;
double *sendbuf_xY, *sendbuf_yZ, *sendbuf_Xz, *sendbuf_XY, *sendbuf_YZ, *sendbuf_XZ;
double *sendbuf_xyz, *sendbuf_Xyz, *sendbuf_xYz, *sendbuf_XYy;
double *sendbuf_xyZ, *sendbuf_XyZ, *sendbuf_xYZ, *sendbuf_XYZ;
double *recvbuf_x, *recvbuf_y, *recvbuf_z, *recvbuf_X, *recvbuf_Y, *recvbuf_Z;
double *recvbuf_xy, *recvbuf_yz, *recvbuf_xz, *recvbuf_Xy, *recvbuf_Yz, *recvbuf_xZ;
double *recvbuf_xY, *recvbuf_yZ, *recvbuf_Xz, *recvbuf_XY, *recvbuf_YZ, *recvbuf_XZ;
double *recvbuf_xyz, *recvbuf_Xyz, *recvbuf_xYz, *recvbuf_XYy;
double *recvbuf_xyZ, *recvbuf_XyZ, *recvbuf_xYZ, *recvbuf_XYZ;
//......................................................................................
int LastExterior();
int FirstInterior();
int LastInterior();
void Send(double *data);
void Recv(double *data);
// Debugging and unit testing functions
void PrintDebug());
private:
bool Lock; // use Lock to make sure only one call at a time to protect data in transit
// only one set of Send requests can be active at any time (per instance)
int i,j,k,n;
int iproc,jproc,kproc;
int nprocx,nprocy,nprocz;
int sendtag,recvtag;
// Give the object it's own MPI communicator
RankInfoStruct rank_info;
MPI_Group Group; // Group of processors associated with this domain
MPI_Request req1[26],req2[26];
MPI_Status stat1[26],stat2[26];
//......................................................................................
// MPI ranks for all 18 neighbors
//......................................................................................
// These variables are all private to prevent external things from modifying them!!
//......................................................................................
int rank;
int rank_x,rank_y,rank_z,rank_X,rank_Y,rank_Z;
int rank_xy,rank_XY,rank_xY,rank_Xy;
int rank_xz,rank_XZ,rank_xZ,rank_Xz;
int rank_yz,rank_YZ,rank_yZ,rank_Yz;
int rank_xyz,rank_Xyz,rank_xYz,rank_XYz;
int rank_xyZ,rank_XyZ,rank_xYZ,rank_XYZ;
//......................................................................................
//......................................................................................
int sendCount_x, sendCount_y, sendCount_z, sendCount_X, sendCount_Y, sendCount_Z;
int sendCount_xy, sendCount_yz, sendCount_xz, sendCount_Xy, sendCount_Yz, sendCount_xZ;
int sendCount_xY, sendCount_yZ, sendCount_Xz, sendCount_XY, sendCount_YZ, sendCount_XZ;
int sendCount_xyz,sendCount_Xyz,sendCount_xYz,sendCount_XYz;
int sendCount_xyZ,sendCount_XyZ,sendCount_xYZ,sendCount_XYZ;
//......................................................................................
int recvCount_x, recvCount_y, recvCount_z, recvCount_X, recvCount_Y, recvCount_Z;
int recvCount_xy, recvCount_yz, recvCount_xz, recvCount_Xy, recvCount_Yz, recvCount_xZ;
int recvCount_xY, recvCount_yZ, recvCount_Xz, recvCount_XY, recvCount_YZ, recvCount_XZ;
int recvCount_xyz,recvCount_Xyz,recvCount_xYz,recvCount_XYz;
int recvCount_xyZ,recvCount_XyZ,recvCount_xYZ,recvCount_XYZ;
//......................................................................................
// Send buffers that reside on the compute device
int *dvcSendList_x, *dvcSendList_y, *dvcSendList_z, *dvcSendList_X, *dvcSendList_Y, *dvcSendList_Z;
int *dvcSendList_xy, *dvcSendList_yz, *dvcSendList_xz, *dvcSendList_Xy, *dvcSendList_Yz, *dvcSendList_xZ;
int *dvcSendList_xY, *dvcSendList_yZ, *dvcSendList_Xz, *dvcSendList_XY, *dvcSendList_YZ, *dvcSendList_XZ;
int *dvcSendList_xyz,*dvcSendList_Xyz,*dvcSendList_xYz,*dvcSendList_XYz;
int *dvcSendList_xyZ,*dvcSendList_XyZ,*dvcSendList_xYZ,*dvcSendList_XYZ;
// Recieve buffers that reside on the compute device
int *dvcRecvList_x, *dvcRecvList_y, *dvcRecvList_z, *dvcRecvList_X, *dvcRecvList_Y, *dvcRecvList_Z;
int *dvcRecvList_xy, *dvcRecvList_yz, *dvcRecvList_xz, *dvcRecvList_Xy, *dvcRecvList_Yz, *dvcRecvList_xZ;
int *dvcRecvList_xY, *dvcRecvList_yZ, *dvcRecvList_Xz, *dvcRecvList_XY, *dvcRecvList_YZ, *dvcRecvList_XZ;
int *dvcRecvList_xyz,*dvcRecvList_Xyz,*dvcRecvList_xYz,*dvcRecvList_XYz;
int *dvcRecvList_xyZ,*dvcRecvList_XyZ,*dvcRecvList_xYZ,*dvcRecvList_XYZ;
//......................................................................................
};