diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 8bc5d90e..b6374e89 100755 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -47,6 +47,7 @@ ADD_LBPM_EXECUTABLE( TestPNP_Stokes ) ADD_LBPM_EXECUTABLE( TestMixedGrad ) ADD_LBPM_EXECUTABLE( TestCrusher ) ADD_LBPM_EXECUTABLE( TestCrusher2 ) +ADD_LBPM_EXECUTABLE( TestCrusher3 ) diff --git a/tests/TestCrusher2.cpp b/tests/TestCrusher2.cpp index eb2a5d7f..b131abae 100644 --- a/tests/TestCrusher2.cpp +++ b/tests/TestCrusher2.cpp @@ -1,38 +1,16 @@ -#include -#include -#include -#include -#include -#include -#include -#include #include #include -#include +#include +#include #include +#include +#include +#include +#include - -#include "mpi.h" - - -#define ASSERT(EXP) \ - do { \ - if (!(EXP)) { \ - std::stringstream tboxos; \ - tboxos << "Failed assertion: " << #EXP << std::ends; \ - throw std::logic_error( tboxos.str() ); \ - } \ - } while (0) -#define INSIST(EXP, MSG) \ - do { \ - if (!(EXP)) { \ - std::stringstream tboxos; \ - tboxos << "Failed insist: " << #EXP << std::endl; \ - tboxos << "Message: " << MSG << std::ends; \ - throw std::logic_error( tboxos.str() ); \ - } \ - } while (0) - +#include "common/Database.h" +#include "common/Utilities.h" +#include "common/MPI.h" struct RankInfoStruct2 { @@ -43,9 +21,7 @@ struct RankInfoStruct2 { int jy; //!< The index of the current process in the y direction int kz; //!< The index of the current process in the z direction int rank[3][3][3]; //!< The rank for the neighbor [i][j][k] - RankInfoStruct2() { - memset(this, 0, sizeof(RankInfoStruct2)); - } + RankInfoStruct2() : RankInfoStruct2( 1, 0, 0, 0 ) {} RankInfoStruct2(int rank0, int nprocx, int nprocy, int nprocz) { memset(this, 0, sizeof(RankInfoStruct2)); nx = nprocx; @@ -88,29 +64,46 @@ struct RankInfoStruct2 { class Domain2 { public: - Domain2(std::array nproc, std::array n, MPI_Comm Communicator) { - MPI_Comm_dup(Communicator, &Comm); - Nx = n[0] + 2; - Ny = n[1] + 2; - Nz = n[2] + 2; + Domain2(std::shared_ptr db, const Utilities::MPI &Communicator) { + Comm = Communicator.dup(); + auto nproc = db->getVector("nproc"); + auto n = db->getVector("n"); + ASSERT(n.size() == 3u); + ASSERT(nproc.size() == 3u); + int nx = n[0]; + int ny = n[1]; + int nz = n[2]; + Nx = nx + 2; + Ny = ny + 2; + Nz = nz + 2; N = Nx * Ny * Nz; - int rank, size; - MPI_Comm_rank( Comm, &rank ); - MPI_Comm_size( Comm, &size ); - rank_info = RankInfoStruct2( rank, nproc[0], nproc[1], nproc[2] ); - INSIST(size == nproc[0] * nproc[1] * nproc[2], "Fatal error in processor count!"); + int myrank = Comm.getRank(); + rank_info = RankInfoStruct2(myrank, nproc[0], nproc[1], nproc[2]); + int nprocs = Comm.getSize(); + INSIST(nprocs == nproc[0] * nproc[1] * nproc[2], "Fatal error in processor count!"); } Domain2() = delete; Domain2(const Domain2 &) = delete; - ~Domain2() { - int err = MPI_Comm_free(&Comm); - INSIST( err == MPI_SUCCESS, "Problem free'ing MPI_Comm object" ); - } + ~Domain2() = default; -public: +public: // Public variables (need to create accessors instead) + int Nx, Ny, Nz, N; + RankInfoStruct2 rank_info; + Utilities::MPI Comm; // MPI Communicator for this domain + + + //********************************** // MPI ranks for all 18 neighbors + //********************************** + inline int iproc() const { return rank_info.ix; } + inline int jproc() const { return rank_info.jy; } + inline int kproc() const { return rank_info.kz; } + inline int nprocx() const { return rank_info.nx; } + inline int nprocy() const { return rank_info.ny; } + inline int nprocz() const { return rank_info.nz; } + inline int rank() const { return rank_info.rank[1][1][1]; } inline int rank_X() const { return rank_info.rank[2][1][1]; } inline int rank_x() const { return rank_info.rank[0][1][1]; } inline int rank_Y() const { return rank_info.rank[1][2][1]; } @@ -132,7 +125,10 @@ public: // Initialize communication data structures within Domain object. void CommInit() { - MPI_Status status[18]; + int i, j, k, n; + int sendtag = 21; + int recvtag = 21; + //...................................................................................... int sendCount_x, sendCount_y, sendCount_z, sendCount_X, sendCount_Y, sendCount_Z; int sendCount_xy, sendCount_yz, sendCount_xz, sendCount_Xy, sendCount_Yz, sendCount_xZ; int sendCount_xY, sendCount_yZ, sendCount_Xz, sendCount_XY, sendCount_YZ, sendCount_XZ; @@ -140,49 +136,49 @@ public: sendCount_xy = sendCount_yz = sendCount_xz = sendCount_Xy = sendCount_Yz = sendCount_xZ = 0; sendCount_xY = sendCount_yZ = sendCount_Xz = sendCount_XY = sendCount_YZ = sendCount_XZ = 0; //...................................................................................... - for (int k = 1; k < Nz - 1; k++) { - for (int j = 1; j < Ny - 1; j++) { - for (int i = 1; i < Nx - 1; i++) { - // Counts for the six faces - if (i == 1) - sendCount_x++; - if (j == 1) - sendCount_y++; - if (k == 1) - sendCount_z++; - if (i == Nx - 2) - sendCount_X++; - if (j == Ny - 2) - sendCount_Y++; - if (k == Nz - 2) - sendCount_Z++; - // Counts for the twelve edges - if (i == 1 && j == 1) - sendCount_xy++; - if (i == 1 && j == Ny - 2) - sendCount_xY++; - if (i == Nx - 2 && j == 1) - sendCount_Xy++; - if (i == Nx - 2 && j == Ny - 2) - sendCount_XY++; + for (k = 1; k < Nz - 1; k++) { + for (j = 1; j < Ny - 1; j++) { + for (i = 1; i < Nx - 1; i++) { + // Counts for the six faces + if (i == 1) + sendCount_x++; + if (j == 1) + sendCount_y++; + if (k == 1) + sendCount_z++; + if (i == Nx - 2) + sendCount_X++; + if (j == Ny - 2) + sendCount_Y++; + if (k == Nz - 2) + sendCount_Z++; + // Counts for the twelve edges + if (i == 1 && j == 1) + sendCount_xy++; + if (i == 1 && j == Ny - 2) + sendCount_xY++; + if (i == Nx - 2 && j == 1) + sendCount_Xy++; + if (i == Nx - 2 && j == Ny - 2) + sendCount_XY++; - if (i == 1 && k == 1) - sendCount_xz++; - if (i == 1 && k == Nz - 2) - sendCount_xZ++; - if (i == Nx - 2 && k == 1) - sendCount_Xz++; - if (i == Nx - 2 && k == Nz - 2) - sendCount_XZ++; + if (i == 1 && k == 1) + sendCount_xz++; + if (i == 1 && k == Nz - 2) + sendCount_xZ++; + if (i == Nx - 2 && k == 1) + sendCount_Xz++; + if (i == Nx - 2 && k == Nz - 2) + sendCount_XZ++; - if (j == 1 && k == 1) - sendCount_yz++; - if (j == 1 && k == Nz - 2) - sendCount_yZ++; - if (j == Ny - 2 && k == 1) - sendCount_Yz++; - if (j == Ny - 2 && k == Nz - 2) - sendCount_YZ++; + if (j == 1 && k == 1) + sendCount_yz++; + if (j == 1 && k == Nz - 2) + sendCount_yZ++; + if (j == Ny - 2 && k == 1) + sendCount_Yz++; + if (j == Ny - 2 && k == Nz - 2) + sendCount_YZ++; } } } @@ -210,51 +206,51 @@ public: sendCount_x = sendCount_y = sendCount_z = sendCount_X = sendCount_Y = sendCount_Z = 0; sendCount_xy = sendCount_yz = sendCount_xz = sendCount_Xy = sendCount_Yz = sendCount_xZ = 0; sendCount_xY = sendCount_yZ = sendCount_Xz = sendCount_XY = sendCount_YZ = sendCount_XZ = 0; - for (int k = 1; k < Nz - 1; k++) { - for (int j = 1; j < Ny - 1; j++) { - for (int i = 1; i < Nx - 1; i++) { + for (k = 1; k < Nz - 1; k++) { + for (j = 1; j < Ny - 1; j++) { + for (i = 1; i < Nx - 1; i++) { // Local value to send - int n = k * Nx * Ny + j * Nx + i; - // Counts for the six faces - if (i == 1) - sendList_x[sendCount_x++] = n; - if (j == 1) - sendList_y[sendCount_y++] = n; - if (k == 1) - sendList_z[sendCount_z++] = n; - if (i == Nx - 2) - sendList_X[sendCount_X++] = n; - if (j == Ny - 2) - sendList_Y[sendCount_Y++] = n; - if (k == Nz - 2) - sendList_Z[sendCount_Z++] = n; - // Counts for the twelve edges - if (i == 1 && j == 1) - sendList_xy[sendCount_xy++] = n; - if (i == 1 && j == Ny - 2) - sendList_xY[sendCount_xY++] = n; - if (i == Nx - 2 && j == 1) - sendList_Xy[sendCount_Xy++] = n; - if (i == Nx - 2 && j == Ny - 2) - sendList_XY[sendCount_XY++] = n; + n = k * Nx * Ny + j * Nx + i; + // Counts for the six faces + if (i == 1) + sendList_x[sendCount_x++] = n; + if (j == 1) + sendList_y[sendCount_y++] = n; + if (k == 1) + sendList_z[sendCount_z++] = n; + if (i == Nx - 2) + sendList_X[sendCount_X++] = n; + if (j == Ny - 2) + sendList_Y[sendCount_Y++] = n; + if (k == Nz - 2) + sendList_Z[sendCount_Z++] = n; + // Counts for the twelve edges + if (i == 1 && j == 1) + sendList_xy[sendCount_xy++] = n; + if (i == 1 && j == Ny - 2) + sendList_xY[sendCount_xY++] = n; + if (i == Nx - 2 && j == 1) + sendList_Xy[sendCount_Xy++] = n; + if (i == Nx - 2 && j == Ny - 2) + sendList_XY[sendCount_XY++] = n; - if (i == 1 && k == 1) - sendList_xz[sendCount_xz++] = n; - if (i == 1 && k == Nz - 2) - sendList_xZ[sendCount_xZ++] = n; - if (i == Nx - 2 && k == 1) - sendList_Xz[sendCount_Xz++] = n; - if (i == Nx - 2 && k == Nz - 2) - sendList_XZ[sendCount_XZ++] = n; + if (i == 1 && k == 1) + sendList_xz[sendCount_xz++] = n; + if (i == 1 && k == Nz - 2) + sendList_xZ[sendCount_xZ++] = n; + if (i == Nx - 2 && k == 1) + sendList_Xz[sendCount_Xz++] = n; + if (i == Nx - 2 && k == Nz - 2) + sendList_XZ[sendCount_XZ++] = n; - if (j == 1 && k == 1) - sendList_yz[sendCount_yz++] = n; - if (j == 1 && k == Nz - 2) - sendList_yZ[sendCount_yZ++] = n; - if (j == Ny - 2 && k == 1) - sendList_Yz[sendCount_Yz++] = n; - if (j == Ny - 2 && k == Nz - 2) - sendList_YZ[sendCount_YZ++] = n; + if (j == 1 && k == 1) + sendList_yz[sendCount_yz++] = n; + if (j == 1 && k == Nz - 2) + sendList_yZ[sendCount_yZ++] = n; + if (j == Ny - 2 && k == 1) + sendList_Yz[sendCount_Yz++] = n; + if (j == Ny - 2 && k == Nz - 2) + sendList_YZ[sendCount_YZ++] = n; } } } @@ -263,45 +259,45 @@ public: int recvCount_x, recvCount_y, recvCount_z, recvCount_X, recvCount_Y, recvCount_Z; int recvCount_xy, recvCount_yz, recvCount_xz, recvCount_Xy, recvCount_Yz, recvCount_xZ; int recvCount_xY, recvCount_yZ, recvCount_Xz, recvCount_XY, recvCount_YZ, recvCount_XZ; - MPI_Isend(&sendCount_x, 1, MPI_INT, rank_x(), 0, Comm, &req1[0] ); - MPI_Irecv(&recvCount_X, 1, MPI_INT, rank_X(), 0, Comm, &req2[0] ); - MPI_Isend(&sendCount_X, 1, MPI_INT, rank_X(), 1, Comm, &req1[1] ); - MPI_Irecv(&recvCount_x, 1, MPI_INT, rank_x(), 1, Comm, &req2[1] ); - MPI_Isend(&sendCount_y, 1, MPI_INT, rank_y(), 2, Comm, &req1[2] ); - MPI_Irecv(&recvCount_Y, 1, MPI_INT, rank_Y(), 2, Comm, &req2[2] ); - MPI_Isend(&sendCount_Y, 1, MPI_INT, rank_Y(), 3, Comm, &req1[3] ); - MPI_Irecv(&recvCount_y, 1, MPI_INT, rank_y(), 3, Comm, &req2[3] ); - MPI_Isend(&sendCount_z, 1, MPI_INT, rank_z(), 4, Comm, &req1[4] ); - MPI_Irecv(&recvCount_Z, 1, MPI_INT, rank_Z(), 4, Comm, &req2[4] ); - MPI_Isend(&sendCount_Z, 1, MPI_INT, rank_Z(), 5, Comm, &req1[5] ); - MPI_Irecv(&recvCount_z, 1, MPI_INT, rank_z(), 5, Comm, &req2[5] ); - MPI_Isend(&sendCount_xy, 1, MPI_INT, rank_xy(), 6, Comm, &req1[6] ); - MPI_Irecv(&recvCount_XY, 1, MPI_INT, rank_XY(), 6, Comm, &req2[6] ); - MPI_Isend(&sendCount_XY, 1, MPI_INT, rank_XY(), 7, Comm, &req1[7] ); - MPI_Irecv(&recvCount_xy, 1, MPI_INT, rank_xy(), 7, Comm, &req2[7] ); - MPI_Isend(&sendCount_Xy, 1, MPI_INT, rank_Xy(), 8, Comm, &req1[8] ); - MPI_Irecv(&recvCount_xY, 1, MPI_INT, rank_xY(), 8, Comm, &req2[8] ); - MPI_Isend(&sendCount_xY, 1, MPI_INT, rank_xY(), 9, Comm, &req1[9] ); - MPI_Irecv(&recvCount_Xy, 1, MPI_INT, rank_Xy(), 9, Comm, &req2[9] ); - MPI_Isend(&sendCount_xz, 1, MPI_INT, rank_xz(), 10, Comm, &req1[10] ); - MPI_Irecv(&recvCount_XZ, 1, MPI_INT, rank_XZ(), 10, Comm, &req2[10] ); - MPI_Isend(&sendCount_XZ, 1, MPI_INT, rank_XZ(), 11, Comm, &req1[11] ); - MPI_Irecv(&recvCount_xz, 1, MPI_INT, rank_xz(), 11, Comm, &req2[11] ); - MPI_Isend(&sendCount_Xz, 1, MPI_INT, rank_Xz(), 12, Comm, &req1[12] ); - MPI_Irecv(&recvCount_xZ, 1, MPI_INT, rank_xZ(), 12, Comm, &req2[12] ); - MPI_Isend(&sendCount_xZ, 1, MPI_INT, rank_xZ(), 13, Comm, &req1[13] ); - MPI_Irecv(&recvCount_Xz, 1, MPI_INT, rank_Xz(), 13, Comm, &req2[13] ); - MPI_Isend(&sendCount_yz, 1, MPI_INT, rank_yz(), 14, Comm, &req1[14] ); - MPI_Irecv(&recvCount_YZ, 1, MPI_INT, rank_YZ(), 14, Comm, &req2[14] ); - MPI_Isend(&sendCount_YZ, 1, MPI_INT, rank_YZ(), 15, Comm, &req1[15] ); - MPI_Irecv(&recvCount_yz, 1, MPI_INT, rank_yz(), 15, Comm, &req2[15] ); - MPI_Isend(&sendCount_Yz, 1, MPI_INT, rank_Yz(), 16, Comm, &req1[16] ); - MPI_Irecv(&recvCount_yZ, 1, MPI_INT, rank_yZ(), 16, Comm, &req2[16] ); - MPI_Isend(&sendCount_yZ, 1, MPI_INT, rank_yZ(), 17, Comm, &req1[17] ); - MPI_Irecv(&recvCount_Yz, 1, MPI_INT, rank_Yz(), 17, Comm, &req2[17] ); - MPI_Waitall( 18, req1, status ); - MPI_Waitall( 18, req2, status ); - MPI_Barrier( Comm ); + req1[0] = Comm.Isend(&sendCount_x, 1, rank_x(), sendtag + 0); + req2[0] = Comm.Irecv(&recvCount_X, 1, rank_X(), recvtag + 0); + req1[1] = Comm.Isend(&sendCount_X, 1, rank_X(), sendtag + 1); + req2[1] = Comm.Irecv(&recvCount_x, 1, rank_x(), recvtag + 1); + req1[2] = Comm.Isend(&sendCount_y, 1, rank_y(), sendtag + 2); + req2[2] = Comm.Irecv(&recvCount_Y, 1, rank_Y(), recvtag + 2); + req1[3] = Comm.Isend(&sendCount_Y, 1, rank_Y(), sendtag + 3); + req2[3] = Comm.Irecv(&recvCount_y, 1, rank_y(), recvtag + 3); + req1[4] = Comm.Isend(&sendCount_z, 1, rank_z(), sendtag + 4); + req2[4] = Comm.Irecv(&recvCount_Z, 1, rank_Z(), recvtag + 4); + req1[5] = Comm.Isend(&sendCount_Z, 1, rank_Z(), sendtag + 5); + req2[5] = Comm.Irecv(&recvCount_z, 1, rank_z(), recvtag + 5); + req1[6] = Comm.Isend(&sendCount_xy, 1, rank_xy(), sendtag + 6); + req2[6] = Comm.Irecv(&recvCount_XY, 1, rank_XY(), recvtag + 6); + req1[7] = Comm.Isend(&sendCount_XY, 1, rank_XY(), sendtag + 7); + req2[7] = Comm.Irecv(&recvCount_xy, 1, rank_xy(), recvtag + 7); + req1[8] = Comm.Isend(&sendCount_Xy, 1, rank_Xy(), sendtag + 8); + req2[8] = Comm.Irecv(&recvCount_xY, 1, rank_xY(), recvtag + 8); + req1[9] = Comm.Isend(&sendCount_xY, 1, rank_xY(), sendtag + 9); + req2[9] = Comm.Irecv(&recvCount_Xy, 1, rank_Xy(), recvtag + 9); + req1[10] = Comm.Isend(&sendCount_xz, 1, rank_xz(), sendtag + 10); + req2[10] = Comm.Irecv(&recvCount_XZ, 1, rank_XZ(), recvtag + 10); + req1[11] = Comm.Isend(&sendCount_XZ, 1, rank_XZ(), sendtag + 11); + req2[11] = Comm.Irecv(&recvCount_xz, 1, rank_xz(), recvtag + 11); + req1[12] = Comm.Isend(&sendCount_Xz, 1, rank_Xz(), sendtag + 12); + req2[12] = Comm.Irecv(&recvCount_xZ, 1, rank_xZ(), recvtag + 12); + req1[13] = Comm.Isend(&sendCount_xZ, 1, rank_xZ(), sendtag + 13); + req2[13] = Comm.Irecv(&recvCount_Xz, 1, rank_Xz(), recvtag + 13); + req1[14] = Comm.Isend(&sendCount_yz, 1, rank_yz(), sendtag + 14); + req2[14] = Comm.Irecv(&recvCount_YZ, 1, rank_YZ(), recvtag + 14); + req1[15] = Comm.Isend(&sendCount_YZ, 1, rank_YZ(), sendtag + 15); + req2[15] = Comm.Irecv(&recvCount_yz, 1, rank_yz(), recvtag + 15); + req1[16] = Comm.Isend(&sendCount_Yz, 1, rank_Yz(), sendtag + 16); + req2[16] = Comm.Irecv(&recvCount_yZ, 1, rank_yZ(), recvtag + 16); + req1[17] = Comm.Isend(&sendCount_yZ, 1, rank_yZ(), sendtag + 17); + req2[17] = Comm.Irecv(&recvCount_Yz, 1, rank_Yz(), recvtag + 17); + Comm.waitAll(18, req1); + Comm.waitAll(18, req2); + Comm.barrier(); // allocate recv lists recvList_x.resize(recvCount_x, 0); recvList_y.resize(recvCount_y, 0); @@ -322,51 +318,85 @@ public: recvList_YZ.resize(recvCount_YZ, 0); recvList_XZ.resize(recvCount_XZ, 0); //...................................................................................... - MPI_Isend(sendList_x.data(), sendCount_x, MPI_INT, rank_x(), 0, Comm, &req1[0] ); - MPI_Irecv(recvList_X.data(), recvCount_X, MPI_INT, rank_X(), 0, Comm, &req2[0] ); - MPI_Isend(sendList_X.data(), sendCount_X, MPI_INT, rank_X(), 1, Comm, &req1[1] ); - MPI_Irecv(recvList_x.data(), recvCount_x, MPI_INT, rank_x(), 1, Comm, &req2[1] ); - MPI_Isend(sendList_y.data(), sendCount_y, MPI_INT, rank_y(), 2, Comm, &req1[2] ); - MPI_Irecv(recvList_Y.data(), recvCount_Y, MPI_INT, rank_Y(), 2, Comm, &req2[2] ); - MPI_Isend(sendList_Y.data(), sendCount_Y, MPI_INT, rank_Y(), 3, Comm, &req1[3] ); - MPI_Irecv(recvList_y.data(), recvCount_y, MPI_INT, rank_y(), 3, Comm, &req2[3] ); - MPI_Isend(sendList_z.data(), sendCount_z, MPI_INT, rank_z(), 4, Comm, &req1[4] ); - MPI_Irecv(recvList_Z.data(), recvCount_Z, MPI_INT, rank_Z(), 4, Comm, &req2[4] ); - MPI_Isend(sendList_Z.data(), sendCount_Z, MPI_INT, rank_Z(), 5, Comm, &req1[5] ); - MPI_Irecv(recvList_z.data(), recvCount_z, MPI_INT, rank_z(), 5, Comm, &req2[5] ); - MPI_Isend(sendList_xy.data(), sendCount_xy, MPI_INT, rank_xy(), 6, Comm, &req1[6] ); - MPI_Irecv(recvList_XY.data(), recvCount_XY, MPI_INT, rank_XY(), 6, Comm, &req2[6] ); - MPI_Isend(sendList_XY.data(), sendCount_XY, MPI_INT, rank_XY(), 7, Comm, &req1[7] ); - MPI_Irecv(recvList_xy.data(), recvCount_xy, MPI_INT, rank_xy(), 7, Comm, &req2[7] ); - MPI_Isend(sendList_Xy.data(), sendCount_Xy, MPI_INT, rank_Xy(), 8, Comm, &req1[8] ); - MPI_Irecv(recvList_xY.data(), recvCount_xY, MPI_INT, rank_xY(), 8, Comm, &req2[8] ); - MPI_Isend(sendList_xY.data(), sendCount_xY, MPI_INT, rank_xY(), 9, Comm, &req1[9] ); - MPI_Irecv(recvList_Xy.data(), recvCount_Xy, MPI_INT, rank_Xy(), 9, Comm, &req2[9] ); - MPI_Isend(sendList_xz.data(), sendCount_xz, MPI_INT, rank_xz(), 10, Comm, &req1[10] ); - MPI_Irecv(recvList_XZ.data(), recvCount_XZ, MPI_INT, rank_XZ(), 10, Comm, &req2[10] ); - MPI_Isend(sendList_XZ.data(), sendCount_XZ, MPI_INT, rank_XZ(), 11, Comm, &req1[11] ); - MPI_Irecv(recvList_xz.data(), recvCount_xz, MPI_INT, rank_xz(), 11, Comm, &req2[11] ); - MPI_Isend(sendList_Xz.data(), sendCount_Xz, MPI_INT, rank_Xz(), 12, Comm, &req1[12] ); - MPI_Irecv(recvList_xZ.data(), recvCount_xZ, MPI_INT, rank_xZ(), 12, Comm, &req2[12] ); - MPI_Isend(sendList_xZ.data(), sendCount_xZ, MPI_INT, rank_xZ(), 13, Comm, &req1[13] ); - MPI_Irecv(recvList_Xz.data(), recvCount_Xz, MPI_INT, rank_Xz(), 13, Comm, &req2[13] ); - MPI_Isend(sendList_yz.data(), sendCount_yz, MPI_INT, rank_yz(), 14, Comm, &req1[14] ); - MPI_Irecv(recvList_YZ.data(), recvCount_YZ, MPI_INT, rank_YZ(), 14, Comm, &req2[14] ); - MPI_Isend(sendList_YZ.data(), sendCount_YZ, MPI_INT, rank_YZ(), 15, Comm, &req1[15] ); - MPI_Irecv(recvList_yz.data(), recvCount_yz, MPI_INT, rank_yz(), 15, Comm, &req2[15] ); - MPI_Isend(sendList_Yz.data(), sendCount_Yz, MPI_INT, rank_Yz(), 16, Comm, &req1[16] ); - MPI_Irecv(recvList_yZ.data(), recvCount_yZ, MPI_INT, rank_yZ(), 16, Comm, &req2[16] ); - MPI_Isend(sendList_yZ.data(), sendCount_yZ, MPI_INT, rank_yZ(), 17, Comm, &req1[17] ); - MPI_Irecv(recvList_Yz.data(), recvCount_Yz, MPI_INT, rank_Yz(), 17, Comm, &req2[17] ); - MPI_Waitall( 18, req1, status ); - MPI_Waitall( 18, req2, status ); - MPI_Barrier( Comm ); + req1[0] = Comm.Isend(sendList_x.data(), sendCount_x, rank_x(), sendtag); + req2[0] = Comm.Irecv(recvList_X.data(), recvCount_X, rank_X(), recvtag); + req1[1] = Comm.Isend(sendList_X.data(), sendCount_X, rank_X(), sendtag); + req2[1] = Comm.Irecv(recvList_x.data(), recvCount_x, rank_x(), recvtag); + req1[2] = Comm.Isend(sendList_y.data(), sendCount_y, rank_y(), sendtag); + req2[2] = Comm.Irecv(recvList_Y.data(), recvCount_Y, rank_Y(), recvtag); + req1[3] = Comm.Isend(sendList_Y.data(), sendCount_Y, rank_Y(), sendtag); + req2[3] = Comm.Irecv(recvList_y.data(), recvCount_y, rank_y(), recvtag); + req1[4] = Comm.Isend(sendList_z.data(), sendCount_z, rank_z(), sendtag); + req2[4] = Comm.Irecv(recvList_Z.data(), recvCount_Z, rank_Z(), recvtag); + req1[5] = Comm.Isend(sendList_Z.data(), sendCount_Z, rank_Z(), sendtag); + req2[5] = Comm.Irecv(recvList_z.data(), recvCount_z, rank_z(), recvtag); + req1[6] = Comm.Isend(sendList_xy.data(), sendCount_xy, rank_xy(), sendtag); + req2[6] = Comm.Irecv(recvList_XY.data(), recvCount_XY, rank_XY(), recvtag); + req1[7] = Comm.Isend(sendList_XY.data(), sendCount_XY, rank_XY(), sendtag); + req2[7] = Comm.Irecv(recvList_xy.data(), recvCount_xy, rank_xy(), recvtag); + req1[8] = Comm.Isend(sendList_Xy.data(), sendCount_Xy, rank_Xy(), sendtag); + req2[8] = Comm.Irecv(recvList_xY.data(), recvCount_xY, rank_xY(), recvtag); + req1[9] = Comm.Isend(sendList_xY.data(), sendCount_xY, rank_xY(), sendtag); + req2[9] = Comm.Irecv(recvList_Xy.data(), recvCount_Xy, rank_Xy(), recvtag); + req1[10] = Comm.Isend(sendList_xz.data(), sendCount_xz, rank_xz(), sendtag); + req2[10] = Comm.Irecv(recvList_XZ.data(), recvCount_XZ, rank_XZ(), recvtag); + req1[11] = Comm.Isend(sendList_XZ.data(), sendCount_XZ, rank_XZ(), sendtag); + req2[11] = Comm.Irecv(recvList_xz.data(), recvCount_xz, rank_xz(), recvtag); + req1[12] = Comm.Isend(sendList_Xz.data(), sendCount_Xz, rank_Xz(), sendtag); + req2[12] = Comm.Irecv(recvList_xZ.data(), recvCount_xZ, rank_xZ(), recvtag); + req1[13] = Comm.Isend(sendList_xZ.data(), sendCount_xZ, rank_xZ(), sendtag); + req2[13] = Comm.Irecv(recvList_Xz.data(), recvCount_Xz, rank_Xz(), recvtag); + req1[14] = Comm.Isend(sendList_yz.data(), sendCount_yz, rank_yz(), sendtag); + req2[14] = Comm.Irecv(recvList_YZ.data(), recvCount_YZ, rank_YZ(), recvtag); + req1[15] = Comm.Isend(sendList_YZ.data(), sendCount_YZ, rank_YZ(), sendtag); + req2[15] = Comm.Irecv(recvList_yz.data(), recvCount_yz, rank_yz(), recvtag); + req1[16] = Comm.Isend(sendList_Yz.data(), sendCount_Yz, rank_Yz(), sendtag); + req2[16] = Comm.Irecv(recvList_yZ.data(), recvCount_yZ, rank_yZ(), recvtag); + req1[17] = Comm.Isend(sendList_yZ.data(), sendCount_yZ, rank_yZ(), sendtag); + req2[17] = Comm.Irecv(recvList_Yz.data(), recvCount_Yz, rank_Yz(), recvtag); + Comm.waitAll(18, req1); + Comm.waitAll(18, req2); + //...................................................................................... + for (int idx = 0; idx < recvCount_x; idx++) + recvList_x[idx] -= (Nx - 2); + for (int idx = 0; idx < recvCount_X; idx++) + recvList_X[idx] += (Nx - 2); + for (int idx = 0; idx < recvCount_y; idx++) + recvList_y[idx] -= (Ny - 2) * Nx; + for (int idx = 0; idx < recvCount_Y; idx++) + recvList_Y[idx] += (Ny - 2) * Nx; + for (int idx = 0; idx < recvCount_z; idx++) + recvList_z[idx] -= (Nz - 2) * Nx * Ny; + for (int idx = 0; idx < recvCount_Z; idx++) + recvList_Z[idx] += (Nz - 2) * Nx * Ny; + for (int idx = 0; idx < recvCount_xy; idx++) + recvList_xy[idx] -= (Nx - 2) + (Ny - 2) * Nx; + for (int idx = 0; idx < recvCount_XY; idx++) + recvList_XY[idx] += (Nx - 2) + (Ny - 2) * Nx; + for (int idx = 0; idx < recvCount_xY; idx++) + recvList_xY[idx] -= (Nx - 2) - (Ny - 2) * Nx; + for (int idx = 0; idx < recvCount_Xy; idx++) + recvList_Xy[idx] += (Nx - 2) - (Ny - 2) * Nx; + for (int idx = 0; idx < recvCount_xz; idx++) + recvList_xz[idx] -= (Nx - 2) + (Nz - 2) * Nx * Ny; + for (int idx = 0; idx < recvCount_XZ; idx++) + recvList_XZ[idx] += (Nx - 2) + (Nz - 2) * Nx * Ny; + for (int idx = 0; idx < recvCount_xZ; idx++) + recvList_xZ[idx] -= (Nx - 2) - (Nz - 2) * Nx * Ny; + for (int idx = 0; idx < recvCount_Xz; idx++) + recvList_Xz[idx] += (Nx - 2) - (Nz - 2) * Nx * Ny; + for (int idx = 0; idx < recvCount_yz; idx++) + recvList_yz[idx] -= (Ny - 2) * Nx + (Nz - 2) * Nx * Ny; + for (int idx = 0; idx < recvCount_YZ; idx++) + recvList_YZ[idx] += (Ny - 2) * Nx + (Nz - 2) * Nx * Ny; + for (int idx = 0; idx < recvCount_yZ; idx++) + recvList_yZ[idx] -= (Ny - 2) * Nx - (Nz - 2) * Nx * Ny; + for (int idx = 0; idx < recvCount_Yz; idx++) + recvList_Yz[idx] += (Ny - 2) * Nx - (Nz - 2) * Nx * Ny; } private: - int Nx, Ny, Nz, N; - RankInfoStruct2 rank_info; - MPI_Comm Comm; // MPI Communicator for this domain + MPI_Request req1[18], req2[18]; std::vector sendList_x, sendList_y, sendList_z, sendList_X, sendList_Y, sendList_Z; std::vector sendList_xy, sendList_yz, sendList_xz, sendList_Xy, sendList_Yz, sendList_xZ; @@ -374,59 +404,28 @@ private: std::vector recvList_x, recvList_y, recvList_z, recvList_X, recvList_Y, recvList_Z; std::vector recvList_xy, recvList_yz, recvList_xz, recvList_Xy, recvList_Yz, recvList_xZ; std::vector recvList_xY, recvList_yZ, recvList_Xz, recvList_XY, recvList_YZ, recvList_XZ; + const std::vector &getRecvList(const char *dir) const; + const std::vector &getSendList(const char *dir) const; }; -std::array get_nproc( int P ) -{ - if ( P == 1 ) - return { 1, 1, 1 }; - else if ( P == 2 ) - return { 2, 1, 1 }; - else if ( P == 4 ) - return { 2, 2, 1 }; - else if ( P == 8 ) - return { 2, 2, 2 }; - else if ( P == 64 ) - return { 4, 4, 4 }; - else - throw std::logic_error("proccessor count not supported yet"); -} - int main(int argc, char **argv) { - // Start MPI - bool multiple = true; - if (multiple) { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if (provided < MPI_THREAD_MULTIPLE) { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - if (rank == 0) - std::cerr << "Warning: Failed to start MPI with thread support\n"; - } - } else { - MPI_Init(&argc, &argv); - } - - // Run the problem - int size = 0; - MPI_Comm_size( MPI_COMM_WORLD, &size ); + Utilities::startup( argc, argv, true ); + Utilities::MPI comm( MPI_COMM_WORLD ); { - auto nproc = get_nproc( size ); - std::array n = { 10, 20, 30 }; - auto Dm = std::make_shared(nproc,n,MPI_COMM_WORLD); + auto filename = argv[1]; + auto input_db = std::make_shared( filename ); + auto db = input_db->getDatabase( "Domain" ); + auto Dm = std::make_shared(db,comm); Dm->CommInit(); std::cout << "step 1" << std::endl << std::flush; } std::cout << "step 2" << std::endl << std::flush; - MPI_Barrier( MPI_COMM_WORLD ); + comm.barrier(); std::cout << "step 3" << std::endl << std::flush; - - // Shutdown MPI - MPI_Finalize(); + Utilities::shutdown(); std::cout << "step 4" << std::endl << std::flush; return 0; } diff --git a/tests/TestCrusher3.cpp b/tests/TestCrusher3.cpp new file mode 100644 index 00000000..eb2a5d7f --- /dev/null +++ b/tests/TestCrusher3.cpp @@ -0,0 +1,432 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "mpi.h" + + +#define ASSERT(EXP) \ + do { \ + if (!(EXP)) { \ + std::stringstream tboxos; \ + tboxos << "Failed assertion: " << #EXP << std::ends; \ + throw std::logic_error( tboxos.str() ); \ + } \ + } while (0) +#define INSIST(EXP, MSG) \ + do { \ + if (!(EXP)) { \ + std::stringstream tboxos; \ + tboxos << "Failed insist: " << #EXP << std::endl; \ + tboxos << "Message: " << MSG << std::ends; \ + throw std::logic_error( tboxos.str() ); \ + } \ + } while (0) + + + +struct RankInfoStruct2 { + int nx; //!< The number of processors in the x direction + int ny; //!< The number of processors in the y direction + int nz; //!< The number of processors in the z direction + int ix; //!< The index of the current process in the x direction + int jy; //!< The index of the current process in the y direction + int kz; //!< The index of the current process in the z direction + int rank[3][3][3]; //!< The rank for the neighbor [i][j][k] + RankInfoStruct2() { + memset(this, 0, sizeof(RankInfoStruct2)); + } + RankInfoStruct2(int rank0, int nprocx, int nprocy, int nprocz) { + memset(this, 0, sizeof(RankInfoStruct2)); + nx = nprocx; + ny = nprocy; + nz = nprocz; + if (rank0 >= nprocx * nprocy * nprocz) { + ix = -1; + jy = -1; + kz = -1; + for (int i = -1; i <= 1; i++) { + for (int j = -1; j <= 1; j++) { + for (int k = -1; k <= 1; k++) { + rank[i + 1][j + 1][k + 1] = -1; + } + } + } + } else { + ix = rank0 % nprocx; + jy = (rank0 / nprocx) % nprocy; + kz = rank0 / (nprocx * nprocy); + for (int i = -1; i <= 1; i++) { + for (int j = -1; j <= 1; j++) { + for (int k = -1; k <= 1; k++) { + rank[i + 1][j + 1][k + 1] = + getRankForBlock(ix + i, jy + j, kz + k); + } + } + } + ASSERT(rank[1][1][1] == rank0); + } + } + int getRankForBlock(int i, int j, int k) const { + int i2 = (i + nx) % nx; + int j2 = (j + ny) % ny; + int k2 = (k + nz) % nz; + return i2 + j2 * nx + k2 * nx * ny; + } +}; + + +class Domain2 { +public: + Domain2(std::array nproc, std::array n, MPI_Comm Communicator) { + MPI_Comm_dup(Communicator, &Comm); + Nx = n[0] + 2; + Ny = n[1] + 2; + Nz = n[2] + 2; + N = Nx * Ny * Nz; + int rank, size; + MPI_Comm_rank( Comm, &rank ); + MPI_Comm_size( Comm, &size ); + rank_info = RankInfoStruct2( rank, nproc[0], nproc[1], nproc[2] ); + INSIST(size == nproc[0] * nproc[1] * nproc[2], "Fatal error in processor count!"); + } + + Domain2() = delete; + Domain2(const Domain2 &) = delete; + ~Domain2() { + int err = MPI_Comm_free(&Comm); + INSIST( err == MPI_SUCCESS, "Problem free'ing MPI_Comm object" ); + } + +public: + + // MPI ranks for all 18 neighbors + inline int rank_X() const { return rank_info.rank[2][1][1]; } + inline int rank_x() const { return rank_info.rank[0][1][1]; } + inline int rank_Y() const { return rank_info.rank[1][2][1]; } + inline int rank_y() const { return rank_info.rank[1][0][1]; } + inline int rank_Z() const { return rank_info.rank[1][1][2]; } + inline int rank_z() const { return rank_info.rank[1][1][0]; } + inline int rank_XY() const { return rank_info.rank[2][2][1]; } + inline int rank_xy() const { return rank_info.rank[0][0][1]; } + inline int rank_Xy() const { return rank_info.rank[2][0][1]; } + inline int rank_xY() const { return rank_info.rank[0][2][1]; } + inline int rank_XZ() const { return rank_info.rank[2][1][2]; } + inline int rank_xz() const { return rank_info.rank[0][1][0]; } + inline int rank_Xz() const { return rank_info.rank[2][1][0]; } + inline int rank_xZ() const { return rank_info.rank[0][1][2]; } + inline int rank_YZ() const { return rank_info.rank[1][2][2]; } + inline int rank_yz() const { return rank_info.rank[1][0][0]; } + inline int rank_Yz() const { return rank_info.rank[1][2][0]; } + inline int rank_yZ() const { return rank_info.rank[1][0][2]; } + + // Initialize communication data structures within Domain object. + void CommInit() { + MPI_Status status[18]; + int sendCount_x, sendCount_y, sendCount_z, sendCount_X, sendCount_Y, sendCount_Z; + int sendCount_xy, sendCount_yz, sendCount_xz, sendCount_Xy, sendCount_Yz, sendCount_xZ; + int sendCount_xY, sendCount_yZ, sendCount_Xz, sendCount_XY, sendCount_YZ, sendCount_XZ; + sendCount_x = sendCount_y = sendCount_z = sendCount_X = sendCount_Y = sendCount_Z = 0; + sendCount_xy = sendCount_yz = sendCount_xz = sendCount_Xy = sendCount_Yz = sendCount_xZ = 0; + sendCount_xY = sendCount_yZ = sendCount_Xz = sendCount_XY = sendCount_YZ = sendCount_XZ = 0; + //...................................................................................... + for (int k = 1; k < Nz - 1; k++) { + for (int j = 1; j < Ny - 1; j++) { + for (int i = 1; i < Nx - 1; i++) { + // Counts for the six faces + if (i == 1) + sendCount_x++; + if (j == 1) + sendCount_y++; + if (k == 1) + sendCount_z++; + if (i == Nx - 2) + sendCount_X++; + if (j == Ny - 2) + sendCount_Y++; + if (k == Nz - 2) + sendCount_Z++; + // Counts for the twelve edges + if (i == 1 && j == 1) + sendCount_xy++; + if (i == 1 && j == Ny - 2) + sendCount_xY++; + if (i == Nx - 2 && j == 1) + sendCount_Xy++; + if (i == Nx - 2 && j == Ny - 2) + sendCount_XY++; + + if (i == 1 && k == 1) + sendCount_xz++; + if (i == 1 && k == Nz - 2) + sendCount_xZ++; + if (i == Nx - 2 && k == 1) + sendCount_Xz++; + if (i == Nx - 2 && k == Nz - 2) + sendCount_XZ++; + + if (j == 1 && k == 1) + sendCount_yz++; + if (j == 1 && k == Nz - 2) + sendCount_yZ++; + if (j == Ny - 2 && k == 1) + sendCount_Yz++; + if (j == Ny - 2 && k == Nz - 2) + sendCount_YZ++; + } + } + } + + // allocate send lists + sendList_x.resize(sendCount_x, 0); + sendList_y.resize(sendCount_y, 0); + sendList_z.resize(sendCount_z, 0); + sendList_X.resize(sendCount_X, 0); + sendList_Y.resize(sendCount_Y, 0); + sendList_Z.resize(sendCount_Z, 0); + sendList_xy.resize(sendCount_xy, 0); + sendList_yz.resize(sendCount_yz, 0); + sendList_xz.resize(sendCount_xz, 0); + sendList_Xy.resize(sendCount_Xy, 0); + sendList_Yz.resize(sendCount_Yz, 0); + sendList_xZ.resize(sendCount_xZ, 0); + sendList_xY.resize(sendCount_xY, 0); + sendList_yZ.resize(sendCount_yZ, 0); + sendList_Xz.resize(sendCount_Xz, 0); + sendList_XY.resize(sendCount_XY, 0); + sendList_YZ.resize(sendCount_YZ, 0); + sendList_XZ.resize(sendCount_XZ, 0); + // Populate the send list + sendCount_x = sendCount_y = sendCount_z = sendCount_X = sendCount_Y = sendCount_Z = 0; + sendCount_xy = sendCount_yz = sendCount_xz = sendCount_Xy = sendCount_Yz = sendCount_xZ = 0; + sendCount_xY = sendCount_yZ = sendCount_Xz = sendCount_XY = sendCount_YZ = sendCount_XZ = 0; + for (int k = 1; k < Nz - 1; k++) { + for (int j = 1; j < Ny - 1; j++) { + for (int i = 1; i < Nx - 1; i++) { + // Local value to send + int n = k * Nx * Ny + j * Nx + i; + // Counts for the six faces + if (i == 1) + sendList_x[sendCount_x++] = n; + if (j == 1) + sendList_y[sendCount_y++] = n; + if (k == 1) + sendList_z[sendCount_z++] = n; + if (i == Nx - 2) + sendList_X[sendCount_X++] = n; + if (j == Ny - 2) + sendList_Y[sendCount_Y++] = n; + if (k == Nz - 2) + sendList_Z[sendCount_Z++] = n; + // Counts for the twelve edges + if (i == 1 && j == 1) + sendList_xy[sendCount_xy++] = n; + if (i == 1 && j == Ny - 2) + sendList_xY[sendCount_xY++] = n; + if (i == Nx - 2 && j == 1) + sendList_Xy[sendCount_Xy++] = n; + if (i == Nx - 2 && j == Ny - 2) + sendList_XY[sendCount_XY++] = n; + + if (i == 1 && k == 1) + sendList_xz[sendCount_xz++] = n; + if (i == 1 && k == Nz - 2) + sendList_xZ[sendCount_xZ++] = n; + if (i == Nx - 2 && k == 1) + sendList_Xz[sendCount_Xz++] = n; + if (i == Nx - 2 && k == Nz - 2) + sendList_XZ[sendCount_XZ++] = n; + + if (j == 1 && k == 1) + sendList_yz[sendCount_yz++] = n; + if (j == 1 && k == Nz - 2) + sendList_yZ[sendCount_yZ++] = n; + if (j == Ny - 2 && k == 1) + sendList_Yz[sendCount_Yz++] = n; + if (j == Ny - 2 && k == Nz - 2) + sendList_YZ[sendCount_YZ++] = n; + } + } + } + + //...................................................................................... + int recvCount_x, recvCount_y, recvCount_z, recvCount_X, recvCount_Y, recvCount_Z; + int recvCount_xy, recvCount_yz, recvCount_xz, recvCount_Xy, recvCount_Yz, recvCount_xZ; + int recvCount_xY, recvCount_yZ, recvCount_Xz, recvCount_XY, recvCount_YZ, recvCount_XZ; + MPI_Isend(&sendCount_x, 1, MPI_INT, rank_x(), 0, Comm, &req1[0] ); + MPI_Irecv(&recvCount_X, 1, MPI_INT, rank_X(), 0, Comm, &req2[0] ); + MPI_Isend(&sendCount_X, 1, MPI_INT, rank_X(), 1, Comm, &req1[1] ); + MPI_Irecv(&recvCount_x, 1, MPI_INT, rank_x(), 1, Comm, &req2[1] ); + MPI_Isend(&sendCount_y, 1, MPI_INT, rank_y(), 2, Comm, &req1[2] ); + MPI_Irecv(&recvCount_Y, 1, MPI_INT, rank_Y(), 2, Comm, &req2[2] ); + MPI_Isend(&sendCount_Y, 1, MPI_INT, rank_Y(), 3, Comm, &req1[3] ); + MPI_Irecv(&recvCount_y, 1, MPI_INT, rank_y(), 3, Comm, &req2[3] ); + MPI_Isend(&sendCount_z, 1, MPI_INT, rank_z(), 4, Comm, &req1[4] ); + MPI_Irecv(&recvCount_Z, 1, MPI_INT, rank_Z(), 4, Comm, &req2[4] ); + MPI_Isend(&sendCount_Z, 1, MPI_INT, rank_Z(), 5, Comm, &req1[5] ); + MPI_Irecv(&recvCount_z, 1, MPI_INT, rank_z(), 5, Comm, &req2[5] ); + MPI_Isend(&sendCount_xy, 1, MPI_INT, rank_xy(), 6, Comm, &req1[6] ); + MPI_Irecv(&recvCount_XY, 1, MPI_INT, rank_XY(), 6, Comm, &req2[6] ); + MPI_Isend(&sendCount_XY, 1, MPI_INT, rank_XY(), 7, Comm, &req1[7] ); + MPI_Irecv(&recvCount_xy, 1, MPI_INT, rank_xy(), 7, Comm, &req2[7] ); + MPI_Isend(&sendCount_Xy, 1, MPI_INT, rank_Xy(), 8, Comm, &req1[8] ); + MPI_Irecv(&recvCount_xY, 1, MPI_INT, rank_xY(), 8, Comm, &req2[8] ); + MPI_Isend(&sendCount_xY, 1, MPI_INT, rank_xY(), 9, Comm, &req1[9] ); + MPI_Irecv(&recvCount_Xy, 1, MPI_INT, rank_Xy(), 9, Comm, &req2[9] ); + MPI_Isend(&sendCount_xz, 1, MPI_INT, rank_xz(), 10, Comm, &req1[10] ); + MPI_Irecv(&recvCount_XZ, 1, MPI_INT, rank_XZ(), 10, Comm, &req2[10] ); + MPI_Isend(&sendCount_XZ, 1, MPI_INT, rank_XZ(), 11, Comm, &req1[11] ); + MPI_Irecv(&recvCount_xz, 1, MPI_INT, rank_xz(), 11, Comm, &req2[11] ); + MPI_Isend(&sendCount_Xz, 1, MPI_INT, rank_Xz(), 12, Comm, &req1[12] ); + MPI_Irecv(&recvCount_xZ, 1, MPI_INT, rank_xZ(), 12, Comm, &req2[12] ); + MPI_Isend(&sendCount_xZ, 1, MPI_INT, rank_xZ(), 13, Comm, &req1[13] ); + MPI_Irecv(&recvCount_Xz, 1, MPI_INT, rank_Xz(), 13, Comm, &req2[13] ); + MPI_Isend(&sendCount_yz, 1, MPI_INT, rank_yz(), 14, Comm, &req1[14] ); + MPI_Irecv(&recvCount_YZ, 1, MPI_INT, rank_YZ(), 14, Comm, &req2[14] ); + MPI_Isend(&sendCount_YZ, 1, MPI_INT, rank_YZ(), 15, Comm, &req1[15] ); + MPI_Irecv(&recvCount_yz, 1, MPI_INT, rank_yz(), 15, Comm, &req2[15] ); + MPI_Isend(&sendCount_Yz, 1, MPI_INT, rank_Yz(), 16, Comm, &req1[16] ); + MPI_Irecv(&recvCount_yZ, 1, MPI_INT, rank_yZ(), 16, Comm, &req2[16] ); + MPI_Isend(&sendCount_yZ, 1, MPI_INT, rank_yZ(), 17, Comm, &req1[17] ); + MPI_Irecv(&recvCount_Yz, 1, MPI_INT, rank_Yz(), 17, Comm, &req2[17] ); + MPI_Waitall( 18, req1, status ); + MPI_Waitall( 18, req2, status ); + MPI_Barrier( Comm ); + // allocate recv lists + recvList_x.resize(recvCount_x, 0); + recvList_y.resize(recvCount_y, 0); + recvList_z.resize(recvCount_z, 0); + recvList_X.resize(recvCount_X, 0); + recvList_Y.resize(recvCount_Y, 0); + recvList_Z.resize(recvCount_Z, 0); + recvList_xy.resize(recvCount_xy, 0); + recvList_yz.resize(recvCount_yz, 0); + recvList_xz.resize(recvCount_xz, 0); + recvList_Xy.resize(recvCount_Xy, 0); + recvList_Yz.resize(recvCount_Yz, 0); + recvList_xZ.resize(recvCount_xZ, 0); + recvList_xY.resize(recvCount_xY, 0); + recvList_yZ.resize(recvCount_yZ, 0); + recvList_Xz.resize(recvCount_Xz, 0); + recvList_XY.resize(recvCount_XY, 0); + recvList_YZ.resize(recvCount_YZ, 0); + recvList_XZ.resize(recvCount_XZ, 0); + //...................................................................................... + MPI_Isend(sendList_x.data(), sendCount_x, MPI_INT, rank_x(), 0, Comm, &req1[0] ); + MPI_Irecv(recvList_X.data(), recvCount_X, MPI_INT, rank_X(), 0, Comm, &req2[0] ); + MPI_Isend(sendList_X.data(), sendCount_X, MPI_INT, rank_X(), 1, Comm, &req1[1] ); + MPI_Irecv(recvList_x.data(), recvCount_x, MPI_INT, rank_x(), 1, Comm, &req2[1] ); + MPI_Isend(sendList_y.data(), sendCount_y, MPI_INT, rank_y(), 2, Comm, &req1[2] ); + MPI_Irecv(recvList_Y.data(), recvCount_Y, MPI_INT, rank_Y(), 2, Comm, &req2[2] ); + MPI_Isend(sendList_Y.data(), sendCount_Y, MPI_INT, rank_Y(), 3, Comm, &req1[3] ); + MPI_Irecv(recvList_y.data(), recvCount_y, MPI_INT, rank_y(), 3, Comm, &req2[3] ); + MPI_Isend(sendList_z.data(), sendCount_z, MPI_INT, rank_z(), 4, Comm, &req1[4] ); + MPI_Irecv(recvList_Z.data(), recvCount_Z, MPI_INT, rank_Z(), 4, Comm, &req2[4] ); + MPI_Isend(sendList_Z.data(), sendCount_Z, MPI_INT, rank_Z(), 5, Comm, &req1[5] ); + MPI_Irecv(recvList_z.data(), recvCount_z, MPI_INT, rank_z(), 5, Comm, &req2[5] ); + MPI_Isend(sendList_xy.data(), sendCount_xy, MPI_INT, rank_xy(), 6, Comm, &req1[6] ); + MPI_Irecv(recvList_XY.data(), recvCount_XY, MPI_INT, rank_XY(), 6, Comm, &req2[6] ); + MPI_Isend(sendList_XY.data(), sendCount_XY, MPI_INT, rank_XY(), 7, Comm, &req1[7] ); + MPI_Irecv(recvList_xy.data(), recvCount_xy, MPI_INT, rank_xy(), 7, Comm, &req2[7] ); + MPI_Isend(sendList_Xy.data(), sendCount_Xy, MPI_INT, rank_Xy(), 8, Comm, &req1[8] ); + MPI_Irecv(recvList_xY.data(), recvCount_xY, MPI_INT, rank_xY(), 8, Comm, &req2[8] ); + MPI_Isend(sendList_xY.data(), sendCount_xY, MPI_INT, rank_xY(), 9, Comm, &req1[9] ); + MPI_Irecv(recvList_Xy.data(), recvCount_Xy, MPI_INT, rank_Xy(), 9, Comm, &req2[9] ); + MPI_Isend(sendList_xz.data(), sendCount_xz, MPI_INT, rank_xz(), 10, Comm, &req1[10] ); + MPI_Irecv(recvList_XZ.data(), recvCount_XZ, MPI_INT, rank_XZ(), 10, Comm, &req2[10] ); + MPI_Isend(sendList_XZ.data(), sendCount_XZ, MPI_INT, rank_XZ(), 11, Comm, &req1[11] ); + MPI_Irecv(recvList_xz.data(), recvCount_xz, MPI_INT, rank_xz(), 11, Comm, &req2[11] ); + MPI_Isend(sendList_Xz.data(), sendCount_Xz, MPI_INT, rank_Xz(), 12, Comm, &req1[12] ); + MPI_Irecv(recvList_xZ.data(), recvCount_xZ, MPI_INT, rank_xZ(), 12, Comm, &req2[12] ); + MPI_Isend(sendList_xZ.data(), sendCount_xZ, MPI_INT, rank_xZ(), 13, Comm, &req1[13] ); + MPI_Irecv(recvList_Xz.data(), recvCount_Xz, MPI_INT, rank_Xz(), 13, Comm, &req2[13] ); + MPI_Isend(sendList_yz.data(), sendCount_yz, MPI_INT, rank_yz(), 14, Comm, &req1[14] ); + MPI_Irecv(recvList_YZ.data(), recvCount_YZ, MPI_INT, rank_YZ(), 14, Comm, &req2[14] ); + MPI_Isend(sendList_YZ.data(), sendCount_YZ, MPI_INT, rank_YZ(), 15, Comm, &req1[15] ); + MPI_Irecv(recvList_yz.data(), recvCount_yz, MPI_INT, rank_yz(), 15, Comm, &req2[15] ); + MPI_Isend(sendList_Yz.data(), sendCount_Yz, MPI_INT, rank_Yz(), 16, Comm, &req1[16] ); + MPI_Irecv(recvList_yZ.data(), recvCount_yZ, MPI_INT, rank_yZ(), 16, Comm, &req2[16] ); + MPI_Isend(sendList_yZ.data(), sendCount_yZ, MPI_INT, rank_yZ(), 17, Comm, &req1[17] ); + MPI_Irecv(recvList_Yz.data(), recvCount_Yz, MPI_INT, rank_Yz(), 17, Comm, &req2[17] ); + MPI_Waitall( 18, req1, status ); + MPI_Waitall( 18, req2, status ); + MPI_Barrier( Comm ); + } + +private: + int Nx, Ny, Nz, N; + RankInfoStruct2 rank_info; + MPI_Comm Comm; // MPI Communicator for this domain + MPI_Request req1[18], req2[18]; + std::vector sendList_x, sendList_y, sendList_z, sendList_X, sendList_Y, sendList_Z; + std::vector sendList_xy, sendList_yz, sendList_xz, sendList_Xy, sendList_Yz, sendList_xZ; + std::vector sendList_xY, sendList_yZ, sendList_Xz, sendList_XY, sendList_YZ, sendList_XZ; + std::vector recvList_x, recvList_y, recvList_z, recvList_X, recvList_Y, recvList_Z; + std::vector recvList_xy, recvList_yz, recvList_xz, recvList_Xy, recvList_Yz, recvList_xZ; + std::vector recvList_xY, recvList_yZ, recvList_Xz, recvList_XY, recvList_YZ, recvList_XZ; +}; + + +std::array get_nproc( int P ) +{ + if ( P == 1 ) + return { 1, 1, 1 }; + else if ( P == 2 ) + return { 2, 1, 1 }; + else if ( P == 4 ) + return { 2, 2, 1 }; + else if ( P == 8 ) + return { 2, 2, 2 }; + else if ( P == 64 ) + return { 4, 4, 4 }; + else + throw std::logic_error("proccessor count not supported yet"); +} + + +int main(int argc, char **argv) +{ + // Start MPI + bool multiple = true; + if (multiple) { + int provided; + MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); + if (provided < MPI_THREAD_MULTIPLE) { + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + if (rank == 0) + std::cerr << "Warning: Failed to start MPI with thread support\n"; + } + } else { + MPI_Init(&argc, &argv); + } + + // Run the problem + int size = 0; + MPI_Comm_size( MPI_COMM_WORLD, &size ); + { + auto nproc = get_nproc( size ); + std::array n = { 10, 20, 30 }; + auto Dm = std::make_shared(nproc,n,MPI_COMM_WORLD); + Dm->CommInit(); + std::cout << "step 1" << std::endl << std::flush; + } + std::cout << "step 2" << std::endl << std::flush; + MPI_Barrier( MPI_COMM_WORLD ); + std::cout << "step 3" << std::endl << std::flush; + + // Shutdown MPI + MPI_Finalize(); + std::cout << "step 4" << std::endl << std::flush; + return 0; +}