diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 08ea575d..c945ddbc 100755 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -45,8 +45,12 @@ ADD_LBPM_EXECUTABLE( TestIonModel ) ADD_LBPM_EXECUTABLE( TestNernstPlanck ) ADD_LBPM_EXECUTABLE( TestPNP_Stokes ) ADD_LBPM_EXECUTABLE( TestMixedGrad ) + +# Temporary tests for crusher MPI bug ADD_LBPM_EXECUTABLE( TestCrusher ) ADD_LBPM_EXECUTABLE( TestCrusher2 ) +ADD_LBPM_EXECUTABLE( TestCrusher3 ) +ADD_LBPM_EXECUTABLE( TestCrusher4 ) diff --git a/tests/TestCrusher.cpp b/tests/TestCrusher.cpp index 83f55b25..11a35efd 100644 --- a/tests/TestCrusher.cpp +++ b/tests/TestCrusher.cpp @@ -1,6 +1,6 @@ #include #include -#include "common/ScaLBL.h" +#include "common/Domain.h" #include "common/MPI.h" diff --git a/tests/TestCrusher2.cpp b/tests/TestCrusher2.cpp index 845efed7..17f6c535 100644 --- a/tests/TestCrusher2.cpp +++ b/tests/TestCrusher2.cpp @@ -1,80 +1,100 @@ -#include -#include +#include +#include #include #include -#include #include -#include -#include +#include +#include #include +#include +#include +#include +#include -#include "common/Array.h" +#include "common/Database.h" #include "common/Utilities.h" #include "common/MPI.h" -#include "common/Communication.h" -#include "common/Database.h" + + +inline MPI_Request Isend( const Utilities::MPI &comm, const int *buf, int count, int rank, int tag ) +{ + return comm.Isend( buf, count, rank, tag ); +} +inline MPI_Request Irecv( const Utilities::MPI &comm, int *buf, int count, int rank, int tag ) +{ + return comm.Irecv( buf, count, rank, tag ); +} + + +struct RankInfoStruct2 { + int nx; //!< The number of processors in the x direction + int ny; //!< The number of processors in the y direction + int nz; //!< The number of processors in the z direction + int ix; //!< The index of the current process in the x direction + int jy; //!< The index of the current process in the y direction + int kz; //!< The index of the current process in the z direction + int rank[3][3][3]; //!< The rank for the neighbor [i][j][k] + RankInfoStruct2() : RankInfoStruct2( 1, 0, 0, 0 ) {} + RankInfoStruct2(int rank0, int nprocx, int nprocy, int nprocz) { + memset(this, 0, sizeof(RankInfoStruct2)); + nx = nprocx; + ny = nprocy; + nz = nprocz; + if (rank0 >= nprocx * nprocy * nprocz) { + ix = -1; + jy = -1; + kz = -1; + for (int i = -1; i <= 1; i++) { + for (int j = -1; j <= 1; j++) { + for (int k = -1; k <= 1; k++) { + rank[i + 1][j + 1][k + 1] = -1; + } + } + } + } else { + ix = rank0 % nprocx; + jy = (rank0 / nprocx) % nprocy; + kz = rank0 / (nprocx * nprocy); + for (int i = -1; i <= 1; i++) { + for (int j = -1; j <= 1; j++) { + for (int k = -1; k <= 1; k++) { + rank[i + 1][j + 1][k + 1] = + getRankForBlock(ix + i, jy + j, kz + k); + } + } + } + ASSERT(rank[1][1][1] == rank0); + } + } + int getRankForBlock(int i, int j, int k) const { + int i2 = (i + nx) % nx; + int j2 = (j + ny) % ny; + int k2 = (k + nz) % nz; + return i2 + j2 * nx + k2 * nx * ny; + } +}; class Domain2 { public: - Domain2(std::shared_ptr db, const Utilities::MPI &Communicator) { + Domain2( std::array nproc, std::array n, const Utilities::MPI &Communicator ) { Comm = Communicator.dup(); - int myrank = Comm.getRank(); - initialize(db); - rank_info = RankInfoStruct(myrank, rank_info.nx, rank_info.ny, rank_info.nz); - Comm.barrier(); + Nx = n[0] + 2; + Ny = n[1] + 2; + Nz = n[2] + 2; + N = Nx * Ny * Nz; + int rank = Comm.getRank(); + int size = Comm.getSize(); + rank_info = RankInfoStruct2( rank, nproc[0], nproc[1], nproc[2] ); + INSIST(size == nproc[0] * nproc[1] * nproc[2], "Fatal error in processor count!"); } Domain2() = delete; Domain2(const Domain2 &) = delete; ~Domain2() = default; -private: - // initialize from database - void initialize(std::shared_ptr db) { - d_db = db; - auto nproc = d_db->getVector("nproc"); - auto n = d_db->getVector("n"); - - ASSERT(n.size() == 3u); - ASSERT(nproc.size() == 3u); - int nx = n[0]; - int ny = n[1]; - int nz = n[2]; - Nx = nx + 2; - Ny = ny + 2; - Nz = nz + 2; - N = Nx * Ny * Nz; - // Initialize ranks - int myrank = Comm.getRank(); - rank_info = RankInfoStruct(myrank, nproc[0], nproc[1], nproc[2]); - - // Fill remaining variables - id = std::vector(N, 0); - int nprocs = Comm.getSize(); - INSIST(nprocs == nproc[0] * nproc[1] * nproc[2], "Fatal error in processor count!"); - } - - std::shared_ptr d_db; - -public: // Public variables (need to create accessors instead) - std::shared_ptr database; - int Nx, Ny, Nz, N; - RankInfoStruct rank_info; - - Utilities::MPI Comm; // MPI Communicator for this domain - - - //********************************** +public: // MPI ranks for all 18 neighbors - //********************************** - inline int iproc() const { return rank_info.ix; } - inline int jproc() const { return rank_info.jy; } - inline int kproc() const { return rank_info.kz; } - inline int nprocx() const { return rank_info.nx; } - inline int nprocy() const { return rank_info.ny; } - inline int nprocz() const { return rank_info.nz; } - inline int rank() const { return rank_info.rank[1][1][1]; } inline int rank_X() const { return rank_info.rank[2][1][1]; } inline int rank_x() const { return rank_info.rank[0][1][1]; } inline int rank_Y() const { return rank_info.rank[1][2][1]; } @@ -94,16 +114,8 @@ public: // Public variables (need to create accessors instead) inline int rank_Yz() const { return rank_info.rank[1][2][0]; } inline int rank_yZ() const { return rank_info.rank[1][0][2]; } - //...................................................................................... - // Solid indicator function - std::vector id; - // Initialize communication data structures within Domain object. void CommInit() { - int i, j, k, n; - int sendtag = 21; - int recvtag = 21; - //...................................................................................... int sendCount_x, sendCount_y, sendCount_z, sendCount_X, sendCount_Y, sendCount_Z; int sendCount_xy, sendCount_yz, sendCount_xz, sendCount_Xy, sendCount_Yz, sendCount_xZ; int sendCount_xY, sendCount_yZ, sendCount_Xz, sendCount_XY, sendCount_YZ, sendCount_XZ; @@ -111,52 +123,49 @@ public: // Public variables (need to create accessors instead) sendCount_xy = sendCount_yz = sendCount_xz = sendCount_Xy = sendCount_Yz = sendCount_xZ = 0; sendCount_xY = sendCount_yZ = sendCount_Xz = sendCount_XY = sendCount_YZ = sendCount_XZ = 0; //...................................................................................... - for (k = 1; k < Nz - 1; k++) { - for (j = 1; j < Ny - 1; j++) { - for (i = 1; i < Nx - 1; i++) { - // Check the phase ID - if (id[k * Nx * Ny + j * Nx + i] > 0) { - // Counts for the six faces - if (i == 1) - sendCount_x++; - if (j == 1) - sendCount_y++; - if (k == 1) - sendCount_z++; - if (i == Nx - 2) - sendCount_X++; - if (j == Ny - 2) - sendCount_Y++; - if (k == Nz - 2) - sendCount_Z++; - // Counts for the twelve edges - if (i == 1 && j == 1) - sendCount_xy++; - if (i == 1 && j == Ny - 2) - sendCount_xY++; - if (i == Nx - 2 && j == 1) - sendCount_Xy++; - if (i == Nx - 2 && j == Ny - 2) - sendCount_XY++; + for (int k = 1; k < Nz - 1; k++) { + for (int j = 1; j < Ny - 1; j++) { + for (int i = 1; i < Nx - 1; i++) { + // Counts for the six faces + if (i == 1) + sendCount_x++; + if (j == 1) + sendCount_y++; + if (k == 1) + sendCount_z++; + if (i == Nx - 2) + sendCount_X++; + if (j == Ny - 2) + sendCount_Y++; + if (k == Nz - 2) + sendCount_Z++; + // Counts for the twelve edges + if (i == 1 && j == 1) + sendCount_xy++; + if (i == 1 && j == Ny - 2) + sendCount_xY++; + if (i == Nx - 2 && j == 1) + sendCount_Xy++; + if (i == Nx - 2 && j == Ny - 2) + sendCount_XY++; - if (i == 1 && k == 1) - sendCount_xz++; - if (i == 1 && k == Nz - 2) - sendCount_xZ++; - if (i == Nx - 2 && k == 1) - sendCount_Xz++; - if (i == Nx - 2 && k == Nz - 2) - sendCount_XZ++; + if (i == 1 && k == 1) + sendCount_xz++; + if (i == 1 && k == Nz - 2) + sendCount_xZ++; + if (i == Nx - 2 && k == 1) + sendCount_Xz++; + if (i == Nx - 2 && k == Nz - 2) + sendCount_XZ++; - if (j == 1 && k == 1) - sendCount_yz++; - if (j == 1 && k == Nz - 2) - sendCount_yZ++; - if (j == Ny - 2 && k == 1) - sendCount_Yz++; - if (j == Ny - 2 && k == Nz - 2) - sendCount_YZ++; - } + if (j == 1 && k == 1) + sendCount_yz++; + if (j == 1 && k == Nz - 2) + sendCount_yZ++; + if (j == Ny - 2 && k == 1) + sendCount_Yz++; + if (j == Ny - 2 && k == Nz - 2) + sendCount_YZ++; } } } @@ -181,106 +190,98 @@ public: // Public variables (need to create accessors instead) sendList_YZ.resize(sendCount_YZ, 0); sendList_XZ.resize(sendCount_XZ, 0); // Populate the send list - sendCount_x = sendCount_y = sendCount_z = sendCount_X = sendCount_Y = - sendCount_Z = 0; - sendCount_xy = sendCount_yz = sendCount_xz = sendCount_Xy = sendCount_Yz = - sendCount_xZ = 0; - sendCount_xY = sendCount_yZ = sendCount_Xz = sendCount_XY = sendCount_YZ = - sendCount_XZ = 0; - for (k = 1; k < Nz - 1; k++) { - for (j = 1; j < Ny - 1; j++) { - for (i = 1; i < Nx - 1; i++) { + sendCount_x = sendCount_y = sendCount_z = sendCount_X = sendCount_Y = sendCount_Z = 0; + sendCount_xy = sendCount_yz = sendCount_xz = sendCount_Xy = sendCount_Yz = sendCount_xZ = 0; + sendCount_xY = sendCount_yZ = sendCount_Xz = sendCount_XY = sendCount_YZ = sendCount_XZ = 0; + for (int k = 1; k < Nz - 1; k++) { + for (int j = 1; j < Ny - 1; j++) { + for (int i = 1; i < Nx - 1; i++) { // Local value to send - n = k * Nx * Ny + j * Nx + i; - if (id[n] > 0) { - // Counts for the six faces - if (i == 1) - sendList_x[sendCount_x++] = n; - if (j == 1) - sendList_y[sendCount_y++] = n; - if (k == 1) - sendList_z[sendCount_z++] = n; - if (i == Nx - 2) - sendList_X[sendCount_X++] = n; - if (j == Ny - 2) - sendList_Y[sendCount_Y++] = n; - if (k == Nz - 2) - sendList_Z[sendCount_Z++] = n; - // Counts for the twelve edges - if (i == 1 && j == 1) - sendList_xy[sendCount_xy++] = n; - if (i == 1 && j == Ny - 2) - sendList_xY[sendCount_xY++] = n; - if (i == Nx - 2 && j == 1) - sendList_Xy[sendCount_Xy++] = n; - if (i == Nx - 2 && j == Ny - 2) - sendList_XY[sendCount_XY++] = n; + int n = k * Nx * Ny + j * Nx + i; + // Counts for the six faces + if (i == 1) + sendList_x[sendCount_x++] = n; + if (j == 1) + sendList_y[sendCount_y++] = n; + if (k == 1) + sendList_z[sendCount_z++] = n; + if (i == Nx - 2) + sendList_X[sendCount_X++] = n; + if (j == Ny - 2) + sendList_Y[sendCount_Y++] = n; + if (k == Nz - 2) + sendList_Z[sendCount_Z++] = n; + // Counts for the twelve edges + if (i == 1 && j == 1) + sendList_xy[sendCount_xy++] = n; + if (i == 1 && j == Ny - 2) + sendList_xY[sendCount_xY++] = n; + if (i == Nx - 2 && j == 1) + sendList_Xy[sendCount_Xy++] = n; + if (i == Nx - 2 && j == Ny - 2) + sendList_XY[sendCount_XY++] = n; - if (i == 1 && k == 1) - sendList_xz[sendCount_xz++] = n; - if (i == 1 && k == Nz - 2) - sendList_xZ[sendCount_xZ++] = n; - if (i == Nx - 2 && k == 1) - sendList_Xz[sendCount_Xz++] = n; - if (i == Nx - 2 && k == Nz - 2) - sendList_XZ[sendCount_XZ++] = n; + if (i == 1 && k == 1) + sendList_xz[sendCount_xz++] = n; + if (i == 1 && k == Nz - 2) + sendList_xZ[sendCount_xZ++] = n; + if (i == Nx - 2 && k == 1) + sendList_Xz[sendCount_Xz++] = n; + if (i == Nx - 2 && k == Nz - 2) + sendList_XZ[sendCount_XZ++] = n; - if (j == 1 && k == 1) - sendList_yz[sendCount_yz++] = n; - if (j == 1 && k == Nz - 2) - sendList_yZ[sendCount_yZ++] = n; - if (j == Ny - 2 && k == 1) - sendList_Yz[sendCount_Yz++] = n; - if (j == Ny - 2 && k == Nz - 2) - sendList_YZ[sendCount_YZ++] = n; - } + if (j == 1 && k == 1) + sendList_yz[sendCount_yz++] = n; + if (j == 1 && k == Nz - 2) + sendList_yZ[sendCount_yZ++] = n; + if (j == Ny - 2 && k == 1) + sendList_Yz[sendCount_Yz++] = n; + if (j == Ny - 2 && k == Nz - 2) + sendList_YZ[sendCount_YZ++] = n; } } } //...................................................................................... - int recvCount_x, recvCount_y, recvCount_z, recvCount_X, recvCount_Y, - recvCount_Z; - int recvCount_xy, recvCount_yz, recvCount_xz, recvCount_Xy, recvCount_Yz, - recvCount_xZ; - int recvCount_xY, recvCount_yZ, recvCount_Xz, recvCount_XY, recvCount_YZ, - recvCount_XZ; - req1[0] = Comm.Isend(&sendCount_x, 1, rank_x(), sendtag + 0); - req2[0] = Comm.Irecv(&recvCount_X, 1, rank_X(), recvtag + 0); - req1[1] = Comm.Isend(&sendCount_X, 1, rank_X(), sendtag + 1); - req2[1] = Comm.Irecv(&recvCount_x, 1, rank_x(), recvtag + 1); - req1[2] = Comm.Isend(&sendCount_y, 1, rank_y(), sendtag + 2); - req2[2] = Comm.Irecv(&recvCount_Y, 1, rank_Y(), recvtag + 2); - req1[3] = Comm.Isend(&sendCount_Y, 1, rank_Y(), sendtag + 3); - req2[3] = Comm.Irecv(&recvCount_y, 1, rank_y(), recvtag + 3); - req1[4] = Comm.Isend(&sendCount_z, 1, rank_z(), sendtag + 4); - req2[4] = Comm.Irecv(&recvCount_Z, 1, rank_Z(), recvtag + 4); - req1[5] = Comm.Isend(&sendCount_Z, 1, rank_Z(), sendtag + 5); - req2[5] = Comm.Irecv(&recvCount_z, 1, rank_z(), recvtag + 5); - req1[6] = Comm.Isend(&sendCount_xy, 1, rank_xy(), sendtag + 6); - req2[6] = Comm.Irecv(&recvCount_XY, 1, rank_XY(), recvtag + 6); - req1[7] = Comm.Isend(&sendCount_XY, 1, rank_XY(), sendtag + 7); - req2[7] = Comm.Irecv(&recvCount_xy, 1, rank_xy(), recvtag + 7); - req1[8] = Comm.Isend(&sendCount_Xy, 1, rank_Xy(), sendtag + 8); - req2[8] = Comm.Irecv(&recvCount_xY, 1, rank_xY(), recvtag + 8); - req1[9] = Comm.Isend(&sendCount_xY, 1, rank_xY(), sendtag + 9); - req2[9] = Comm.Irecv(&recvCount_Xy, 1, rank_Xy(), recvtag + 9); - req1[10] = Comm.Isend(&sendCount_xz, 1, rank_xz(), sendtag + 10); - req2[10] = Comm.Irecv(&recvCount_XZ, 1, rank_XZ(), recvtag + 10); - req1[11] = Comm.Isend(&sendCount_XZ, 1, rank_XZ(), sendtag + 11); - req2[11] = Comm.Irecv(&recvCount_xz, 1, rank_xz(), recvtag + 11); - req1[12] = Comm.Isend(&sendCount_Xz, 1, rank_Xz(), sendtag + 12); - req2[12] = Comm.Irecv(&recvCount_xZ, 1, rank_xZ(), recvtag + 12); - req1[13] = Comm.Isend(&sendCount_xZ, 1, rank_xZ(), sendtag + 13); - req2[13] = Comm.Irecv(&recvCount_Xz, 1, rank_Xz(), recvtag + 13); - req1[14] = Comm.Isend(&sendCount_yz, 1, rank_yz(), sendtag + 14); - req2[14] = Comm.Irecv(&recvCount_YZ, 1, rank_YZ(), recvtag + 14); - req1[15] = Comm.Isend(&sendCount_YZ, 1, rank_YZ(), sendtag + 15); - req2[15] = Comm.Irecv(&recvCount_yz, 1, rank_yz(), recvtag + 15); - req1[16] = Comm.Isend(&sendCount_Yz, 1, rank_Yz(), sendtag + 16); - req2[16] = Comm.Irecv(&recvCount_yZ, 1, rank_yZ(), recvtag + 16); - req1[17] = Comm.Isend(&sendCount_yZ, 1, rank_yZ(), sendtag + 17); - req2[17] = Comm.Irecv(&recvCount_Yz, 1, rank_Yz(), recvtag + 17); + int recvCount_x, recvCount_y, recvCount_z, recvCount_X, recvCount_Y, recvCount_Z; + int recvCount_xy, recvCount_yz, recvCount_xz, recvCount_Xy, recvCount_Yz, recvCount_xZ; + int recvCount_xY, recvCount_yZ, recvCount_Xz, recvCount_XY, recvCount_YZ, recvCount_XZ; + req1[0] = Isend( Comm, &sendCount_x, 1, rank_x(), 0 ); + req2[0] = Irecv( Comm, &recvCount_X, 1, rank_X(), 0 ); + req1[1] = Isend( Comm, &sendCount_X, 1, rank_X(), 1 ); + req2[1] = Irecv( Comm, &recvCount_x, 1, rank_x(), 1 ); + req1[2] = Isend( Comm, &sendCount_y, 1, rank_y(), 2 ); + req2[2] = Irecv( Comm, &recvCount_Y, 1, rank_Y(), 2 ); + req1[3] = Isend( Comm, &sendCount_Y, 1, rank_Y(), 3 ); + req2[3] = Irecv( Comm, &recvCount_y, 1, rank_y(), 3 ); + req1[4] = Isend( Comm, &sendCount_z, 1, rank_z(), 4 ); + req2[4] = Irecv( Comm, &recvCount_Z, 1, rank_Z(), 4 ); + req1[5] = Isend( Comm, &sendCount_Z, 1, rank_Z(), 5 ); + req2[5] = Irecv( Comm, &recvCount_z, 1, rank_z(), 5 ); + req1[6] = Isend( Comm, &sendCount_xy, 1, rank_xy(), 6 ); + req2[6] = Irecv( Comm, &recvCount_XY, 1, rank_XY(), 6 ); + req1[7] = Isend( Comm, &sendCount_XY, 1, rank_XY(), 7 ); + req2[7] = Irecv( Comm, &recvCount_xy, 1, rank_xy(), 7 ); + req1[8] = Isend( Comm, &sendCount_Xy, 1, rank_Xy(), 8 ); + req2[8] = Irecv( Comm, &recvCount_xY, 1, rank_xY(), 8 ); + req1[9] = Isend( Comm, &sendCount_xY, 1, rank_xY(), 9 ); + req2[9] = Irecv( Comm, &recvCount_Xy, 1, rank_Xy(), 9 ); + req1[10] = Isend( Comm, &sendCount_xz, 1, rank_xz(), 10 ); + req2[10] = Irecv( Comm, &recvCount_XZ, 1, rank_XZ(), 10 ); + req1[11] = Isend( Comm, &sendCount_XZ, 1, rank_XZ(), 11 ); + req2[11] = Irecv( Comm, &recvCount_xz, 1, rank_xz(), 11 ); + req1[12] = Isend( Comm, &sendCount_Xz, 1, rank_Xz(), 12 ); + req2[12] = Irecv( Comm, &recvCount_xZ, 1, rank_xZ(), 12 ); + req1[13] = Isend( Comm, &sendCount_xZ, 1, rank_xZ(), 13 ); + req2[13] = Irecv( Comm, &recvCount_Xz, 1, rank_Xz(), 13 ); + req1[14] = Isend( Comm, &sendCount_yz, 1, rank_yz(), 14 ); + req2[14] = Irecv( Comm, &recvCount_YZ, 1, rank_YZ(), 14 ); + req1[15] = Isend( Comm, &sendCount_YZ, 1, rank_YZ(), 15 ); + req2[15] = Irecv( Comm, &recvCount_yz, 1, rank_yz(), 15 ); + req1[16] = Isend( Comm, &sendCount_Yz, 1, rank_Yz(), 16 ); + req2[16] = Irecv( Comm, &recvCount_yZ, 1, rank_yZ(), 16 ); + req1[17] = Isend( Comm, &sendCount_yZ, 1, rank_yZ(), 17 ); + req2[17] = Irecv( Comm, &recvCount_Yz, 1, rank_Yz(), 17 ); Comm.waitAll(18, req1); Comm.waitAll(18, req2); Comm.barrier(); @@ -304,42 +305,42 @@ public: // Public variables (need to create accessors instead) recvList_YZ.resize(recvCount_YZ, 0); recvList_XZ.resize(recvCount_XZ, 0); //...................................................................................... - req1[0] = Comm.Isend(sendList_x.data(), sendCount_x, rank_x(), sendtag); - req2[0] = Comm.Irecv(recvList_X.data(), recvCount_X, rank_X(), recvtag); - req1[1] = Comm.Isend(sendList_X.data(), sendCount_X, rank_X(), sendtag); - req2[1] = Comm.Irecv(recvList_x.data(), recvCount_x, rank_x(), recvtag); - req1[2] = Comm.Isend(sendList_y.data(), sendCount_y, rank_y(), sendtag); - req2[2] = Comm.Irecv(recvList_Y.data(), recvCount_Y, rank_Y(), recvtag); - req1[3] = Comm.Isend(sendList_Y.data(), sendCount_Y, rank_Y(), sendtag); - req2[3] = Comm.Irecv(recvList_y.data(), recvCount_y, rank_y(), recvtag); - req1[4] = Comm.Isend(sendList_z.data(), sendCount_z, rank_z(), sendtag); - req2[4] = Comm.Irecv(recvList_Z.data(), recvCount_Z, rank_Z(), recvtag); - req1[5] = Comm.Isend(sendList_Z.data(), sendCount_Z, rank_Z(), sendtag); - req2[5] = Comm.Irecv(recvList_z.data(), recvCount_z, rank_z(), recvtag); - req1[6] = Comm.Isend(sendList_xy.data(), sendCount_xy, rank_xy(), sendtag); - req2[6] = Comm.Irecv(recvList_XY.data(), recvCount_XY, rank_XY(), recvtag); - req1[7] = Comm.Isend(sendList_XY.data(), sendCount_XY, rank_XY(), sendtag); - req2[7] = Comm.Irecv(recvList_xy.data(), recvCount_xy, rank_xy(), recvtag); - req1[8] = Comm.Isend(sendList_Xy.data(), sendCount_Xy, rank_Xy(), sendtag); - req2[8] = Comm.Irecv(recvList_xY.data(), recvCount_xY, rank_xY(), recvtag); - req1[9] = Comm.Isend(sendList_xY.data(), sendCount_xY, rank_xY(), sendtag); - req2[9] = Comm.Irecv(recvList_Xy.data(), recvCount_Xy, rank_Xy(), recvtag); - req1[10] = Comm.Isend(sendList_xz.data(), sendCount_xz, rank_xz(), sendtag); - req2[10] = Comm.Irecv(recvList_XZ.data(), recvCount_XZ, rank_XZ(), recvtag); - req1[11] = Comm.Isend(sendList_XZ.data(), sendCount_XZ, rank_XZ(), sendtag); - req2[11] = Comm.Irecv(recvList_xz.data(), recvCount_xz, rank_xz(), recvtag); - req1[12] = Comm.Isend(sendList_Xz.data(), sendCount_Xz, rank_Xz(), sendtag); - req2[12] = Comm.Irecv(recvList_xZ.data(), recvCount_xZ, rank_xZ(), recvtag); - req1[13] = Comm.Isend(sendList_xZ.data(), sendCount_xZ, rank_xZ(), sendtag); - req2[13] = Comm.Irecv(recvList_Xz.data(), recvCount_Xz, rank_Xz(), recvtag); - req1[14] = Comm.Isend(sendList_yz.data(), sendCount_yz, rank_yz(), sendtag); - req2[14] = Comm.Irecv(recvList_YZ.data(), recvCount_YZ, rank_YZ(), recvtag); - req1[15] = Comm.Isend(sendList_YZ.data(), sendCount_YZ, rank_YZ(), sendtag); - req2[15] = Comm.Irecv(recvList_yz.data(), recvCount_yz, rank_yz(), recvtag); - req1[16] = Comm.Isend(sendList_Yz.data(), sendCount_Yz, rank_Yz(), sendtag); - req2[16] = Comm.Irecv(recvList_yZ.data(), recvCount_yZ, rank_yZ(), recvtag); - req1[17] = Comm.Isend(sendList_yZ.data(), sendCount_yZ, rank_yZ(), sendtag); - req2[17] = Comm.Irecv(recvList_Yz.data(), recvCount_Yz, rank_Yz(), recvtag); + req1[0] = Isend( Comm, sendList_x.data(), sendCount_x, rank_x(), 0 ); + req2[0] = Irecv( Comm, recvList_X.data(), recvCount_X, rank_X(), 0 ); + req1[1] = Isend( Comm, sendList_X.data(), sendCount_X, rank_X(), 1 ); + req2[1] = Irecv( Comm, recvList_x.data(), recvCount_x, rank_x(), 1 ); + req1[2] = Isend( Comm, sendList_y.data(), sendCount_y, rank_y(), 2 ); + req2[2] = Irecv( Comm, recvList_Y.data(), recvCount_Y, rank_Y(), 2 ); + req1[3] = Isend( Comm, sendList_Y.data(), sendCount_Y, rank_Y(), 3 ); + req2[3] = Irecv( Comm, recvList_y.data(), recvCount_y, rank_y(), 3 ); + req1[4] = Isend( Comm, sendList_z.data(), sendCount_z, rank_z(), 4 ); + req2[4] = Irecv( Comm, recvList_Z.data(), recvCount_Z, rank_Z(), 4 ); + req1[5] = Isend( Comm, sendList_Z.data(), sendCount_Z, rank_Z(), 5 ); + req2[5] = Irecv( Comm, recvList_z.data(), recvCount_z, rank_z(), 5 ); + req1[6] = Isend( Comm, sendList_xy.data(), sendCount_xy, rank_xy(), 6 ); + req2[6] = Irecv( Comm, recvList_XY.data(), recvCount_XY, rank_XY(), 6 ); + req1[7] = Isend( Comm, sendList_XY.data(), sendCount_XY, rank_XY(), 7 ); + req2[7] = Irecv( Comm, recvList_xy.data(), recvCount_xy, rank_xy(), 7 ); + req1[8] = Isend( Comm, sendList_Xy.data(), sendCount_Xy, rank_Xy(), 8 ); + req2[8] = Irecv( Comm, recvList_xY.data(), recvCount_xY, rank_xY(), 8 ); + req1[9] = Isend( Comm, sendList_xY.data(), sendCount_xY, rank_xY(), 9 ); + req2[9] = Irecv( Comm, recvList_Xy.data(), recvCount_Xy, rank_Xy(), 9 ); + req1[10] = Isend( Comm, sendList_xz.data(), sendCount_xz, rank_xz(), 10 ); + req2[10] = Irecv( Comm, recvList_XZ.data(), recvCount_XZ, rank_XZ(), 10 ); + req1[11] = Isend( Comm, sendList_XZ.data(), sendCount_XZ, rank_XZ(), 11 ); + req2[11] = Irecv( Comm, recvList_xz.data(), recvCount_xz, rank_xz(), 11 ); + req1[12] = Isend( Comm, sendList_Xz.data(), sendCount_Xz, rank_Xz(), 12 ); + req2[12] = Irecv( Comm, recvList_xZ.data(), recvCount_xZ, rank_xZ(), 12 ); + req1[13] = Isend( Comm, sendList_xZ.data(), sendCount_xZ, rank_xZ(), 13 ); + req2[13] = Irecv( Comm, recvList_Xz.data(), recvCount_Xz, rank_Xz(), 13 ); + req1[14] = Isend( Comm, sendList_yz.data(), sendCount_yz, rank_yz(), 14 ); + req2[14] = Irecv( Comm, recvList_YZ.data(), recvCount_YZ, rank_YZ(), 14 ); + req1[15] = Isend( Comm, sendList_YZ.data(), sendCount_YZ, rank_YZ(), 15 ); + req2[15] = Irecv( Comm, recvList_yz.data(), recvCount_yz, rank_yz(), 15 ); + req1[16] = Isend( Comm, sendList_Yz.data(), sendCount_Yz, rank_Yz(), 16 ); + req2[16] = Irecv( Comm, recvList_yZ.data(), recvCount_yZ, rank_yZ(), 16 ); + req1[17] = Isend( Comm, sendList_yZ.data(), sendCount_yZ, rank_yZ(), 17 ); + req2[17] = Irecv( Comm, recvList_Yz.data(), recvCount_Yz, rank_Yz(), 17 ); Comm.waitAll(18, req1); Comm.waitAll(18, req2); //...................................................................................... @@ -382,7 +383,9 @@ public: // Public variables (need to create accessors instead) } private: - + int Nx, Ny, Nz, N; + RankInfoStruct2 rank_info; + Utilities::MPI Comm; // MPI Communicator for this domain MPI_Request req1[18], req2[18]; std::vector sendList_x, sendList_y, sendList_z, sendList_X, sendList_Y, sendList_Z; std::vector sendList_xy, sendList_yz, sendList_xz, sendList_Xy, sendList_Yz, sendList_xZ; @@ -395,40 +398,43 @@ private: }; +std::array get_nproc( int P ) +{ + if ( P == 1 ) + return { 1, 1, 1 }; + else if ( P == 2 ) + return { 2, 1, 1 }; + else if ( P == 4 ) + return { 2, 2, 1 }; + else if ( P == 8 ) + return { 2, 2, 2 }; + else if ( P == 64 ) + return { 4, 4, 4 }; + else + throw std::logic_error("proccessor count not supported yet"); +} + int main(int argc, char **argv) { + // Start MPI Utilities::startup( argc, argv, true ); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); + + // Run the problem + int size = 0; + MPI_Comm_size( MPI_COMM_WORLD, &size ); { - auto filename = argv[1]; - auto input_db = std::make_shared( filename ); - auto db = input_db->getDatabase( "Domain" ); - auto Dm = std::make_shared(db,comm); - int Nx = db->getVector( "n" )[0] + 2; - int Ny = db->getVector( "n" )[1] + 2; - int Nz = db->getVector( "n" )[2] + 2; - char LocalRankString[8]; - sprintf(LocalRankString,"%05d",rank); - char LocalRankFilename[40]; - sprintf(LocalRankFilename,"ID.%05i",rank); - auto id = new char[Nx*Ny*Nz]; - for (int k=0;kid[n] = id[n]; - } - } - } + auto nproc = get_nproc( size ); + std::array n = { 10, 20, 30 }; + auto Dm = std::make_shared(nproc,n,MPI_COMM_WORLD); Dm->CommInit(); std::cout << "step 1" << std::endl << std::flush; } std::cout << "step 2" << std::endl << std::flush; - comm.barrier(); + MPI_Barrier( MPI_COMM_WORLD ); std::cout << "step 3" << std::endl << std::flush; + + // Shutdown MPI Utilities::shutdown(); std::cout << "step 4" << std::endl << std::flush; return 0; diff --git a/tests/TestCrusher3.cpp b/tests/TestCrusher3.cpp new file mode 100644 index 00000000..c923fb0c --- /dev/null +++ b/tests/TestCrusher3.cpp @@ -0,0 +1,447 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "StackTrace/StackTrace.h" +#include "StackTrace/ErrorHandlers.h" + +#include "mpi.h" + + +#define ASSERT(EXP) \ + do { \ + if (!(EXP)) { \ + std::stringstream tboxos; \ + tboxos << "Failed assertion: " << #EXP << std::ends; \ + throw std::logic_error( tboxos.str() ); \ + } \ + } while (0) +#define INSIST(EXP, MSG) \ + do { \ + if (!(EXP)) { \ + std::stringstream tboxos; \ + tboxos << "Failed insist: " << #EXP << std::endl; \ + tboxos << "Message: " << MSG << std::ends; \ + throw std::logic_error( tboxos.str() ); \ + } \ + } while (0) + + + +inline MPI_Request Isend( MPI_Comm comm, const int *buf, int count, int rank, int tag ) +{ + MPI_Request req; + MPI_Isend( buf, count, MPI_INT, rank, tag, comm, &req ); + return req; +} +inline MPI_Request Irecv( MPI_Comm comm, int *buf, int count, int rank, int tag ) +{ + MPI_Request req; + MPI_Irecv( buf, count, MPI_INT, rank, tag, comm, &req ); + return req; +} + + +struct RankInfoStruct2 { + int nx; //!< The number of processors in the x direction + int ny; //!< The number of processors in the y direction + int nz; //!< The number of processors in the z direction + int ix; //!< The index of the current process in the x direction + int jy; //!< The index of the current process in the y direction + int kz; //!< The index of the current process in the z direction + int rank[3][3][3]; //!< The rank for the neighbor [i][j][k] + RankInfoStruct2() { + memset(this, 0, sizeof(RankInfoStruct2)); + } + RankInfoStruct2(int rank0, int nprocx, int nprocy, int nprocz) { + memset(this, 0, sizeof(RankInfoStruct2)); + nx = nprocx; + ny = nprocy; + nz = nprocz; + if (rank0 >= nprocx * nprocy * nprocz) { + ix = -1; + jy = -1; + kz = -1; + for (int i = -1; i <= 1; i++) { + for (int j = -1; j <= 1; j++) { + for (int k = -1; k <= 1; k++) { + rank[i + 1][j + 1][k + 1] = -1; + } + } + } + } else { + ix = rank0 % nprocx; + jy = (rank0 / nprocx) % nprocy; + kz = rank0 / (nprocx * nprocy); + for (int i = -1; i <= 1; i++) { + for (int j = -1; j <= 1; j++) { + for (int k = -1; k <= 1; k++) { + rank[i + 1][j + 1][k + 1] = + getRankForBlock(ix + i, jy + j, kz + k); + } + } + } + ASSERT(rank[1][1][1] == rank0); + } + } + int getRankForBlock(int i, int j, int k) const { + int i2 = (i + nx) % nx; + int j2 = (j + ny) % ny; + int k2 = (k + nz) % nz; + return i2 + j2 * nx + k2 * nx * ny; + } +}; + + +class Domain2 { +public: + Domain2( std::array nproc, std::array n, MPI_Comm Communicator ) { + MPI_Comm_dup(Communicator, &Comm); + Nx = n[0] + 2; + Ny = n[1] + 2; + Nz = n[2] + 2; + N = Nx * Ny * Nz; + int rank, size; + MPI_Comm_rank( Comm, &rank ); + MPI_Comm_size( Comm, &size ); + rank_info = RankInfoStruct2( rank, nproc[0], nproc[1], nproc[2] ); + INSIST(size == nproc[0] * nproc[1] * nproc[2], "Fatal error in processor count!"); + } + + Domain2() = delete; + Domain2(const Domain2 &) = delete; + ~Domain2() { + int err = MPI_Comm_free(&Comm); + INSIST( err == MPI_SUCCESS, "Problem free'ing MPI_Comm object" ); + } + +public: + + // MPI ranks for all 18 neighbors + inline int rank_X() const { return rank_info.rank[2][1][1]; } + inline int rank_x() const { return rank_info.rank[0][1][1]; } + inline int rank_Y() const { return rank_info.rank[1][2][1]; } + inline int rank_y() const { return rank_info.rank[1][0][1]; } + inline int rank_Z() const { return rank_info.rank[1][1][2]; } + inline int rank_z() const { return rank_info.rank[1][1][0]; } + inline int rank_XY() const { return rank_info.rank[2][2][1]; } + inline int rank_xy() const { return rank_info.rank[0][0][1]; } + inline int rank_Xy() const { return rank_info.rank[2][0][1]; } + inline int rank_xY() const { return rank_info.rank[0][2][1]; } + inline int rank_XZ() const { return rank_info.rank[2][1][2]; } + inline int rank_xz() const { return rank_info.rank[0][1][0]; } + inline int rank_Xz() const { return rank_info.rank[2][1][0]; } + inline int rank_xZ() const { return rank_info.rank[0][1][2]; } + inline int rank_YZ() const { return rank_info.rank[1][2][2]; } + inline int rank_yz() const { return rank_info.rank[1][0][0]; } + inline int rank_Yz() const { return rank_info.rank[1][2][0]; } + inline int rank_yZ() const { return rank_info.rank[1][0][2]; } + + // Initialize communication data structures within Domain object. + void CommInit() { + MPI_Status status[18]; + int sendCount_x, sendCount_y, sendCount_z, sendCount_X, sendCount_Y, sendCount_Z; + int sendCount_xy, sendCount_yz, sendCount_xz, sendCount_Xy, sendCount_Yz, sendCount_xZ; + int sendCount_xY, sendCount_yZ, sendCount_Xz, sendCount_XY, sendCount_YZ, sendCount_XZ; + sendCount_x = sendCount_y = sendCount_z = sendCount_X = sendCount_Y = sendCount_Z = 0; + sendCount_xy = sendCount_yz = sendCount_xz = sendCount_Xy = sendCount_Yz = sendCount_xZ = 0; + sendCount_xY = sendCount_yZ = sendCount_Xz = sendCount_XY = sendCount_YZ = sendCount_XZ = 0; + //...................................................................................... + for (int k = 1; k < Nz - 1; k++) { + for (int j = 1; j < Ny - 1; j++) { + for (int i = 1; i < Nx - 1; i++) { + // Counts for the six faces + if (i == 1) + sendCount_x++; + if (j == 1) + sendCount_y++; + if (k == 1) + sendCount_z++; + if (i == Nx - 2) + sendCount_X++; + if (j == Ny - 2) + sendCount_Y++; + if (k == Nz - 2) + sendCount_Z++; + // Counts for the twelve edges + if (i == 1 && j == 1) + sendCount_xy++; + if (i == 1 && j == Ny - 2) + sendCount_xY++; + if (i == Nx - 2 && j == 1) + sendCount_Xy++; + if (i == Nx - 2 && j == Ny - 2) + sendCount_XY++; + + if (i == 1 && k == 1) + sendCount_xz++; + if (i == 1 && k == Nz - 2) + sendCount_xZ++; + if (i == Nx - 2 && k == 1) + sendCount_Xz++; + if (i == Nx - 2 && k == Nz - 2) + sendCount_XZ++; + + if (j == 1 && k == 1) + sendCount_yz++; + if (j == 1 && k == Nz - 2) + sendCount_yZ++; + if (j == Ny - 2 && k == 1) + sendCount_Yz++; + if (j == Ny - 2 && k == Nz - 2) + sendCount_YZ++; + } + } + } + + // allocate send lists + sendList_x.resize(sendCount_x, 0); + sendList_y.resize(sendCount_y, 0); + sendList_z.resize(sendCount_z, 0); + sendList_X.resize(sendCount_X, 0); + sendList_Y.resize(sendCount_Y, 0); + sendList_Z.resize(sendCount_Z, 0); + sendList_xy.resize(sendCount_xy, 0); + sendList_yz.resize(sendCount_yz, 0); + sendList_xz.resize(sendCount_xz, 0); + sendList_Xy.resize(sendCount_Xy, 0); + sendList_Yz.resize(sendCount_Yz, 0); + sendList_xZ.resize(sendCount_xZ, 0); + sendList_xY.resize(sendCount_xY, 0); + sendList_yZ.resize(sendCount_yZ, 0); + sendList_Xz.resize(sendCount_Xz, 0); + sendList_XY.resize(sendCount_XY, 0); + sendList_YZ.resize(sendCount_YZ, 0); + sendList_XZ.resize(sendCount_XZ, 0); + // Populate the send list + sendCount_x = sendCount_y = sendCount_z = sendCount_X = sendCount_Y = sendCount_Z = 0; + sendCount_xy = sendCount_yz = sendCount_xz = sendCount_Xy = sendCount_Yz = sendCount_xZ = 0; + sendCount_xY = sendCount_yZ = sendCount_Xz = sendCount_XY = sendCount_YZ = sendCount_XZ = 0; + for (int k = 1; k < Nz - 1; k++) { + for (int j = 1; j < Ny - 1; j++) { + for (int i = 1; i < Nx - 1; i++) { + // Local value to send + int n = k * Nx * Ny + j * Nx + i; + // Counts for the six faces + if (i == 1) + sendList_x[sendCount_x++] = n; + if (j == 1) + sendList_y[sendCount_y++] = n; + if (k == 1) + sendList_z[sendCount_z++] = n; + if (i == Nx - 2) + sendList_X[sendCount_X++] = n; + if (j == Ny - 2) + sendList_Y[sendCount_Y++] = n; + if (k == Nz - 2) + sendList_Z[sendCount_Z++] = n; + // Counts for the twelve edges + if (i == 1 && j == 1) + sendList_xy[sendCount_xy++] = n; + if (i == 1 && j == Ny - 2) + sendList_xY[sendCount_xY++] = n; + if (i == Nx - 2 && j == 1) + sendList_Xy[sendCount_Xy++] = n; + if (i == Nx - 2 && j == Ny - 2) + sendList_XY[sendCount_XY++] = n; + + if (i == 1 && k == 1) + sendList_xz[sendCount_xz++] = n; + if (i == 1 && k == Nz - 2) + sendList_xZ[sendCount_xZ++] = n; + if (i == Nx - 2 && k == 1) + sendList_Xz[sendCount_Xz++] = n; + if (i == Nx - 2 && k == Nz - 2) + sendList_XZ[sendCount_XZ++] = n; + + if (j == 1 && k == 1) + sendList_yz[sendCount_yz++] = n; + if (j == 1 && k == Nz - 2) + sendList_yZ[sendCount_yZ++] = n; + if (j == Ny - 2 && k == 1) + sendList_Yz[sendCount_Yz++] = n; + if (j == Ny - 2 && k == Nz - 2) + sendList_YZ[sendCount_YZ++] = n; + } + } + } + + //...................................................................................... + int recvCount_x, recvCount_y, recvCount_z, recvCount_X, recvCount_Y, recvCount_Z; + int recvCount_xy, recvCount_yz, recvCount_xz, recvCount_Xy, recvCount_Yz, recvCount_xZ; + int recvCount_xY, recvCount_yZ, recvCount_Xz, recvCount_XY, recvCount_YZ, recvCount_XZ; + req1[0] = Isend( Comm, &sendCount_x, 1, rank_x(), 0 ); + req2[0] = Irecv( Comm, &recvCount_X, 1, rank_X(), 0 ); + req1[1] = Isend( Comm, &sendCount_X, 1, rank_X(), 1 ); + req2[1] = Irecv( Comm, &recvCount_x, 1, rank_x(), 1 ); + req1[2] = Isend( Comm, &sendCount_y, 1, rank_y(), 2 ); + req2[2] = Irecv( Comm, &recvCount_Y, 1, rank_Y(), 2 ); + req1[3] = Isend( Comm, &sendCount_Y, 1, rank_Y(), 3 ); + req2[3] = Irecv( Comm, &recvCount_y, 1, rank_y(), 3 ); + req1[4] = Isend( Comm, &sendCount_z, 1, rank_z(), 4 ); + req2[4] = Irecv( Comm, &recvCount_Z, 1, rank_Z(), 4 ); + req1[5] = Isend( Comm, &sendCount_Z, 1, rank_Z(), 5 ); + req2[5] = Irecv( Comm, &recvCount_z, 1, rank_z(), 5 ); + req1[6] = Isend( Comm, &sendCount_xy, 1, rank_xy(), 6 ); + req2[6] = Irecv( Comm, &recvCount_XY, 1, rank_XY(), 6 ); + req1[7] = Isend( Comm, &sendCount_XY, 1, rank_XY(), 7 ); + req2[7] = Irecv( Comm, &recvCount_xy, 1, rank_xy(), 7 ); + req1[8] = Isend( Comm, &sendCount_Xy, 1, rank_Xy(), 8 ); + req2[8] = Irecv( Comm, &recvCount_xY, 1, rank_xY(), 8 ); + req1[9] = Isend( Comm, &sendCount_xY, 1, rank_xY(), 9 ); + req2[9] = Irecv( Comm, &recvCount_Xy, 1, rank_Xy(), 9 ); + req1[10] = Isend( Comm, &sendCount_xz, 1, rank_xz(), 10 ); + req2[10] = Irecv( Comm, &recvCount_XZ, 1, rank_XZ(), 10 ); + req1[11] = Isend( Comm, &sendCount_XZ, 1, rank_XZ(), 11 ); + req2[11] = Irecv( Comm, &recvCount_xz, 1, rank_xz(), 11 ); + req1[12] = Isend( Comm, &sendCount_Xz, 1, rank_Xz(), 12 ); + req2[12] = Irecv( Comm, &recvCount_xZ, 1, rank_xZ(), 12 ); + req1[13] = Isend( Comm, &sendCount_xZ, 1, rank_xZ(), 13 ); + req2[13] = Irecv( Comm, &recvCount_Xz, 1, rank_Xz(), 13 ); + req1[14] = Isend( Comm, &sendCount_yz, 1, rank_yz(), 14 ); + req2[14] = Irecv( Comm, &recvCount_YZ, 1, rank_YZ(), 14 ); + req1[15] = Isend( Comm, &sendCount_YZ, 1, rank_YZ(), 15 ); + req2[15] = Irecv( Comm, &recvCount_yz, 1, rank_yz(), 15 ); + req1[16] = Isend( Comm, &sendCount_Yz, 1, rank_Yz(), 16 ); + req2[16] = Irecv( Comm, &recvCount_yZ, 1, rank_yZ(), 16 ); + req1[17] = Isend( Comm, &sendCount_yZ, 1, rank_yZ(), 17 ); + req2[17] = Irecv( Comm, &recvCount_Yz, 1, rank_Yz(), 17 ); + MPI_Waitall( 18, req1, status ); + MPI_Waitall( 18, req2, status ); + MPI_Barrier( Comm ); + // allocate recv lists + recvList_x.resize(recvCount_x, 0); + recvList_y.resize(recvCount_y, 0); + recvList_z.resize(recvCount_z, 0); + recvList_X.resize(recvCount_X, 0); + recvList_Y.resize(recvCount_Y, 0); + recvList_Z.resize(recvCount_Z, 0); + recvList_xy.resize(recvCount_xy, 0); + recvList_yz.resize(recvCount_yz, 0); + recvList_xz.resize(recvCount_xz, 0); + recvList_Xy.resize(recvCount_Xy, 0); + recvList_Yz.resize(recvCount_Yz, 0); + recvList_xZ.resize(recvCount_xZ, 0); + recvList_xY.resize(recvCount_xY, 0); + recvList_yZ.resize(recvCount_yZ, 0); + recvList_Xz.resize(recvCount_Xz, 0); + recvList_XY.resize(recvCount_XY, 0); + recvList_YZ.resize(recvCount_YZ, 0); + recvList_XZ.resize(recvCount_XZ, 0); + //...................................................................................... + req1[0] = Isend( Comm, sendList_x.data(), sendCount_x, rank_x(), 0 ); + req2[0] = Irecv( Comm, recvList_X.data(), recvCount_X, rank_X(), 0 ); + req1[1] = Isend( Comm, sendList_X.data(), sendCount_X, rank_X(), 1 ); + req2[1] = Irecv( Comm, recvList_x.data(), recvCount_x, rank_x(), 1 ); + req1[2] = Isend( Comm, sendList_y.data(), sendCount_y, rank_y(), 2 ); + req2[2] = Irecv( Comm, recvList_Y.data(), recvCount_Y, rank_Y(), 2 ); + req1[3] = Isend( Comm, sendList_Y.data(), sendCount_Y, rank_Y(), 3 ); + req2[3] = Irecv( Comm, recvList_y.data(), recvCount_y, rank_y(), 3 ); + req1[4] = Isend( Comm, sendList_z.data(), sendCount_z, rank_z(), 4 ); + req2[4] = Irecv( Comm, recvList_Z.data(), recvCount_Z, rank_Z(), 4 ); + req1[5] = Isend( Comm, sendList_Z.data(), sendCount_Z, rank_Z(), 5 ); + req2[5] = Irecv( Comm, recvList_z.data(), recvCount_z, rank_z(), 5 ); + req1[6] = Isend( Comm, sendList_xy.data(), sendCount_xy, rank_xy(), 6 ); + req2[6] = Irecv( Comm, recvList_XY.data(), recvCount_XY, rank_XY(), 6 ); + req1[7] = Isend( Comm, sendList_XY.data(), sendCount_XY, rank_XY(), 7 ); + req2[7] = Irecv( Comm, recvList_xy.data(), recvCount_xy, rank_xy(), 7 ); + req1[8] = Isend( Comm, sendList_Xy.data(), sendCount_Xy, rank_Xy(), 8 ); + req2[8] = Irecv( Comm, recvList_xY.data(), recvCount_xY, rank_xY(), 8 ); + req1[9] = Isend( Comm, sendList_xY.data(), sendCount_xY, rank_xY(), 9 ); + req2[9] = Irecv( Comm, recvList_Xy.data(), recvCount_Xy, rank_Xy(), 9 ); + req1[10] = Isend( Comm, sendList_xz.data(), sendCount_xz, rank_xz(), 10 ); + req2[10] = Irecv( Comm, recvList_XZ.data(), recvCount_XZ, rank_XZ(), 10 ); + req1[11] = Isend( Comm, sendList_XZ.data(), sendCount_XZ, rank_XZ(), 11 ); + req2[11] = Irecv( Comm, recvList_xz.data(), recvCount_xz, rank_xz(), 11 ); + req1[12] = Isend( Comm, sendList_Xz.data(), sendCount_Xz, rank_Xz(), 12 ); + req2[12] = Irecv( Comm, recvList_xZ.data(), recvCount_xZ, rank_xZ(), 12 ); + req1[13] = Isend( Comm, sendList_xZ.data(), sendCount_xZ, rank_xZ(), 13 ); + req2[13] = Irecv( Comm, recvList_Xz.data(), recvCount_Xz, rank_Xz(), 13 ); + req1[14] = Isend( Comm, sendList_yz.data(), sendCount_yz, rank_yz(), 14 ); + req2[14] = Irecv( Comm, recvList_YZ.data(), recvCount_YZ, rank_YZ(), 14 ); + req1[15] = Isend( Comm, sendList_YZ.data(), sendCount_YZ, rank_YZ(), 15 ); + req2[15] = Irecv( Comm, recvList_yz.data(), recvCount_yz, rank_yz(), 15 ); + req1[16] = Isend( Comm, sendList_Yz.data(), sendCount_Yz, rank_Yz(), 16 ); + req2[16] = Irecv( Comm, recvList_yZ.data(), recvCount_yZ, rank_yZ(), 16 ); + req1[17] = Isend( Comm, sendList_yZ.data(), sendCount_yZ, rank_yZ(), 17 ); + req2[17] = Irecv( Comm, recvList_Yz.data(), recvCount_Yz, rank_Yz(), 17 ); + MPI_Waitall( 18, req1, status ); + MPI_Waitall( 18, req2, status ); + MPI_Barrier( Comm ); + } + +private: + int Nx, Ny, Nz, N; + RankInfoStruct2 rank_info; + MPI_Comm Comm; // MPI Communicator for this domain + MPI_Request req1[18], req2[18]; + std::vector sendList_x, sendList_y, sendList_z, sendList_X, sendList_Y, sendList_Z; + std::vector sendList_xy, sendList_yz, sendList_xz, sendList_Xy, sendList_Yz, sendList_xZ; + std::vector sendList_xY, sendList_yZ, sendList_Xz, sendList_XY, sendList_YZ, sendList_XZ; + std::vector recvList_x, recvList_y, recvList_z, recvList_X, recvList_Y, recvList_Z; + std::vector recvList_xy, recvList_yz, recvList_xz, recvList_Xy, recvList_Yz, recvList_xZ; + std::vector recvList_xY, recvList_yZ, recvList_Xz, recvList_XY, recvList_YZ, recvList_XZ; +}; + + +std::array get_nproc( int P ) +{ + if ( P == 1 ) + return { 1, 1, 1 }; + else if ( P == 2 ) + return { 2, 1, 1 }; + else if ( P == 4 ) + return { 2, 2, 1 }; + else if ( P == 8 ) + return { 2, 2, 2 }; + else if ( P == 64 ) + return { 4, 4, 4 }; + else + throw std::logic_error("proccessor count not supported yet"); +} + + +int main(int argc, char **argv) +{ + // Start MPI + bool multiple = true; + if (multiple) { + int provided; + MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); + if (provided < MPI_THREAD_MULTIPLE) + std::cerr << "Warning: Failed to start MPI with thread support\n"; + StackTrace::globalCallStackInitialize(MPI_COMM_WORLD); + } else { + MPI_Init(&argc, &argv); + } + + // Run the problem + int size = 0; + MPI_Comm_size( MPI_COMM_WORLD, &size ); + { + auto nproc = get_nproc( size ); + std::array n = { 222, 222, 222 }; + auto Dm = std::make_shared(nproc,n,MPI_COMM_WORLD); + Dm->CommInit(); + std::cout << "step 1" << std::endl << std::flush; + } + std::cout << "step 2" << std::endl << std::flush; + MPI_Barrier( MPI_COMM_WORLD ); + std::cout << "step 3" << std::endl << std::flush; + + // Shutdown MPI + StackTrace::globalCallStackFinalize(); + MPI_Barrier(MPI_COMM_WORLD); + MPI_Finalize(); + std::cout << "step 4" << std::endl << std::flush; + return 0; +} diff --git a/tests/TestCrusher4.cpp b/tests/TestCrusher4.cpp new file mode 100644 index 00000000..b0236897 --- /dev/null +++ b/tests/TestCrusher4.cpp @@ -0,0 +1,502 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "mpi.h" + + +#define ASSERT(EXP) \ + do { \ + if (!(EXP)) { \ + std::stringstream tboxos; \ + tboxos << "Failed assertion: " << #EXP << std::ends; \ + throw std::logic_error( tboxos.str() ); \ + } \ + } while (0) +#define INSIST(EXP, MSG) \ + do { \ + if (!(EXP)) { \ + std::stringstream tboxos; \ + tboxos << "Failed insist: " << #EXP << std::endl; \ + tboxos << "Message: " << MSG << std::ends; \ + throw std::logic_error( tboxos.str() ); \ + } \ + } while (0) + + + +inline MPI_Request Isend( MPI_Comm comm, const int *buf, int count, int rank, int tag ) +{ + MPI_Request req; + MPI_Isend( buf, count, MPI_INT, rank, tag, comm, &req ); + return req; +} +inline MPI_Request Irecv( MPI_Comm comm, int *buf, int count, int rank, int tag ) +{ + MPI_Request req; + MPI_Irecv( buf, count, MPI_INT, rank, tag, comm, &req ); + return req; +} + + +struct RankInfoStruct2 { + int nx; //!< The number of processors in the x direction + int ny; //!< The number of processors in the y direction + int nz; //!< The number of processors in the z direction + int ix; //!< The index of the current process in the x direction + int jy; //!< The index of the current process in the y direction + int kz; //!< The index of the current process in the z direction + int rank[3][3][3]; //!< The rank for the neighbor [i][j][k] + RankInfoStruct2() { + memset(this, 0, sizeof(RankInfoStruct2)); + } + RankInfoStruct2(int rank0, int nprocx, int nprocy, int nprocz) { + memset(this, 0, sizeof(RankInfoStruct2)); + nx = nprocx; + ny = nprocy; + nz = nprocz; + if (rank0 >= nprocx * nprocy * nprocz) { + ix = -1; + jy = -1; + kz = -1; + for (int i = -1; i <= 1; i++) { + for (int j = -1; j <= 1; j++) { + for (int k = -1; k <= 1; k++) { + rank[i + 1][j + 1][k + 1] = -1; + } + } + } + } else { + ix = rank0 % nprocx; + jy = (rank0 / nprocx) % nprocy; + kz = rank0 / (nprocx * nprocy); + for (int i = -1; i <= 1; i++) { + for (int j = -1; j <= 1; j++) { + for (int k = -1; k <= 1; k++) { + rank[i + 1][j + 1][k + 1] = + getRankForBlock(ix + i, jy + j, kz + k); + } + } + } + ASSERT(rank[1][1][1] == rank0); + } + } + int getRankForBlock(int i, int j, int k) const { + int i2 = (i + nx) % nx; + int j2 = (j + ny) % ny; + int k2 = (k + nz) % nz; + return i2 + j2 * nx + k2 * nx * ny; + } +}; + + +class Domain2 { +public: + Domain2( std::array nproc, std::array n, MPI_Comm Communicator ) { + MPI_Comm_dup(Communicator, &Comm); + Nx = n[0] + 2; + Ny = n[1] + 2; + Nz = n[2] + 2; + N = Nx * Ny * Nz; + int rank, size; + MPI_Comm_rank( Comm, &rank ); + MPI_Comm_size( Comm, &size ); + rank_info = RankInfoStruct2( rank, nproc[0], nproc[1], nproc[2] ); + INSIST(size == nproc[0] * nproc[1] * nproc[2], "Fatal error in processor count!"); + } + + Domain2() = delete; + Domain2(const Domain2 &) = delete; + ~Domain2() { + int err = MPI_Comm_free(&Comm); + INSIST( err == MPI_SUCCESS, "Problem free'ing MPI_Comm object" ); + } + +public: + + // MPI ranks for all 18 neighbors + inline int rank_X() const { return rank_info.rank[2][1][1]; } + inline int rank_x() const { return rank_info.rank[0][1][1]; } + inline int rank_Y() const { return rank_info.rank[1][2][1]; } + inline int rank_y() const { return rank_info.rank[1][0][1]; } + inline int rank_Z() const { return rank_info.rank[1][1][2]; } + inline int rank_z() const { return rank_info.rank[1][1][0]; } + inline int rank_XY() const { return rank_info.rank[2][2][1]; } + inline int rank_xy() const { return rank_info.rank[0][0][1]; } + inline int rank_Xy() const { return rank_info.rank[2][0][1]; } + inline int rank_xY() const { return rank_info.rank[0][2][1]; } + inline int rank_XZ() const { return rank_info.rank[2][1][2]; } + inline int rank_xz() const { return rank_info.rank[0][1][0]; } + inline int rank_Xz() const { return rank_info.rank[2][1][0]; } + inline int rank_xZ() const { return rank_info.rank[0][1][2]; } + inline int rank_YZ() const { return rank_info.rank[1][2][2]; } + inline int rank_yz() const { return rank_info.rank[1][0][0]; } + inline int rank_Yz() const { return rank_info.rank[1][2][0]; } + inline int rank_yZ() const { return rank_info.rank[1][0][2]; } + + // Initialize communication data structures within Domain object. + void CommInit() { + MPI_Status status[18]; + int sendCount_x, sendCount_y, sendCount_z, sendCount_X, sendCount_Y, sendCount_Z; + int sendCount_xy, sendCount_yz, sendCount_xz, sendCount_Xy, sendCount_Yz, sendCount_xZ; + int sendCount_xY, sendCount_yZ, sendCount_Xz, sendCount_XY, sendCount_YZ, sendCount_XZ; + sendCount_x = sendCount_y = sendCount_z = sendCount_X = sendCount_Y = sendCount_Z = 0; + sendCount_xy = sendCount_yz = sendCount_xz = sendCount_Xy = sendCount_Yz = sendCount_xZ = 0; + sendCount_xY = sendCount_yZ = sendCount_Xz = sendCount_XY = sendCount_YZ = sendCount_XZ = 0; + //...................................................................................... + for (int k = 1; k < Nz - 1; k++) { + for (int j = 1; j < Ny - 1; j++) { + for (int i = 1; i < Nx - 1; i++) { + // Counts for the six faces + if (i == 1) + sendCount_x++; + if (j == 1) + sendCount_y++; + if (k == 1) + sendCount_z++; + if (i == Nx - 2) + sendCount_X++; + if (j == Ny - 2) + sendCount_Y++; + if (k == Nz - 2) + sendCount_Z++; + // Counts for the twelve edges + if (i == 1 && j == 1) + sendCount_xy++; + if (i == 1 && j == Ny - 2) + sendCount_xY++; + if (i == Nx - 2 && j == 1) + sendCount_Xy++; + if (i == Nx - 2 && j == Ny - 2) + sendCount_XY++; + + if (i == 1 && k == 1) + sendCount_xz++; + if (i == 1 && k == Nz - 2) + sendCount_xZ++; + if (i == Nx - 2 && k == 1) + sendCount_Xz++; + if (i == Nx - 2 && k == Nz - 2) + sendCount_XZ++; + + if (j == 1 && k == 1) + sendCount_yz++; + if (j == 1 && k == Nz - 2) + sendCount_yZ++; + if (j == Ny - 2 && k == 1) + sendCount_Yz++; + if (j == Ny - 2 && k == Nz - 2) + sendCount_YZ++; + } + } + } + + // allocate send lists + sendList_x.resize(sendCount_x, 0); + sendList_y.resize(sendCount_y, 0); + sendList_z.resize(sendCount_z, 0); + sendList_X.resize(sendCount_X, 0); + sendList_Y.resize(sendCount_Y, 0); + sendList_Z.resize(sendCount_Z, 0); + sendList_xy.resize(sendCount_xy, 0); + sendList_yz.resize(sendCount_yz, 0); + sendList_xz.resize(sendCount_xz, 0); + sendList_Xy.resize(sendCount_Xy, 0); + sendList_Yz.resize(sendCount_Yz, 0); + sendList_xZ.resize(sendCount_xZ, 0); + sendList_xY.resize(sendCount_xY, 0); + sendList_yZ.resize(sendCount_yZ, 0); + sendList_Xz.resize(sendCount_Xz, 0); + sendList_XY.resize(sendCount_XY, 0); + sendList_YZ.resize(sendCount_YZ, 0); + sendList_XZ.resize(sendCount_XZ, 0); + // Populate the send list + sendCount_x = sendCount_y = sendCount_z = sendCount_X = sendCount_Y = sendCount_Z = 0; + sendCount_xy = sendCount_yz = sendCount_xz = sendCount_Xy = sendCount_Yz = sendCount_xZ = 0; + sendCount_xY = sendCount_yZ = sendCount_Xz = sendCount_XY = sendCount_YZ = sendCount_XZ = 0; + for (int k = 1; k < Nz - 1; k++) { + for (int j = 1; j < Ny - 1; j++) { + for (int i = 1; i < Nx - 1; i++) { + // Local value to send + int n = k * Nx * Ny + j * Nx + i; + // Counts for the six faces + if (i == 1) + sendList_x[sendCount_x++] = n; + if (j == 1) + sendList_y[sendCount_y++] = n; + if (k == 1) + sendList_z[sendCount_z++] = n; + if (i == Nx - 2) + sendList_X[sendCount_X++] = n; + if (j == Ny - 2) + sendList_Y[sendCount_Y++] = n; + if (k == Nz - 2) + sendList_Z[sendCount_Z++] = n; + // Counts for the twelve edges + if (i == 1 && j == 1) + sendList_xy[sendCount_xy++] = n; + if (i == 1 && j == Ny - 2) + sendList_xY[sendCount_xY++] = n; + if (i == Nx - 2 && j == 1) + sendList_Xy[sendCount_Xy++] = n; + if (i == Nx - 2 && j == Ny - 2) + sendList_XY[sendCount_XY++] = n; + + if (i == 1 && k == 1) + sendList_xz[sendCount_xz++] = n; + if (i == 1 && k == Nz - 2) + sendList_xZ[sendCount_xZ++] = n; + if (i == Nx - 2 && k == 1) + sendList_Xz[sendCount_Xz++] = n; + if (i == Nx - 2 && k == Nz - 2) + sendList_XZ[sendCount_XZ++] = n; + + if (j == 1 && k == 1) + sendList_yz[sendCount_yz++] = n; + if (j == 1 && k == Nz - 2) + sendList_yZ[sendCount_yZ++] = n; + if (j == Ny - 2 && k == 1) + sendList_Yz[sendCount_Yz++] = n; + if (j == Ny - 2 && k == Nz - 2) + sendList_YZ[sendCount_YZ++] = n; + } + } + } + + //...................................................................................... + int recvCount_x, recvCount_y, recvCount_z, recvCount_X, recvCount_Y, recvCount_Z; + int recvCount_xy, recvCount_yz, recvCount_xz, recvCount_Xy, recvCount_Yz, recvCount_xZ; + int recvCount_xY, recvCount_yZ, recvCount_Xz, recvCount_XY, recvCount_YZ, recvCount_XZ; + req1[0] = Isend( Comm, &sendCount_x, 1, rank_x(), 0 ); + req2[0] = Irecv( Comm, &recvCount_X, 1, rank_X(), 0 ); + req1[1] = Isend( Comm, &sendCount_X, 1, rank_X(), 1 ); + req2[1] = Irecv( Comm, &recvCount_x, 1, rank_x(), 1 ); + req1[2] = Isend( Comm, &sendCount_y, 1, rank_y(), 2 ); + req2[2] = Irecv( Comm, &recvCount_Y, 1, rank_Y(), 2 ); + req1[3] = Isend( Comm, &sendCount_Y, 1, rank_Y(), 3 ); + req2[3] = Irecv( Comm, &recvCount_y, 1, rank_y(), 3 ); + req1[4] = Isend( Comm, &sendCount_z, 1, rank_z(), 4 ); + req2[4] = Irecv( Comm, &recvCount_Z, 1, rank_Z(), 4 ); + req1[5] = Isend( Comm, &sendCount_Z, 1, rank_Z(), 5 ); + req2[5] = Irecv( Comm, &recvCount_z, 1, rank_z(), 5 ); + req1[6] = Isend( Comm, &sendCount_xy, 1, rank_xy(), 6 ); + req2[6] = Irecv( Comm, &recvCount_XY, 1, rank_XY(), 6 ); + req1[7] = Isend( Comm, &sendCount_XY, 1, rank_XY(), 7 ); + req2[7] = Irecv( Comm, &recvCount_xy, 1, rank_xy(), 7 ); + req1[8] = Isend( Comm, &sendCount_Xy, 1, rank_Xy(), 8 ); + req2[8] = Irecv( Comm, &recvCount_xY, 1, rank_xY(), 8 ); + req1[9] = Isend( Comm, &sendCount_xY, 1, rank_xY(), 9 ); + req2[9] = Irecv( Comm, &recvCount_Xy, 1, rank_Xy(), 9 ); + req1[10] = Isend( Comm, &sendCount_xz, 1, rank_xz(), 10 ); + req2[10] = Irecv( Comm, &recvCount_XZ, 1, rank_XZ(), 10 ); + req1[11] = Isend( Comm, &sendCount_XZ, 1, rank_XZ(), 11 ); + req2[11] = Irecv( Comm, &recvCount_xz, 1, rank_xz(), 11 ); + req1[12] = Isend( Comm, &sendCount_Xz, 1, rank_Xz(), 12 ); + req2[12] = Irecv( Comm, &recvCount_xZ, 1, rank_xZ(), 12 ); + req1[13] = Isend( Comm, &sendCount_xZ, 1, rank_xZ(), 13 ); + req2[13] = Irecv( Comm, &recvCount_Xz, 1, rank_Xz(), 13 ); + req1[14] = Isend( Comm, &sendCount_yz, 1, rank_yz(), 14 ); + req2[14] = Irecv( Comm, &recvCount_YZ, 1, rank_YZ(), 14 ); + req1[15] = Isend( Comm, &sendCount_YZ, 1, rank_YZ(), 15 ); + req2[15] = Irecv( Comm, &recvCount_yz, 1, rank_yz(), 15 ); + req1[16] = Isend( Comm, &sendCount_Yz, 1, rank_Yz(), 16 ); + req2[16] = Irecv( Comm, &recvCount_yZ, 1, rank_yZ(), 16 ); + req1[17] = Isend( Comm, &sendCount_yZ, 1, rank_yZ(), 17 ); + req2[17] = Irecv( Comm, &recvCount_Yz, 1, rank_Yz(), 17 ); + MPI_Waitall( 18, req1, status ); + MPI_Waitall( 18, req2, status ); + MPI_Barrier( Comm ); + // allocate recv lists + recvList_x.resize(recvCount_x, 0); + recvList_y.resize(recvCount_y, 0); + recvList_z.resize(recvCount_z, 0); + recvList_X.resize(recvCount_X, 0); + recvList_Y.resize(recvCount_Y, 0); + recvList_Z.resize(recvCount_Z, 0); + recvList_xy.resize(recvCount_xy, 0); + recvList_yz.resize(recvCount_yz, 0); + recvList_xz.resize(recvCount_xz, 0); + recvList_Xy.resize(recvCount_Xy, 0); + recvList_Yz.resize(recvCount_Yz, 0); + recvList_xZ.resize(recvCount_xZ, 0); + recvList_xY.resize(recvCount_xY, 0); + recvList_yZ.resize(recvCount_yZ, 0); + recvList_Xz.resize(recvCount_Xz, 0); + recvList_XY.resize(recvCount_XY, 0); + recvList_YZ.resize(recvCount_YZ, 0); + recvList_XZ.resize(recvCount_XZ, 0); + //...................................................................................... + req1[0] = Isend( Comm, sendList_x.data(), sendCount_x, rank_x(), 0 ); + req2[0] = Irecv( Comm, recvList_X.data(), recvCount_X, rank_X(), 0 ); + req1[1] = Isend( Comm, sendList_X.data(), sendCount_X, rank_X(), 1 ); + req2[1] = Irecv( Comm, recvList_x.data(), recvCount_x, rank_x(), 1 ); + req1[2] = Isend( Comm, sendList_y.data(), sendCount_y, rank_y(), 2 ); + req2[2] = Irecv( Comm, recvList_Y.data(), recvCount_Y, rank_Y(), 2 ); + req1[3] = Isend( Comm, sendList_Y.data(), sendCount_Y, rank_Y(), 3 ); + req2[3] = Irecv( Comm, recvList_y.data(), recvCount_y, rank_y(), 3 ); + req1[4] = Isend( Comm, sendList_z.data(), sendCount_z, rank_z(), 4 ); + req2[4] = Irecv( Comm, recvList_Z.data(), recvCount_Z, rank_Z(), 4 ); + req1[5] = Isend( Comm, sendList_Z.data(), sendCount_Z, rank_Z(), 5 ); + req2[5] = Irecv( Comm, recvList_z.data(), recvCount_z, rank_z(), 5 ); + req1[6] = Isend( Comm, sendList_xy.data(), sendCount_xy, rank_xy(), 6 ); + req2[6] = Irecv( Comm, recvList_XY.data(), recvCount_XY, rank_XY(), 6 ); + req1[7] = Isend( Comm, sendList_XY.data(), sendCount_XY, rank_XY(), 7 ); + req2[7] = Irecv( Comm, recvList_xy.data(), recvCount_xy, rank_xy(), 7 ); + req1[8] = Isend( Comm, sendList_Xy.data(), sendCount_Xy, rank_Xy(), 8 ); + req2[8] = Irecv( Comm, recvList_xY.data(), recvCount_xY, rank_xY(), 8 ); + req1[9] = Isend( Comm, sendList_xY.data(), sendCount_xY, rank_xY(), 9 ); + req2[9] = Irecv( Comm, recvList_Xy.data(), recvCount_Xy, rank_Xy(), 9 ); + req1[10] = Isend( Comm, sendList_xz.data(), sendCount_xz, rank_xz(), 10 ); + req2[10] = Irecv( Comm, recvList_XZ.data(), recvCount_XZ, rank_XZ(), 10 ); + req1[11] = Isend( Comm, sendList_XZ.data(), sendCount_XZ, rank_XZ(), 11 ); + req2[11] = Irecv( Comm, recvList_xz.data(), recvCount_xz, rank_xz(), 11 ); + req1[12] = Isend( Comm, sendList_Xz.data(), sendCount_Xz, rank_Xz(), 12 ); + req2[12] = Irecv( Comm, recvList_xZ.data(), recvCount_xZ, rank_xZ(), 12 ); + req1[13] = Isend( Comm, sendList_xZ.data(), sendCount_xZ, rank_xZ(), 13 ); + req2[13] = Irecv( Comm, recvList_Xz.data(), recvCount_Xz, rank_Xz(), 13 ); + req1[14] = Isend( Comm, sendList_yz.data(), sendCount_yz, rank_yz(), 14 ); + req2[14] = Irecv( Comm, recvList_YZ.data(), recvCount_YZ, rank_YZ(), 14 ); + req1[15] = Isend( Comm, sendList_YZ.data(), sendCount_YZ, rank_YZ(), 15 ); + req2[15] = Irecv( Comm, recvList_yz.data(), recvCount_yz, rank_yz(), 15 ); + req1[16] = Isend( Comm, sendList_Yz.data(), sendCount_Yz, rank_Yz(), 16 ); + req2[16] = Irecv( Comm, recvList_yZ.data(), recvCount_yZ, rank_yZ(), 16 ); + req1[17] = Isend( Comm, sendList_yZ.data(), sendCount_yZ, rank_yZ(), 17 ); + req2[17] = Irecv( Comm, recvList_Yz.data(), recvCount_Yz, rank_Yz(), 17 ); + MPI_Waitall( 18, req1, status ); + MPI_Waitall( 18, req2, status ); + MPI_Barrier( Comm ); + } + +private: + int Nx, Ny, Nz, N; + RankInfoStruct2 rank_info; + MPI_Comm Comm; // MPI Communicator for this domain + MPI_Request req1[18], req2[18]; + std::vector sendList_x, sendList_y, sendList_z, sendList_X, sendList_Y, sendList_Z; + std::vector sendList_xy, sendList_yz, sendList_xz, sendList_Xy, sendList_Yz, sendList_xZ; + std::vector sendList_xY, sendList_yZ, sendList_Xz, sendList_XY, sendList_YZ, sendList_XZ; + std::vector recvList_x, recvList_y, recvList_z, recvList_X, recvList_Y, recvList_Z; + std::vector recvList_xy, recvList_yz, recvList_xz, recvList_Xy, recvList_Yz, recvList_xZ; + std::vector recvList_xY, recvList_yZ, recvList_Xz, recvList_XY, recvList_YZ, recvList_XZ; +}; + + +// Code to mimic StackTrace backend +static MPI_Comm globalCommForGlobalCommStack = MPI_COMM_NULL; +static volatile int globalMonitorThreadStatus = -1; +static std::shared_ptr globalMonitorThread; +static void runGlobalMonitorThread() +{ + int rank = 0; + int size = 1; + MPI_Comm_size( globalCommForGlobalCommStack, &size ); + MPI_Comm_rank( globalCommForGlobalCommStack, &rank ); + while ( globalMonitorThreadStatus == 1 ) { + // Check for any messages + int flag = 0; + MPI_Status status; + int err = MPI_Iprobe( MPI_ANY_SOURCE, 1, globalCommForGlobalCommStack, &flag, &status ); + if ( err != MPI_SUCCESS ) { + printf( "Internal error in runGlobalMonitorThread\n" ); + break; + } else if ( flag != 0 ) { + // We received a request + int src_rank = status.MPI_SOURCE; + int tag; + MPI_Recv( &tag, 1, MPI_INT, src_rank, 1, globalCommForGlobalCommStack, &status ); + // Do stuff + throw std::logic_error( "This should not happen for this test" ); + } else { + // No requests received + std::this_thread::sleep_for( std::chrono::milliseconds( 50 ) ); + } + } +} +void globalCallStackInitialize( MPI_Comm comm ) +{ + globalMonitorThreadStatus = 3; + // Check that we have support to get call stacks from threads + int N_threads = 2; + MPI_Bcast( &N_threads, 1, MPI_INT, 0, comm ); + // Create the communicator and initialize the helper thread + globalMonitorThreadStatus = 1; + MPI_Comm_dup( comm, &globalCommForGlobalCommStack ); + globalMonitorThread.reset( new std::thread( runGlobalMonitorThread ) ); + std::this_thread::sleep_for( std::chrono::milliseconds( 50 ) ); +} +void globalCallStackFinalize() +{ + if ( globalMonitorThread ) { + globalMonitorThreadStatus = 2; + globalMonitorThread->join(); + globalMonitorThread.reset(); + } + if ( globalCommForGlobalCommStack != MPI_COMM_NULL ) + MPI_Comm_free( &globalCommForGlobalCommStack ); + globalCommForGlobalCommStack = MPI_COMM_NULL; +} + + +std::array get_nproc( int P ) +{ + if ( P == 1 ) + return { 1, 1, 1 }; + else if ( P == 2 ) + return { 2, 1, 1 }; + else if ( P == 4 ) + return { 2, 2, 1 }; + else if ( P == 8 ) + return { 2, 2, 2 }; + else if ( P == 64 ) + return { 4, 4, 4 }; + else + throw std::logic_error("proccessor count not supported yet"); +} + + +int main(int argc, char **argv) +{ + // Start MPI + bool multiple = true; + if (multiple) { + int provided; + MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); + if (provided < MPI_THREAD_MULTIPLE) + std::cerr << "Warning: Failed to start MPI with thread support\n"; + globalCallStackInitialize(MPI_COMM_WORLD); + } else { + MPI_Init(&argc, &argv); + } + + // Run the problem + int size = 0; + MPI_Comm_size( MPI_COMM_WORLD, &size ); + { + auto nproc = get_nproc( size ); + std::array n = { 222, 222, 222 }; + auto Dm = std::make_shared(nproc,n,MPI_COMM_WORLD); + Dm->CommInit(); + std::cout << "step 1" << std::endl << std::flush; + } + std::cout << "step 2" << std::endl << std::flush; + MPI_Barrier( MPI_COMM_WORLD ); + std::cout << "step 3" << std::endl << std::flush; + + // Shutdown MPI + globalCallStackFinalize(); + MPI_Barrier(MPI_COMM_WORLD); + MPI_Finalize(); + std::cout << "step 4" << std::endl << std::flush; + return 0; +}