diff --git a/common/Communication.hpp b/common/Communication.hpp index a59c8335..26139c66 100644 --- a/common/Communication.hpp +++ b/common/Communication.hpp @@ -4,7 +4,7 @@ #include "common/Communication.h" #include "common/MPI_Helpers.h" #include "common/Utilities.h" -#include "ProfilerApp.h" +//#include "ProfilerApp.h" /******************************************************** @@ -104,7 +104,7 @@ fillHalo::~fillHalo( ) template void fillHalo::fill( Array& data ) { - PROFILE_START("fillHalo::fill",1); + //PROFILE_START("fillHalo::fill",1); int depth2 = data.size(3); ASSERT((int)data.size(0)==nx+2*ngx); ASSERT((int)data.size(1)==ny+2*ngy); @@ -156,7 +156,7 @@ void fillHalo::fill( Array& data ) } } } - PROFILE_STOP("fillHalo::fill",1); + //PROFILE_STOP("fillHalo::fill",1); } template void fillHalo::pack( const Array& data, int i0, int j0, int k0, TYPE *buffer ) @@ -207,7 +207,7 @@ template template void fillHalo::copy( const Array& src, Array& dst ) { - PROFILE_START("fillHalo::copy",1); + //PROFILE_START("fillHalo::copy",1); ASSERT( (int)src.size(0)==nx || (int)src.size(0)==nx+2*ngx ); ASSERT( (int)dst.size(0)==nx || (int)dst.size(0)==nx+2*ngx ); bool src_halo = (int)src.size(0)==nx+2*ngx; @@ -254,7 +254,7 @@ void fillHalo::copy( const Array& src, Array& dst ) } fill(dst); } - PROFILE_STOP("fillHalo::copy",1); + //PROFILE_STOP("fillHalo::copy",1); } diff --git a/common/Domain.cpp b/common/Domain.cpp index 6416ab52..381bc588 100644 --- a/common/Domain.cpp +++ b/common/Domain.cpp @@ -15,17 +15,12 @@ #include "common/MPI_Helpers.h" #include "common/Communication.h" -static int MAX_BLOB_COUNT=50; - using namespace std; - - - // Reading the domain information file void read_domain( int rank, int nprocs, MPI_Comm comm, - int& nprocx, int& nprocy, int& nprocz, int& nx, int& ny, int& nz, - int& nspheres, double& Lx, double& Ly, double& Lz ) + int& nprocx, int& nprocy, int& nprocz, int& nx, int& ny, int& nz, + int& nspheres, double& Lx, double& Ly, double& Lz ) { if (rank==0){ ifstream domain("Domain.in"); @@ -59,696 +54,1173 @@ void read_domain( int rank, int nprocs, MPI_Comm comm, /******************************************************** -* Constructor/Destructor * -********************************************************/ + * Constructor/Destructor * + ********************************************************/ Domain::Domain(int nx, int ny, int nz, int rnk, int npx, int npy, int npz, - double lx, double ly, double lz, int BC): - Nx(0), Ny(0), Nz(0), iproc(0), jproc(0), nprocx(0), nprocy(0), nprocz(0), - Lx(0), Ly(0), Lz(0), Volume(0), rank(0), BoundaryCondition(0), - Group(MPI_GROUP_NULL), Comm(MPI_COMM_NULL), - rank_x(0), rank_y(0), rank_z(0), rank_X(0), rank_Y(0), rank_Z(0), - rank_xy(0), rank_XY(0), rank_xY(0), rank_Xy(0), - rank_xz(0), rank_XZ(0), rank_xZ(0), rank_Xz(0), - rank_yz(0), rank_YZ(0), rank_yZ(0), rank_Yz(0), - sendCount_x(0), sendCount_y(0), sendCount_z(0), sendCount_X(0), sendCount_Y(0), sendCount_Z(0), - sendCount_xy(0), sendCount_yz(0), sendCount_xz(0), sendCount_Xy(0), sendCount_Yz(0), sendCount_xZ(0), - sendCount_xY(0), sendCount_yZ(0), sendCount_Xz(0), sendCount_XY(0), sendCount_YZ(0), sendCount_XZ(0), - sendList_x(NULL), sendList_y(NULL), sendList_z(NULL), sendList_X(NULL), sendList_Y(NULL), sendList_Z(NULL), - sendList_xy(NULL), sendList_yz(NULL), sendList_xz(NULL), sendList_Xy(NULL), sendList_Yz(NULL), sendList_xZ(NULL), - sendList_xY(NULL), sendList_yZ(NULL), sendList_Xz(NULL), sendList_XY(NULL), sendList_YZ(NULL), sendList_XZ(NULL), - sendBuf_x(NULL), sendBuf_y(NULL), sendBuf_z(NULL), sendBuf_X(NULL), sendBuf_Y(NULL), sendBuf_Z(NULL), - sendBuf_xy(NULL), sendBuf_yz(NULL), sendBuf_xz(NULL), sendBuf_Xy(NULL), sendBuf_Yz(NULL), sendBuf_xZ(NULL), - sendBuf_xY(NULL), sendBuf_yZ(NULL), sendBuf_Xz(NULL), sendBuf_XY(NULL), sendBuf_YZ(NULL), sendBuf_XZ(NULL), - recvCount_x(0), recvCount_y(0), recvCount_z(0), recvCount_X(0), recvCount_Y(0), recvCount_Z(0), - recvCount_xy(0), recvCount_yz(0), recvCount_xz(0), recvCount_Xy(0), recvCount_Yz(0), recvCount_xZ(0), - recvCount_xY(0), recvCount_yZ(0), recvCount_Xz(0), recvCount_XY(0), recvCount_YZ(0), recvCount_XZ(0), - recvList_x(NULL), recvList_y(NULL), recvList_z(NULL), recvList_X(NULL), recvList_Y(NULL), recvList_Z(NULL), - recvList_xy(NULL), recvList_yz(NULL), recvList_xz(NULL), recvList_Xy(NULL), recvList_Yz(NULL), recvList_xZ(NULL), - recvList_xY(NULL), recvList_yZ(NULL), recvList_Xz(NULL), recvList_XY(NULL), recvList_YZ(NULL), recvList_XZ(NULL), - recvBuf_x(NULL), recvBuf_y(NULL), recvBuf_z(NULL), recvBuf_X(NULL), recvBuf_Y(NULL), recvBuf_Z(NULL), - recvBuf_xy(NULL), recvBuf_yz(NULL), recvBuf_xz(NULL), recvBuf_Xy(NULL), recvBuf_Yz(NULL), recvBuf_xZ(NULL), - recvBuf_xY(NULL), recvBuf_yZ(NULL), recvBuf_Xz(NULL), recvBuf_XY(NULL), recvBuf_YZ(NULL), recvBuf_XZ(NULL), - sendData_x(NULL), sendData_y(NULL), sendData_z(NULL), sendData_X(NULL), sendData_Y(NULL), sendData_Z(NULL), - sendData_xy(NULL), sendData_yz(NULL), sendData_xz(NULL), sendData_Xy(NULL), sendData_Yz(NULL), sendData_xZ(NULL), - sendData_xY(NULL), sendData_yZ(NULL), sendData_Xz(NULL), sendData_XY(NULL), sendData_YZ(NULL), sendData_XZ(NULL), - recvData_x(NULL), recvData_y(NULL), recvData_z(NULL), recvData_X(NULL), recvData_Y(NULL), recvData_Z(NULL), - recvData_xy(NULL), recvData_yz(NULL), recvData_xz(NULL), recvData_Xy(NULL), recvData_Yz(NULL), recvData_xZ(NULL), - recvData_xY(NULL), recvData_yZ(NULL), recvData_Xz(NULL), recvData_XY(NULL), recvData_YZ(NULL), recvData_XZ(NULL), - id(NULL) + double lx, double ly, double lz, int BC): + Nx(0), Ny(0), Nz(0), iproc(0), jproc(0), nprocx(0), nprocy(0), nprocz(0), + Lx(0), Ly(0), Lz(0), Volume(0), rank(0), BoundaryCondition(0), + Group(MPI_GROUP_NULL), Comm(MPI_COMM_NULL), + rank_x(0), rank_y(0), rank_z(0), rank_X(0), rank_Y(0), rank_Z(0), + rank_xy(0), rank_XY(0), rank_xY(0), rank_Xy(0), + rank_xz(0), rank_XZ(0), rank_xZ(0), rank_Xz(0), + rank_yz(0), rank_YZ(0), rank_yZ(0), rank_Yz(0), + sendCount_x(0), sendCount_y(0), sendCount_z(0), sendCount_X(0), sendCount_Y(0), sendCount_Z(0), + sendCount_xy(0), sendCount_yz(0), sendCount_xz(0), sendCount_Xy(0), sendCount_Yz(0), sendCount_xZ(0), + sendCount_xY(0), sendCount_yZ(0), sendCount_Xz(0), sendCount_XY(0), sendCount_YZ(0), sendCount_XZ(0), + sendList_x(NULL), sendList_y(NULL), sendList_z(NULL), sendList_X(NULL), sendList_Y(NULL), sendList_Z(NULL), + sendList_xy(NULL), sendList_yz(NULL), sendList_xz(NULL), sendList_Xy(NULL), sendList_Yz(NULL), sendList_xZ(NULL), + sendList_xY(NULL), sendList_yZ(NULL), sendList_Xz(NULL), sendList_XY(NULL), sendList_YZ(NULL), sendList_XZ(NULL), + sendBuf_x(NULL), sendBuf_y(NULL), sendBuf_z(NULL), sendBuf_X(NULL), sendBuf_Y(NULL), sendBuf_Z(NULL), + sendBuf_xy(NULL), sendBuf_yz(NULL), sendBuf_xz(NULL), sendBuf_Xy(NULL), sendBuf_Yz(NULL), sendBuf_xZ(NULL), + sendBuf_xY(NULL), sendBuf_yZ(NULL), sendBuf_Xz(NULL), sendBuf_XY(NULL), sendBuf_YZ(NULL), sendBuf_XZ(NULL), + recvCount_x(0), recvCount_y(0), recvCount_z(0), recvCount_X(0), recvCount_Y(0), recvCount_Z(0), + recvCount_xy(0), recvCount_yz(0), recvCount_xz(0), recvCount_Xy(0), recvCount_Yz(0), recvCount_xZ(0), + recvCount_xY(0), recvCount_yZ(0), recvCount_Xz(0), recvCount_XY(0), recvCount_YZ(0), recvCount_XZ(0), + recvList_x(NULL), recvList_y(NULL), recvList_z(NULL), recvList_X(NULL), recvList_Y(NULL), recvList_Z(NULL), + recvList_xy(NULL), recvList_yz(NULL), recvList_xz(NULL), recvList_Xy(NULL), recvList_Yz(NULL), recvList_xZ(NULL), + recvList_xY(NULL), recvList_yZ(NULL), recvList_Xz(NULL), recvList_XY(NULL), recvList_YZ(NULL), recvList_XZ(NULL), + recvBuf_x(NULL), recvBuf_y(NULL), recvBuf_z(NULL), recvBuf_X(NULL), recvBuf_Y(NULL), recvBuf_Z(NULL), + recvBuf_xy(NULL), recvBuf_yz(NULL), recvBuf_xz(NULL), recvBuf_Xy(NULL), recvBuf_Yz(NULL), recvBuf_xZ(NULL), + recvBuf_xY(NULL), recvBuf_yZ(NULL), recvBuf_Xz(NULL), recvBuf_XY(NULL), recvBuf_YZ(NULL), recvBuf_XZ(NULL), + sendData_x(NULL), sendData_y(NULL), sendData_z(NULL), sendData_X(NULL), sendData_Y(NULL), sendData_Z(NULL), + sendData_xy(NULL), sendData_yz(NULL), sendData_xz(NULL), sendData_Xy(NULL), sendData_Yz(NULL), sendData_xZ(NULL), + sendData_xY(NULL), sendData_yZ(NULL), sendData_Xz(NULL), sendData_XY(NULL), sendData_YZ(NULL), sendData_XZ(NULL), + recvData_x(NULL), recvData_y(NULL), recvData_z(NULL), recvData_X(NULL), recvData_Y(NULL), recvData_Z(NULL), + recvData_xy(NULL), recvData_yz(NULL), recvData_xz(NULL), recvData_Xy(NULL), recvData_Yz(NULL), recvData_xZ(NULL), + recvData_xY(NULL), recvData_yZ(NULL), recvData_Xz(NULL), recvData_XY(NULL), recvData_YZ(NULL), recvData_XZ(NULL), + id(NULL) { - Volume = nx*ny*nx*npx*npy*npz*1.0; - Nx = nx+2; Ny = ny+2; Nz = nz+2; - Lx = lx, Ly = ly, Lz = lz; - rank = rnk; - nprocx=npx; nprocy=npy; nprocz=npz; - N = Nx*Ny*Nz; - id = new char[N]; - memset(id,0,N); - BlobLabel.resize(Nx,Ny,Nz); - BlobGraph.resize(18,MAX_BLOB_COUNT,MAX_BLOB_COUNT); - BoundaryCondition = BC; - rank_info=RankInfoStruct(rank,nprocx,nprocy,nprocz); + Volume = nx*ny*nx*npx*npy*npz*1.0; + Nx = nx+2; Ny = ny+2; Nz = nz+2; + Lx = lx, Ly = ly, Lz = lz; + rank = rnk; + nprocx=npx; nprocy=npy; nprocz=npz; + N = Nx*Ny*Nz; + id = new char[N]; + memset(id,0,N); + BoundaryCondition = BC; + rank_info=RankInfoStruct(rank,nprocx,nprocy,nprocz); } Domain::~Domain() { - // Free sendList - delete [] sendList_x; delete [] sendList_y; delete [] sendList_z; - delete [] sendList_X; delete [] sendList_Y; delete [] sendList_Z; - delete [] sendList_xy; delete [] sendList_yz; delete [] sendList_xz; - delete [] sendList_Xy; delete [] sendList_Yz; delete [] sendList_xZ; - delete [] sendList_xY; delete [] sendList_yZ; delete [] sendList_Xz; - delete [] sendList_XY; delete [] sendList_YZ; delete [] sendList_XZ; - // Free sendBuf - delete [] sendBuf_x; delete [] sendBuf_y; delete [] sendBuf_z; - delete [] sendBuf_X; delete [] sendBuf_Y; delete [] sendBuf_Z; - delete [] sendBuf_xy; delete [] sendBuf_yz; delete [] sendBuf_xz; - delete [] sendBuf_Xy; delete [] sendBuf_Yz; delete [] sendBuf_xZ; - delete [] sendBuf_xY; delete [] sendBuf_yZ; delete [] sendBuf_Xz; - delete [] sendBuf_XY; delete [] sendBuf_YZ; delete [] sendBuf_XZ; - // Free recvList - delete [] recvList_x; delete [] recvList_y; delete [] recvList_z; - delete [] recvList_X; delete [] recvList_Y; delete [] recvList_Z; - delete [] recvList_xy; delete [] recvList_yz; delete [] recvList_xz; - delete [] recvList_Xy; delete [] recvList_Yz; delete [] recvList_xZ; - delete [] recvList_xY; delete [] recvList_yZ; delete [] recvList_Xz; - delete [] recvList_XY; delete [] recvList_YZ; delete [] recvList_XZ; - // Free recvBuf - delete [] recvBuf_x; delete [] recvBuf_y; delete [] recvBuf_z; - delete [] recvBuf_X; delete [] recvBuf_Y; delete [] recvBuf_Z; - delete [] recvBuf_xy; delete [] recvBuf_yz; delete [] recvBuf_xz; - delete [] recvBuf_Xy; delete [] recvBuf_Yz; delete [] recvBuf_xZ; - delete [] recvBuf_xY; delete [] recvBuf_yZ; delete [] recvBuf_Xz; - delete [] recvBuf_XY; delete [] recvBuf_YZ; delete [] recvBuf_XZ; - // Free sendData - delete [] sendData_x; delete [] sendData_y; delete [] sendData_z; - delete [] sendData_X; delete [] sendData_Y; delete [] sendData_Z; - delete [] sendData_xy; delete [] sendData_xY; delete [] sendData_Xy; - delete [] sendData_XY; delete [] sendData_xz; delete [] sendData_xZ; - delete [] sendData_Xz; delete [] sendData_XZ; delete [] sendData_yz; - delete [] sendData_yZ; delete [] sendData_Yz; delete [] sendData_YZ; - // Free recvData - delete [] recvData_x; delete [] recvData_y; delete [] recvData_z; - delete [] recvData_X; delete [] recvData_Y; delete [] recvData_Z; - delete [] recvData_xy; delete [] recvData_xY; delete [] recvData_Xy; - delete [] recvData_XY; delete [] recvData_xz; delete [] recvData_xZ; - delete [] recvData_Xz; delete [] recvData_XZ; delete [] recvData_yz; - delete [] recvData_yZ; delete [] recvData_Yz; delete [] recvData_YZ; - // Free id - delete [] id; - // Free the communicator - if ( Group!=MPI_GROUP_NULL ) { - MPI_Group_free(&Group); - MPI_Comm_free(&Comm); - } -} - - - -int Domain::VoxelConnection(int n) -{ - const int d[26][3] = {{1,0,0},{-1,0,0},{0,1,0},{0,-1,0},{0,0,1},{0,0,-1}, - {1,1,0},{1,-1,0},{-1,1,0},{-1,-1,0},{1,0,1},{-1,0,1}, - {1,0,-1},{-1,0,-1},{0,1,1},{0,-1,1},{0,1,-1},{0,-1,-1}, - {1,1,1},{1,1,-1},{1,-1,1},{1,-1,-1},{-1,1,1},{-1,1,-1}, - {-1,-1,1},{-1,-1,-1}}; // directions to neighbors - - int returnVal = -1; - int x,y,z; - // Get the 3-D indices - x = n%Nx; - y = (n/Nx)%Ny; - z = n/(Nx*Ny); - int nodx,nody,nodz; - for (int p=0;p<26;p++){ - nodx=x+d[p][0]; - // Get the neighbor and guarantee it is in the domain - if (nodx < 0 ){ nodx = 0; } - if (nodx > Nx-1 ){ nodx = Nx-1; } - nody=y+d[p][1]; - if (nody < 0 ){ nody = 0; } - if (nody > Ny-1 ){ nody = Ny-1; } - nodz=z+d[p][2]; - if (nodz < 0 ){ nodz = 0; } - if (nodz > Nz-1 ){ nodz = Nz-1; } - - if (BlobLabel(nodx,nody,nodz) > returnVal ) returnVal = BlobLabel(nodx,nody,nodz); - } - return returnVal; -} - -void Domain::getBlobConnections(int * List, int count, int neighbor, int direction){ - - int idx,n,localValue,neighborValue; - int x,y,z; - for (idx=0; idx -1){ - localValue = VoxelConnection(n); - printf("Blob (%i,%i) connects to neighbor blob (%i,%i)", localValue, rank, neighborValue, neighbor); - BlobGraph(direction,localValue,neighbor) = 1; // Set the BlobGraph to TRUE for this pair - } - } + // Free sendList + delete [] sendList_x; delete [] sendList_y; delete [] sendList_z; + delete [] sendList_X; delete [] sendList_Y; delete [] sendList_Z; + delete [] sendList_xy; delete [] sendList_yz; delete [] sendList_xz; + delete [] sendList_Xy; delete [] sendList_Yz; delete [] sendList_xZ; + delete [] sendList_xY; delete [] sendList_yZ; delete [] sendList_Xz; + delete [] sendList_XY; delete [] sendList_YZ; delete [] sendList_XZ; + // Free sendBuf + delete [] sendBuf_x; delete [] sendBuf_y; delete [] sendBuf_z; + delete [] sendBuf_X; delete [] sendBuf_Y; delete [] sendBuf_Z; + delete [] sendBuf_xy; delete [] sendBuf_yz; delete [] sendBuf_xz; + delete [] sendBuf_Xy; delete [] sendBuf_Yz; delete [] sendBuf_xZ; + delete [] sendBuf_xY; delete [] sendBuf_yZ; delete [] sendBuf_Xz; + delete [] sendBuf_XY; delete [] sendBuf_YZ; delete [] sendBuf_XZ; + // Free recvList + delete [] recvList_x; delete [] recvList_y; delete [] recvList_z; + delete [] recvList_X; delete [] recvList_Y; delete [] recvList_Z; + delete [] recvList_xy; delete [] recvList_yz; delete [] recvList_xz; + delete [] recvList_Xy; delete [] recvList_Yz; delete [] recvList_xZ; + delete [] recvList_xY; delete [] recvList_yZ; delete [] recvList_Xz; + delete [] recvList_XY; delete [] recvList_YZ; delete [] recvList_XZ; + // Free recvBuf + delete [] recvBuf_x; delete [] recvBuf_y; delete [] recvBuf_z; + delete [] recvBuf_X; delete [] recvBuf_Y; delete [] recvBuf_Z; + delete [] recvBuf_xy; delete [] recvBuf_yz; delete [] recvBuf_xz; + delete [] recvBuf_Xy; delete [] recvBuf_Yz; delete [] recvBuf_xZ; + delete [] recvBuf_xY; delete [] recvBuf_yZ; delete [] recvBuf_Xz; + delete [] recvBuf_XY; delete [] recvBuf_YZ; delete [] recvBuf_XZ; + // Free sendData + delete [] sendData_x; delete [] sendData_y; delete [] sendData_z; + delete [] sendData_X; delete [] sendData_Y; delete [] sendData_Z; + delete [] sendData_xy; delete [] sendData_xY; delete [] sendData_Xy; + delete [] sendData_XY; delete [] sendData_xz; delete [] sendData_xZ; + delete [] sendData_Xz; delete [] sendData_XZ; delete [] sendData_yz; + delete [] sendData_yZ; delete [] sendData_Yz; delete [] sendData_YZ; + // Free recvData + delete [] recvData_x; delete [] recvData_y; delete [] recvData_z; + delete [] recvData_X; delete [] recvData_Y; delete [] recvData_Z; + delete [] recvData_xy; delete [] recvData_xY; delete [] recvData_Xy; + delete [] recvData_XY; delete [] recvData_xz; delete [] recvData_xZ; + delete [] recvData_Xz; delete [] recvData_XZ; delete [] recvData_yz; + delete [] recvData_yZ; delete [] recvData_Yz; delete [] recvData_YZ; + // Free id + delete [] id; + // Free the communicator + if ( Group!=MPI_GROUP_NULL ) { + MPI_Group_free(&Group); + MPI_Comm_free(&Comm); + } } void Domain::InitializeRanks() { - int i,j,k; - // map the rank to the block index - kproc = rank/(nprocx*nprocy); - jproc = (rank-nprocx*nprocy*kproc)/nprocx; - iproc = rank-nprocx*nprocy*kproc-nprocx*jproc; - - // set up the neighbor ranks - i = iproc; - j = jproc; - k = kproc; - - rank_X = getRankForBlock(i+1,j,k); - rank_x = getRankForBlock(i-1,j,k); - rank_Y = getRankForBlock(i,j+1,k); - rank_y = getRankForBlock(i,j-1,k); - rank_Z = getRankForBlock(i,j,k+1); - rank_z = getRankForBlock(i,j,k-1); - rank_XY = getRankForBlock(i+1,j+1,k); - rank_xy = getRankForBlock(i-1,j-1,k); - rank_Xy = getRankForBlock(i+1,j-1,k); - rank_xY = getRankForBlock(i-1,j+1,k); - rank_XZ = getRankForBlock(i+1,j,k+1); - rank_xz = getRankForBlock(i-1,j,k-1); - rank_Xz = getRankForBlock(i+1,j,k-1); - rank_xZ = getRankForBlock(i-1,j,k+1); - rank_YZ = getRankForBlock(i,j+1,k+1); - rank_yz = getRankForBlock(i,j-1,k-1); - rank_Yz = getRankForBlock(i,j+1,k-1); - rank_yZ = getRankForBlock(i,j-1,k+1); + int i,j,k; + kproc = rank/(nprocx*nprocy); + jproc = (rank-nprocx*nprocy*kproc)/nprocx; + iproc = rank-nprocx*nprocy*kproc-nprocx*jproc; + + // set up the neighbor ranks + i = iproc; + j = jproc; + k = kproc; + + rank_X = getRankForBlock(i+1,j,k); + rank_x = getRankForBlock(i-1,j,k); + rank_Y = getRankForBlock(i,j+1,k); + rank_y = getRankForBlock(i,j-1,k); + rank_Z = getRankForBlock(i,j,k+1); + rank_z = getRankForBlock(i,j,k-1); + rank_XY = getRankForBlock(i+1,j+1,k); + rank_xy = getRankForBlock(i-1,j-1,k); + rank_Xy = getRankForBlock(i+1,j-1,k); + rank_xY = getRankForBlock(i-1,j+1,k); + rank_XZ = getRankForBlock(i+1,j,k+1); + rank_xz = getRankForBlock(i-1,j,k-1); + rank_Xz = getRankForBlock(i+1,j,k-1); + rank_xZ = getRankForBlock(i-1,j,k+1); + rank_YZ = getRankForBlock(i,j+1,k+1); + rank_yz = getRankForBlock(i,j-1,k-1); + rank_Yz = getRankForBlock(i,j+1,k-1); + rank_yZ = getRankForBlock(i,j-1,k+1); } void Domain::CommInit(MPI_Comm Communicator){ - int i,j,k,n; - int sendtag = 21; - int recvtag = 21; + int i,j,k,n; + int sendtag = 21; + int recvtag = 21; - //...................................................................................... - //Get the ranks of each process and it's neighbors - // map the rank to the block index - kproc = rank/(nprocx*nprocy); - jproc = (rank-nprocx*nprocy*kproc)/nprocx; - iproc = rank-nprocx*nprocy*kproc-nprocx*jproc; - - // set up the neighbor ranks - i = iproc; - j = jproc; - k = kproc; + //...................................................................................... + //Get the ranks of each process and it's neighbors + // map the rank to the block index + //iproc = rank%nprocx; + //jproc = (rank/nprocx)%nprocy; + //kproc = rank/(nprocx*nprocy); - rank_X = getRankForBlock(i+1,j,k); - rank_x = getRankForBlock(i-1,j,k); - rank_Y = getRankForBlock(i,j+1,k); - rank_y = getRankForBlock(i,j-1,k); - rank_Z = getRankForBlock(i,j,k+1); - rank_z = getRankForBlock(i,j,k-1); - rank_XY = getRankForBlock(i+1,j+1,k); - rank_xy = getRankForBlock(i-1,j-1,k); - rank_Xy = getRankForBlock(i+1,j-1,k); - rank_xY = getRankForBlock(i-1,j+1,k); - rank_XZ = getRankForBlock(i+1,j,k+1); - rank_xz = getRankForBlock(i-1,j,k-1); - rank_Xz = getRankForBlock(i+1,j,k-1); - rank_xZ = getRankForBlock(i-1,j,k+1); - rank_YZ = getRankForBlock(i,j+1,k+1); - rank_yz = getRankForBlock(i,j-1,k-1); - rank_Yz = getRankForBlock(i,j+1,k-1); - rank_yZ = getRankForBlock(i,j-1,k+1); - //...................................................................................... + MPI_Barrier(MPI_COMM_WORLD); + kproc = rank/(nprocx*nprocy); + jproc = (rank-nprocx*nprocy*kproc)/nprocx; + iproc = rank-nprocx*nprocy*kproc-nprocx*jproc; + + // set up the neighbor ranks + i = iproc; + j = jproc; + k = kproc; + + rank_X = getRankForBlock(i+1,j,k); + rank_x = getRankForBlock(i-1,j,k); + rank_Y = getRankForBlock(i,j+1,k); + rank_y = getRankForBlock(i,j-1,k); + rank_Z = getRankForBlock(i,j,k+1); + rank_z = getRankForBlock(i,j,k-1); + rank_XY = getRankForBlock(i+1,j+1,k); + rank_xy = getRankForBlock(i-1,j-1,k); + rank_Xy = getRankForBlock(i+1,j-1,k); + rank_xY = getRankForBlock(i-1,j+1,k); + rank_XZ = getRankForBlock(i+1,j,k+1); + rank_xz = getRankForBlock(i-1,j,k-1); + rank_Xz = getRankForBlock(i+1,j,k-1); + rank_xZ = getRankForBlock(i-1,j,k+1); + rank_YZ = getRankForBlock(i,j+1,k+1); + rank_yz = getRankForBlock(i,j-1,k-1); + rank_Yz = getRankForBlock(i,j+1,k-1); + rank_yZ = getRankForBlock(i,j-1,k+1); + //...................................................................................... - MPI_Comm_group(Communicator,&Group); - MPI_Comm_create(Communicator,Group,&Comm); + MPI_Comm_group(Communicator,&Group); + MPI_Comm_create(Communicator,Group,&Comm); - //...................................................................................... - MPI_Request req1[18], req2[18]; - MPI_Status stat1[18],stat2[18]; - //...................................................................................... - sendCount_x = sendCount_y = sendCount_z = sendCount_X = sendCount_Y = sendCount_Z = 0; - sendCount_xy = sendCount_yz = sendCount_xz = sendCount_Xy = sendCount_Yz = sendCount_xZ = 0; - sendCount_xY = sendCount_yZ = sendCount_Xz = sendCount_XY = sendCount_YZ = sendCount_XZ = 0; - //...................................................................................... - for (k=1; k Label; + vector Affinity; + // Read the labels + // if (rank==0){ + /* printf("Component labels:\n"); + ifstream iFILE("ComponentLabels.csv"); + if (iFILE.good()){ + while (!iFILE.eof()){ + iFILE>>VALUE; + iFILE>>AFFINITY; + Label.push_back(VALUE); + Affinity.push_back(AFFINITY); + NLABELS++; + printf("%i %f\n",VALUE,AFFINITY); + } + } + else{ + */ + if (rank ==0 ) { + printf("Using default labels: Solid (0 --> -1.0), NWP (1 --> 1.0), WP (2 --> -1.0)\n"); + } + // Set default values + VALUE=0; AFFINITY=-1.0; + Label.push_back(VALUE); + Affinity.push_back(AFFINITY); + NLABELS++; + VALUE=1; AFFINITY=1.0; + Label.push_back(VALUE); + Affinity.push_back(AFFINITY); + NLABELS++; + VALUE=2; AFFINITY=-1.0; + Label.push_back(VALUE); + Affinity.push_back(AFFINITY); + NLABELS++; + // } +// } + // Broadcast the list + // MPI_Bcast(&NLABELS,1,MPI_INT,0,Comm); + + // Copy into contiguous buffers + //char *LabelList; + //double * AffinityList; + //LabelList=new char[NLABELS]; + //AffinityList=new double[NLABELS]; + //MPI_Bcast(&LabelList,NLABELS,MPI_CHAR,0,Comm); + //MPI_Bcast(&AffinityList,NLABELS,MPI_DOUBLE,0,Comm); + + // Assign the labels + for (int k=0;k> Isending and Ireceiving count data across processors...\n"); + + + + MPI_Barrier(MPI_COMM_WORLD); + + MPI_Isend(&sendCount_x, 1,MPI_INT,rank_x,sendtag+0,Communicator,&req1[0]); + MPI_Irecv(&recvCount_X, 1,MPI_INT,rank_X,recvtag+0,Communicator,&req2[0]); + MPI_Isend(&sendCount_X, 1,MPI_INT,rank_X,sendtag+1,Communicator,&req1[1]); + MPI_Irecv(&recvCount_x, 1,MPI_INT,rank_x,recvtag+1,Communicator,&req2[1]); + + + + + MPI_Isend(&sendCount_y, 1,MPI_INT,rank_y,sendtag+2,Communicator,&req1[2]); + MPI_Irecv(&recvCount_Y, 1,MPI_INT,rank_Y,recvtag+2,Communicator,&req2[2]); + MPI_Isend(&sendCount_Y, 1,MPI_INT,rank_Y,sendtag+3,Communicator,&req1[3]); + MPI_Irecv(&recvCount_y, 1,MPI_INT,rank_y,recvtag+3,Communicator,&req2[3]); + MPI_Isend(&sendCount_z, 1,MPI_INT,rank_z,sendtag+4,Communicator,&req1[4]); + MPI_Irecv(&recvCount_Z, 1,MPI_INT,rank_Z,recvtag+4,Communicator,&req2[4]); + MPI_Isend(&sendCount_Z, 1,MPI_INT,rank_Z,sendtag+5,Communicator,&req1[5]); + MPI_Irecv(&recvCount_z, 1,MPI_INT,rank_z,recvtag+5,Communicator,&req2[5]); + MPI_Isend(&sendCount_xy, 1,MPI_INT,rank_xy,sendtag+6,Communicator,&req1[6]); + MPI_Irecv(&recvCount_XY, 1,MPI_INT,rank_XY,recvtag+6,Communicator,&req2[6]); + MPI_Isend(&sendCount_XY, 1,MPI_INT,rank_XY,sendtag+7,Communicator,&req1[7]); + MPI_Irecv(&recvCount_xy, 1,MPI_INT,rank_xy,recvtag+7,Communicator,&req2[7]); + MPI_Isend(&sendCount_Xy, 1,MPI_INT,rank_Xy,sendtag+8,Communicator,&req1[8]); + MPI_Irecv(&recvCount_xY, 1,MPI_INT,rank_xY,recvtag+8,Communicator,&req2[8]); + MPI_Isend(&sendCount_xY, 1,MPI_INT,rank_xY,sendtag+9,Communicator,&req1[9]); + MPI_Irecv(&recvCount_Xy, 1,MPI_INT,rank_Xy,recvtag+9,Communicator,&req2[9]); + MPI_Isend(&sendCount_xz, 1,MPI_INT,rank_xz,sendtag+10,Communicator,&req1[10]); + MPI_Irecv(&recvCount_XZ, 1,MPI_INT,rank_XZ,recvtag+10,Communicator,&req2[10]); + MPI_Isend(&sendCount_XZ, 1,MPI_INT,rank_XZ,sendtag+11,Communicator,&req1[11]); + MPI_Irecv(&recvCount_xz, 1,MPI_INT,rank_xz,recvtag+11,Communicator,&req2[11]); + MPI_Isend(&sendCount_Xz, 1,MPI_INT,rank_Xz,sendtag+12,Communicator,&req1[12]); + MPI_Irecv(&recvCount_xZ, 1,MPI_INT,rank_xZ,recvtag+12,Communicator,&req2[12]); + MPI_Isend(&sendCount_xZ, 1,MPI_INT,rank_xZ,sendtag+13,Communicator,&req1[13]); + MPI_Irecv(&recvCount_Xz, 1,MPI_INT,rank_Xz,recvtag+13,Communicator,&req2[13]); + MPI_Isend(&sendCount_yz, 1,MPI_INT,rank_yz,sendtag+14,Communicator,&req1[14]); + MPI_Irecv(&recvCount_YZ, 1,MPI_INT,rank_YZ,recvtag+14,Communicator,&req2[14]); + MPI_Isend(&sendCount_YZ, 1,MPI_INT,rank_YZ,sendtag+15,Communicator,&req1[15]); + MPI_Irecv(&recvCount_yz, 1,MPI_INT,rank_yz,recvtag+15,Communicator,&req2[15]); + MPI_Isend(&sendCount_Yz, 1,MPI_INT,rank_Yz,sendtag+16,Communicator,&req1[16]); + MPI_Irecv(&recvCount_yZ, 1,MPI_INT,rank_yZ,recvtag+16,Communicator,&req2[16]); + MPI_Isend(&sendCount_yZ, 1,MPI_INT,rank_yZ,sendtag+17,Communicator,&req1[17]); + MPI_Irecv(&recvCount_Yz, 1,MPI_INT,rank_Yz,recvtag+17,Communicator,&req2[17]); + MPI_Waitall(18,req1,stat1); + MPI_Waitall(18,req2,stat2); + MPI_Barrier(Communicator); + //...................................................................................... + + if (rank == 0) printf("* recvList_# has been allocated through construction of Dm but sizes not determined. Creating arrays with size recvCount_#... *\n"); + + MPI_Barrier(MPI_COMM_WORLD); + + // recv buffers + recvList_x = new int [recvCount_x]; + recvList_y = new int [recvCount_y]; + recvList_z = new int [recvCount_z]; + recvList_X = new int [recvCount_X]; + recvList_Y = new int [recvCount_Y]; + recvList_Z = new int [recvCount_Z]; + recvList_xy = new int [recvCount_xy]; + recvList_yz = new int [recvCount_yz]; + recvList_xz = new int [recvCount_xz]; + recvList_Xy = new int [recvCount_Xy]; + recvList_Yz = new int [recvCount_Yz]; + recvList_xZ = new int [recvCount_xZ]; + recvList_xY = new int [recvCount_xY]; + recvList_yZ = new int [recvCount_yZ]; + recvList_Xz = new int [recvCount_Xz]; + recvList_XY = new int [recvCount_XY]; + recvList_YZ = new int [recvCount_YZ]; + recvList_XZ = new int [recvCount_XZ]; + //...................................................................................... + + if (rank == 0) printf("* >> Isending and Ireceiving list data (of size sendCount_# and recvCount_#) across processors...\n"); + + MPI_Barrier(MPI_COMM_WORLD); + + MPI_Isend(sendList_x, sendCount_x,MPI_INT,rank_x,sendtag,Communicator,&req1[0]); + MPI_Irecv(recvList_X, recvCount_X,MPI_INT,rank_X,recvtag,Communicator,&req2[0]); + MPI_Isend(sendList_X, sendCount_X,MPI_INT,rank_X,sendtag,Communicator,&req1[1]); + MPI_Irecv(recvList_x, recvCount_x,MPI_INT,rank_x,recvtag,Communicator,&req2[1]); + MPI_Isend(sendList_y, sendCount_y,MPI_INT,rank_y,sendtag,Communicator,&req1[2]); + MPI_Irecv(recvList_Y, recvCount_Y,MPI_INT,rank_Y,recvtag,Communicator,&req2[2]); + MPI_Isend(sendList_Y, sendCount_Y,MPI_INT,rank_Y,sendtag,Communicator,&req1[3]); + MPI_Irecv(recvList_y, recvCount_y,MPI_INT,rank_y,recvtag,Communicator,&req2[3]); + MPI_Isend(sendList_z, sendCount_z,MPI_INT,rank_z,sendtag,Communicator,&req1[4]); + MPI_Irecv(recvList_Z, recvCount_Z,MPI_INT,rank_Z,recvtag,Communicator,&req2[4]); + MPI_Isend(sendList_Z, sendCount_Z,MPI_INT,rank_Z,sendtag,Communicator,&req1[5]); + MPI_Irecv(recvList_z, recvCount_z,MPI_INT,rank_z,recvtag,Communicator,&req2[5]); + MPI_Isend(sendList_xy, sendCount_xy,MPI_INT,rank_xy,sendtag,Communicator,&req1[6]); + MPI_Irecv(recvList_XY, recvCount_XY,MPI_INT,rank_XY,recvtag,Communicator,&req2[6]); + MPI_Isend(sendList_XY, sendCount_XY,MPI_INT,rank_XY,sendtag,Communicator,&req1[7]); + MPI_Irecv(recvList_xy, recvCount_xy,MPI_INT,rank_xy,recvtag,Communicator,&req2[7]); + MPI_Isend(sendList_Xy, sendCount_Xy,MPI_INT,rank_Xy,sendtag,Communicator,&req1[8]); + MPI_Irecv(recvList_xY, recvCount_xY,MPI_INT,rank_xY,recvtag,Communicator,&req2[8]); + MPI_Isend(sendList_xY, sendCount_xY,MPI_INT,rank_xY,sendtag,Communicator,&req1[9]); + MPI_Irecv(recvList_Xy, recvCount_Xy,MPI_INT,rank_Xy,recvtag,Communicator,&req2[9]); + MPI_Isend(sendList_xz, sendCount_xz,MPI_INT,rank_xz,sendtag,Communicator,&req1[10]); + MPI_Irecv(recvList_XZ, recvCount_XZ,MPI_INT,rank_XZ,recvtag,Communicator,&req2[10]); + MPI_Isend(sendList_XZ, sendCount_XZ,MPI_INT,rank_XZ,sendtag,Communicator,&req1[11]); + MPI_Irecv(recvList_xz, recvCount_xz,MPI_INT,rank_xz,recvtag,Communicator,&req2[11]); + MPI_Isend(sendList_Xz, sendCount_Xz,MPI_INT,rank_Xz,sendtag,Communicator,&req1[12]); + MPI_Irecv(recvList_xZ, recvCount_xZ,MPI_INT,rank_xZ,recvtag,Communicator,&req2[12]); + MPI_Isend(sendList_xZ, sendCount_xZ,MPI_INT,rank_xZ,sendtag,Communicator,&req1[13]); + MPI_Irecv(recvList_Xz, recvCount_Xz,MPI_INT,rank_Xz,recvtag,Communicator,&req2[13]); + MPI_Isend(sendList_yz, sendCount_yz,MPI_INT,rank_yz,sendtag,Communicator,&req1[14]); + MPI_Irecv(recvList_YZ, recvCount_YZ,MPI_INT,rank_YZ,recvtag,Communicator,&req2[14]); + MPI_Isend(sendList_YZ, sendCount_YZ,MPI_INT,rank_YZ,sendtag,Communicator,&req1[15]); + MPI_Irecv(recvList_yz, recvCount_yz,MPI_INT,rank_yz,recvtag,Communicator,&req2[15]); + MPI_Isend(sendList_Yz, sendCount_Yz,MPI_INT,rank_Yz,sendtag,Communicator,&req1[16]); + MPI_Irecv(recvList_yZ, recvCount_yZ,MPI_INT,rank_yZ,recvtag,Communicator,&req2[16]); + MPI_Isend(sendList_yZ, sendCount_yZ,MPI_INT,rank_yZ,sendtag,Communicator,&req1[17]); + MPI_Irecv(recvList_Yz, recvCount_Yz,MPI_INT,rank_Yz,recvtag,Communicator,&req2[17]); + MPI_Waitall(18,req1,stat1); + MPI_Waitall(18,req2,stat2); + + + if (rank == 0) { + + printf("* recvList_x: %d %d %d %d \n",recvList_x[0],recvList_x[1],recvList_x[2],recvList_x[3]); + + printf("* recvList_X: %d %d %d %d \n",recvList_X[0],recvList_X[1],recvList_X[2],recvList_X[3]); + + printf("\n"); + } + + MPI_Barrier(MPI_COMM_WORLD); + + if (rank == 1) { + + printf("* recvList_x: %d %d %d %d \n",recvList_x[0],recvList_x[1],recvList_x[2],recvList_x[3]); + + printf("* recvList_X: %d %d %d %d \n",recvList_X[0],recvList_X[1],recvList_X[2],recvList_X[3]); + + printf("\n\n"); + } + + + + //...................................................................................... + for (int idx=0; idx fabs(b)) value = b; - - return value; -} - -inline double Eikonal(DoubleArray &Distance, char *ID, Domain &Dm, int timesteps){ - - /* - * This routine converts the data in the Distance array to a signed distance - * by solving the equation df/dt = sign(1-|grad f|), where Distance provides - * the values of f on the mesh associated with domain Dm - * It has been tested with segmented data initialized to values [-1,1] - * and will converge toward the signed distance to the surface bounding the associated phases - * - * Reference: - * Min C (2010) On reinitializing level set functions, Journal of Computational Physics 229 - */ - - int i,j,k; - double dt=0.1; - double Dx,Dy,Dz; - double Dxp,Dxm,Dyp,Dym,Dzp,Dzm; - double Dxxp,Dxxm,Dyyp,Dyym,Dzzp,Dzzm; - double sign,norm; - double LocalVar,GlobalVar,LocalMax,GlobalMax; - - int xdim,ydim,zdim; - xdim=Dm.Nx-2; - ydim=Dm.Ny-2; - zdim=Dm.Nz-2; - fillHalo fillData(Dm.Comm, Dm.rank_info,xdim,ydim,zdim,1,1,1,0,1); - - // Arrays to store the second derivatives - DoubleArray Dxx(Dm.Nx,Dm.Ny,Dm.Nz); - DoubleArray Dyy(Dm.Nx,Dm.Ny,Dm.Nz); - DoubleArray Dzz(Dm.Nx,Dm.Ny,Dm.Nz); - - int count = 0; - while (count < timesteps){ - - // Communicate the halo of values - fillData.fill(Distance); - - // Compute second order derivatives - for (k=1;k 0.f) Dx = Dxp*Dxp; - else Dx = Dxm*Dxm; - - if (Dyp + Dym > 0.f) Dy = Dyp*Dyp; - else Dy = Dym*Dym; - - if (Dzp + Dzm > 0.f) Dz = Dzp*Dzp; - else Dz = Dzm*Dzm; - } - else{ - - if (Dxp + Dxm < 0.f) Dx = Dxp*Dxp; - else Dx = Dxm*Dxm; - - if (Dyp + Dym < 0.f) Dy = Dyp*Dyp; - else Dy = Dym*Dym; - - if (Dzp + Dzm < 0.f) Dz = Dzp*Dzp; - else Dz = Dzm*Dzm; - } - - //Dx = max(Dxp*Dxp,Dxm*Dxm); - //Dy = max(Dyp*Dyp,Dym*Dym); - //Dz = max(Dzp*Dzp,Dzm*Dzm); - - norm=sqrt(Dx + Dy + Dz); - if (norm > 1.0) norm=1.0; - - Distance(i,j,k) += dt*sign*(1.0 - norm); - LocalVar += dt*sign*(1.0 - norm); - - if (fabs(dt*sign*(1.0 - norm)) > LocalMax) - LocalMax = fabs(dt*sign*(1.0 - norm)); - } - } - } - - MPI_Allreduce(&LocalVar,&GlobalVar,1,MPI_DOUBLE,MPI_SUM,Dm.Comm); - MPI_Allreduce(&LocalMax,&GlobalMax,1,MPI_DOUBLE,MPI_MAX,Dm.Comm); - GlobalVar /= (Dm.Nx-2)*(Dm.Ny-2)*(Dm.Nz-2)*Dm.nprocx*Dm.nprocy*Dm.nprocz; - count++; - - - if (count%50 == 0 && Dm.rank==0 ) - printf("Time=%i, Max variation=%f, Global variation=%f \n",count,GlobalMax,GlobalVar); - - if (fabs(GlobalMax) < 1e-5){ - if (Dm.rank==0) printf("Exiting with max tolerance of 1e-5 \n"); - count=timesteps; - } - } - return GlobalVar; -} #endif diff --git a/common/MPI_Helpers.hpp b/common/MPI_Helpers.hpp index 85261cf1..53708368 100644 --- a/common/MPI_Helpers.hpp +++ b/common/MPI_Helpers.hpp @@ -27,7 +27,7 @@ void pack( const std::vector& rhs, char *buffer ) size_t size = rhs.size(); memcpy(buffer,&size,sizeof(size_t)); size_t pos = sizeof(size_t); - for (size_t i=0; i& data, const char *buffer ) data.clear(); data.resize(size); size_t pos = sizeof(size_t); - for (size_t i=0; i Np ){ + ERROR("ScaLBL_Communicator::MemoryDenseLayout: Failed to create memory efficient layout!\n"); + } + + // for (k=1;k Np) printf("ScaLBL_Communicator::MemoryDenseLayout: Map(%i,%i,%i) = %i > %i \n",i,j,k,Map(i,j,k),Np); + else if (!(idx<0)){ + // store the idx associated with each neighbor + // store idx for self if neighbor is in solid or out of domain + //D3Q19 = {{1,0,0},{-1,0,0} + // {0,1,0},{0,-1,0} + // {0,0,1},{0,0,-1}, + // {1,1,0},{-1,-1,0}, + // {1,-1,0},{-1,1,0}, + // {1,0,1},{-1,0,-1}, + // {1,0,-1},{-1,0,1}, + // {0,1,1},{0,-1,-1}, + // {0,1,-1},{0,-1,1}}; + // note that only odd distributions need to be stored to execute the swap algorithm + int neighbor; // cycle through the neighbors of lattice site idx + neighbor=Map(i+1,j,k); + if (neighbor==-2) neighborList[idx]=-1; + else if (neighbor<0) neighborList[idx]=idx; + else neighborList[idx]=neighbor; + + neighbor=Map(i,j+1,k); + if (neighbor==-2) neighborList[Np+idx]=-1; + else if (neighbor<0) neighborList[Np+idx]=idx; + else neighborList[Np+idx]=neighbor; + + neighbor=Map(i,j,k+1); + if (neighbor==-2) neighborList[2*Np+idx]=-1; + else if (neighbor<0) neighborList[2*Np+idx]=idx; + else neighborList[2*Np+idx]=neighbor; + + neighbor=Map(i+1,j+1,k); + if (neighbor==-2) neighborList[3*Np+idx]=-1; + else if (neighbor<0) neighborList[3*Np+idx]=idx; + else neighborList[3*Np+idx]=neighbor; + + neighbor=Map(i+1,j-1,k); + if (neighbor==-2) neighborList[4*Np+idx]=-1; + else if (neighbor<0) neighborList[4*Np+idx]=idx; + else neighborList[4*Np+idx]=neighbor; + + neighbor=Map(i+1,j,k+1); + if (neighbor==-2) neighborList[5*Np+idx]=-1; + else if (neighbor<0) neighborList[5*Np+idx]=idx; + else neighborList[5*Np+idx]=neighbor; + + neighbor=Map(i+1,j,k-1); + if (neighbor==-2) neighborList[6*Np+idx]=-1; + else if (neighbor<0) neighborList[6*Np+idx]=idx; + else neighborList[6*Np+idx]=neighbor; + + neighbor=Map(i,j+1,k+1); + if (neighbor==-2) neighborList[7*Np+idx]=-1; + else if (neighbor<0) neighborList[7*Np+idx]=idx; + else neighborList[7*Np+idx]=neighbor; + + neighbor=Map(i,j+1,k-1); + if (neighbor==-2) neighborList[8*Np+idx]=-1; + else if (neighbor<0) neighborList[8*Np+idx]=idx; + else neighborList[8*Np+idx]=neighbor; + } + } + } + } + + //for (idx=0; idx Np ){ + ERROR("ScaLBL_Communicator::MemoryDenseLayoutFull: Failed to create memory efficient layout!\n"); + } + + // if (rank == 0) { + // printf("* Displaying the final map from rank %d\n",rank); + // + // for (k=1;k Np) printf("ScaLBL_Communicator::MemoryDenseLayoutFull: Map(%i,%i,%i) = %i > %i \n",i,j,k,Map(i,j,k),Np); + else if (!(idx<0)){ + // store the idx associated with each neighbor + // store idx for self if neighbor is in solid or out of domain + //D3Q19 = {{1,0,0},{-1,0,0} + // {0,1,0},{0,-1,0} + // {0,0,1},{0,0,-1}, + // {1,1,0},{-1,-1,0}, + // {1,-1,0},{-1,1,0}, + // {1,0,1},{-1,0,-1}, + // {1,0,-1},{-1,0,1}, + // {0,1,1},{0,-1,-1}, + // {0,1,-1},{0,-1,1}}; + + + /* + * Storing the full neighbor list. The AA algorithm may require fewer neighbors but I'm saving everything for now... + * + */ + + + int neighbor; // cycle through the neighbors of lattice site idx + neighbor=Map(i+1,j,k); + if (neighbor==-2) neighborList[idx]=-1; + else if (neighbor<0) neighborList[idx]=idx; + else neighborList[idx]=neighbor; + + // 2 + neighbor=Map(i-1,j,k); + if (neighbor==-2) neighborList[Np+idx]=-1; + else if (neighbor<0) neighborList[Np+idx]=idx; + else neighborList[Np+idx]=neighbor; + + neighbor=Map(i,j+1,k); + if (neighbor==-2) neighborList[2*Np+idx]=-1; + else if (neighbor<0) neighborList[2*Np+idx]=idx; + else neighborList[2*Np+idx]=neighbor; + + // 4 + neighbor=Map(i,j-1,k); + if (neighbor==-2) neighborList[3*Np+idx]=-1; + else if (neighbor<0) neighborList[3*Np+idx]=idx; + else neighborList[3*Np+idx]=neighbor; + + + neighbor=Map(i,j,k+1); + if (neighbor==-2) neighborList[4*Np+idx]=-1; + else if (neighbor<0) neighborList[4*Np+idx]=idx; + else neighborList[4*Np+idx]=neighbor; + + // 6 + neighbor=Map(i,j,k-1); + if (neighbor==-2) neighborList[5*Np+idx]=-1; + else if (neighbor<0) neighborList[5*Np+idx]=idx; + else neighborList[5*Np+idx]=neighbor; + + neighbor=Map(i+1,j+1,k); + if (neighbor==-2) neighborList[6*Np+idx]=-1; + else if (neighbor<0) neighborList[6*Np+idx]=idx; + else neighborList[6*Np+idx]=neighbor; + + // 8 + neighbor=Map(i-1,j-1,k); + if (neighbor==-2) neighborList[7*Np+idx]=-1; + else if (neighbor<0) neighborList[7*Np+idx]=idx; + else neighborList[7*Np+idx]=neighbor; + + + neighbor=Map(i+1,j-1,k); + if (neighbor==-2) neighborList[8*Np+idx]=-1; + else if (neighbor<0) neighborList[8*Np+idx]=idx; + else neighborList[8*Np+idx]=neighbor; + + // 10 + neighbor=Map(i-1,j+1,k); + if (neighbor==-2) neighborList[9*Np+idx]=-1; + else if (neighbor<0) neighborList[9*Np+idx]=idx; + else neighborList[9*Np+idx]=neighbor; + + + neighbor=Map(i+1,j,k+1); + if (neighbor==-2) neighborList[10*Np+idx]=-1; + else if (neighbor<0) neighborList[10*Np+idx]=idx; + else neighborList[10*Np+idx]=neighbor; + + // 12 + neighbor=Map(i-1,j,k-1); + if (neighbor==-2) neighborList[11*Np+idx]=-1; + else if (neighbor<0) neighborList[11*Np+idx]=idx; + else neighborList[11*Np+idx]=neighbor; + + + neighbor=Map(i+1,j,k-1); + if (neighbor==-2) neighborList[12*Np+idx]=-1; + else if (neighbor<0) neighborList[12*Np+idx]=idx; + else neighborList[12*Np+idx]=neighbor; + + // 14 + neighbor=Map(i-1,j,k+1); + if (neighbor==-2) neighborList[13*Np+idx]=-1; + else if (neighbor<0) neighborList[13*Np+idx]=idx; + else neighborList[13*Np+idx]=neighbor; + + neighbor=Map(i,j+1,k+1); + if (neighbor==-2) neighborList[14*Np+idx]=-1; + else if (neighbor<0) neighborList[14*Np+idx]=idx; + else neighborList[14*Np+idx]=neighbor; + + // 16 + neighbor=Map(i,j-1,k-1); + if (neighbor==-2) neighborList[15*Np+idx]=-1; + else if (neighbor<0) neighborList[15*Np+idx]=idx; + else neighborList[15*Np+idx]=neighbor; + + neighbor=Map(i,j+1,k-1); + if (neighbor==-2) neighborList[16*Np+idx]=-1; + else if (neighbor<0) neighborList[16*Np+idx]=idx; + else neighborList[16*Np+idx]=neighbor; + + // 18 + neighbor=Map(i,j-1,k+1); + if (neighbor==-2) neighborList[17*Np+idx]=-1; + else if (neighbor<0) neighborList[17*Np+idx]=idx; + else neighborList[17*Np+idx]=neighbor; + + + } + } + } + } + + //....................................................................... + // Now map through SendList and RecvList to update indices + // First loop over the send lists + + int *TempBuffer; + TempBuffer = new int [5*RecvCount]; + + //....................................................................... + // Re-index the send lists + ScaLBL_CopyToHost(TempBuffer,dvcSendList_x,sendCount_x*sizeof(int)); + + for (i=0; i Np ){ ERROR("ScaLBL_Communicator::MemoryOptimizedLayout: Failed to create memory efficient layout!\n"); } -/* - for (k=1;k Np ){ + ERROR("ScaLBL_Communicator::MemoryOptimizedLayout: Failed to create memory efficient layout!\n"); + } + + // for (k=1;k Np) printf("ScaLBL_Communicator::MemoryOptimizedLayout: Map(%i,%i,%i) = %i > %i \n",i,j,k,Map(i,j,k),Np); + else if (!(idx<0)){ + // store the idx associated with each neighbor + // store idx for self if neighbor is in solid or out of domain + //D3Q19 = {{1,0,0},{-1,0,0} + // {0,1,0},{0,-1,0} + // {0,0,1},{0,0,-1}, + // {1,1,0},{-1,-1,0}, + // {1,-1,0},{-1,1,0}, + // {1,0,1},{-1,0,-1}, + // {1,0,-1},{-1,0,1}, + // {0,1,1},{0,-1,-1}, + // {0,1,-1},{0,-1,1}}; + // note that only odd distributions need to be stored to execute the swap algorithm + int neighbor; // cycle through the neighbors of lattice site idx + neighbor=Map(i+1,j,k); + if (neighbor==-2) neighborList[idx]=-1; + else if (neighbor<0) neighborList[idx]=idx; + else neighborList[idx]=neighbor; + + neighbor=Map(i-1,j,k); + if (neighbor==-2) neighborList[Np+idx]=-1; + else if (neighbor<0) neighborList[Np+idx]=idx; + else neighborList[Np+idx]=neighbor; + + neighbor=Map(i,j+1,k); + if (neighbor==-2) neighborList[2*Np+idx]=-1; + else if (neighbor<0) neighborList[2*Np+idx]=idx; + else neighborList[2*Np+idx]=neighbor; + + neighbor=Map(i,j-1,k); + if (neighbor==-2) neighborList[3*Np+idx]=-1; + else if (neighbor<0) neighborList[3*Np+idx]=idx; + else neighborList[3*Np+idx]=neighbor; + + + neighbor=Map(i,j,k+1); + if (neighbor==-2) neighborList[4*Np+idx]=-1; + else if (neighbor<0) neighborList[4*Np+idx]=idx; + else neighborList[4*Np+idx]=neighbor; + + neighbor=Map(i,j,k-1); + if (neighbor==-2) neighborList[5*Np+idx]=-1; + else if (neighbor<0) neighborList[5*Np+idx]=idx; + else neighborList[5*Np+idx]=neighbor; + + neighbor=Map(i+1,j+1,k); + if (neighbor==-2) neighborList[6*Np+idx]=-1; + else if (neighbor<0) neighborList[6*Np+idx]=idx; + else neighborList[6*Np+idx]=neighbor; + + neighbor=Map(i-1,j-1,k); + if (neighbor==-2) neighborList[7*Np+idx]=-1; + else if (neighbor<0) neighborList[7*Np+idx]=idx; + else neighborList[7*Np+idx]=neighbor; + + + neighbor=Map(i+1,j-1,k); + if (neighbor==-2) neighborList[8*Np+idx]=-1; + else if (neighbor<0) neighborList[8*Np+idx]=idx; + else neighborList[8*Np+idx]=neighbor; + + neighbor=Map(i-1,j+1,k); + if (neighbor==-2) neighborList[9*Np+idx]=-1; + else if (neighbor<0) neighborList[9*Np+idx]=idx; + else neighborList[9*Np+idx]=neighbor; + + + neighbor=Map(i+1,j,k+1); + if (neighbor==-2) neighborList[10*Np+idx]=-1; + else if (neighbor<0) neighborList[10*Np+idx]=idx; + else neighborList[10*Np+idx]=neighbor; + + neighbor=Map(i-1,j,k-1); + if (neighbor==-2) neighborList[11*Np+idx]=-1; + else if (neighbor<0) neighborList[11*Np+idx]=idx; + else neighborList[11*Np+idx]=neighbor; + + + neighbor=Map(i+1,j,k-1); + if (neighbor==-2) neighborList[12*Np+idx]=-1; + else if (neighbor<0) neighborList[12*Np+idx]=idx; + else neighborList[12*Np+idx]=neighbor; + + neighbor=Map(i-1,j,k+1); + if (neighbor==-2) neighborList[13*Np+idx]=-1; + else if (neighbor<0) neighborList[13*Np+idx]=idx; + else neighborList[13*Np+idx]=neighbor; + + neighbor=Map(i,j+1,k+1); + if (neighbor==-2) neighborList[14*Np+idx]=-1; + else if (neighbor<0) neighborList[14*Np+idx]=idx; + else neighborList[14*Np+idx]=neighbor; + + neighbor=Map(i,j-1,k-1); + if (neighbor==-2) neighborList[15*Np+idx]=-1; + else if (neighbor<0) neighborList[15*Np+idx]=idx; + else neighborList[15*Np+idx]=neighbor; + + neighbor=Map(i,j+1,k-1); + if (neighbor==-2) neighborList[16*Np+idx]=-1; + else if (neighbor<0) neighborList[16*Np+idx]=idx; + else neighborList[16*Np+idx]=neighbor; + + neighbor=Map(i,j-1,k+1); + if (neighbor==-2) neighborList[17*Np+idx]=-1; + else if (neighbor<0) neighborList[17*Np+idx]=idx; + else neighborList[17*Np+idx]=neighbor; + } + } + } + } + + //for (idx=0; idx Np ){ + ERROR("ScaLBL_Communicator::MemoryOptimizedLayout: Failed to create memory efficient layout!\n"); + } + /* + for (k=1;k Np) printf("ScaLBL_Communicator::MemoryOptimizedLayout: Map(%i,%i,%i) = %i > %i \n",i,j,k,Map(i,j,k),Np); + else if (!(idx<0)){ + // store the idx associated with each neighbor + // store idx for self if neighbor is in solid or out of domain + //D3Q19 = {{1,0,0},{-1,0,0} + // {0,1,0},{0,-1,0} + // {0,0,1},{0,0,-1}, + // {1,1,0},{-1,-1,0}, + // {1,-1,0},{-1,1,0}, + // {1,0,1},{-1,0,-1}, + // {1,0,-1},{-1,0,1}, + // {0,1,1},{0,-1,-1}, + // {0,1,-1},{0,-1,1}}; + int neighbor; // cycle through the neighbors of lattice site idx + neighbor=Map(i-1,j,k); + if (neighbor<0) neighborList[idx]=idx + 2*Np; + else neighborList[idx]=neighbor + 1*Np; + + neighbor=Map(i+1,j,k); + if (neighbor<0) neighborList[Np+idx] = idx + 1*Np; + else neighborList[Np+idx]= neighbor + 2*Np; + + neighbor=Map(i,j-1,k); + if (neighbor<0) neighborList[2*Np+idx]=idx + 4*Np; + else neighborList[2*Np+idx]=neighbor + 3*Np; + + neighbor=Map(i,j+1,k); + if (neighbor<0) neighborList[3*Np+idx]=idx + 3*Np; + else neighborList[3*Np+idx]=neighbor + 4*Np; + + neighbor=Map(i,j,k-1); + if (neighbor<0) neighborList[4*Np+idx]=idx + 6*Np; + else neighborList[4*Np+idx]=neighbor + 5*Np; + + neighbor=Map(i,j,k+1); + if (neighbor<0) neighborList[5*Np+idx]=idx + 5*Np; + else neighborList[5*Np+idx]=neighbor + 6*Np; + + neighbor=Map(i-1,j-1,k); + if (neighbor<0) neighborList[6*Np+idx]=idx + 8*Np; + else neighborList[6*Np+idx]=neighbor + 7*Np; + + neighbor=Map(i+1,j+1,k); + if (neighbor<0) neighborList[7*Np+idx]=idx + 7*Np; + else neighborList[7*Np+idx]=neighbor+8*Np; + + neighbor=Map(i-1,j+1,k); + if (neighbor<0) neighborList[8*Np+idx]=idx + 10*Np; + else neighborList[8*Np+idx]=neighbor + 9*Np; + + neighbor=Map(i+1,j-1,k); + if (neighbor<0) neighborList[9*Np+idx]=idx + 9*Np; + else neighborList[9*Np+idx]=neighbor + 10*Np; + + neighbor=Map(i-1,j,k-1); + if (neighbor<0) neighborList[10*Np+idx]=idx + 12*Np; + else neighborList[10*Np+idx]=neighbor + 11*Np; + + neighbor=Map(i+1,j,k+1); + if (neighbor<0) neighborList[11*Np+idx]=idx + 11*Np; + else neighborList[11*Np+idx]=neighbor + 12*Np; + + neighbor=Map(i-1,j,k+1); + if (neighbor<0) neighborList[12*Np+idx]=idx + 14*Np; + else neighborList[12*Np+idx]=neighbor + 13*Np; + + neighbor=Map(i+1,j,k-1); + if (neighbor<0) neighborList[13*Np+idx]=idx + 13*Np; + else neighborList[13*Np+idx]=neighbor + 14*Np; + + neighbor=Map(i,j-1,k-1); + if (neighbor<0) neighborList[14*Np+idx]=idx + 16*Np; + else neighborList[14*Np+idx]=neighbor + 15*Np; + + neighbor=Map(i,j+1,k+1); + if (neighbor<0) neighborList[15*Np+idx]=idx + 15*Np; + else neighborList[15*Np+idx]=neighbor + 16*Np; + + neighbor=Map(i,j-1,k+1); + if (neighbor<0) neighborList[16*Np+idx]=idx + 18*Np; + else neighborList[16*Np+idx]=neighbor + 17*Np; + + neighbor=Map(i,j+1,k-1); + if (neighbor<0) neighborList[17*Np+idx]=idx + 17*Np; + else neighborList[17*Np+idx]=neighbor + 18*Np; + } + } + } + } + + //for (idx=0; idx -#include -#include #include -#include -#include -#include #include -#include -#include -#include -#include +#include +#include +#if __cplusplus > 199711L + #include +#endif -// Detect the OS -// clang-format off -#if defined( WIN32 ) || defined( _WIN32 ) || defined( WIN64 ) || defined( _WIN64 ) || defined( _MSC_VER ) +// Detect the OS and include system dependent headers +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) || defined(_MSC_VER) + // Note: windows has not been testeds #define USE_WINDOWS - #define NOMINMAX -#elif defined( __APPLE__ ) + #include + #include + #include + #include + #include + #include + #include + //#pragma comment(lib, psapi.lib) //added + //#pragma comment(linker, /DEFAULTLIB:psapi.lib) +#elif defined(__APPLE__) #define USE_MAC - #define USE_NM -#elif defined( __linux ) || defined( __unix ) || defined( __posix ) + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include +#elif defined(__linux) || defined(__unix) || defined(__posix) #define USE_LINUX #define USE_NM + #include + #include + #include + #include + #include + #include + #include #else #error Unknown OS #endif -// clang-format on - - -// Include/detect MPI -// clang-format off -#ifndef USE_MPI - #ifdef USE_EXT_MPI - #define USE_MPI - #elif defined(__has_include) - #if __has_include("mpi.h") - #define USE_MPI - #endif - #endif -#endif -#ifdef USE_MPI - #include "mpi.h" -#endif -// clang-format on - - -// Include system dependent headers -// clang-format off -// Detect the OS and include system dependent headers -#ifdef USE_WINDOWS - #include - #include - #include - #include - #include - #include - #include - #include - #pragma comment( lib, "version.lib" ) // for "VerQueryValue" -#else - #include - #include - #include - #include - #include - #include - #include -#endif -#ifdef USE_MAC - #include - #include - #include - #include -#endif -// clang-format on #ifdef __GNUC__ -#define USE_ABI -#include + #define USE_ABI + #include #endif - #ifndef NULL_USE -#define NULL_USE( variable ) \ - do { \ - if ( 0 ) { \ - char *temp = (char *) &variable; \ - temp++; \ - } \ - } while ( 0 ) + #define NULL_USE(variable) do { \ + if(0) {char *temp = (char *)&variable; temp++;} \ + }while(0) #endif -// Set the callstack signal -#ifdef SIGRTMIN - #define CALLSTACK_SIG SIGRTMIN+4 -#else - #define CALLSTACK_SIG SIGUSR1 - #define SIGRTMIN SIGUSR1 - #define SIGRTMAX SIGUSR1 -#endif - - -// Utility to break a string by a newline -static inline std::vector breakString( const std::string& str ) -{ - std::vector strvec; - size_t i1 = 0; - size_t i2 = std::min( str.find( '\n', i1 ), str.length() ); - while ( i1 < str.length() ) { - strvec.push_back( str.substr( i1, i2-i1 ) ); - i1 = i2 + 1; - i2 = std::min( str.find( '\n', i1 ), str.length() ); - } - return strvec; -} - - // Utility to strip the path from a filename -static inline std::string stripPath( const std::string &filename ) +inline std::string stripPath( const std::string& filename ) { - if ( filename.empty() ) { - return std::string(); - } - int i = 0; - for ( i = (int) filename.size() - 1; i >= 0 && filename[i] != 47 && filename[i] != 92; i-- ) { - } - i = std::max( 0, i + 1 ); - return filename.substr( i ); + if ( filename.empty() ) { return std::string(); } + int i=0; + for (i=(int)filename.size()-1; i>=0&&filename[i]!=47&&filename[i]!=92; i--) {} + i = std::max(0,i+1); + return filename.substr(i); } // Inline function to subtract two addresses returning the absolute difference -static inline void *subtractAddress( void *a, void *b ) -{ - return reinterpret_cast( - std::abs( reinterpret_cast( a ) - reinterpret_cast( b ) ) ); -} - - -#ifdef USE_WINDOWS -static BOOL __stdcall readProcMem( HANDLE hProcess, - DWORD64 qwBaseAddress, - PVOID lpBuffer, - DWORD nSize, - LPDWORD lpNumberOfBytesRead ) -{ - SIZE_T st; - BOOL bRet = ReadProcessMemory( hProcess, (LPVOID) qwBaseAddress, lpBuffer, nSize, &st ); - *lpNumberOfBytesRead = (DWORD) st; - return bRet; -} -static inline std::string getCurrentDirectory() -{ - char temp[1024] = { 0 }; - GetCurrentDirectoryA( sizeof( temp ), temp ); - return temp; -} -namespace StackTrace { -BOOL GetModuleListTH32( HANDLE hProcess, DWORD pid ); -BOOL GetModuleListPSAPI( HANDLE hProcess ); -DWORD LoadModule( HANDLE hProcess, LPCSTR img, LPCSTR mod, DWORD64 baseAddr, DWORD size ); -void LoadModules(); -}; -#endif - - -// Functions to copy data -static inline char* copy_in( size_t N, const void* data, char *ptr ) -{ - memcpy( ptr, data, N ); - return ptr + N; -} -static inline const char* copy_out( size_t N, void* data, const char *ptr ) -{ - memcpy( data, ptr, N ); - return ptr + N; -} - - -/**************************************************************************** -* Utility to call system command and return output * -****************************************************************************/ -#ifdef USE_WINDOWS -#define popen _popen -#define pclose _pclose -#endif -std::string StackTrace::exec( const std::string& cmd, int& code ) -{ - signal( SIGCHLD, SIG_DFL ); // Clear child exited - FILE* pipe = popen(cmd.c_str(), "r"); - if ( pipe == nullptr ) - return std::string(); - std::string result = ""; - result.reserve(1024); - while ( !feof(pipe) ) { - char buffer[257]; - buffer[256] = 0; - if ( fgets(buffer, 128, pipe) != NULL ) - result += buffer; - } - auto status = pclose( pipe ); - code = WEXITSTATUS(status); - return result; +inline void* subtractAddress( void* a, void* b ) { + return reinterpret_cast( std::abs( + reinterpret_cast(a)-reinterpret_cast(b) ) ); } /**************************************************************************** * stack_info * ****************************************************************************/ -bool StackTrace::stack_info::operator==( const StackTrace::stack_info& rhs ) const -{ - if ( address == rhs.address ) - return true; - if ( address2==rhs.address2 && object==rhs.object ) - return true; - return false; -} -bool StackTrace::stack_info::operator!=( const StackTrace::stack_info& rhs ) const -{ - return !operator==( rhs ); -} std::string StackTrace::stack_info::print() const { char tmp[32]; - sprintf( tmp, "0x%016llx: ", reinterpret_cast( address ) ); - std::string stack( tmp ); - sprintf( tmp, "%i", line ); - std::string line_str( tmp ); - stack += stripPath( object ); - stack.resize( std::max( stack.size(), 38 ), ' ' ); + sprintf(tmp,"0x%016llx: ",reinterpret_cast(address)); + std::string stack(tmp); + sprintf(tmp,"%i",line); + std::string line_str(tmp); + stack += stripPath(object); + stack.resize(std::max(stack.size(),38),' '); stack += " " + function; - if ( !filename.empty() && line > 0 ) { - stack.resize( std::max( stack.size(), 72 ), ' ' ); - stack += " " + stripPath( filename ) + ":" + line_str; + if ( !filename.empty() && line>0 ) { + stack.resize(std::max(stack.size(),70),' '); + stack += " " + stripPath(filename) + ":" + line_str; } else if ( !filename.empty() ) { - stack.resize( std::max( stack.size(), 72 ), ' ' ); - stack += " " + stripPath( filename ); - } else if ( line > 0 ) { + stack.resize(std::max(stack.size(),70),' '); + stack += " " + stripPath(filename); + } else if ( line>0 ) { stack += " : " + line_str; } return stack; } -size_t StackTrace::stack_info::size() const -{ - return 2*sizeof(void*) + 4*sizeof(int) + object.size() + function.size() + filename.size(); -} -char* StackTrace::stack_info::pack( char* ptr ) const -{ - int Nobj = object.size(); - int Nfun = function.size(); - int Nfile = filename.size(); - ptr = copy_in( sizeof(void*), &address, ptr ); - ptr = copy_in( sizeof(void*), &address2, ptr ); - ptr = copy_in( sizeof(int), &Nobj, ptr ); - ptr = copy_in( sizeof(int), &Nfun, ptr ); - ptr = copy_in( sizeof(int), &Nfile, ptr ); - ptr = copy_in( sizeof(int), &line, ptr ); - ptr = copy_in( Nobj, object.data(), ptr ); - ptr = copy_in( Nfun, function.data(), ptr ); - ptr = copy_in( Nfile, filename.data(), ptr ); - return ptr; -} -const char* StackTrace::stack_info::unpack( const char* ptr ) -{ - int Nobj, Nfun, Nfile; - ptr = copy_out( sizeof(void*), &address, ptr ); - ptr = copy_out( sizeof(void*), &address2, ptr ); - ptr = copy_out( sizeof(int), &Nobj, ptr ); - ptr = copy_out( sizeof(int), &Nfun, ptr ); - ptr = copy_out( sizeof(int), &Nfile, ptr ); - ptr = copy_out( sizeof(int), &line, ptr ); - object.resize( Nobj ); - function.resize( Nfun ); - filename.resize( Nfile ); - ptr = copy_out( Nobj, &object.front(), ptr ); - ptr = copy_out( Nfun, &function.front(), ptr ); - ptr = copy_out( Nfile, &filename.front(), ptr ); - return ptr; -} -std::vector StackTrace::stack_info::packArray( const std::vector& data ) -{ - size_t size = sizeof(int); - for (size_t i=0; i vec(size,0); - char* ptr = vec.data(); - int N = data.size(); - ptr = copy_in( sizeof(int), &N, ptr ); - for (size_t i=0; i StackTrace::stack_info::unpackArray( const char* ptr ) -{ - int N; - ptr = copy_out( sizeof(int), &N, ptr ); - std::vector data(N); - for (size_t i=0; i pack( const std::vector>& data ) -{ - size_t size = sizeof(int); - for (size_t i=0; i out( size, 0 ); - char* ptr = out.data(); - int N = data.size(); - ptr = copy_in( sizeof(int), &N, ptr ); - for (int i=0; i> unpack( const std::vector& in ) -{ - const char* ptr = in.data(); - int N; - ptr = copy_out( sizeof(int), &N, ptr ); - std::vector> data( N ); - for (int i=0; i( depth, maxDepth( child ) ); - return depth+1; -}*/ -std::vector StackTrace::multi_stack_info::print( const std::string& prefix ) const -{ - std::vector text; - if ( stack == stack_info() ) { - for ( const auto& child : children ) { - auto tmp = child.print( ); - text.insert( text.end(), tmp.begin(), tmp.end() ); - } - return text; - } - //auto depth = maxDepth( *this ); - //std::string line = prefix + "[" + std::to_string( N ) + "] "; - //for (auto i=1; i1 && j>0 && i 1 ) - children[i].add( len-1, stack ); - return; - } - } - children.resize( children.size()+1 ); - children.back().N = 1; - children.back().stack = s; - if ( len > 1 ) - children.back().add( len-1, stack ); -} /**************************************************************************** * Function to find an entry * ****************************************************************************/ template -inline size_t findfirst( const std::vector &X, TYPE Y ) +inline size_t findfirst( const std::vector& X, TYPE Y ) { if ( X.empty() ) return 0; size_t lower = 0; - size_t upper = X.size() - 1; + size_t upper = X.size()-1; if ( X[lower] >= Y ) return lower; if ( X[upper] < Y ) return upper; - while ( ( upper - lower ) != 1 ) { - size_t value = ( upper + lower ) / 2; + while ( (upper-lower) != 1 ) { + size_t value = (upper+lower)/2; if ( X[value] >= Y ) upper = value; else @@ -434,46 +136,40 @@ inline size_t findfirst( const std::vector &X, TYPE Y ) * exccessive calls to nm. This function also uses a lock to ensure * * thread safety. * ****************************************************************************/ -std::mutex getSymbols_mutex; +#if __cplusplus <= 199711L + class mutex_class { + public: + void lock() {} + void unlock() {} + }; + mutex_class getSymbols_mutex; +#else + std::mutex getSymbols_mutex; +#endif struct global_symbols_struct { - std::vector address; + std::vector address; std::vector type; std::vector obj; int error; } global_symbols; -std::string StackTrace::getExecutable() +static std::string get_executable() { std::string exe; - try { -#ifdef USE_LINUX - char *buf = new char[0x10000]; - int len = ::readlink( "/proc/self/exe", buf, 0x10000 ); - if ( len != -1 ) { - buf[len] = '\0'; - exe = std::string( buf ); - } - delete[] buf; -#elif defined( USE_MAC ) - uint32_t size = 0x10000; - char *buf = new char[size]; - memset( buf, 0, size ); - if ( _NSGetExecutablePath( buf, &size ) == 0 ) - exe = std::string( buf ); - delete[] buf; -#elif defined( USE_WINDOWS ) - DWORD size = 0x10000; - char *buf = new char[size]; - memset( buf, 0, size ); - GetModuleFileName( nullptr, buf, size ); - exe = std::string( buf ); - delete[] buf; -#endif - } catch ( ... ) { - } + try { + #ifdef USE_LINUX + char *buf = new char[0x10000]; + int len = ::readlink("/proc/self/exe",buf,0x10000); + if ( len!=-1 ) { + buf[len] = '\0'; + exe = std::string(buf); + } + delete [] buf; + #endif + } catch (...) {} return exe; } -std::string global_exe_name = StackTrace::getExecutable(); -static const global_symbols_struct &getSymbols2() +std::string global_exe_name = get_executable(); +static const global_symbols_struct& getSymbols2( ) { static bool loaded = false; static global_symbols_struct data; @@ -482,1088 +178,212 @@ static const global_symbols_struct &getSymbols2() getSymbols_mutex.lock(); if ( !loaded ) { loaded = true; -#ifdef USE_NM - try { - char cmd[1024]; -#ifdef USE_LINUX - sprintf( cmd, "nm -n --demangle %s", global_exe_name.c_str() ); -#elif defined( USE_MAC ) - sprintf( cmd, "nm -n %s | c++filt", global_exe_name.c_str() ); -#else -#error Unknown OS using nm -#endif - int code; - auto output = breakString( StackTrace::exec( cmd, code ) ); - for ( const auto& line : output ) { - if ( line.empty() ) - continue; - if ( line[0] == ' ' ) - continue; - char *a = const_cast(line.c_str()); - char *b = strchr( a, ' ' ); - if ( b == nullptr ) - continue; - b[0] = 0; - b++; - char *c = strchr( b, ' ' ); - if ( c == nullptr ) - continue; - c[0] = 0; - c++; - char *d = strchr( c, '\n' ); - if ( d ) - d[0] = 0; - size_t add = strtoul( a, nullptr, 16 ); - data.address.push_back( reinterpret_cast( add ) ); - data.type.push_back( b[0] ); - data.obj.push_back( std::string( c ) ); + #ifdef USE_NM + try { + char cmd[1024]; + sprintf(cmd,"nm --demangle --numeric-sort %s",global_exe_name.c_str()); + FILE *in = popen(cmd,"r"); + if ( in==NULL ) { + data.error = -2; + return data; + } + char *buf = new char[0x100000]; + while ( fgets(buf,0xFFFFF,in)!=NULL ) { + if ( buf[0]==' ' || buf==NULL ) + continue; + char *a = buf; + char *b = strchr(a,' '); if (b==NULL) {continue;} b[0] = 0; b++; + char *c = strchr(b,' '); if (c==NULL) {continue;} c[0] = 0; c++; + char *d = strchr(c,'\n'); if ( d ) { d[0]=0; } + size_t add = strtoul(a,NULL,16); + data.address.push_back( reinterpret_cast(add) ); + data.type.push_back( b[0] ); + data.obj.push_back( std::string(c) ); + } + pclose(in); + delete [] buf; + } catch (...) { + data.error = -3; } - } catch ( ... ) { - data.error = -3; - } - data.error = 0; -#else - data.error = -1; -#endif + data.error = 0; + #else + data.error = -1; + #endif } getSymbols_mutex.unlock(); } return data; } -int StackTrace::getSymbols( - std::vector &address, std::vector &type, std::vector &obj ) +int StackTrace::getSymbols( std::vector& address, std::vector& type, + std::vector& obj ) { - const global_symbols_struct &data = getSymbols2(); - address = data.address; - type = data.type; - obj = data.obj; + const global_symbols_struct& data = getSymbols2(); + address = data.address; + type = data.type; + obj = data.obj; return data.error; } /**************************************************************************** -* Function to get call stack info * +* Function to get the current call stack * ****************************************************************************/ -#ifdef USE_MAC -static void *loadAddress( const std::string& object ) +static void getFileAndLine( StackTrace::stack_info& info ) { - static std::map obj_map; - if ( obj_map.empty() ) { - uint32_t numImages = _dyld_image_count(); - for ( uint32_t i = 0; i < numImages; i++ ) { - const struct mach_header *header = _dyld_get_image_header( i ); - const char *name = _dyld_get_image_name( i ); - const char *p = strrchr( name, '/' ); - struct mach_header *address = const_cast( header ); - obj_map.insert( std::pair( p + 1, address ) ); - // printf(" module=%s, address=%p\n", p + 1, header); - } - } - auto it = obj_map.find( object ); - void *address = 0; - if ( it != obj_map.end() ) { - address = it->second; - } else { - it = obj_map.find( stripPath( object ) ); - if ( it != obj_map.end() ) - address = it->second; - } - // printf("%s: 0x%016llx\n",object.c_str(),address); - return address; -} -static std::tuple split_atos( const std::string &buf ) -{ - if ( buf.empty() ) - return std::tuple(); - // Get the function - size_t index = buf.find( " (in " ); - if ( index == std::string::npos ) - return std::make_tuple( - buf.substr( 0, buf.length() - 1 ), std::string(), std::string(), 0 ); - std::string fun = buf.substr( 0, index ); - std::string tmp = buf.substr( index + 5 ); - // Get the object - index = tmp.find( ')' ); - std::string obj = tmp.substr( 0, index ); - tmp = tmp.substr( index + 1 ); - // Get the filename and line number - size_t p1 = tmp.find( '(' ); - size_t p2 = tmp.find( ')' ); - tmp = tmp.substr( p1 + 1, p2 - p1 - 1 ); - index = tmp.find( ':' ); - std::string file; - int line = 0; - if ( index != std::string::npos ) { - file = tmp.substr( 0, index ); - line = std::stoi( tmp.substr( index + 1 ) ); - } else if ( p1 != std::string::npos ) { - file = tmp; - } - return std::make_tuple( fun, obj, file, line ); -} -#endif -#ifdef USE_LINUX - typedef uint64_t uint_p; -#elif defined(USE_MAC) - typedef unsigned long uint_p; -#endif -#if defined( USE_LINUX ) || defined( USE_MAC ) -static inline std::string generateCmd( const std::string& s1, - const std::string& s2, const std::string& s3, - std::vector addresses, const std::string& s4 ) -{ - std::string cmd = s1 + s2 + s3; - for (size_t i=0; i( addresses[i] ) ); - cmd += tmp; - } - cmd += s4; - return cmd; -} -#endif -// clang-format off -static void getFileAndLineObject( std::vector &info ) -{ - if ( info.empty() ) - return; - // This gets the file and line numbers for multiple stack lines in the same object - #if defined( USE_LINUX ) - // Create the call command - std::vector address_list(info.size(),nullptr); - for (size_t i=0; iaddress; - if ( info[i]->object.find( ".so" ) != std::string::npos ) - address_list[i] = info[i]->address2; - } - std::string cmd = generateCmd( "addr2line -C -e ", info[0]->object, - " -f -i ", address_list, " 2> /dev/null" ); - // Get the function/line/file - int code; - auto cmd_output = StackTrace::exec( cmd, code ); - auto output = breakString( cmd_output ); - if ( output.size() != 2*info.size() ) + #if defined(USE_LINUX) || defined(USE_MAC) + void *address = info.address; + if ( info.object.find(".so")!=std::string::npos ) + address = info.address2; + char buf[4096]; + sprintf(buf, "addr2line -C -e %s -f -i %lx 2> /dev/null", + info.object.c_str(),reinterpret_cast(address)); + FILE* f = popen(buf, "r"); + if (f == NULL) return; - // Add the results to info - for (size_t i=0; ifunction.empty() ) - info[i]->function = output[2*i+0]; - // get file and line - const char *buf = output[2*i+1].c_str(); - if ( buf[0] != '?' && buf[0] != 0 ) { - size_t j = 0; - for ( j = 0; j < 4095 && buf[j] != ':'; j++ ) { - } - info[i]->filename = std::string( buf, j ); - info[i]->line = atoi( &buf[j + 1] ); - } + buf[4095] = 0; + // get function name + char *rtn = fgets(buf,4095,f); + if ( info.function.empty() && rtn==buf ) { + info.function = std::string(buf); + info.function.resize(std::max(info.function.size(),1)-1); } - #elif defined( USE_MAC ) - // Create the call command - void* load_address = loadAddress( info[0]->object ); - if ( load_address == nullptr ) - return; - std::vector address_list(info.size(),nullptr); - for (size_t i=0; iaddress; - // Call atos to get the object info - char tmp[64]; - sprintf( tmp, " -l %lx ", (uint_p) load_address ); - std::string cmd = generateCmd( "atos -o ", info[0]->object, - tmp, address_list, " 2> /dev/null" ); - // Get the function/line/file - int code; - auto cmd_output = StackTrace::exec( cmd, code ); - auto output = breakString( cmd_output ); - if ( output.size() != info.size() ) - return; - // Parse the output for function, file and line info - for ( size_t i=0; ifunction.empty() ) - info[i]->function = std::get<0>(data); - if ( info[i]->object.empty() ) - info[i]->object = std::get<1>(data); - if ( info[i]->filename.empty() ) - info[i]->filename = std::get<2>(data); - if ( info[i]->line==0 ) - info[i]->line = std::get<3>(data); + // get file and line + rtn = fgets(buf,4095,f); + if ( buf[0]!='?' && buf[0]!=0 && rtn==buf ) { + size_t i = 0; + for (i=0; i<4095 && buf[i]!=':'; i++) { } + info.filename = std::string(buf,i); + info.line = atoi(&buf[i+1]); } + pclose(f); #endif } -static void getFileAndLine( std::vector &info ) -{ - // Build a list of stack elements for each object - std::map> obj_map; - for (size_t i=0; i 0 ) - info.object = global_symbols.obj[index - 1]; + info.object = global_symbols.obj[index-1]; else info.object = global_exe_name; } } -static void signal_handler( int sig ) +StackTrace::stack_info StackTrace::getStackInfo( void* address ) { - printf("Signal caught acquiring stack (%i)\n",sig); - StackTrace::setErrorHandlers( [](std::string,StackTrace::terminateType) { exit( -1 ); } ); -} -StackTrace::stack_info StackTrace::getStackInfo( void *address ) -{ - return getStackInfo( std::vector(1,address) )[0]; -} -std::vector StackTrace::getStackInfo( const std::vector& address ) -{ - // Temporarily handle signals to prevent recursion on the stack - auto prev_handler = signal( SIGINT, signal_handler ); - // Get the detailed stack info - std::vector info(address.size()); - try { - #ifdef USE_WINDOWS - IMAGEHLP_SYMBOL64 pSym[1024]; - memset( pSym, 0, sizeof( pSym ) ); - pSym->SizeOfStruct = sizeof( IMAGEHLP_SYMBOL64 ); - pSym->MaxNameLength = 1024; - - IMAGEHLP_MODULE64 Module; - memset( &Module, 0, sizeof( Module ) ); - Module.SizeOfStruct = sizeof( Module ); - - HANDLE pid = GetCurrentProcess(); - - for (size_t i=0; i( address[i] ); - DWORD64 offsetFromSymbol; - if ( SymGetSymFromAddr( pid, address2, &offsetFromSymbol, pSym ) != FALSE ) { - char name[8192]={0}; - DWORD rtn = UnDecorateSymbolName( pSym->Name, name, sizeof(name)-1, UNDNAME_COMPLETE ); - if ( rtn == 0 ) - info[i].function = std::string(pSym->Name); - else - info[i].function = std::string(name); - } else { - printf( "ERROR: SymGetSymFromAddr (%d,%p)\n", GetLastError(), address2 ); - } - - // Get line number - IMAGEHLP_LINE64 Line; - memset( &Line, 0, sizeof( Line ) ); - Line.SizeOfStruct = sizeof( Line ); - DWORD offsetFromLine; - if ( SymGetLineFromAddr64( pid, address2, &offsetFromLine, &Line ) != FALSE ) { - info[i].line = Line.LineNumber; - info[i].filename = std::string( Line.FileName ); - } else { - info[i].line = 0; - info[i].filename = std::string(); - } - - // Get the object - if ( SymGetModuleInfo64( pid, address2, &Module ) != FALSE ) { - //info[i].object = std::string( Module.ModuleName ); - info[i].object = std::string( Module.LoadedImageName ); - //info[i].baseOfImage = Module.BaseOfImage; - } + StackTrace::stack_info info; + info.address = address; + #ifdef _GNU_SOURCE + Dl_info dlinfo; + if ( !dladdr(address, &dlinfo) ) { + getDataFromGlobalSymbols( info ); + getFileAndLine(info); + return info; + } + info.address2 = subtractAddress(info.address,dlinfo.dli_fbase); + info.object = std::string(dlinfo.dli_fname); + #if defined(USE_ABI) + int status; + char *demangled = abi::__cxa_demangle(dlinfo.dli_sname,NULL,0,&status); + if ( status == 0 && demangled!=NULL ) { + info.function = std::string(demangled); + } else if ( dlinfo.dli_sname!=NULL ) { + info.function = std::string(dlinfo.dli_sname); } + free(demangled); #else - for (size_t i=0; i thread_backtrace; -static bool thread_backtrace_finished; -static std::mutex thread_backtrace_mutex; -static void _callstack_signal_handler( int, siginfo_t*, void* ) +std::vector StackTrace::getCallStack() { - thread_backtrace = StackTrace::backtrace( ); - thread_backtrace_finished = true; -} -#endif -std::vector StackTrace::backtrace( std::thread::native_handle_type tid ) -{ - std::vector trace; - #if defined( USE_LINUX ) || defined( USE_MAC ) + std::vector stack_list; + #if defined(USE_LINUX) || defined(USE_MAC) // Get the trace - if ( tid == pthread_self() ) { - trace.resize(1000,nullptr); - int trace_size = ::backtrace( trace.data(), trace.size() ); - trace.resize (trace_size ); - } else { - // Note: this will get the backtrace, but terminates the thread in the process!!! - thread_backtrace_mutex.lock(); - struct sigaction sa; - sigfillset(&sa.sa_mask); - sa.sa_flags = SA_SIGINFO; - sa.sa_sigaction = _callstack_signal_handler; - sigaction(CALLSTACK_SIG, &sa, NULL); - thread_backtrace_finished = false; - pthread_kill( tid, CALLSTACK_SIG ); - auto t1 = std::chrono::high_resolution_clock::now(); - auto t2 = std::chrono::high_resolution_clock::now(); - while ( !thread_backtrace_finished && std::chrono::duration(t2-t1).count()<0.1 ) { - std::this_thread::yield(); - t2 = std::chrono::high_resolution_clock::now(); - } - std::swap( trace, thread_backtrace ); - thread_backtrace_finished = false; - thread_backtrace_mutex.unlock(); - } - #elif defined( USE_WINDOWS ) - #if defined(DBGHELP) - - // Load the modules for the stack trace - LoadModules(); - - // Initialize stackframe for first call - ::CONTEXT context; - memset( &context, 0, sizeof( context ) ); - context.ContextFlags = CONTEXT_FULL; - RtlCaptureContext( &context ); - STACKFRAME64 frame; // in/out stackframe - memset( &frame, 0, sizeof( frame ) ); + void *trace[100]; + memset(trace,0,100*sizeof(void*)); + int trace_size = backtrace(trace,100); + stack_list.reserve(trace_size); + for (int i=0; i( lInfoMemory.AllocationBase ); + ::TCHAR lNameModule[ 1024 ]; + ::HMODULE hBaseAllocation = reinterpret_cast< ::HMODULE >( lBaseAllocation ); + ::GetModuleFileName( hBaseAllocation, lNameModule, 1024 ); + PIMAGE_DOS_HEADER lHeaderDOS = reinterpret_cast( lBaseAllocation ); + if ( lHeaderDOS==NULL ) + continue; + PIMAGE_NT_HEADERS lHeaderNT = reinterpret_cast( lBaseAllocation + lHeaderDOS->e_lfanew ); + PIMAGE_SECTION_HEADER lHeaderSection = IMAGE_FIRST_SECTION( lHeaderNT ); + ::DWORD64 lRVA = lFrameStack.AddrPC.Offset - lBaseAllocation; + ::DWORD64 lNumberSection = ::DWORD64(); + ::DWORD64 lOffsetSection = ::DWORD64(); + for( int lCnt = ::DWORD64(); lCnt < lHeaderNT->FileHeader.NumberOfSections; lCnt++, lHeaderSection++ ) { + ::DWORD64 lSectionBase = lHeaderSection->VirtualAddress; + ::DWORD64 lSectionEnd = lSectionBase + std::max<::DWORD64>( + lHeaderSection->SizeOfRawData, lHeaderSection->Misc.VirtualSize ); + if( ( lRVA >= lSectionBase ) && ( lRVA <= lSectionEnd ) ) { + lNumberSection = lCnt + 1; + lOffsetSection = lRVA - lSectionBase; + //break; + } } - - if ( frame.AddrPC.Offset != 0 ) - trace.push_back( reinterpret_cast( frame.AddrPC.Offset ) ); - - if ( frame.AddrReturn.Offset == 0 ) - break; + StackTrace::stack_info info; + info.object = lNameModule; + info.address = reinterpret_cast(lRVA); + char tmp[20]; + sprintf(tmp,"0x%016llx",static_cast(lOffsetSection)); + info.function = std::to_string(lNumberSection) + ":" + std::string(tmp); + stack_list.push_back(info); } - SetLastError( ERROR_SUCCESS ); #endif #else #warning Stack trace is not supported on this compiler/OS #endif - return trace; -} -std::vector StackTrace::backtrace() -{ - std::vector trace = backtrace( thisThread() ); - return trace; -} -std::vector> StackTrace::backtraceAll() -{ - // Get the list of threads - auto threads = activeThreads( ); - // Get the backtrace of each thread - std::vector> thread_backtrace; - for ( auto thread : threads ) - thread_backtrace.push_back( backtrace( thread ) ); - return thread_backtrace; -} - - -/**************************************************************************** -* Function to get the list of all active threads * -****************************************************************************/ -#if defined( USE_LINUX ) -static std::thread::native_handle_type thread_handle; -static void _activeThreads_signal_handler( int ) -{ - auto handle = StackTrace::thisThread( ); - thread_handle = handle; - thread_backtrace_finished = true; -} -static inline int get_tid( int pid, const std::string& line ) -{ - char buf2[128]; - int i1 = 0; - while ( line[i1]==' ' && line[i1]!=0 ) { i1++; } - int i2 = i1; - while ( line[i2]!=' ' && line[i2]!=0 ) { i2++; } - memcpy(buf2,&line[i1],i2-i1); - buf2[i2-i1+1] = 0; - int pid2 = atoi(buf2); - if ( pid2 != pid ) - return -1; - i1 = i2; - while ( line[i1]==' ' && line[i1]!=0 ) { i1++; } - i2 = i1; - while ( line[i2]!=' ' && line[i2]!=0 ) { i2++; } - memcpy(buf2,&line[i1],i2-i1); - buf2[i2-i1+1] = 0; - int tid = atoi(buf2); - return tid; -} -#endif -std::thread::native_handle_type StackTrace::thisThread( ) -{ - #if defined( USE_LINUX ) || defined( USE_MAC ) - return pthread_self(); - #elif defined( USE_WINDOWS ) - return GetCurrentThread(); - #else - #warning Stack trace is not supported on this compiler/OS - return std::thread::native_handle_type(); - #endif -} -std::set StackTrace::activeThreads( ) -{ - std::set threads; - #if defined( USE_LINUX ) - std::set tid; - int pid = getpid(); - char cmd[128]; - sprintf( cmd, "ps -T -p %i", pid ); - signal( SIGCHLD, SIG_DFL ); // Clear child exited - int code; - auto output = breakString( exec( cmd, code ) ); - for ( const auto& line : output ) { - int tid2 = get_tid( pid, line ); - if ( tid2 != -1 ) - tid.insert( tid2 ); - } - tid.erase( syscall(SYS_gettid) ); - signal( CALLSTACK_SIG, _activeThreads_signal_handler ); - for ( auto tid2 : tid ) { - thread_backtrace_mutex.lock(); - thread_backtrace_finished = false; - thread_handle = thisThread(); - syscall( SYS_tgkill, pid, tid2, CALLSTACK_SIG ); - auto t1 = std::chrono::high_resolution_clock::now(); - auto t2 = std::chrono::high_resolution_clock::now(); - while ( !thread_backtrace_finished && std::chrono::duration(t2-t1).count()<0.1 ) { - std::this_thread::yield(); - t2 = std::chrono::high_resolution_clock::now(); - } - threads.insert( thread_handle ); - thread_backtrace_mutex.unlock(); - } - #elif defined( USE_MAC ) - printf("activeThreads not finished\n"); - #elif defined( USE_WINDOWS ) - HANDLE hThreadSnap = CreateToolhelp32Snapshot( TH32CS_SNAPTHREAD, 0 ); - if( hThreadSnap != INVALID_HANDLE_VALUE ) { - // Fill in the size of the structure before using it - THREADENTRY32 te32 - te32.dwSize = sizeof(THREADENTRY32 ); - // Retrieve information about the first thread, and exit if unsuccessful - if( !Thread32First( hThreadSnap, &te32 ) ) { - printError( TEXT("Thread32First") ); // Show cause of failure - CloseHandle( hThreadSnap ); // Must clean up the snapshot object! - return( FALSE ); - } - // Now walk the thread list of the system - do { - if ( te32.th32OwnerProcessID == dwOwnerPID ) - threads.insert( te32.th32ThreadID ); - } while( Thread32Next(hThreadSnap, &te32 ) ); - CloseHandle( hThreadSnap ); // Must clean up the snapshot object! - } - #else - #warning activeThreads is not yet supported on this compiler/OS - #endif - threads.insert( thisThread() ); - return threads; -} -// clang-format on - - -/**************************************************************************** -* Function to get the current call stack * -****************************************************************************/ -std::vector StackTrace::getCallStack() -{ - auto trace = StackTrace::backtrace(); - auto info = getStackInfo(trace); - return info; -} -std::vector StackTrace::getCallStack( std::thread::native_handle_type id ) -{ - auto trace = StackTrace::backtrace( id ); - auto info = getStackInfo(trace); - return info; -} -static StackTrace::multi_stack_info generateMultiStack( const std::vector>& thread_backtrace ) -{ - // Get the stack data for all pointers - std::set addresses_set; - for (const auto& trace : thread_backtrace ) { - for (auto ptr : trace ) - addresses_set.insert( ptr ); - } - std::vector addresses( addresses_set.begin(), addresses_set.end() ); - auto stack_data = StackTrace::getStackInfo( addresses ); - std::map map_data; - for ( size_t i=0; i stack( trace.size() ); - for (size_t i=0; i 0 ) { - for ( char *p = ( temp + strlen( temp ) - 1 ); p >= temp; --p ) { - // locate the rightmost path separator - if ( ( *p == '\\' ) || ( *p == '/' ) || ( *p == ':' ) ) { - *p = 0; - break; - } - } - if ( strlen( temp ) > 0 ) { - paths += temp; - paths += ";"; - } - } - memset( temp, 0, sizeof( temp ) ); - if ( GetEnvironmentVariableA( "_NT_SYMBOL_PATH", temp, sizeof( temp ) - 1 ) > 0 ) { - paths += temp; - paths += ";"; - } - memset( temp, 0, sizeof( temp ) ); - if ( GetEnvironmentVariableA( "_NT_ALTERNATE_SYMBOL_PATH", temp, sizeof( temp ) - 1 ) > 0 ) { - paths += temp; - paths += ";"; - } - memset( temp, 0, sizeof( temp ) ); - if ( GetEnvironmentVariableA( "SYSTEMROOT", temp, sizeof( temp ) - 1 ) > 0 ) { - paths += temp; - paths += ";"; - // also add the "system32"-directory: - paths += temp; - paths += "\\system32;"; - } - memset( temp, 0, sizeof( temp ) ); - if ( GetEnvironmentVariableA( "SYSTEMDRIVE", temp, sizeof( temp ) - 1 ) > 0 ) { - paths += "SRV*;" + std::string( temp ) + - "\\websymbols*http://msdl.microsoft.com/download/symbols;"; - } else { - paths += "SRV*c:\\websymbols*http://msdl.microsoft.com/download/symbols;"; - } -#endif - return paths; -} - - -/**************************************************************************** -* Load modules for windows * -****************************************************************************/ -#ifdef USE_WINDOWS -BOOL StackTrace::GetModuleListTH32( HANDLE hProcess, DWORD pid ) -{ - // CreateToolhelp32Snapshot() - typedef HANDLE( __stdcall * tCT32S )( DWORD dwFlags, DWORD th32ProcessID ); - // Module32First() - typedef BOOL( __stdcall * tM32F )( HANDLE hSnapshot, LPMODULEENTRY32 lpme ); - // Module32Next() - typedef BOOL( __stdcall * tM32N )( HANDLE hSnapshot, LPMODULEENTRY32 lpme ); - - // try both dlls... - const TCHAR *dllname[] = { _T("kernel32.dll"), _T("tlhelp32.dll") }; - HINSTANCE hToolhelp = nullptr; - tCT32S pCT32S = nullptr; - tM32F pM32F = nullptr; - tM32N pM32N = nullptr; - - HANDLE hSnap; - MODULEENTRY32 me; - me.dwSize = sizeof( me ); - - for ( size_t i = 0; i < ( sizeof( dllname ) / sizeof( dllname[0] ) ); i++ ) { - hToolhelp = LoadLibrary( dllname[i] ); - if ( hToolhelp == nullptr ) - continue; - pCT32S = (tCT32S) GetProcAddress( hToolhelp, "CreateToolhelp32Snapshot" ); - pM32F = (tM32F) GetProcAddress( hToolhelp, "Module32First" ); - pM32N = (tM32N) GetProcAddress( hToolhelp, "Module32Next" ); - if ( ( pCT32S != nullptr ) && ( pM32F != nullptr ) && ( pM32N != nullptr ) ) - break; // found the functions! - FreeLibrary( hToolhelp ); - hToolhelp = nullptr; - } - - if ( hToolhelp == nullptr ) - return FALSE; - - hSnap = pCT32S( TH32CS_SNAPMODULE, pid ); - if ( hSnap == (HANDLE) -1 ) { - FreeLibrary( hToolhelp ); - return FALSE; - } - - bool keepGoing = !!pM32F( hSnap, &me ); - int cnt = 0; - while ( keepGoing ) { - LoadModule( hProcess, me.szExePath, me.szModule, (DWORD64) me.modBaseAddr, me.modBaseSize ); - cnt++; - keepGoing = !!pM32N( hSnap, &me ); - } - CloseHandle( hSnap ); - FreeLibrary( hToolhelp ); - if ( cnt <= 0 ) - return FALSE; - return TRUE; -} -DWORD StackTrace::LoadModule( - HANDLE hProcess, LPCSTR img, LPCSTR mod, DWORD64 baseAddr, DWORD size ) -{ - CHAR *szImg = _strdup( img ); - CHAR *szMod = _strdup( mod ); - DWORD result = ERROR_SUCCESS; - if ( ( szImg == nullptr ) || ( szMod == nullptr ) ) { - result = ERROR_NOT_ENOUGH_MEMORY; - } else { - if ( SymLoadModule( hProcess, 0, szImg, szMod, baseAddr, size ) == 0 ) - result = GetLastError(); - } - ULONGLONG fileVersion = 0; - if ( szImg != nullptr ) { - // try to retrive the file-version: - VS_FIXEDFILEINFO *fInfo = nullptr; - DWORD dwHandle; - DWORD dwSize = GetFileVersionInfoSizeA( szImg, &dwHandle ); - if ( dwSize > 0 ) { - LPVOID vData = malloc( dwSize ); - if ( vData != nullptr ) { - if ( GetFileVersionInfoA( szImg, dwHandle, dwSize, vData ) != 0 ) { - UINT len; - TCHAR szSubBlock[] = _T("\\"); - if ( VerQueryValue( vData, szSubBlock, (LPVOID *) &fInfo, &len ) == 0 ) { - fInfo = nullptr; - } else { - fileVersion = ( (ULONGLONG) fInfo->dwFileVersionLS ) + - ( (ULONGLONG) fInfo->dwFileVersionMS << 32 ); - } - } - free( vData ); - } - } - - // Retrive some additional-infos about the module - IMAGEHLP_MODULE64 Module; - Module.SizeOfStruct = sizeof( IMAGEHLP_MODULE64 ); - SymGetModuleInfo64( hProcess, baseAddr, &Module ); - LPCSTR pdbName = Module.LoadedImageName; - if ( Module.LoadedPdbName[0] != 0 ) - pdbName = Module.LoadedPdbName; - } - if ( szImg != nullptr ) - free( szImg ); - if ( szMod != nullptr ) - free( szMod ); - return result; -} -BOOL StackTrace::GetModuleListPSAPI( HANDLE hProcess ) -{ - DWORD cbNeeded; - HMODULE hMods[1024]; - char tt[8192]; - char tt2[8192]; - if ( !EnumProcessModules( hProcess, hMods, sizeof( hMods ), &cbNeeded ) ) { - return false; - } - if ( cbNeeded > sizeof( hMods ) ) { - printf( "Insufficient memory allocated in GetModuleListPSAPI\n" ); - return false; - } - int cnt = 0; - for ( DWORD i = 0; i < cbNeeded / sizeof( hMods[0] ); i++ ) { - // base address, size - MODULEINFO mi; - GetModuleInformation( hProcess, hMods[i], &mi, sizeof( mi ) ); - // image file name - tt[0] = 0; - GetModuleFileNameExA( hProcess, hMods[i], tt, sizeof( tt ) ); - // module name - tt2[0] = 0; - GetModuleBaseNameA( hProcess, hMods[i], tt2, sizeof( tt2 ) ); - DWORD dwRes = LoadModule( hProcess, tt, tt2, (DWORD64) mi.lpBaseOfDll, mi.SizeOfImage ); - if ( dwRes != ERROR_SUCCESS ) - printf( "ERROR: LoadModule (%d)\n", dwRes ); - cnt++; - } - - return cnt != 0; -} -void StackTrace::LoadModules() -{ - static bool modules_loaded = false; - if ( !modules_loaded ) { - modules_loaded = true; - - // Get the search paths for symbols - std::string paths = StackTrace::getSymPaths(); - - // Initialize the symbols - if ( SymInitialize( GetCurrentProcess(), paths.c_str(), FALSE ) == FALSE ) - printf( "ERROR: SymInitialize (%d)\n", GetLastError() ); - - DWORD symOptions = SymGetOptions(); - symOptions |= SYMOPT_LOAD_LINES | SYMOPT_FAIL_CRITICAL_ERRORS; - symOptions = SymSetOptions( symOptions ); - char buf[1024] = { 0 }; - if ( SymGetSearchPath( GetCurrentProcess(), buf, sizeof( buf ) ) == FALSE ) - printf( "ERROR: SymGetSearchPath (%d)\n", GetLastError() ); - - // First try to load modules from toolhelp32 - BOOL loaded = StackTrace::GetModuleListTH32( GetCurrentProcess(), GetCurrentProcessId() ); - - // Try to load from Psapi - if ( !loaded ) - loaded = StackTrace::GetModuleListPSAPI( GetCurrentProcess() ); - } -} -#endif - - -/**************************************************************************** -* Get the signal name * -****************************************************************************/ -std::string StackTrace::signalName( int sig ) -{ - return std::string( strsignal(sig) ); -} -std::vector StackTrace::allSignalsToCatch() -{ - std::set signals; - for (int i=1; i<32; i++) - signals.insert( i ); - for (int i=SIGRTMIN; i<=SIGRTMAX; i++) - signals.insert( i ); - signals.erase( SIGKILL ); - signals.erase( SIGSTOP ); - return std::vector( signals.begin(), signals.end() ); -} -std::vector StackTrace::defaultSignalsToCatch() -{ - auto tmp = allSignalsToCatch(); - std::set signals( tmp.begin(), tmp.end() ); - signals.erase( SIGWINCH ); // Don't catch window changed by default - signals.erase( SIGCONT ); // Don't catch continue by default - return std::vector( signals.begin(), signals.end() ); -} - - -/**************************************************************************** -* Set the signal handlers * -****************************************************************************/ -static std::function abort_fun; -static std::string rethrow() -{ - std::string last_message; -#ifdef USE_LINUX - try { - static int tried_throw = 0; - if ( tried_throw == 0 ) { - tried_throw = 1; - throw; - } - // No active exception - } catch ( const std::exception &err ) { - // Caught a std::runtime_error - last_message = err.what(); - } catch ( ... ) { - // Caught an unknown exception - last_message = "unknown exception occurred."; - } -#endif - return last_message; -} -static void term_func_abort( int sig ) -{ - std::string msg( "Caught signal: " ); - msg += StackTrace::signalName( sig ); - abort_fun( msg, StackTrace::terminateType::signal ); -} -static std::set signals_set = std::set(); -static void term_func() -{ - std::string last_message = rethrow(); - StackTrace::clearSignals(); - abort_fun( "Unhandled exception:\n" + last_message, StackTrace::terminateType::exception ); -} -void StackTrace::clearSignal( int sig ) -{ - if ( signals_set.find(sig) != signals_set.end() ) { - signal( sig, SIG_DFL ); - signals_set.erase( sig ); - } -} -void StackTrace::clearSignals() -{ - for ( auto sig : signals_set ) - signal( sig, SIG_DFL ); - signals_set.clear(); -} -void StackTrace::setSignals( const std::vector& signals, void (*handler) (int) ) -{ - for ( auto sig : signals ) { - signal( sig, handler ); - signals_set.insert( sig ); - } -} -void StackTrace::setErrorHandlers( - std::function abort ) -{ - abort_fun = abort; - std::set_terminate( term_func ); - setSignals( defaultSignalsToCatch(), &term_func_abort ); - std::set_unexpected( term_func ); -} - - -/**************************************************************************** -* Global call stack functionallity * -****************************************************************************/ -#ifdef USE_MPI -static MPI_Comm globalCommForGlobalCommStack = MPI_COMM_NULL; -static std::shared_ptr globalMonitorThread; -static bool stopGlobalMonitorThread = false; -static void runGlobalMonitorThread() -{ - int rank = 0; - int size = 1; - MPI_Comm_size( globalCommForGlobalCommStack, &size ); - MPI_Comm_rank( globalCommForGlobalCommStack, &rank ); - while ( !stopGlobalMonitorThread ) { - // Check for any messages - int flag = 0; - MPI_Status status; - int err = MPI_Iprobe( MPI_ANY_SOURCE, 1, globalCommForGlobalCommStack, &flag, &status ); - if ( err != MPI_SUCCESS ) { - printf("Internal error in StackTrace::getGlobalCallStacks::runGlobalMonitorThread\n"); - break; - } else if ( flag != 0 ) { - // We received a request - int src_rank = status.MPI_SOURCE; - int tag; - MPI_Recv( &tag, 1, MPI_INT, src_rank, 1, globalCommForGlobalCommStack, &status ); - // Get a trace of all threads (except this) - auto threads = StackTrace::activeThreads( ); - threads.erase( StackTrace::thisThread( ) ); - if ( threads.empty() ) - continue; - // Get the stack trace of each thread - std::vector> stack; - for ( auto thread : threads ) - stack.push_back( StackTrace::getCallStack( thread ) ); - // Pack and send the data - auto data = pack( stack ); - int count = data.size(); - MPI_Send( data.data(), count, MPI_CHAR, src_rank, tag, globalCommForGlobalCommStack ); - } else { - // No requests recieved - std::this_thread::sleep_for( std::chrono::milliseconds(50) ); - } - } -} -void StackTrace::globalCallStackInitialize( MPI_Comm comm ) -{ - #ifdef USE_MPI - MPI_Comm_dup( comm, &globalCommForGlobalCommStack ); - #endif - stopGlobalMonitorThread = false; - globalMonitorThread.reset( new std::thread( runGlobalMonitorThread ) ); -} -void StackTrace::globalCallStackFinalize( ) -{ - stopGlobalMonitorThread = true; - globalMonitorThread->join(); - globalMonitorThread.reset(); - #ifdef USE_MPI - if ( globalCommForGlobalCommStack ) - MPI_Comm_free( &globalCommForGlobalCommStack ); - #endif -} -StackTrace::multi_stack_info StackTrace::getGlobalCallStacks( ) -{ - // Check if we properly initialized the comm - if ( globalMonitorThread == nullptr ) { - printf("Warning: getGlobalCallStacks called without call to globalCallStackInitialize\n"); - return getAllCallStacks( ); - } - if ( activeThreads().size()==1 ) { - printf("Warning: getAllCallStacks not supported on this OS, defaulting to basic call stack\n"); - return getAllCallStacks( ); - } - // Signal all processes that we want their stack for all threads - int rank = 0; - int size = 1; - MPI_Comm_size( globalCommForGlobalCommStack, &size ); - MPI_Comm_rank( globalCommForGlobalCommStack, &rank ); - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<> dis(2,0x7FFF); - int tag = dis(gen); - std::vector sendRequest( size ); - for (int i=0; inative_handle() ); - StackTrace::multi_stack_info multistack; - for ( auto thread : threads ) { - auto stack = StackTrace::getCallStack( thread ); - multistack.add( stack.size(), stack.data() ); - } - // Recieve the backtrace for all processes/threads - int N_finished = 1; - auto start = std::chrono::steady_clock::now(); - double time = 0; - const double max_time = 2.0 + size*20e-3; - while ( N_finished data( count, 0 ); - MPI_Recv( data.data(), count, MPI_CHAR, src_rank, tag, globalCommForGlobalCommStack, &status ); - auto stack_list = unpack( data ); - for ( const auto& stack : stack_list ) - multistack.add( stack.size(), stack.data() ); - N_finished++; - } else { - auto stop = std::chrono::steady_clock::now(); - time = std::chrono::duration_cast(stop-start).count(); - std::this_thread::yield(); - } - } - return multistack; -} -#else -void StackTrace::globalCallStackInitialize( MPI_Comm ) -{ -} -void StackTrace::globalCallStackFinalize( ) -{ -} -StackTrace::multi_stack_info StackTrace::getGlobalCallStacks( ) -{ - return getAllCallStacks( ); -} -#endif - diff --git a/common/StackTrace.h b/common/StackTrace.h index f3ca5698..1a5d1dac 100644 --- a/common/StackTrace.h +++ b/common/StackTrace.h @@ -1,31 +1,12 @@ -#ifndef included_AtomicStackTrace -#define included_AtomicStackTrace +#ifndef included_StackTrace +#define included_StackTrace -#include -#include #include #include +#include #include -#include -#include -#include -// Check for and include MPI -// clang-format off -#if defined(USE_MPI) || defined(USE_EXT_MPI) - #include "mpi.h" -#elif defined(__has_include) - #if __has_include("mpi.h") - #include "mpi.h" - #else - typedef int MPI_Comm; - #endif -#else - typedef int MPI_Comm; -#endif -// clang-format on - namespace StackTrace { @@ -38,179 +19,29 @@ struct stack_info { std::string filename; int line; //! Default constructor - stack_info() : address( nullptr ), address2( nullptr ), line( 0 ) {} - //! Operator== - bool operator==( const stack_info& rhs ) const; - //! Operator!= - bool operator!=( const stack_info& rhs ) const; + stack_info(): address(NULL), address2(NULL), line(0) {} //! Print the stack info std::string print() const; - //! Compute the number of bytes needed to store the object - size_t size() const; - //! Pack the data to a byte array, returning a pointer to the end of the data - char* pack( char* ptr ) const; - //! Unpack the data from a byte array, returning a pointer to the end of the data - const char* unpack( const char* ptr ); - //! Pack a vector of data to a memory block - static std::vector packArray( const std::vector& data ); - //! Unpack a vector of data from a memory block - static std::vector unpackArray( const char* data ); }; -struct multi_stack_info { - int N; - stack_info stack; - std::vector children; - //! Default constructor - multi_stack_info() : N( 0 ) {} - //! Add the given stack to the multistack - void add( size_t N, const stack_info *stack ); - //! Print the stack info - std::vector print( const std::string& prefix=std::string() ) const; -}; - - -/*! - * @brief Get the current call stack - * @details This function returns the current call stack for the current thread - * @return Returns vector containing the stack - */ +//! Function to return the current call stack std::vector getCallStack(); -/*! - * @brief Get the current call stack for a thread - * @details This function returns the current call stack for the given thread - * @param[in] id The thread id of the stack we want to return - * @return Returns vector containing the stack - */ -std::vector getCallStack( std::thread::native_handle_type id ); - - -/*! - * @brief Get the current call stack for all threads - * @details This function returns the current call stack for all threads - * in the current process. - * Note: This functionality may not be availible on all platforms - * @return Returns vector containing the stack - */ -multi_stack_info getAllCallStacks( ); - - -/*! - * @brief Get the current call stack for all threads/processes - * @details This function returns the current call stack for all threads - * for all processes in the current process. This function requires - * the user to call globalCallStackInitialize() before calling this - * routine, and globalCallStackFinalize() before exiting. - * Note: This functionality may not be availible on all platforms - * @return Returns vector containing the stack - */ -multi_stack_info getGlobalCallStacks( ); - - -//! Function to return the current call stack for the current thread -std::vector backtrace(); - -//! Function to return the current call stack for the given thread -std::vector backtrace( std::thread::native_handle_type id ); - -//! Function to return the current call stack for all threads -std::vector> backtraceAll(); - - //! Function to return the stack info for a given address -stack_info getStackInfo( void *address ); - - -//! Function to return the stack info for a given address -std::vector getStackInfo( const std::vector &address ); - - -//! Function to return the signal name -std::string signalName( int signal ); +stack_info getStackInfo( void* address ); /*! * Return the symbols from the current executable (not availible for all platforms) * @return Returns 0 if sucessful */ -int getSymbols( - std::vector &address, std::vector &type, std::vector &obj ); - - -/*! - * Return the name of the executable - * @return Returns the name of the executable (usually the full path) - */ -std::string getExecutable(); - - -/*! - * Return the search path for the symbols - * @return Returns the search path for the symbols - */ -std::string getSymPaths(); - - -//!< Terminate type -enum class terminateType { signal, exception }; - -/*! - * Set the error handlers - * @param[in] Function to terminate the program: abort(msg,type) - */ -void setErrorHandlers( std::function abort ); - - -/*! - * Set the given signals to the handler - * @param[in] Function to terminate the program: abort(msg,type) - */ -void setSignals( const std::vector& signals, void (*handler) (int) ); - - -//! Clear a signal set by setSignals -void clearSignal( int signal ); - - -//! Clear all signals set by setSignals -void clearSignals( ); - - -//! Return a list of all signals that can be caught -std::vector allSignalsToCatch( ); - -//! Return a default list of signals to catch -std::vector defaultSignalsToCatch( ); - - -//! Get a list of the active threads -std::set activeThreads( ); - -//! Get a handle to this thread -std::thread::native_handle_type thisThread( ); - - -//! Initialize globalCallStack functionallity -void globalCallStackInitialize( MPI_Comm comm ); - -//! Clean up globalCallStack functionallity -void globalCallStackFinalize( ); - - -/*! - * @brief Call system command - * @details This function calls a system command, waits for the program - * to execute, captures and returns the output and exit code. - * @param[in] cmd Command to execute - * @param[out] exit_code Exit code returned from child process - * @return Returns string containing the output - */ -std::string exec( const std::string& cmd, int& exit_code ); +int getSymbols( std::vector& address, std::vector& type, std::vector& obj ); } // namespace StackTrace + #endif + diff --git a/common/Utilities.cpp b/common/Utilities.cpp index f227b8e0..55369141 100644 --- a/common/Utilities.cpp +++ b/common/Utilities.cpp @@ -236,9 +236,9 @@ size_t Utilities::getMemoryUsage() size_t N_bytes = 0; #if defined(USE_LINUX) struct mallinfo meminfo = mallinfo(); - size_t size_hblkhd = static_cast( meminfo.hblkhd ); - size_t size_uordblks = static_cast( meminfo.uordblks ); - N_bytes = size_hblkhd + size_uordblks; + size_t size_hblkhd = static_cast( meminfo.hblkhd ); + size_t size_uordblks = static_cast( meminfo.uordblks ); + N_bytes = static_cast( size_hblkhd + size_uordblks ); #elif defined(USE_MAC) struct task_basic_info t_info; mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT; @@ -347,11 +347,3 @@ std::vector Utilities::factor(size_t number) std::sort( factors.begin(), factors.end() ); return factors; } - - -// Dummy function to prevent compiler from optimizing away variable -void Utilities::nullUse( void* data ) -{ - NULL_USE(data); -} - diff --git a/common/Utilities.h b/common/Utilities.h index e1f1713d..5c1424e7 100644 --- a/common/Utilities.h +++ b/common/Utilities.h @@ -61,9 +61,6 @@ namespace Utilities //! Factor a number into it's prime factors std::vector factor(size_t number); - //! Print AMP Banner - void nullUse( void* ); - } // namespace Utilities