From 0a49f9ce77d7fff83e40cdb456fc30891b68ae45 Mon Sep 17 00:00:00 2001 From: Mark Berrill Date: Mon, 18 Mar 2019 09:42:44 -0400 Subject: [PATCH] Updating threadpool / StackTrace --- CMakeLists.txt | 6 +- IO/Mesh.cpp | 2 +- IO/Mesh.h | 3 +- IO/MeshDatabase.h | 2 +- IO/Reader.h | 2 +- IO/Writer.cpp | 2 +- StackTrace/ErrorHandlers.h | 42 + StackTrace/Readme.txt | 4 + StackTrace/StackTrace.cpp | 2517 ++++++++++++++++++++++++++ {common => StackTrace}/StackTrace.h | 157 +- StackTrace/Utilities.cpp | 296 +++ StackTrace/Utilities.h | 99 + StackTrace/string_view.h | 193 ++ analysis/Minkowski.cpp | 6 +- analysis/Minkowski.h | 2 +- analysis/TwoPhase.cpp | 7 +- analysis/TwoPhase.h | 7 +- cmake/SharedPtr.cmake | 170 -- common/Array.hpp | 5 +- common/MPI_Helpers.cpp | 10 +- common/StackTrace.cpp | 1876 ------------------- common/Utilities.cpp | 295 --- common/Utilities.h | 85 +- tests/CMakeLists.txt | 1 - tests/TestWriter.cpp | 2 +- tests/testUtilities.cpp | 145 -- threadpool/Readme.txt | 2 + threadpool/atomic_helpers.cpp | 38 + threadpool/atomic_helpers.h | 8 +- threadpool/atomic_list.h | 54 +- threadpool/atomic_list.hpp | 58 +- threadpool/test/CMakeLists.txt | 16 - threadpool/test/test_atomic.cpp | 154 -- threadpool/test/test_atomic_list.cpp | 221 --- threadpool/test/test_thread_pool.cpp | 967 ---------- threadpool/thread_pool.cpp | 470 +++-- threadpool/thread_pool.h | 226 ++- threadpool/thread_pool.hpp | 185 +- 38 files changed, 3849 insertions(+), 4486 deletions(-) create mode 100644 StackTrace/ErrorHandlers.h create mode 100644 StackTrace/Readme.txt create mode 100644 StackTrace/StackTrace.cpp rename {common => StackTrace}/StackTrace.h (59%) create mode 100644 StackTrace/Utilities.cpp create mode 100644 StackTrace/Utilities.h create mode 100644 StackTrace/string_view.h delete mode 100644 cmake/SharedPtr.cmake delete mode 100644 common/StackTrace.cpp delete mode 100644 tests/testUtilities.cpp create mode 100644 threadpool/Readme.txt delete mode 100644 threadpool/test/CMakeLists.txt delete mode 100644 threadpool/test/test_atomic.cpp delete mode 100644 threadpool/test/test_atomic_list.cpp delete mode 100644 threadpool/test/test_thread_pool.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 2f055989..acc2c2dc 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -112,7 +112,7 @@ ENDIF() ADD_CUSTOM_TARGET( build-test ) ADD_CUSTOM_TARGET( build-examples ) ADD_CUSTOM_TARGET( check COMMAND make test ) -ADD_DISTCLEAN( analysis null_timer tests liblbpm-wia.* cpu gpu example common IO threadpool ) +ADD_DISTCLEAN( analysis null_timer tests liblbpm-wia.* cpu gpu example common IO threadpool StackTrace ) # Check for CUDA @@ -133,8 +133,6 @@ IF ( NOT ONLY_BUILD_DOCS ) CONFIGURE_LBPM() CONFIGURE_TIMER( 0 "${${PROJ}_INSTALL_DIR}/null_timer" ) CONFIGURE_LINE_COVERAGE() - INCLUDE( "${CMAKE_CURRENT_SOURCE_DIR}/cmake/SharedPtr.cmake" ) - CONFIGURE_SHARED_PTR( "${${PROJ}_INSTALL_DIR}/include" "std" ) # Set the external library link list SET( EXTERNAL_LIBS ${EXTERNAL_LIBS} ${TIMER_LIBS} ) ENDIF() @@ -156,6 +154,7 @@ IF ( NOT ONLY_BUILD_DOCS ) ADD_PACKAGE_SUBDIRECTORY( analysis ) ADD_PACKAGE_SUBDIRECTORY( IO ) ADD_PACKAGE_SUBDIRECTORY( threadpool ) + ADD_PACKAGE_SUBDIRECTORY( StackTrace ) ADD_PACKAGE_SUBDIRECTORY( models ) IF ( USE_CUDA ) ADD_PACKAGE_SUBDIRECTORY( gpu ) @@ -164,7 +163,6 @@ IF ( NOT ONLY_BUILD_DOCS ) ENDIF() INSTALL_LBPM_TARGET( lbpm-wia-library ) ADD_SUBDIRECTORY( tests ) - ADD_SUBDIRECTORY( threadpool/test ) ADD_SUBDIRECTORY( example ) #ADD_SUBDIRECTORY( workflows ) INSTALL_PROJ_LIB() diff --git a/IO/Mesh.cpp b/IO/Mesh.cpp index 742dac85..eb712296 100644 --- a/IO/Mesh.cpp +++ b/IO/Mesh.cpp @@ -1,8 +1,8 @@ #include "Mesh.h" #include "common/Utilities.h" -#include "shared_ptr.h" #include +#include #include namespace IO { diff --git a/IO/Mesh.h b/IO/Mesh.h index 604dddfd..b204675a 100644 --- a/IO/Mesh.h +++ b/IO/Mesh.h @@ -2,14 +2,13 @@ #define MESH_INC #include +#include #include #include #include "common/Array.h" #include "common/Communication.h" #include "analysis/PointList.h" -#include "shared_ptr.h" - namespace IO { diff --git a/IO/MeshDatabase.h b/IO/MeshDatabase.h index ad696260..9f544925 100644 --- a/IO/MeshDatabase.h +++ b/IO/MeshDatabase.h @@ -3,9 +3,9 @@ #include "IO/Mesh.h" #include "common/MPI_Helpers.h" -#include "shared_ptr.h" #include +#include #include #include #include diff --git a/IO/Reader.h b/IO/Reader.h index ce8dba22..4230ff8f 100644 --- a/IO/Reader.h +++ b/IO/Reader.h @@ -2,12 +2,12 @@ #define READER_INC #include +#include #include #include #include "IO/Mesh.h" #include "IO/MeshDatabase.h" -#include "shared_ptr.h" namespace IO { diff --git a/IO/Writer.cpp b/IO/Writer.cpp index bb522cf6..6581ad42 100644 --- a/IO/Writer.cpp +++ b/IO/Writer.cpp @@ -4,12 +4,12 @@ #include "IO/silo.h" #include "common/MPI_Helpers.h" #include "common/Utilities.h" -#include "shared_ptr.h" #include #include #include #include +#include diff --git a/StackTrace/ErrorHandlers.h b/StackTrace/ErrorHandlers.h new file mode 100644 index 00000000..12b8d7de --- /dev/null +++ b/StackTrace/ErrorHandlers.h @@ -0,0 +1,42 @@ +#ifndef included_StackTraceErrorHandlers +#define included_StackTraceErrorHandlers + + +#include "StackTrace/StackTrace.h" + +#include + +#include "mpi.h" + + +namespace StackTrace +{ + + + /*! + * Set the error handler + * @param[in] abort Function to terminate the program: abort(msg,type) + */ + void setErrorHandler( std::function abort ); + + //! Clear the error handler + void clearErrorHandler(); + + + //! Set an error handler for MPI + void setMPIErrorHandler( MPI_Comm comm ); + + //! Clear an error handler for MPI + void clearMPIErrorHandler( MPI_Comm comm ); + + + //! Initialize globalCallStack functionallity + void globalCallStackInitialize( MPI_Comm comm ); + + //! Clean up globalCallStack functionallity + void globalCallStackFinalize(); + + +} // namespace StackTrace + +#endif diff --git a/StackTrace/Readme.txt b/StackTrace/Readme.txt new file mode 100644 index 00000000..264fed62 --- /dev/null +++ b/StackTrace/Readme.txt @@ -0,0 +1,4 @@ +This directory contains code external code released with permission under the license of this project. + +Original code and license are availible at: +https://bitbucket.org/mberrill/StackTrace diff --git a/StackTrace/StackTrace.cpp b/StackTrace/StackTrace.cpp new file mode 100644 index 00000000..e9292990 --- /dev/null +++ b/StackTrace/StackTrace.cpp @@ -0,0 +1,2517 @@ +#include "StackTrace/StackTrace.h" +#include "StackTrace/ErrorHandlers.h" +#include "StackTrace/Utilities.h" + +// Replace sith std::string_view when we switch to c++17 +#include "StackTrace/string_view.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define perr std::cerr + +using StackTrace::string_view; + +// Detect the OS +// clang-format off +#if defined( WIN32 ) || defined( _WIN32 ) || defined( WIN64 ) || defined( _WIN64 ) || defined( _MSC_VER ) + #define USE_WINDOWS + #define NOMINMAX +#elif defined( __APPLE__ ) + #define USE_MAC + #define USE_NM +#elif defined( __linux ) || defined( __linux__ ) || defined( __unix ) || defined( __posix ) + #define USE_LINUX + #define USE_NM +#else + #error Unknown OS +#endif +// clang-format on + + +// Include system dependent headers +// clang-format off +// Detect the OS and include system dependent headers +#ifdef USE_WINDOWS + #include + #include + #include + #include + #include + #include + #include + #include + #pragma comment( lib, "version.lib" ) // for "VerQueryValue" +#else + #include + #include + #include + #include + #include + #include + #include +#endif +#ifdef USE_MAC + #include + #include + #include + #include + #define SIGRTMIN SIGUSR1 + #define SIGRTMAX SIGUSR2 +#endif +// clang-format on + + +#ifdef __GNUC__ +#define USE_ABI +#include +#endif + + +#ifndef NULL_USE +#define NULL_USE( variable ) \ + do { \ + if ( 0 ) { \ + auto static temp = (char *) &variable; \ + temp++; \ + } \ + } while ( 0 ) +#endif + + +// Mutex for StackTrace opertions that need blocking +static std::mutex StackTrace_mutex; + + +// Helper thread +static std::shared_ptr globalMonitorThread; + + +// Function to replace all instances of a string with another +static constexpr size_t replace( + char *str, size_t N, size_t pos, size_t len, const string_view &r ) noexcept +{ + size_t Nr = r.size(); + auto tmp = str; + size_t k = pos; + for ( size_t i = 0; i < Nr && k < N; i++, k++ ) + str[k] = r[i]; + for ( size_t i = pos + len; i < N && k < N; i++, k++ ) + str[k] = tmp[i]; + for ( size_t m = k; m < N; m++ ) + str[k] = 0; + return k; +} +template +static constexpr size_t replace( + std::array &str, size_t pos, size_t len, const string_view &r ) noexcept +{ + return replace( str.data(), N, pos, len, r ); +} +static constexpr void strrep( + char *str, size_t &N, const string_view &s, const string_view &r ) noexcept +{ + size_t Ns = s.size(); + size_t pos = string_view( str, N ).find( s ); + while ( pos != std::string::npos ) { + N = replace( str, N, pos, Ns, r ); + pos = string_view( str, N ).find( s ); + } +} + +static void cleanupFunctionName( char * ); + + +// Utility to strip the path from a filename +static constexpr const char *stripPath( const char *filename ) noexcept +{ + const char *s = filename; + while ( *s ) { + if ( *s == 47 || *s == 92 ) + filename = s + 1; + ++s; + } + return filename; +} + + +// Functions to hash strings +constexpr uint32_t hashString( const char *s ) +{ + uint32_t c = 0; + uint32_t hash = 5381; + while ( ( c = *s++ ) ) + hash = ( ( hash << 5 ) + hash ) ^ c; + return hash; +} +template +static constexpr uint64_t objHash( + const std::array &obj, const std::array &objPath ) +{ + uint32_t v1 = hashString( obj.data() ); + uint32_t v2 = hashString( objPath.data() ); + uint64_t key = ( static_cast( v1 ) << 32 ) + static_cast( v1 ^ v2 ); + return key; +} + + +//! Assign a string to a std::array +template +static constexpr void copy( const char *in, std::array &out ) noexcept +{ + size_t N1 = strlen( in ); + out.fill( 0 ); + if ( N1 < N2 ) { + memcpy( out.data(), in, N1 ); + } else { + memcpy( out.data(), in, N2 - 4 ); + out[N2 - 4] = out[N2 - 3] = out[N2 - 2] = '.'; + } +} +template +static constexpr void copy( const std::array &in, std::array &out ) noexcept +{ + out.fill( 0 ); + if ( N1 < N2 ) { + memcpy( out.data(), in.data(), N1 ); + } else { + memcpy( out.data(), in.data(), N2 - 4 ); + out[N2 - 4] = out[N2 - 3] = out[N2 - 2] = '.'; + } +} +template +static constexpr void copy( + const char *in, std::array &out, std::array &outPath ) noexcept +{ + auto ptr = stripPath( in ); + copy( ptr, out ); + outPath.fill( 0 ); + if ( ptr != in ) { + size_t N = ptr - in - 1; + if ( N < N3 ) { + memcpy( outPath.data(), in, N ); + } else { + memcpy( outPath.data(), in, N3 - 4 ); + outPath[N3 - 4] = outPath[N3 - 3] = outPath[N3 - 2] = '.'; + } + } +} + + +// Inline function to subtract two addresses returning the absolute difference +static inline void *subtractAddress( void *a, void *b ) noexcept +{ + return reinterpret_cast( + std::abs( reinterpret_cast( a ) - reinterpret_cast( b ) ) ); +} + + +#ifdef USE_WINDOWS +static BOOL __stdcall readProcMem( HANDLE hProcess, DWORD64 qwBaseAddress, PVOID lpBuffer, + DWORD nSize, LPDWORD lpNumberOfBytesRead ) +{ + SIZE_T st; + BOOL bRet = ReadProcessMemory( hProcess, (LPVOID) qwBaseAddress, lpBuffer, nSize, &st ); + *lpNumberOfBytesRead = (DWORD) st; + return bRet; +} +static inline std::string getCurrentDirectory() +{ + char temp[1024] = { 0 }; + GetCurrentDirectoryA( sizeof( temp ), temp ); + return temp; +} +namespace StackTrace { +BOOL GetModuleListTH32( HANDLE hProcess, DWORD pid ); +BOOL GetModuleListPSAPI( HANDLE hProcess ); +DWORD LoadModule( HANDLE hProcess, LPCSTR img, LPCSTR mod, DWORD64 baseAddr, DWORD size ); +void LoadModules(); +}; // namespace StackTrace +#endif + + +/**************************************************************************** + * Class to replace a std::vector with a fixed capacity * + ****************************************************************************/ +template +class staticVector final +{ +public: + staticVector() : d_size( 0 ) {} + size_t size() const { return d_size; } + bool empty() const { return d_size == 0; } + void push_back( const TYPE &v ) + { + if ( d_size < CAPACITY ) + d_data[d_size++] = v; + } + TYPE &operator[]( size_t i ) { return d_data[i]; } + TYPE *begin() { return d_data; } + TYPE *end() { return d_data + d_size; } + TYPE &back() { return d_data[d_size - 1]; } + TYPE *data() { return d_size == 0 ? nullptr : d_data; } + void pop_back() { d_size = std::max( d_size, 1 ) - 1; } + const TYPE *begin() const { return d_data; } + const TYPE *end() const { return d_data + d_size; } + const TYPE &back() const { return d_data[d_size - 1]; } + void clear() { d_size = 0; } + void resize( size_t N, TYPE x = TYPE() ) + { + if ( N > CAPACITY ) + throw std::logic_error( "Invalid size" ); + for ( size_t i = d_size; i < N; i++ ) + d_data[i] = x; + d_size = N; + } + void erase( const TYPE &x ) + { + size_t N = 0; + for ( size_t i = 0; i < d_size; i++ ) { + if ( d_data[i] != x ) + d_data[N++] = d_data[i]; + } + d_size = N; + } + void insert( const TYPE &x ) + { + if ( std::find( begin(), end(), x ) == end() ) { + push_back( x ); + std::sort( begin(), end() ); + } + } + +private: + size_t d_size; + TYPE d_data[CAPACITY]; +}; + + +/**************************************************************************** + * Utility to temporarily clear a signal in a thread-safe manner * + * If multiple threads attempt to clear a signal, then it will be cleared * + * until all threads are finished * + ****************************************************************************/ +typedef void ( *handle_type )( int ); +static std::atomic_int reset_signal_count[128]; +static handle_type reset_signal_handler[128] = { nullptr }; +static bool initialize_reset_signal_count() +{ + for ( int i = 0; i < 128; i++ ) + reset_signal_count[i].store( 0 ); + return true; +} +static bool reset_signal_vars_initialize = initialize_reset_signal_count(); +static void clearSignal( int sig ) +{ + NULL_USE( reset_signal_vars_initialize ); + if ( reset_signal_count[sig].fetch_add( 1 ) == 0 ) + reset_signal_handler[sig] = signal( sig, SIG_IGN ); +} +static void resetSignal( int sig ) +{ + if ( reset_signal_count[sig].fetch_add( -1 ) == 1 ) + signal( sig, reset_signal_handler[sig] ); +} + + +/**************************************************************************** + * Utility to call system command and return output * + ****************************************************************************/ +#ifdef USE_WINDOWS +#define popen _popen +#define pclose _pclose +#endif +template +static inline int exec3( const char *cmd, FUNCTION &fun ) +{ + clearSignal( SIGCHLD ); // Clear child exited + auto pipe = popen( cmd, "r" ); + if ( pipe == nullptr ) + return -1; + while ( !feof( pipe ) ) { + char buffer[0x2000]; + buffer[0] = 0; + auto ptr = fgets( buffer, sizeof( buffer ), pipe ); + NULL_USE( ptr ); + if ( buffer[0] != 0 ) + fun( buffer ); + } + auto status = pclose( pipe ); + int code = WEXITSTATUS( status ); + std::this_thread::yield(); // Allow any signals to process + resetSignal( SIGCHLD ); // Clear child exited + return code; +} +template +static void exec2( const char *cmd, staticVector, blocKSize> &out ) +{ + out.clear(); + auto fun = [&out]( const char *line ) { + size_t N = strlen( line ); + size_t k = out.size(); + out.resize( k + 1 ); + out[k].fill( 0 ); + memcpy( out[k].data(), line, N ); + if ( out[k][N - 1] == '\n' ) + out[k][N - 1] = 0; + }; + exec3( cmd, fun ); +} +std::string StackTrace::exec( const string_view &cmd, int &code ) +{ + std::string result; + auto fun = [&result]( const char *line ) { result += line; }; + code = exec3( cmd.data(), fun ); + return result; +} + + +/**************************************************************************** + * stack_info * + ****************************************************************************/ +static_assert( sizeof( StackTrace::stack_info ) <= 512, "Unexpected size for stack_info" ); +StackTrace::stack_info::stack_info() { clear(); } +void StackTrace::stack_info::clear() +{ + line = 0; + address = nullptr; + address2 = nullptr; + object.fill( 0 ); + objectPath.fill( 0 ); + filename.fill( 0 ); + filenamePath.fill( 0 ); + function.fill( 0 ); +} +bool StackTrace::stack_info::operator==( const StackTrace::stack_info &rhs ) const +{ + if ( address == rhs.address ) + return true; + if ( address2 == rhs.address2 && object == rhs.object ) + return true; + return false; +} +bool StackTrace::stack_info::operator!=( const StackTrace::stack_info &rhs ) const +{ + return !operator==( rhs ); +} +int StackTrace::stack_info::getAddressWidth() const +{ + auto addr = reinterpret_cast( address ); + if ( addr <= 0xFFFF ) + return 4; + if ( addr <= 0xFFFFFFFF ) + return 8; + if ( addr <= 0xFFFFFFFFFFFF ) + return 12; + return 16; +} +std::string StackTrace::stack_info::print( int w1, int w2, int w3 ) const +{ + char out[32 + sizeof( stack_info )]; + print2( out, w1, w2, w3 ); + return std::string( out ); +} +void StackTrace::stack_info::print( + std::ostream &out, const std::vector &stack, const StackTrace::string_view &prefix ) +{ + char buf[32 + sizeof( stack_info )]; + for ( const auto &tmp : stack ) { + tmp.print2( buf, 16, 20, 32 ); + out << prefix << buf << std::endl; + } +} +void StackTrace::stack_info::print2( char *out, int w1, int w2, int w3 ) const +{ + char tmp1[16], tmp2[16]; + sprintf( tmp1, "0x%%0%illx: ", w1 ); + sprintf( tmp2, "%%%is %%%is", w2, w3 ); + size_t pos = 0; + pos += sprintf( &out[pos], tmp1, reinterpret_cast( address ) ); + pos += sprintf( &out[pos], tmp2, stripPath( object.data() ), function.data() ); + if ( filename[0] != 0 && line > 0 ) { + pos += sprintf( &out[pos], " %s:%u", stripPath( filename.data() ), line ); + } else if ( filename[0] != 0 ) { + pos += sprintf( &out[pos], " %s", stripPath( filename.data() ) ); + } else if ( line > 0 ) { + pos += sprintf( &out[pos], " : %u", line ); + } + NULL_USE( pos ); +} +size_t StackTrace::stack_info::size() const { return sizeof( *this ); } +char *StackTrace::stack_info::pack( char *ptr ) const +{ + memcpy( ptr, this, sizeof( *this ) ); + return ptr + sizeof( *this ); +} +const char *StackTrace::stack_info::unpack( const char *ptr ) +{ + memcpy( this, ptr, sizeof( *this ) ); + return ptr + sizeof( *this ); +} + + +/**************************************************************************** + * multi_stack_info * + ****************************************************************************/ +StackTrace::multi_stack_info::multi_stack_info( const std::vector &rhs ) +{ + operator=( rhs ); +} +StackTrace::multi_stack_info &StackTrace::multi_stack_info::operator=( + const std::vector &rhs ) +{ + clear(); + if ( rhs.empty() ) + return *this; + N = 1; + stack = rhs[0]; + if ( rhs.size() > 1 ) + add( rhs.size() - 1, &rhs[1] ); + return *this; +} +void StackTrace::multi_stack_info::clear() +{ + N = 0; + stack.clear(); + children.clear(); +} +template +void StackTrace::multi_stack_info::print2( int Np, char *prefix, int w[3], bool c, FUN &fun ) const +{ + if ( stack.address != 0 ) { + prefix[Np] = 0; + char line[4096]; + int N2 = sprintf( line, "%s[%i] ", prefix, N ); + stack.print2( &line[N2], w[0], w[1], w[2] ); + fun( line ); + prefix[Np++] = c ? '|' : ' '; + prefix[Np++] = ' '; + } + for ( size_t i = 0; i < children.size(); i++ ) { + bool c2 = children.size() > 1 && i < children.size() - 1 && stack.address != 0; + const auto &child = children[i]; + child.print2( Np, prefix, w, c2, fun ); + } +} +std::vector StackTrace::multi_stack_info::print( const string_view &prefix ) const +{ + std::vector text; + int w[3] = { getAddressWidth(), getObjectWidth(), getFunctionWidth() }; + char prefix2[1024]; + memcpy( prefix2, prefix.data(), prefix.size() ); + auto fun = [&text]( const char *line ) { text.push_back( line ); }; + print2( prefix.size(), prefix2, w, false, fun ); + return text; +} +void StackTrace::multi_stack_info::print( std::ostream &out, const string_view &prefix ) const +{ + int w[3] = { getAddressWidth(), getObjectWidth(), getFunctionWidth() }; + char prefix2[1024]; + memcpy( prefix2, prefix.data(), prefix.size() ); + auto fun = [&out]( const char *line ) { out << line << std::endl; }; + print2( prefix.size(), prefix2, w, false, fun ); +} +std::string StackTrace::multi_stack_info::printString( const string_view &prefix ) const +{ + int w[3] = { getAddressWidth(), getObjectWidth(), getFunctionWidth() }; + char prefix2[1024]; + memcpy( prefix2, prefix.data(), prefix.size() ); + std::string out; + out.reserve( 4096 ); + auto fun = [&out]( const char *line ) { + out += line; + out += '\n'; + }; + print2( prefix.size(), prefix2, w, false, fun ); + return out; +} +int StackTrace::multi_stack_info::getAddressWidth() const +{ + int w = stack.getAddressWidth(); + for ( const auto &child : children ) + w = std::max( w, child.getAddressWidth() ); + return w; +} +int StackTrace::multi_stack_info::getObjectWidth() const +{ + int w = std::min( stack.object.size() + 1, 20 ); + for ( const auto &child : children ) + w = std::max( w, child.getObjectWidth() ); + return w; +} +int StackTrace::multi_stack_info::getFunctionWidth() const +{ + int w = std::min( stack.function.size() + 1, 40 ); + for ( const auto &child : children ) + w = std::max( w, child.getFunctionWidth() ); + return w; +} +void StackTrace::multi_stack_info::add( size_t len, const stack_info *stack ) +{ + if ( len == 0 ) + return; + const auto &s = stack[len - 1]; + for ( auto &i : children ) { + if ( i.stack == s ) { + i.N++; + if ( len > 1 ) + i.add( len - 1, stack ); + return; + } + } + children.resize( children.size() + 1 ); + children.back().N = 1; + children.back().stack = s; + if ( len > 1 ) + children.back().add( len - 1, stack ); +} +void StackTrace::multi_stack_info::add( const multi_stack_info &rhs ) +{ + N += rhs.N; + for ( const auto &x : rhs.children ) { + bool found = false; + for ( auto &tmp : children ) { + if ( tmp.stack == x.stack ) { + found = true; + tmp.add( x ); + } + } + if ( !found ) + children.push_back( x ); + } +} +size_t StackTrace::multi_stack_info::size() const +{ + size_t bytes = 2 * sizeof( int ) + stack.size(); + for ( const auto &tmp : children ) + bytes += tmp.size(); + return bytes; +} +char *StackTrace::multi_stack_info::pack( char *ptr ) const +{ + int N2 = N; + memcpy( ptr, &N2, sizeof( int ) ); + ptr += sizeof( int ); + ptr = stack.pack( ptr ); + int Nc = children.size(); + memcpy( ptr, &Nc, sizeof( int ) ); + ptr += sizeof( int ); + for ( const auto &tmp : children ) + ptr = tmp.pack( ptr ); + return ptr; +} +const char *StackTrace::multi_stack_info::unpack( const char *ptr ) +{ + int N2, Nc; + memcpy( &N2, ptr, sizeof( int ) ); + ptr += sizeof( int ); + N = N2; + ptr = stack.unpack( ptr ); + memcpy( &Nc, ptr, sizeof( int ) ); + ptr += sizeof( int ); + children.resize( Nc ); + for ( auto &tmp : children ) + ptr = tmp.unpack( ptr ); + return ptr; +} + + +/**************************************************************************** + * Function to get the executable name * + ****************************************************************************/ +static std::array getExecutableName() +{ + std::array exe; + try { +#ifdef USE_LINUX + char buf[0x10000] = { 0 }; + int len = ::readlink( "/proc/self/exe", buf, 0x10000 ); + if ( len != -1 ) { + buf[len] = '\0'; + strcpy( exe.data(), buf ); + } +#elif defined( USE_MAC ) + uint32_t size = 0x10000; + char buf[0x10000] = { 0 }; + if ( _NSGetExecutablePath( buf, &size ) == 0 ) + strcpy( exe.data(), buf ); +#elif defined( USE_WINDOWS ) + DWORD size = 0x10000; + char buf[0x10000] = { 0 }; + GetModuleFileName( nullptr, buf, size ); + strcpy( exe.data(), buf ); +#endif + } catch ( ... ) { + } + return exe; +} +static const char *getExecutable2() +{ + static auto execname = getExecutableName(); + return execname.data(); +} +std::string StackTrace::getExecutable() { return std::string( getExecutable2() ); } + + +/**************************************************************************** + * Function to get symbols for the executable from nm (if availible) * + * Note: this function maintains an internal cached copy to prevent * + * exccessive calls to nm. This function also uses a lock to ensure * + * thread safety. * + ****************************************************************************/ +static_assert( sizeof( StackTrace::symbols_struct ) <= 128, "Unexpected size for symbols_struct" ); +std::vector global_symbols_data; +static bool global_symbols_loaded = false; +static std::vector getSymbolData() +{ + std::vector data; +#ifdef USE_NM + try { + char cmd[1024]; +#ifdef USE_LINUX + sprintf( cmd, "nm -n --demangle %s", getExecutable2() ); +#elif defined( USE_MAC ) + sprintf( cmd, "nm -n %s | c++filt", getExecutable2() ); +#else +#error Unknown OS using nm +#endif + // Function to process a line of nm output + auto fun = [&data]( char *line ) { + if ( line[0] == ' ' ) + return; + auto *a = line; + char *b = strchr( a, ' ' ); + if ( b == nullptr ) + return; + b[0] = 0; + b++; + char *c = strchr( b, ' ' ); + if ( c == nullptr ) + return; + c[0] = 0; + c++; + char *d = strchr( c, '\n' ); + if ( d ) + d[0] = 0; + size_t add = strtoul( a, nullptr, 16 ); + size_t k = data.size(); + data.resize( k + 1 ); + data[k].address = reinterpret_cast( add ); + data[k].type = b[0]; + copy( c, data[k].obj, data[k].objPath ); + }; + // Call nm + exec3( cmd, fun ); + } catch ( ... ) { + } +#endif + return data; +} +std::vector StackTrace::getSymbols() +{ + StackTrace_mutex.lock(); + if ( !global_symbols_loaded ) { + global_symbols_data = getSymbolData(); + global_symbols_loaded = true; + } + auto data = global_symbols_data; + StackTrace_mutex.unlock(); + return data; +} +void StackTrace::clearSymbols() +{ + StackTrace_mutex.lock(); + if ( global_symbols_loaded ) { + global_symbols_data = std::vector(); + global_symbols_loaded = false; + } + StackTrace_mutex.unlock(); +} + + +/**************************************************************************** + * Function to get call stack info * + ****************************************************************************/ +#ifdef USE_MAC +static void *loadAddress( const uint32_t &obj_hash ) +{ + static std::map obj_map; + if ( obj_map.empty() ) { + uint32_t numImages = _dyld_image_count(); + for ( uint32_t i = 0; i < numImages; i++ ) { + auto header = _dyld_get_image_header( i ); + auto name = _dyld_get_image_name( i ); + auto p = strrchr( name, '/' ); + auto address = const_cast( header ); + auto hash = hashString( p + 1 ); + obj_map.insert( std::make_pair( hash, address ) ); + } + } + auto it = obj_map.find( obj_hash ); + void *address = 0; + if ( it != obj_map.end() ) { + address = it->second; + } else { + it = obj_map.find( obj_hash ); + if ( it != obj_map.end() ) + address = it->second; + } + return address; +} +static auto split_atos( const std::string &buf ) +{ + int line = 0; + std::array fun; + std::array obj, file, objPath, filePath; + if ( buf.empty() ) + return std::tie( fun, obj, objPath, file, filePath, line ); + // Get the function + size_t index = buf.find( " (in " ); + if ( index == std::string::npos ) { + copy( buf.c_str(), fun ); + cleanupFunctionName( fun ); + return std::tie( fun, obj, objPath, file, filePath, line ); + } + copy( buf.substr( 0, index ).c_str(), fun ); + cleanupFunctionName( fun ); + std::string tmp = buf.substr( index + 5 ); + // Get the object + index = tmp.find( ')' ); + copy( tmp.substr( 0, index ).c_str(), obj, objPath ); + tmp = tmp.substr( index + 1 ); + // Get the filename and line number + size_t p1 = tmp.find( '(' ); + size_t p2 = tmp.find( ')' ); + tmp = tmp.substr( p1 + 1, p2 - p1 - 1 ); + index = tmp.find( ':' ); + if ( index != std::string::npos ) { + copy( tmp.substr( 0, index ).c_str(), file, filePath ); + line = std::stoi( tmp.substr( index + 1 ) ); + } else if ( p1 != std::string::npos ) { + copy( tmp.c_str(), file, filePath ); + } + return std::tie( fun, obj, objPath, file, filePath, line ); +} +#endif +// clang-format off +template +static void getFileAndLineObject( staticVector &info ) +{ + if ( info.empty() ) + return; + // This gets the file and line numbers for multiple stack lines in the same object + #if defined( USE_LINUX ) + // Create the call command + uint32_t N; + char cmd[4096]; + static_assert( sizeof(unsigned long) == sizeof(size_t), "Unxpected size for ul" ); + if ( info[0]->objectPath[0] == 0 ) + N = sprintf(cmd,"addr2line -C -e %s -f",info[0]->object.data()); + else + N = sprintf(cmd,"addr2line -C -e %s/%s -f",info[0]->objectPath.data(),info[0]->object.data()); + for (size_t i=0; i( info[i]->address ), + reinterpret_cast( info[i]->address2 ) ); + } + N += sprintf(&cmd[N]," 2> /dev/null"); + // Get the function/line/file + staticVector,4*blockSize> output; + exec2( cmd, output ); + if ( output.size() != 4*info.size() ) + return; + // Add the results to info + for (size_t i=0; ifunction.empty() ) { + cleanupFunctionName( tmp1 ); + copy( tmp1, info[i]->function ); + } + // get file and line + char *buf = tmp2; + if ( buf[0] != '?' && buf[0] != 0 ) { + size_t j = 0; + for ( j = 0; j < 4095 && buf[j] != ':'; j++ ) { + } + buf[j] = 0; + copy( buf, info[i]->filename, info[i]->filenamePath ); + info[i]->line = atoi( &buf[j + 1] ); + } + } + #elif defined( USE_MAC ) + // Create the call command + void* load_address = loadAddress( hashString( info[0]->object.data() ) ); + if ( load_address == nullptr ) + return; + // Call atos to get the object info + uint32_t N; + char cmd[4096]; + static_assert( sizeof(unsigned long) == sizeof(size_t), "Unxpected size for ul" ); + auto addr = reinterpret_cast( load_address ); + if ( info[0]->objectPath[0] == 0 ) + N = sprintf( cmd, "atos -o %s -f -l %lx", info[0]->object.data(), addr ); + else + N = sprintf( cmd, "atos -o %s/%s -f -l %lx", info[0]->objectPath.data(), info[0]->object.data(), addr ); + for (size_t i=0; i( info[i]->address ) ); + N += sprintf(&cmd[N]," 2> /dev/null"); + // Get the function/line/file + staticVector,blockSize> output; + exec2( cmd, output ); + if ( output.size() != info.size() ) + return; + // Parse the output for function, file and line info + for ( size_t i=0; ifunction.empty() ) + info[i]->function = std::get<0>(data); + if ( info[i]->object.empty() ) { + info[i]->object = std::get<1>(data); + info[i]->objectPath = std::get<2>(data); + } + if ( info[i]->filename.empty() ) { + info[i]->filename = std::get<3>(data); + info[i]->filenamePath = std::get<4>(data); + } + if ( info[i]->line==0 ) + info[i]->line = std::get<5>(data); + } + #endif +} +static void getFileAndLine( size_t N, StackTrace::stack_info *info ) +{ + constexpr size_t blockSize = 1024; + // Operate on blocks + size_t i0 = 0; + while ( i0 < N ) { + // Get a list of objects + staticVector objectHash; + for ( size_t i = i0; i list; + for ( size_t i = i0; i= info.address ) + upper = value; + else + lower = value; + } + if ( upper > 0 ) { + copy( data[lower].obj, info.object ); + copy( data[lower].objPath, info.objectPath ); + } else { + copy( getExecutable2(), info.object, info.objectPath ); + } + } +} +static void signal_handler( int sig ) +{ + printf("Signal caught acquiring stack (%i)\n",sig); + StackTrace::setErrorHandler( [](const StackTrace::abort_error &err) { std::cerr << err.what(); exit( -1 ); } ); +} +static void getStackInfo2( size_t N, void* const* address, StackTrace::stack_info *info ) +{ + // Temporarily handle signals to prevent recursion on the stack + auto prev_handler = signal( SIGINT, signal_handler ); + // Get the detailed stack info + try { + #ifdef USE_WINDOWS + IMAGEHLP_SYMBOL64 pSym[1024]; + memset( pSym, 0, sizeof( pSym ) ); + pSym->SizeOfStruct = sizeof( IMAGEHLP_SYMBOL64 ); + pSym->MaxNameLength = 1024; + + IMAGEHLP_MODULE64 Module; + memset( &Module, 0, sizeof( Module ) ); + Module.SizeOfStruct = sizeof( Module ); + + HANDLE pid = GetCurrentProcess(); + + for (size_t i=0; i( address[i] ); + DWORD64 offsetFromSymbol; + if ( SymGetSymFromAddr( pid, address2, &offsetFromSymbol, pSym ) != FALSE ) { + char name[8192]={0}; + DWORD rtn = UnDecorateSymbolName( pSym->Name, name, sizeof(name)-1, UNDNAME_COMPLETE ); + if ( rtn == 0 ) { + cleanupFunctionName( pSym->Name ); + copy( pSym->Name, info[i].function ); + } else { + info[i].function.fill( 0 ); + } + } else { + printf( "ERROR: SymGetSymFromAddr (%d,%p)\n", GetLastError(), address2 ); + } + + // Get line number + IMAGEHLP_LINE64 Line; + memset( &Line, 0, sizeof( Line ) ); + Line.SizeOfStruct = sizeof( Line ); + DWORD offsetFromLine; + if ( SymGetLineFromAddr64( pid, address2, &offsetFromLine, &Line ) != FALSE ) { + info[i].line = Line.LineNumber; + copy( Line.FileName, info[i].filename, info[i].filenamePath ); + } else { + info[i].line = 0; + copy( nullptr, info[i].filename, info[i].filenamePath ); + } + + // Get the object + if ( SymGetModuleInfo64( pid, address2, &Module ) != FALSE ) { + copy( Module.LoadedImageName, info[i].object, info[i].objectPath ); + } + } + #else + for (size_t i=0; i tmp; + copy( dlinfo.dli_sname, tmp ); + cleanupFunctionName( tmp.data() ); + copy( tmp, info[i].function ); + } + #else + getDataFromGlobalSymbols( info[i] ); + #endif + } + // Get the filename / line numbers for each item on the stack + getFileAndLine( N, info ); + #endif + } catch ( ... ) { + } + signal( SIGINT, prev_handler ) ; +} +StackTrace::stack_info StackTrace::getStackInfo( void *address ) +{ + StackTrace::stack_info info; + getStackInfo2( 1, &address, &info ); + return info; +} +std::vector StackTrace::getStackInfo( const std::vector& address ) +{ + std::vector info( address.size() ); + getStackInfo2( address.size(), address.data(), info.data() ); + return info; +} + + +/**************************************************************************** +* Helper functions for controlling interal signals * +****************************************************************************/ +static int backtrace_thread( const std::thread::native_handle_type&, void**, size_t ); +#if defined( USE_LINUX ) || defined( USE_MAC ) +static int global_thread_backtrace_count; +static void* global_thread_backtrace[1000]; +static void _callstack_signal_handler( int, siginfo_t*, void* ) +{ + global_thread_backtrace_count = backtrace_thread( StackTrace::thisThread(), global_thread_backtrace, 1000 ); +} +static int get_thread_callstack_signal() +{ + if ( 39 >= SIGRTMIN && 39 <= SIGRTMAX ) + return 39; + return std::min( SIGRTMIN+4, SIGRTMAX ); +} +static int thread_callstack_signal = get_thread_callstack_signal(); +#endif + + +/**************************************************************************** +* Function to get the list of all active threads * +****************************************************************************/ +#if defined( USE_LINUX ) || defined( USE_MAC ) +static std::thread::native_handle_type thread_handle; +static bool thread_id_finished; +static void _activeThreads_signal_handler( int ) +{ + auto handle = StackTrace::thisThread( ); + thread_handle = handle; + thread_id_finished = true; +} +#endif +#ifdef USE_LINUX +static constexpr int get_tid( int pid, const char *line ) +{ + char buf2[128]={0}; + int i1 = 0; + while ( line[i1]==' ' ) { i1++; } + int i2 = i1; + while ( line[i2]!=' ' ) { i2++; } + memcpy(buf2,&line[i1],i2-i1); + buf2[i2-i1+1] = 0; + int pid2 = atoi(buf2); + if ( pid2 != pid ) + return -1; + i1 = i2; + while ( line[i1]==' ' ) { i1++; } + i2 = i1; + while ( line[i2]!=' ' ) { i2++; } + memcpy(buf2,&line[i1],i2-i1); + buf2[i2-i1+1] = 0; + int tid = atoi(buf2); + return tid; +} +#endif +std::thread::native_handle_type StackTrace::thisThread( ) +{ + #if defined( USE_LINUX ) || defined( USE_MAC ) + return pthread_self(); + #elif defined( USE_WINDOWS ) + return GetCurrentThread(); + #else + #warning Stack trace is not supported on this compiler/OS + return std::thread::native_handle_type(); + #endif +} +static staticVector getActiveThreads( ) +{ + staticVector threads; + #if defined( USE_LINUX ) + int N_tid = 0, tid[1024]; + int pid = getpid(); + char cmd[128]; + sprintf( cmd, "ps -T -p %i", pid ); + auto fun = [&N_tid,&tid,pid]( const char* line ) { + int id = get_tid( pid, line ); + if ( id != -1 && N_tid < 1024 ) + tid[N_tid++] = id; + }; + exec3( cmd, fun ); + int myid = syscall(SYS_gettid); + for ( int i=0; i(t2-t1).count()<0.1 ) { + std::this_thread::yield(); + t2 = std::chrono::high_resolution_clock::now(); + } + threads.push_back( thread_handle ); + StackTrace_mutex.unlock(); + } + signal( thread_callstack_signal, old ); + #elif defined( USE_MAC ) + thread_act_port_array_t thread_list; + mach_msg_type_number_t thread_count = 0; + task_threads(mach_task_self(), &thread_list, &thread_count); + auto old = signal( thread_callstack_signal, _activeThreads_signal_handler ); + for ( int i=0; i(t2-t1).count()<0.1 ) { + std::this_thread::yield(); + t2 = std::chrono::high_resolution_clock::now(); + } + threads.push_back( thread_handle ); + StackTrace_mutex.unlock();*/ + } + signal( thread_callstack_signal, old ); + #elif defined( USE_WINDOWS ) + HANDLE hThreadSnap = CreateToolhelp32Snapshot( TH32CS_SNAPTHREAD, 0 ); + if( hThreadSnap != INVALID_HANDLE_VALUE ) { + // Fill in the size of the structure before using it + THREADENTRY32 te32 + te32.dwSize = sizeof(THREADENTRY32 ); + // Retrieve information about the first thread, and exit if unsuccessful + if( !Thread32First( hThreadSnap, &te32 ) ) { + printError( TEXT("Thread32First") ); // Show cause of failure + CloseHandle( hThreadSnap ); // Must clean up the snapshot object! + return( FALSE ); + } + // Now walk the thread list of the system + do { + if ( te32.th32OwnerProcessID == dwOwnerPID ) + threads.push_back( te32.th32ThreadID ); + } while( Thread32Next(hThreadSnap, &te32 ) ); + CloseHandle( hThreadSnap ); // Must clean up the snapshot object! + } + #else + #warning activeThreads is not yet supported on this compiler/OS + #endif + // Add the current thread + threads.push_back( StackTrace::thisThread() ); + // Remove the globalMonitorThread + if ( globalMonitorThread ) { + auto globalThreadId = globalMonitorThread->native_handle(); + for ( int i = threads.size() - 1; i >= 0; i-- ) { + if ( threads[i] == globalThreadId ) { + std::swap( threads[i], threads.back() ); + threads.pop_back(); + } + } + } + // Sort the threads, remove any duplicates and remove the globalMonitorThread + std::sort( threads.begin(), threads.end() ); + return threads; +} +// clang-format on +std::vector StackTrace::activeThreads() +{ + auto threads = getActiveThreads(); + std::sort( threads.begin(), threads.end() ); + return std::vector( threads.begin(), threads.end() ); +} + + +/**************************************************************************** + * Function to get the backtrace * + ****************************************************************************/ +static int backtrace_thread( + const std::thread::native_handle_type &tid, void **buffer, size_t size ) +{ + int count = 0; +#if defined( USE_LINUX ) || defined( USE_MAC ) + // Get the trace + if ( tid == pthread_self() ) { + count = ::backtrace( buffer, size ); + } else { + // Note: this will get the backtrace, but terminates the thread in the process!!! + StackTrace_mutex.lock(); + struct sigaction sa; + sigfillset( &sa.sa_mask ); + sa.sa_flags = SA_SIGINFO; + sa.sa_sigaction = _callstack_signal_handler; + sigaction( thread_callstack_signal, &sa, nullptr ); + global_thread_backtrace_count = -1; + pthread_kill( tid, thread_callstack_signal ); + auto t1 = std::chrono::high_resolution_clock::now(); + auto t2 = std::chrono::high_resolution_clock::now(); + while ( global_thread_backtrace_count == -1 && + std::chrono::duration( t2 - t1 ).count() < 0.15 ) { + std::this_thread::yield(); + t2 = std::chrono::high_resolution_clock::now(); + } + count = std::max( global_thread_backtrace_count, 0 ); + memcpy( buffer, global_thread_backtrace, count * sizeof( void * ) ); + global_thread_backtrace_count = -1; + StackTrace_mutex.unlock(); + } +#elif defined( USE_WINDOWS ) +#if defined( DBGHELP ) + + // Load the modules for the stack trace + LoadModules(); + + // Initialize stackframe for first call + ::CONTEXT context; + memset( &context, 0, sizeof( context ) ); + context.ContextFlags = CONTEXT_FULL; + RtlCaptureContext( &context ); + STACKFRAME64 frame; // in/out stackframe + memset( &frame, 0, sizeof( frame ) ); +#ifdef _M_IX86 + DWORD imageType = IMAGE_FILE_MACHINE_I386; + frame.AddrPC.Offset = context.Eip; + frame.AddrPC.Mode = AddrModeFlat; + frame.AddrFrame.Offset = context.Ebp; + frame.AddrFrame.Mode = AddrModeFlat; + frame.AddrStack.Offset = context.Esp; + frame.AddrStack.Mode = AddrModeFlat; +#elif _M_X64 + DWORD imageType = IMAGE_FILE_MACHINE_AMD64; + frame.AddrPC.Offset = context.Rip; + frame.AddrPC.Mode = AddrModeFlat; + frame.AddrFrame.Offset = context.Rsp; + frame.AddrFrame.Mode = AddrModeFlat; + frame.AddrStack.Offset = context.Rsp; + frame.AddrStack.Mode = AddrModeFlat; +#elif _M_IA64 + DWORD imageType = IMAGE_FILE_MACHINE_IA64; + frame.AddrPC.Offset = context.StIIP; + frame.AddrPC.Mode = AddrModeFlat; + frame.AddrFrame.Offset = context.IntSp; + frame.AddrFrame.Mode = AddrModeFlat; + frame.AddrBStore.Offset = context.RsBSP; + frame.AddrBStore.Mode = AddrModeFlat; + frame.AddrStack.Offset = context.IntSp; + frame.AddrStack.Mode = AddrModeFlat; +#else +#error "Platform not supported!" +#endif + + auto pid = GetCurrentProcess(); + for ( int frameNum = 0; frameNum < 1024; ++frameNum ) { + BOOL rtn = StackWalk64( imageType, pid, tid, &frame, &context, readProcMem, + SymFunctionTableAccess, SymGetModuleBase64, NULL ); + if ( !rtn ) { + printf( "ERROR: StackWalk64 (%p)\n", frame.AddrPC.Offset ); + break; + } + if ( frame.AddrPC.Offset != 0 ) { + buffer[count] = reinterpret_cast( frame.AddrPC.Offset ) ); + count++; + } + if ( frame.AddrReturn.Offset == 0 ) + break; + } + SetLastError( ERROR_SUCCESS ); +#endif +#else +#warning Stack trace is not supported on this compiler/OS +#endif + return count; +} +std::vector StackTrace::backtrace( std::thread::native_handle_type tid ) +{ + std::vector trace( 1000, nullptr ); + size_t count = backtrace_thread( tid, trace.data(), trace.size() ); + trace.resize( count ); + return trace; +} +std::vector StackTrace::backtrace() +{ + std::vector trace( 1000, nullptr ); + size_t count = backtrace_thread( thisThread(), trace.data(), trace.size() ); + trace.resize( count ); + return trace; +} +std::vector> StackTrace::backtraceAll() +{ + // Get the list of threads + auto threads = getActiveThreads(); + // Get the backtrace of each thread + std::vector> trace( threads.size() ); + for ( size_t i = 0; i < threads.size(); i++ ) { + trace[i].resize( 1000 ); + size_t count = backtrace_thread( threads[i], trace[i].data(), trace[i].size() ); + trace[i].resize( count ); + } + return trace; +} + + +/**************************************************************************** + * Function to get the current call stack * + ****************************************************************************/ +std::vector StackTrace::getCallStack() +{ + void *trace[1000]; + size_t count = backtrace_thread( thisThread(), trace, 1000 ); + std::vector info( count ); + getStackInfo2( count, trace, info.data() ); + return info; +} +std::vector StackTrace::getCallStack( std::thread::native_handle_type id ) +{ + void *trace[1000]; + size_t count = backtrace_thread( id, trace, 1000 ); + std::vector info( count ); + getStackInfo2( count, trace, info.data() ); + return info; +} +static std::vector> generateStacks( + const std::vector> &trace ) +{ + // Function to find an address + auto find = []( const auto &data, auto x ) { + for ( size_t i = 0; i < data.size(); i++ ) { + if ( data[i] == x ) + return static_cast( i ); + } + return -1; + }; + // Get the stack data for all pointers + std::vector addresses; + addresses.reserve( 1024 ); + for ( const auto &tmp : trace ) { + for ( auto ptr : tmp ) { + if ( find( addresses, ptr ) == -1 ) + addresses.push_back( ptr ); + } + } + auto stack_data = StackTrace::getStackInfo( addresses ); + // Create the stack traces + std::vector> stack( trace.size() ); + for ( size_t i = 0; i < trace.size(); i++ ) { + // Create the stack for the given thread trace + stack[i].resize( trace[i].size() ); + for ( size_t j = 0; j < trace[i].size(); j++ ) { + int k = find( addresses, trace[i][j] ); + stack[i][j] = stack_data[k]; + } + } + return stack; +} +static StackTrace::multi_stack_info generateMultiStack( + const std::vector> &trace ) +{ + // Get the stack data for all pointers + auto stack = generateStacks( trace ); + // Create the multi-stack trace + StackTrace::multi_stack_info multistack; + multistack.N = stack.size(); + for ( const auto &tmp : stack ) + multistack.add( tmp.size(), tmp.data() ); + return multistack; +} +static StackTrace::multi_stack_info generateMultiStack( + const staticVector &threads ) +{ + // Get the stack data for all pointers + std::vector> trace( threads.size() ); + auto it = threads.begin(); + for ( size_t i = 0; i < threads.size(); i++, ++it ) + trace[i] = StackTrace::backtrace( *it ); + // Create the multi-stack trace + return generateMultiStack( trace ); +} +StackTrace::multi_stack_info StackTrace::getAllCallStacks() +{ + // Get the list of active thread + auto threads = getActiveThreads(); + // Create the multi-stack strucutre + auto stack = generateMultiStack( threads ); + return stack; +} + + +/**************************************************************************** + * Function to get system search paths * + ****************************************************************************/ +std::string StackTrace::getSymPaths() +{ + std::string paths; +#ifdef USE_WINDOWS + // Create the path list (seperated by ';' ) + paths = std::string( ".;" ); + paths.reserve( 1000 ); + // Add the current directory + paths += getCurrentDirectory() + ";"; + // Now add the path for the main-module: + char temp[1024]; + memset( temp, 0, sizeof( temp ) ); + if ( GetModuleFileNameA( nullptr, temp, sizeof( temp ) - 1 ) > 0 ) { + for ( char *p = ( temp + strlen( temp ) - 1 ); p >= temp; --p ) { + // locate the rightmost path separator + if ( ( *p == '\\' ) || ( *p == '/' ) || ( *p == ':' ) ) { + *p = 0; + break; + } + } + if ( strlen( temp ) > 0 ) { + paths += temp; + paths += ";"; + } + } + memset( temp, 0, sizeof( temp ) ); + if ( GetEnvironmentVariableA( "_NT_SYMBOL_PATH", temp, sizeof( temp ) - 1 ) > 0 ) { + paths += temp; + paths += ";"; + } + memset( temp, 0, sizeof( temp ) ); + if ( GetEnvironmentVariableA( "_NT_ALTERNATE_SYMBOL_PATH", temp, sizeof( temp ) - 1 ) > 0 ) { + paths += temp; + paths += ";"; + } + memset( temp, 0, sizeof( temp ) ); + if ( GetEnvironmentVariableA( "SYSTEMROOT", temp, sizeof( temp ) - 1 ) > 0 ) { + paths += temp; + paths += ";"; + // also add the "system32"-directory: + paths += temp; + paths += "\\system32;"; + } + memset( temp, 0, sizeof( temp ) ); + if ( GetEnvironmentVariableA( "SYSTEMDRIVE", temp, sizeof( temp ) - 1 ) > 0 ) { + paths += "SRV*;" + std::string( temp ) + + "\\websymbols*http://msdl.microsoft.com/download/symbols;"; + } else { + paths += "SRV*c:\\websymbols*http://msdl.microsoft.com/download/symbols;"; + } +#endif + return paths; +} + + +/**************************************************************************** + * Load modules for windows * + ****************************************************************************/ +#ifdef USE_WINDOWS +BOOL StackTrace::GetModuleListTH32( HANDLE hProcess, DWORD pid ) +{ + // CreateToolhelp32Snapshot() + typedef HANDLE( __stdcall * tCT32S )( DWORD dwFlags, DWORD th32ProcessID ); + // Module32First() + typedef BOOL( __stdcall * tM32F )( HANDLE hSnapshot, LPMODULEENTRY32 lpme ); + // Module32Next() + typedef BOOL( __stdcall * tM32N )( HANDLE hSnapshot, LPMODULEENTRY32 lpme ); + + // try both dlls... + const TCHAR *dllname[] = { _T("kernel32.dll"), _T("tlhelp32.dll") }; + HINSTANCE hToolhelp = nullptr; + tCT32S pCT32S = nullptr; + tM32F pM32F = nullptr; + tM32N pM32N = nullptr; + + HANDLE hSnap; + MODULEENTRY32 me; + me.dwSize = sizeof( me ); + + for ( size_t i = 0; i < ( sizeof( dllname ) / sizeof( dllname[0] ) ); i++ ) { + hToolhelp = LoadLibrary( dllname[i] ); + if ( hToolhelp == nullptr ) + continue; + pCT32S = (tCT32S) GetProcAddress( hToolhelp, "CreateToolhelp32Snapshot" ); + pM32F = (tM32F) GetProcAddress( hToolhelp, "Module32First" ); + pM32N = (tM32N) GetProcAddress( hToolhelp, "Module32Next" ); + if ( ( pCT32S != nullptr ) && ( pM32F != nullptr ) && ( pM32N != nullptr ) ) + break; // found the functions! + FreeLibrary( hToolhelp ); + hToolhelp = nullptr; + } + + if ( hToolhelp == nullptr ) + return FALSE; + + hSnap = pCT32S( TH32CS_SNAPMODULE, pid ); + if ( hSnap == (HANDLE) -1 ) { + FreeLibrary( hToolhelp ); + return FALSE; + } + + bool keepGoing = !!pM32F( hSnap, &me ); + int cnt = 0; + while ( keepGoing ) { + LoadModule( hProcess, me.szExePath, me.szModule, (DWORD64) me.modBaseAddr, me.modBaseSize ); + cnt++; + keepGoing = !!pM32N( hSnap, &me ); + } + CloseHandle( hSnap ); + FreeLibrary( hToolhelp ); + if ( cnt <= 0 ) + return FALSE; + return TRUE; +} +DWORD StackTrace::LoadModule( + HANDLE hProcess, LPCSTR img, LPCSTR mod, DWORD64 baseAddr, DWORD size ) +{ + CHAR *szImg = _strdup( img ); + CHAR *szMod = _strdup( mod ); + DWORD result = ERROR_SUCCESS; + if ( ( szImg == nullptr ) || ( szMod == nullptr ) ) { + result = ERROR_NOT_ENOUGH_MEMORY; + } else { + if ( SymLoadModule( hProcess, 0, szImg, szMod, baseAddr, size ) == 0 ) + result = GetLastError(); + } + ULONGLONG fileVersion = 0; + if ( szImg != nullptr ) { + // try to retrive the file-version: + VS_FIXEDFILEINFO *fInfo = nullptr; + DWORD dwHandle; + DWORD dwSize = GetFileVersionInfoSizeA( szImg, &dwHandle ); + if ( dwSize > 0 ) { + LPVOID vData = malloc( dwSize ); + if ( vData != nullptr ) { + if ( GetFileVersionInfoA( szImg, dwHandle, dwSize, vData ) != 0 ) { + UINT len; + TCHAR szSubBlock[] = _T("\\"); + if ( VerQueryValue( vData, szSubBlock, (LPVOID *) &fInfo, &len ) == 0 ) { + fInfo = nullptr; + } else { + fileVersion = ( (ULONGLONG) fInfo->dwFileVersionLS ) + + ( (ULONGLONG) fInfo->dwFileVersionMS << 32 ); + } + } + free( vData ); + } + } + + // Retrive some additional-infos about the module + IMAGEHLP_MODULE64 Module; + Module.SizeOfStruct = sizeof( IMAGEHLP_MODULE64 ); + SymGetModuleInfo64( hProcess, baseAddr, &Module ); + LPCSTR pdbName = Module.LoadedImageName; + if ( Module.LoadedPdbName[0] != 0 ) + pdbName = Module.LoadedPdbName; + } + if ( szImg != nullptr ) + free( szImg ); + if ( szMod != nullptr ) + free( szMod ); + return result; +} +BOOL StackTrace::GetModuleListPSAPI( HANDLE hProcess ) +{ + DWORD cbNeeded; + HMODULE hMods[1024]; + char tt[8192]; + char tt2[8192]; + if ( !EnumProcessModules( hProcess, hMods, sizeof( hMods ), &cbNeeded ) ) { + return false; + } + if ( cbNeeded > sizeof( hMods ) ) { + printf( "Insufficient memory allocated in GetModuleListPSAPI\n" ); + return false; + } + int cnt = 0; + for ( DWORD i = 0; i < cbNeeded / sizeof( hMods[0] ); i++ ) { + // base address, size + MODULEINFO mi; + GetModuleInformation( hProcess, hMods[i], &mi, sizeof( mi ) ); + // image file name + tt[0] = 0; + GetModuleFileNameExA( hProcess, hMods[i], tt, sizeof( tt ) ); + // module name + tt2[0] = 0; + GetModuleBaseNameA( hProcess, hMods[i], tt2, sizeof( tt2 ) ); + DWORD dwRes = LoadModule( hProcess, tt, tt2, (DWORD64) mi.lpBaseOfDll, mi.SizeOfImage ); + if ( dwRes != ERROR_SUCCESS ) + printf( "ERROR: LoadModule (%d)\n", dwRes ); + cnt++; + } + + return cnt != 0; +} +void StackTrace::LoadModules() +{ + static bool modules_loaded = false; + if ( !modules_loaded ) { + modules_loaded = true; + + // Get the search paths for symbols + std::string paths = StackTrace::getSymPaths(); + + // Initialize the symbols + if ( SymInitialize( GetCurrentProcess(), paths.c_str(), FALSE ) == FALSE ) + printf( "ERROR: SymInitialize (%d)\n", GetLastError() ); + + DWORD symOptions = SymGetOptions(); + symOptions |= SYMOPT_LOAD_LINES | SYMOPT_FAIL_CRITICAL_ERRORS; + symOptions = SymSetOptions( symOptions ); + char buf[1024] = { 0 }; + if ( SymGetSearchPath( GetCurrentProcess(), buf, sizeof( buf ) ) == FALSE ) + printf( "ERROR: SymGetSearchPath (%d)\n", GetLastError() ); + + // First try to load modules from toolhelp32 + BOOL loaded = StackTrace::GetModuleListTH32( GetCurrentProcess(), GetCurrentProcessId() ); + + // Try to load from Psapi + if ( !loaded ) + loaded = StackTrace::GetModuleListPSAPI( GetCurrentProcess() ); + } +} +#endif + + +/**************************************************************************** + * Get the signal name * + ****************************************************************************/ +static char signalNames[128][32]; +const char *StackTrace::signalName( int sig ) +{ + static bool initialized = false; + if ( !initialized ) { + StackTrace_mutex.lock(); + memset( signalNames, 0, sizeof( signalNames ) ); + for ( int i = 0; i < 128; i++ ) + strcpy( signalNames[i], strsignal( i + 1 ) ); + StackTrace_mutex.unlock(); + initialized = true; + } + bool valid = sig > 0 && sig <= 128; + return valid ? signalNames[sig - 1] : nullptr; +} +std::vector StackTrace::allSignalsToCatch() +{ + std::vector signals; + signals.reserve( SIGRTMAX ); + for ( int i = 1; i < 32; i++ ) { + if ( i == SIGKILL || i == SIGSTOP ) + continue; + signals.push_back( i ); + } + for ( int i = SIGRTMIN; i <= SIGRTMAX; i++ ) { + if ( i == SIGKILL || i == SIGSTOP ) + continue; + signals.push_back( i ); + } + return signals; +} +template +static inline void erase( std::vector &x, TYPE y ) +{ + x.erase( std::find( x.begin(), x.end(), y ) ); +} +std::vector StackTrace::defaultSignalsToCatch() +{ + auto signals = allSignalsToCatch(); + erase( signals, SIGWINCH ); // Don't catch window changed by default + erase( signals, SIGCONT ); // Don't catch continue by default + erase( signals, SIGCHLD ); // Don't catch child exited by default + return signals; +} + + +/**************************************************************************** + * Set the signal handlers * + ****************************************************************************/ +static std::function abort_fun; +static StackTrace::abort_error rethrow() +{ + StackTrace::abort_error error; +#ifdef USE_LINUX + try { + static int tried_throw = 0; + if ( tried_throw == 0 ) { + tried_throw = 1; + throw; + } + // No active exception + } catch ( const StackTrace::abort_error &err ) { + // Caught a std::runtime_error + error = err; + } catch ( const std::exception &err ) { + // Caught a std::runtime_error + error.message = err.what(); + } catch ( ... ) { + // Caught an unknown exception + error.message = "Unknown exception"; + } +#else + error.message = "Unknown exception"; +#endif + if ( error.type == StackTrace::terminateType::unknown ) + error.type = StackTrace::terminateType::exception; + if ( error.bytes == 0 ) + error.bytes = StackTrace::Utilities::getMemoryUsage(); + if ( error.stack.empty() ) { + error.stackType = StackTrace::printStackType::local; + error.stack = StackTrace::backtrace(); + } + return error; +} +static void term_func_abort( int sig ) +{ + StackTrace::abort_error err; + err.type = StackTrace::terminateType::signal; + err.signal = sig; + err.bytes = StackTrace::Utilities::getMemoryUsage(); + err.stack = StackTrace::backtrace(); + err.stackType = StackTrace::printStackType::global; + abort_fun( err ); +} +static bool signals_set[256] = { false }; +static void term_func() +{ + auto err = rethrow(); + StackTrace::clearSignals(); + abort_fun( err ); +} +static void null_term_func() {} +void StackTrace::clearSignal( int sig ) +{ + if ( signals_set[sig] ) { + signal( sig, SIG_DFL ); + signals_set[sig] = false; + } +} +void StackTrace::clearSignals( const std::vector &signals ) +{ + for ( auto sig : signals ) { + signal( sig, SIG_DFL ); + signals_set[sig] = false; + } +} +void StackTrace::clearSignals() +{ + for ( size_t i = 0; i < sizeof( signals_set ); i++ ) { + if ( signals_set[i] ) { + signal( i, SIG_DFL ); + signals_set[i] = false; + } + } +} +void StackTrace::setSignals( const std::vector &signals, void ( *handler )( int ) ) +{ + for ( auto sig : signals ) { + signal( sig, handler ); + signals_set[sig] = true; + } + std::this_thread::yield(); +} +void StackTrace::raiseSignal( int signal ) { std::raise( signal ); } +void StackTrace::setErrorHandler( std::function abort ) +{ + abort_fun = abort; + std::set_terminate( term_func ); + setSignals( defaultSignalsToCatch(), &term_func_abort ); + std::set_unexpected( term_func ); +} +void StackTrace::clearErrorHandler() +{ + abort_fun = []( const StackTrace::abort_error & ) {}; + std::set_terminate( null_term_func ); + clearSignals(); + std::set_unexpected( null_term_func ); +} + + +/**************************************************************************** + * Functions to handle MPI errors * + ****************************************************************************/ +#ifdef USE_MPI +static bool MPI_Initialized() +{ + int initialized = 0, finalized = 0; + MPI_Initialized( &initialized ); + MPI_Finalized( &finalized ); + return initialized != 0 && finalized == 0; +} +static std::shared_ptr mpierr; +static void MPI_error_handler_fun( MPI_Comm *comm, int *err, ... ) +{ + if ( *err == MPI_ERR_COMM && *comm == MPI_COMM_WORLD ) { + // Special error handling for an invalid MPI_COMM_WORLD + std::cerr << "Error invalid MPI_COMM_WORLD"; + exit( -1 ); + } + int msg_len = 0; + char message[1000] = { 0 }; + MPI_Error_string( *err, message, &msg_len ); + StackTrace::abort_error error; + error.message = std::string( message ); + error.type = StackTrace::terminateType::MPI; + error.bytes = StackTrace::Utilities::getMemoryUsage(); + error.stack = StackTrace::backtrace(); + error.stackType = StackTrace::printStackType::global; + throw error; +} +void StackTrace::setMPIErrorHandler( MPI_Comm comm ) +{ + if ( !MPI_Initialized() ) + return; + if ( mpierr.get() == nullptr ) { + mpierr = std::make_shared(); + MPI_Comm_create_errhandler( MPI_error_handler_fun, mpierr.get() ); + } + MPI_Comm_set_errhandler( comm, *mpierr ); +} +void StackTrace::clearMPIErrorHandler( MPI_Comm comm ) +{ + if ( !MPI_Initialized() ) + return; + if ( mpierr.get() != nullptr ) + MPI_Errhandler_free( mpierr.get() ); // Delete the error handler + mpierr.reset(); + MPI_Comm_set_errhandler( comm, MPI_ERRORS_ARE_FATAL ); +} +#else +void StackTrace::setMPIErrorHandler( MPI_Comm ) {} +void StackTrace::clearMPIErrorHandler( MPI_Comm ) {} +#endif + + +/**************************************************************************** + * Global call stack functionallity * + ****************************************************************************/ +#ifdef USE_MPI +static MPI_Comm globalCommForGlobalCommStack = MPI_COMM_NULL; +static volatile int globalMonitorThreadStatus = -1; +static void runGlobalMonitorThread() +{ + int rank = 0; + int size = 1; + MPI_Comm_size( globalCommForGlobalCommStack, &size ); + MPI_Comm_rank( globalCommForGlobalCommStack, &rank ); + while ( globalMonitorThreadStatus == 1 ) { + // Check for any messages + int flag = 0; + MPI_Status status; + int err = MPI_Iprobe( MPI_ANY_SOURCE, 1, globalCommForGlobalCommStack, &flag, &status ); + if ( err != MPI_SUCCESS ) { + printf( "Internal error in StackTrace::getGlobalCallStacks::runGlobalMonitorThread\n" ); + break; + } else if ( flag != 0 ) { + // We received a request + int src_rank = status.MPI_SOURCE; + int tag; + MPI_Recv( &tag, 1, MPI_INT, src_rank, 1, globalCommForGlobalCommStack, &status ); + // Get the list of threads (except this) + auto threads = getActiveThreads(); + if ( threads.empty() ) + continue; + // Get the stack info for the threads + auto multistack = generateMultiStack( threads ); + // Pack and send the data + size_t bytes = multistack.size(); + char *data = new char[bytes]; + multistack.pack( data ); + MPI_Send( data, bytes, MPI_CHAR, src_rank, tag, globalCommForGlobalCommStack ); + delete[] data; + } else { + // No requests recieved + std::this_thread::sleep_for( std::chrono::milliseconds( 50 ) ); + } + } +} +void StackTrace::globalCallStackInitialize( MPI_Comm comm ) +{ + globalMonitorThreadStatus = 3; + // Check that we have the necessary MPI thread support + if ( !MPI_Initialized() ) { + printf( "Warning: MPI not initialized before calling globalCallStackInitialize\n" ); + return; + } + int rank = 0; + MPI_Comm_rank( comm, &rank ); + int provided; + MPI_Query_thread( &provided ); + if ( provided != MPI_THREAD_MULTIPLE ) { + if ( rank == 0 ) + printf( "Warning: getGlobalCallStacks requires support for MPI_THREAD_MULTIPLE\n" ); + return; + } + // Check that we have support to get call stacks from threads + int N_threads = 0; + if ( rank == 0 ) { + std::thread thread( StackTrace::Utilities::sleep_ms, 200 ); + std::this_thread::yield(); + auto thread_ids = getActiveThreads(); + N_threads = thread_ids.size(); + thread.join(); + } + MPI_Bcast( &N_threads, 1, MPI_INT, 0, comm ); + if ( N_threads == 1 ) { + if ( rank == 0 ) + printf( "Warning: getAllCallStacks not supported on this OS\n" ); + return; + } + // Create the communicator and initialize the helper thread + globalMonitorThreadStatus = 1; + MPI_Comm_dup( comm, &globalCommForGlobalCommStack ); + globalMonitorThread.reset( new std::thread( runGlobalMonitorThread ) ); + std::this_thread::sleep_for( std::chrono::milliseconds( 50 ) ); +} +void StackTrace::globalCallStackFinalize() +{ + if ( globalMonitorThread ) { + globalMonitorThreadStatus = 2; + globalMonitorThread->join(); + globalMonitorThread.reset(); + } + if ( globalCommForGlobalCommStack != MPI_COMM_NULL ) + MPI_Comm_free( &globalCommForGlobalCommStack ); + globalCommForGlobalCommStack = MPI_COMM_NULL; +} +StackTrace::multi_stack_info getRemoteCallStacks() +{ + if ( globalMonitorThreadStatus == -1 ) { + // User did not call globalCallStackInitialize + printf( "Warning: getGlobalCallStacks called without call to globalCallStackInitialize\n" ); + return StackTrace::multi_stack_info(); + } else if ( globalMonitorThreadStatus != 1 ) { + // globalCallStackInitialize is not supported + return StackTrace::multi_stack_info(); + } + // Signal all processes that we want their stack for all threads + int rank = 0; + int size = 1; + MPI_Comm_size( globalCommForGlobalCommStack, &size ); + MPI_Comm_rank( globalCommForGlobalCommStack, &rank ); + std::random_device rd; + std::mt19937 gen( rd() ); + std::uniform_int_distribution<> dis( 2, 0x7FFF ); + int tag = dis( gen ); + std::vector sendRequest( size ); + for ( int i = 0; i < size; i++ ) { + if ( i == rank ) + continue; + MPI_Isend( &tag, 1, MPI_INT, i, 1, globalCommForGlobalCommStack, &sendRequest[i] ); + } + // Recieve the backtrace for all remote processes/threads + int N_finished = 1; + auto start = std::chrono::steady_clock::now(); + double time = 0; + const double max_time = 10.0 + size * 20e-3; + StackTrace::multi_stack_info multistack; + while ( N_finished < size && time < max_time ) { + int flag = 0; + MPI_Status status; + int err = MPI_Iprobe( MPI_ANY_SOURCE, tag, globalCommForGlobalCommStack, &flag, &status ); + if ( err != MPI_SUCCESS ) { + printf( "Internal error in StackTrace::getGlobalCallStacks\n" ); + break; + } else if ( flag != 0 ) { + // We recieved a response + int src_rank = status.MPI_SOURCE; + int count; + MPI_Get_count( &status, MPI_CHAR, &count ); + char *data = new char[count]; + MPI_Recv( data, count, MPI_CHAR, src_rank, tag, globalCommForGlobalCommStack, &status ); + StackTrace::multi_stack_info tmp; + tmp.unpack( data ); + delete[] data; + multistack.add( tmp ); + N_finished++; + } else { + auto stop = std::chrono::steady_clock::now(); + time = std::chrono::duration_cast( stop - start ).count(); + std::this_thread::yield(); + } + } + for ( int i = 0; i < size; i++ ) { + if ( i == rank ) + continue; + MPI_Request_free( &sendRequest[i] ); + } + return multistack; +} +#else +void StackTrace::globalCallStackInitialize( MPI_Comm ) {} +void StackTrace::globalCallStackFinalize() {} +StackTrace::multi_stack_info getRemoteCallStacks() { return StackTrace::multi_stack_info(); } +#endif +StackTrace::multi_stack_info StackTrace::getGlobalCallStacks() +{ + auto threads = getActiveThreads(); + auto multistack = generateMultiStack( threads ); + multistack.add( getRemoteCallStacks() ); + return multistack; +} + + +/**************************************************************************** + * Cleanup the call stack * + ****************************************************************************/ +static constexpr size_t findMatching( const char *str, size_t N, size_t pos ) noexcept +{ + size_t pos2 = pos + 1; + int count = 1; + while ( count != 0 && pos2 < N ) { + if ( str[pos2] == '<' ) + count++; + if ( str[pos2] == '>' ) + count--; + pos2++; + } + return pos2; +} +template +static constexpr size_t findMatching( const std::array &str, size_t pos ) noexcept +{ + return findMatching( str.data(), N ); +} +static void cleanupFunctionName( char *function ) +{ + constexpr size_t npos = std::string::npos; + // First find the string length + size_t N = strlen( function ); + // Cleanup template space + strrep( function, N, " >", ">" ); + strrep( function, N, "< ", "<" ); + // Remove std::__1:: + strrep( function, N, "std::__1::", "std::" ); + // Replace std::ratio with abbriviated version + auto find = [&function, &N]( const string_view &str, size_t pos = 0 ) { + return string_view( function, N ).find( str, pos ); + }; + if ( find( "std::ratio<" ) != npos ) { + strrep( function, N, "std::ratio<1l, 1000000000000000000000000l>", "std::yocto" ); + strrep( function, N, "std::ratio<1l, 1000000000000000000000l>", "std::zepto" ); + strrep( function, N, "std::ratio<1l, 1000000000000000000l>", "std::atto" ); + strrep( function, N, "std::ratio<1l, 1000000000000000l>", "std::femto" ); + strrep( function, N, "std::ratio<1l, 1000000000000l>", "std::pico" ); + strrep( function, N, "std::ratio<1l, 1000000000l>", "std::nano" ); + strrep( function, N, "std::ratio<1l, 1000000l>", "std::micro" ); + strrep( function, N, "std::ratio<1l, 1000l>", "std::milli" ); + strrep( function, N, "std::ratio<1l, 100l>", "std::centi" ); + strrep( function, N, "std::ratio<1l, 10l>", "std::deci" ); + strrep( function, N, "std::ratio<1l, 1l>", "" ); + strrep( function, N, "std::ratio<10l, 1l>", "std::deca" ); + strrep( function, N, "std::ratio<60l, 1l>", "std::ratio<60>" ); + strrep( function, N, "std::ratio<100l, 1l>", "std::hecto" ); + strrep( function, N, "std::ratio<1000l, 1l>", "std::kilo" ); + strrep( function, N, "std::ratio<3600l, 1l>", "std::ratio<3600>" ); + strrep( function, N, "std::ratio<1000000l, 1l>", "std::mega" ); + strrep( function, N, "std::ratio<1000000000l, 1l>", "std::giga" ); + strrep( function, N, "std::ratio<1000000000000l, 1l>", "std::tera" ); + strrep( function, N, "std::ratio<1000000000000000l, 1l>", "std::peta" ); + strrep( function, N, "std::ratio<1000000000000000000l, 1l>", "std::exa" ); + strrep( function, N, "std::ratio<1000000000000000000000l, 1l>", "std::zetta" ); + strrep( function, N, "std::ratio<1000000000000000000000000l, 1l>", "std::yotta" ); + strrep( function, N, " >", ">" ); + strrep( function, N, "< ", "<" ); + } + // Replace std::chrono::duration with abbriviated version + if ( find( "std::chrono::duration<" ) != npos ) { + // clang-format off + strrep( function, N, "std::chrono::duration", "std::chrono::nanoseconds" ); + strrep( function, N, "std::chrono::duration", "std::chrono::microseconds" ); + strrep( function, N, "std::chrono::duration", "std::chrono::milliseconds" ); + strrep( function, N, "std::chrono::duration", "std::chrono::seconds" ); + strrep( function, N, "std::chrono::duration", "std::chrono::seconds" ); + strrep( function, N, "std::chrono::duration>", "std::chrono::minutes" ); + strrep( function, N, "std::chrono::duration>", "std::chrono::hours" ); + strrep( function, N, " >", ">" ); + strrep( function, N, "< ", "<" ); + // clang-format on + } + // Replace std::this_thread::sleep_for with abbriviated version. + if ( find( "::sleep_for<" ) != npos ) { + strrep( function, N, "::sleep_for", "::sleep_for" ); + strrep( function, N, "::sleep_for", "::sleep_for" ); + strrep( function, N, "::sleep_for", "::sleep_for" ); + strrep( function, N, "::sleep_for", "::sleep_for" ); + strrep( function, N, "::sleep_for", "::sleep_for" ); + strrep( function, N, "::sleep_for>", "::sleep_for" ); + strrep( function, N, "::sleep_for>", "::sleep_for" ); + strrep( function, N, "::sleep_for(std::chrono::nanoseconds", + "::sleep_for(std::chrono::nanoseconds" ); + strrep( function, N, "::sleep_for(std::chrono::microseconds", + "::sleep_for(std::chrono::microseconds" ); + strrep( function, N, "::sleep_for(std::chrono::milliseconds", + "::sleep_for(std::chrono::milliseconds" ); + strrep( function, N, "::sleep_for(std::chrono::seconds", + "::sleep_for(std::chrono::seconds" ); + strrep( function, N, "::sleep_for(std::chrono::minutes", + "::sleep_for(std::chrono::milliseconds" ); + strrep( function, N, "::sleep_for(std::chrono::hours", + "::sleep_for(std::chrono::hours" ); + } + // Replace std::basic_string with abbriviated version + strrep( function, N, "std::__cxx11::basic_string<", "std::basic_string<" ); + size_t pos = 0; + while ( pos < N ) { + // Find next instance of std::basic_string + pos = find( "std::basic_string<", pos ); + if ( pos == npos ) + break; + // Find the matching > + size_t pos1 = pos + 17; + size_t pos2 = findMatching( function, N, pos1 ); + if ( pos2 == pos1 ) + break; + if ( strncmp( &function[pos1 + 1], "char", 4 ) == 0 ) + N = replace( function, N, pos, pos2 - pos, "std::string" ); + else if ( strncmp( &function[pos1 + 1], "wchar_t", 7 ) == 0 ) + N = replace( function, N, pos, pos2 - pos, "std::wstring" ); + else if ( strncmp( &function[pos1 + 1], "char16_t", 8 ) == 0 ) + N = replace( function, N, pos, pos2 - pos, "std::u16string" ); + else if ( strncmp( &function[pos1 + 1], "char32_t", 8 ) == 0 ) + N = replace( function, N, pos, pos2 - pos, "std::u32string" ); + pos++; + } + // Replace std::make_shared with abbriviated version + if ( find( "std::make_shared<" ) != npos ) { + size_t pos1 = find( "std::make_shared<" ); + size_t pos2 = find( ",", pos1 ); + size_t pos3 = find( "(", pos1 ); + N = replace( function, N, pos2, pos3 - pos2, ">" ); + } + // Remove std::allocator in std::vector + if ( find( "std::vector<" ) != npos ) { + size_t pos1 = find( "std::vector<" ); + size_t pos2 = find( ", std::allocator", pos1 ); + size_t pos3 = findMatching( function, N, pos1 + 11 ); + N = replace( function, N, pos2, pos3 - pos2, ">" ); + } +} +void StackTrace::cleanupStackTrace( multi_stack_info &stack ) +{ + auto it = stack.children.begin(); + const size_t npos = std::string::npos; + while ( it != stack.children.end() ) { + string_view object( it->stack.object.data() ); + string_view function( it->stack.function.data() ); + string_view filename( it->stack.filename.data() ); + bool remove_entry = false; + // Remove StackTrace functions + if ( filename == "StackTrace.cpp" ) { + // Remove callstack (and all children) for threads that are just contributing + bool test = function.find( "_callstack_signal_handler" ) != npos || + function.find( "getGlobalCallStacks" ) != npos || + function.find( "(" ) == npos; + if ( test ) { + it = stack.children.erase( it ); + continue; + } + // Remove backtrace_thread + if ( function.find( "backtrace_thread" ) != npos ) + remove_entry = true; + } + // Remove libc functions + if ( object.find( "libc.so" ) != npos ) { + // Remove __libc_start_main + if ( function.find( "__libc_start_main" ) != npos ) + remove_entry = true; + // Remove libc fgets children + if ( function.find( "fgets" ) != npos ) + it->children.clear(); + } + // Remove libc++ functions + if ( object.find( "libstdc++" ) != npos ) { + // Remove std::this_thread::__sleep_for + if ( function.find( "std::this_thread::__sleep_for(" ) != npos ) + remove_entry = true; + } + // Remove pthread functions + if ( object.find( "libpthread" ) != npos ) { + // Remove __restore_rt + if ( function.find( "__restore_rt" ) != npos && object.find( "libpthread" ) != npos ) + remove_entry = true; + } + // Remove condition_variable functions + if ( filename == "condition_variable" ) { + // Remove std::condition_variable::__wait_until_impl + if ( function.find( "std::condition_variable::__wait_until_impl" ) != npos ) + remove_entry = true; + } + // Remove std::function references + if ( filename == "functional" ) { + remove_entry = remove_entry || function.find( "std::_Function_handler<" ) != npos; + remove_entry = remove_entry || function.find( "std::_Bind_simple<" ) != npos; + remove_entry = remove_entry || function.find( "_M_invoke" ) != npos; + } + // Remove std::thread::_Impl + if ( filename == "thread" ) { + if ( function.find( "std::thread::_Impl<" ) != npos || + function.find( "std::thread::_Invoker<" ) != npos ) + remove_entry = true; + } + if ( filename == "invoke.h" ) { + remove_entry = remove_entry || function.find( "std::__invoke_impl" ) != npos; + remove_entry = remove_entry || function.find( "std::__invoke_result" ) != npos; + } + // Remove pthread internals + if ( function == "__GI___pthread_timedjoin_ex" ) + remove_entry = true; + // Remove MPI internal routines + if ( function == "MPIR_Barrier_impl" || function == "MPIR_Barrier_intra" || + function == "MPIC_Sendrecv" ) + remove_entry = true; + // Remove OpenMPI specific internal routines + if ( function == "opal_libevent2022_event_set_log_callback" || + function == "opal_libevent2022_event_base_loop" ) + remove_entry = true; + // Remove MATLAB internal routines + if ( object == "libmwmcr.so" || object == "libmwm_lxe.so" || object == "libmwbridge.so" || + object == "libmwiqm.so" ) + remove_entry = true; + // Remove std::shared_ptr functions + if ( filename == "shared_ptr.h" ) { + if ( function.find( "> std::allocate_shared<" ) != npos || + function.find( "std::_Sp_make_shared_tag," ) != npos ) + remove_entry = true; + } + if ( filename == "shared_ptr_base.h" ) + remove_entry = true; + // Remove new_allocator functions + if ( filename == "new_allocator.h" ) + remove_entry = true; + // Remove alloc_traits functions + if ( filename == "alloc_traits.h" ) + remove_entry = true; + // Remove gthr-default functions + if ( filename == "gthr-default.h" ) + remove_entry = true; + // Remove entries with no useful information + if ( function.empty() && filename.empty() ) + remove_entry = true; + // Remove the desired entry + if ( remove_entry ) { + if ( it->children.empty() ) { + it = stack.children.erase( it ); + continue; + } else if ( it->children.size() == 1 ) { + *it = it->children[0]; + continue; + } + } + // Cleanup the children + cleanupStackTrace( *it ); + // Combine any children with the same address (can occur when we remove items) + bool remove = false; + for ( auto it2 = stack.children.begin(); it2 != it; it2++ ) { + if ( it->stack == it2->stack ) { + remove = true; + it2->N += it->N; + for ( auto &tmp : it->children ) + it2->children.push_back( tmp ); + cleanupStackTrace( *it2 ); + } + } + if ( remove ) { + it = stack.children.erase( it ); + continue; + } + ++it; + } +} + + +/**************************************************************************** + * Generate stack from string * + ****************************************************************************/ +static StackTrace::stack_info parseLine( const char *str ) +{ + char tmp[1000]; + StackTrace::stack_info stack; + // Load the address + const char *p0 = strchr( str, 0 ); + const char *p1 = strchr( str, 'x' ); + const char *p2 = strchr( str, ':' ); + memset( tmp, 0, sizeof( tmp ) ); + memcpy( tmp, p1 + 1, p2 - p1 - 1 ); + uint64_t address = strtol( tmp, nullptr, 16 ); + stack.address = reinterpret_cast( address ); + stack.address2 = stack.address; + // Load object, function, file + const char *p3 = p2 + 1; + while ( *p3 == ' ' ) + p3++; + if ( *p3 == 0 ) + return stack; + const char *p4 = strstr( p3, " " ); + const char *p5 = nullptr; + if ( p4 != nullptr ) { + while ( *p4 == ' ' ) + p4++; + p5 = strstr( p4, " " ); + if ( p5 != nullptr ) { + while ( *p5 == ' ' ) + p5++; + } + } + if ( p5 == nullptr ) { + if ( p3 - p2 > 20 ) { + p5 = p4; + p4 = p3; + } + } + if ( p4 == nullptr ) + p4 = p0; + if ( p5 == nullptr ) + p5 = p0; + // Load line + const char *p6 = strchr( p5, ':' ); + if ( p6 == nullptr ) + p6 = p0; + // Store the results + auto copyField = []( const char *p1, const char *p2, auto &field ) { + field.fill( 0 ); + memcpy( field.data(), p1, std::min( p2 - p1, field.size() ) ); + for ( int i = field.size() - 1; i > 0 && ( field[i] == ' ' || field[i] == 0 ); i-- ) + field[i] = 0; + }; + copyField( p3, p4, stack.object ); + copyField( p4, p5, stack.function ); + copyField( p5, p6, stack.filename ); + if ( p6 != p0 ) + stack.line = atoi( p6 + 1 ); + return stack; +} +StackTrace::multi_stack_info StackTrace::generateFromString( const std::string &str ) +{ + // Break the string according to line breaks + std::vector data; + size_t p1 = 0; + size_t p2 = str.find( '\n' ); + while ( p2 != std::string::npos ) { + data.push_back( str.substr( p1, p2 - p1 ) ); + p1 = p2 + 1; + p2 = str.find( '\n', p1 ); + } + data.push_back( str.substr( p1 ) ); + // Generate the stack + return generateFromString( data ); +} +StackTrace::multi_stack_info StackTrace::generateFromString( const std::vector &text ) +{ + // Get the data from the text + std::vector indent; + std::vector stack; + for ( const auto &str : text ) { + auto p1 = str.find( '[' ); + auto p2 = str.find( ']' ); + auto p3 = str.find( 'x' ); + if ( p3 == std::string::npos ) + continue; + multi_stack_info tmp; + tmp.N = 1; + if ( p1 < p2 && p1 < p3 ) + tmp.N = std::stoi( str.substr( p1 + 1, p2 - p1 - 1 ) ); + tmp.stack = parseLine( &str[p3 - 1] ); + indent.push_back( std::min( p1, p3 - 1 ) ); + stack.push_back( tmp ); + } + // Generate the stack hierarchy + multi_stack_info stack2; + std::vector *>> map; + map.emplace_back( 0, &stack2.children ); + for ( size_t i = 0; i < stack.size(); i++ ) { + while ( indent[i] < map.back().first ) + map.resize( map.size() - 1 ); + if ( indent[i] == map.back().first ) { + map.back().second->push_back( stack[i] ); + } else { + map.back().second->back().children.push_back( stack[i] ); + map.emplace_back( indent[i], &map.back().second->back().children ); + } + } + return stack2; +} + + +/**************************************************************************** + * abort_error * + ****************************************************************************/ +StackTrace::abort_error::abort_error() + : type( terminateType::unknown ), signal( 0 ), line( -1 ), bytes( 0 ) +{ +} +const char *StackTrace::abort_error::what() const noexcept +{ + d_msg.clear(); + if ( type == terminateType::abort ) { + d_msg += "Program abort called"; + } else if ( type == terminateType::signal ) { + d_msg += "Unhandled signal (" + std::to_string( signal ) + ") caught"; + } else if ( type == terminateType::exception ) { + d_msg += "Unhandled exception caught"; + } else if ( type == terminateType::MPI ) { + d_msg += "Error calling MPI routine"; + } else { + d_msg += "Unknown error called"; + } + if ( !filename.empty() ) { + d_msg += " in file '" + filename + "'"; + if ( line > 0 ) { + d_msg += " at line " + std::to_string( line ); + } + } + d_msg += ":\n"; + d_msg += " " + message + "\n"; + if ( bytes > 0 ) { + d_msg += "Bytes used = " + std::to_string( bytes ) + "\n"; + } + if ( !stack.empty() ) { + d_msg += "Stack Trace:\n"; + if ( stackType == printStackType::local ) { + for ( const auto &item : getStackInfo( stack ) ) { + char txt[1000]; + item.print2( txt ); + d_msg += " \n"; + d_msg += txt; + } + } else if ( stackType == printStackType::threaded || stackType == printStackType::global ) { + // Get the call stack + std::vector> trace; + trace.push_back( stack ); + // Get the call stack for all threads except the current one + auto threads = getActiveThreads(); + threads.erase( thisThread() ); + for ( auto tid : threads ) + trace.push_back( backtrace( tid ) ); + // Generate call stack + auto multistack = generateMultiStack( trace ); + // Add remote call stack info + if ( stackType == printStackType::global ) + multistack.add( getRemoteCallStacks() ); + // Cleanup call stack + cleanupStackTrace( multistack ); + // Print the results + d_msg += multistack.printString( " " ); + } else { + d_msg += "Unknown value for stackType\n"; + } + } + for ( size_t i = 0; i < d_msg.size(); i++ ) + if ( d_msg[i] == 0 ) + d_msg.erase( i, 1 ); + return d_msg.c_str(); +} diff --git a/common/StackTrace.h b/StackTrace/StackTrace.h similarity index 59% rename from common/StackTrace.h rename to StackTrace/StackTrace.h index 8d436bf7..ce315020 100644 --- a/common/StackTrace.h +++ b/StackTrace/StackTrace.h @@ -1,41 +1,30 @@ #ifndef included_StackTrace #define included_StackTrace +#include #include #include #include #include #include - -// Check for and include MPI -// clang-format off -#if defined(USE_MPI) || defined(USE_EXT_MPI) - #include "mpi.h" -#elif defined(__has_include) - #if __has_include("mpi.h") - #include "mpi.h" - #else - typedef int MPI_Comm; - #endif -#else - typedef int MPI_Comm; -#endif -// clang-format on +#include "StackTrace/string_view.h" namespace StackTrace { - +//! Class to contain stack trace info for a single thread/process struct stack_info { + uint32_t line; void *address; void *address2; - std::string object; - std::string function; - std::string filename; - int line; + std::array object; + std::array objectPath; + std::array filename; + std::array filenamePath; + std::array function; //! Default constructor - stack_info() : address( nullptr ), address2( nullptr ), line( 0 ) {} + stack_info(); //! Reset the stack void clear(); //! Operator== @@ -46,19 +35,22 @@ struct stack_info { int getAddressWidth() const; //! Print the stack info std::string print( int widthAddress = 16, int widthObject = 20, int widthFunction = 32 ) const; + //! Print the stack info + static void print( std::ostream &out, const std::vector &stack, + const StackTrace::string_view &prefix = "" ); + //! Print the stack info + void print2( + char *txt, int widthAddress = 16, int widthObject = 20, int widthFunction = 32 ) const; //! Compute the number of bytes needed to store the object size_t size() const; //! Pack the data to a byte array, returning a pointer to the end of the data char *pack( char *ptr ) const; //! Unpack the data from a byte array, returning a pointer to the end of the data const char *unpack( const char *ptr ); - //! Pack a vector of data to a memory block - static std::vector packArray( const std::vector &data ); - //! Unpack a vector of data from a memory block - static std::vector unpackArray( const char *data ); }; +//! Class to contain stack trace info for multiple threads/processes struct multi_stack_info { int N; // Number of threads/processes stack_info stack; // Current stack item @@ -71,19 +63,69 @@ struct multi_stack_info { multi_stack_info &operator=( const std::vector & ); //! Reset the stack void clear(); + //! Is the stack empty + bool empty() const { return N == 0; } //! Add the given stack to the multistack void add( size_t len, const stack_info *stack ); + //! Add the given stack to the multistack + void add( const multi_stack_info &stack ); + //! Compute the number of bytes needed to store the object + size_t size() const; + //! Pack the data to a byte array, returning a pointer to the end of the data + char *pack( char *ptr ) const; + //! Unpack the data from a byte array, returning a pointer to the end of the data + const char *unpack( const char *ptr ); //! Print the stack info - std::vector print( const std::string &prefix = std::string() ) const; + std::vector print( const StackTrace::string_view &prefix = "" ) const; + //! Print the stack info + void print( std::ostream &out, const StackTrace::string_view &prefix = "" ) const; + //! Print the stack info + std::string printString( const StackTrace::string_view &prefix = "" ) const; private: - void print2( const std::string &prefix, int w[3], std::vector &text ) const; + template + void print2( int Np, char *prefix, int w[3], bool c, FUN &fun ) const; int getAddressWidth() const; int getObjectWidth() const; int getFunctionWidth() const; }; +//!< Terminate type +enum class terminateType : uint8_t { signal, exception, abort, MPI, unknown }; +enum class printStackType : uint8_t { local = 1, threaded = 2, global = 3 }; + +//!< Class to contain exception info from abort +class abort_error : public std::exception +{ +public: + std::string message; //!< Abort message + std::string filename; //!< File where abort was called + terminateType type; //!< What caused the termination + printStackType stackType; //!< Print the local stack, all threads, or global call stack + uint8_t signal; //!< Signal number + int line; //!< Line number where abort was called + size_t bytes; //!< Memory in use during abort + std::vector stack; //!< Local call stack for abort +public: + virtual const char *what() const noexcept override; + abort_error(); + virtual ~abort_error() {} + +private: + mutable std::string d_msg; +}; + + +//!< Class to contain symbol information +struct symbols_struct { + char type; + void *address; + std::array obj; + std::array objPath; +}; + + /*! * @brief Get the current call stack * @details This function returns the current call stack for the current thread @@ -152,16 +194,18 @@ std::vector getStackInfo( const std::vector &address ); //! Function to return the signal name -std::string signalName( int signal ); +const char *signalName( int signal ); /*! * Return the symbols from the current executable (not availible for all platforms) - * @return Returns 0 if sucessful + * @return Returns the symbols loaded */ -int getSymbols( std::vector &address, - std::vector &type, - std::vector &obj ); +std::vector getSymbols(); + + +//! Clear internal symbol data +void clearSymbols(); /*! @@ -178,20 +222,10 @@ std::string getExecutable(); std::string getSymPaths(); -//!< Terminate type -enum class terminateType { signal, exception }; - -/*! - * Set the error handlers - * @param[in] abort Function to terminate the program: abort(msg,type) - */ -void setErrorHandlers( std::function abort ); - - /*! * Set the given signals to the handler * @param[in] signals Signals to handle - * @param[in] handler Function to terminate the program: abort(msg,type) + * @param[in] handler Function to terminate the program: abort(signal) */ void setSignals( const std::vector &signals, void ( *handler )( int ) ); @@ -200,10 +234,18 @@ void setSignals( const std::vector &signals, void ( *handler )( int ) ); void clearSignal( int signal ); +//! Clear a signal set by setSignals +void clearSignals( const std::vector &signals ); + + //! Clear all signals set by setSignals void clearSignals(); +//! Raise a signal +void raiseSignal( int signal ); + + //! Return a list of all signals that can be caught std::vector allSignalsToCatch(); @@ -212,19 +254,12 @@ std::vector defaultSignalsToCatch(); //! Get a list of the active threads -std::set activeThreads(); +std::vector activeThreads(); //! Get a handle to this thread std::thread::native_handle_type thisThread(); -//! Initialize globalCallStack functionallity -void globalCallStackInitialize( MPI_Comm comm ); - -//! Clean up globalCallStack functionallity -void globalCallStackFinalize(); - - /*! * @brief Call system command * @details This function calls a system command, waits for the program @@ -233,7 +268,25 @@ void globalCallStackFinalize(); * @param[out] exit_code Exit code returned from child process * @return Returns string containing the output */ -std::string exec( const std::string &cmd, int &exit_code ); +std::string exec( const string_view &cmd, int &exit_code ); + + +/*! + * @brief Create stack from string + * @details This function creates the call stack from the string generated by print + * @param[in] str Vector of strings containing call stack + * @return Returns the call stack + */ +multi_stack_info generateFromString( const std::vector &str ); + + +/*! + * @brief Create stack from string + * @details This function creates the call stack from the string + * @param[in] str String containing call stack + * @return Returns the call stack + */ +multi_stack_info generateFromString( const std::string &str ); } // namespace StackTrace diff --git a/StackTrace/Utilities.cpp b/StackTrace/Utilities.cpp new file mode 100644 index 00000000..734a0056 --- /dev/null +++ b/StackTrace/Utilities.cpp @@ -0,0 +1,296 @@ +#define NOMINMAX +#include "StackTrace/Utilities.h" +#include "StackTrace/ErrorHandlers.h" +#include "StackTrace/StackTrace.h" + +#include +#include +#include +#include +#include +#include +#include + +#ifdef USE_MPI +#include "mpi.h" +#endif + +#ifdef USE_TIMER +#include "MemoryApp.h" +#endif + + +#define perr std::cerr + + +// Detect the OS +// clang-format off +#if defined( WIN32 ) || defined( _WIN32 ) || defined( WIN64 ) || defined( _WIN64 ) || defined( _MSC_VER ) + #define USE_WINDOWS +#elif defined( __APPLE__ ) + #define USE_MAC +#elif defined( __linux ) || defined( __linux__ ) || defined( __unix ) || defined( __posix ) + #define USE_LINUX + #define USE_NM +#else + #error Unknown OS +#endif +// clang-format on + + +// Include system dependent headers +// clang-format off +#ifdef USE_WINDOWS + #include + #include + #include + #include + #include +#else + #include + #include + #include + #include + #include + #include +#endif +#ifdef USE_LINUX + #include +#endif +#ifdef USE_MAC + #include + #include + #include +#endif +// clang-format on + + +namespace StackTrace { + + +/**************************************************************************** + * Function to find an entry * + ****************************************************************************/ +template +inline size_t findfirst( const std::vector &X, TYPE Y ) +{ + if ( X.empty() ) + return 0; + size_t lower = 0; + size_t upper = X.size() - 1; + if ( X[lower] >= Y ) + return lower; + if ( X[upper] < Y ) + return upper; + while ( ( upper - lower ) != 1 ) { + size_t value = ( upper + lower ) / 2; + if ( X[value] >= Y ) + upper = value; + else + lower = value; + } + return upper; +} + + +/**************************************************************************** + * Function to terminate the program * + ****************************************************************************/ +static bool abort_throwException = false; +static printStackType abort_stackType = printStackType::global; +static int force_exit = 0; +void Utilities::setAbortBehavior( bool throwException, int stackType ) +{ + abort_throwException = throwException; + abort_stackType = static_cast( stackType ); +} +void Utilities::abort( const std::string &message, const std::string &filename, const int line ) +{ + abort_error err; + err.message = message; + err.filename = filename; + err.type = terminateType::abort; + err.line = line; + err.bytes = Utilities::getMemoryUsage(); + err.stackType = abort_stackType; + err.stack = StackTrace::backtrace(); + throw err; +} +static void terminate( const StackTrace::abort_error &err ) +{ + clearErrorHandler(); + // Print the message and abort + if ( force_exit > 1 ) { + std::abort(); + } else if ( !abort_throwException ) { + // Use MPI_abort (will terminate all processes) + force_exit = 2; + perr << err.what(); +#if defined( USE_MPI ) || defined( HAVE_MPI ) + int initialized = 0, finalized = 0; + MPI_Initialized( &initialized ); + MPI_Finalized( &finalized ); + if ( initialized != 0 && finalized == 0 ) { + clearMPIErrorHandler( MPI_COMM_WORLD ); + MPI_Abort( MPI_COMM_WORLD, -1 ); + } +#endif + std::abort(); + } else { + perr << err.what(); + std::abort(); + } +} + + +/**************************************************************************** + * Functions to set the error handler * + ****************************************************************************/ +static void setTerminateErrorHandler() +{ + // Set the terminate routine for runtime errors + StackTrace::setErrorHandler( terminate ); +} +void Utilities::setErrorHandlers() +{ +#ifdef USE_MPI + setMPIErrorHandler( MPI_COMM_WORLD ); + setMPIErrorHandler( MPI_COMM_SELF ); +#endif + setTerminateErrorHandler(); +} +void Utilities::clearErrorHandlers() +{ +#ifdef USE_MPI + clearMPIErrorHandler( MPI_COMM_WORLD ); + clearMPIErrorHandler( MPI_COMM_SELF ); +#endif +} + + +/**************************************************************************** + * Function to get the memory usage * + * Note: this function should be thread-safe * + ****************************************************************************/ +// clang-format off +#if defined( USE_MAC ) || defined( USE_LINUX ) + // Get the page size on mac or linux + static size_t page_size = static_cast( sysconf( _SC_PAGESIZE ) ); +#endif +size_t Utilities::getSystemMemory() +{ + #if defined( USE_LINUX ) + static long pages = sysconf( _SC_PHYS_PAGES ); + size_t N_bytes = pages * page_size; + #elif defined( USE_MAC ) + int mib[2] = { CTL_HW, HW_MEMSIZE }; + u_int namelen = sizeof( mib ) / sizeof( mib[0] ); + uint64_t size; + size_t len = sizeof( size ); + size_t N_bytes = 0; + if ( sysctl( mib, namelen, &size, &len, nullptr, 0 ) == 0 ) + N_bytes = size; + #elif defined( USE_WINDOWS ) + MEMORYSTATUSEX status; + status.dwLength = sizeof( status ); + GlobalMemoryStatusEx( &status ); + size_t N_bytes = status.ullTotalPhys; + #else + #error Unknown OS + #endif + return N_bytes; +} +size_t Utilities::getMemoryUsage() +{ + #ifdef USE_TIMER + size_t N_bytes = MemoryApp::getTotalMemoryUsage(); + #else + #if defined( USE_LINUX ) + struct mallinfo meminfo = mallinfo(); + size_t size_hblkhd = static_cast( meminfo.hblkhd ); + size_t size_uordblks = static_cast( meminfo.uordblks ); + size_t N_bytes = size_hblkhd + size_uordblks; + #elif defined( USE_MAC ) + struct task_basic_info t_info; + mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT; + if ( KERN_SUCCESS != + task_info( mach_task_self(), TASK_BASIC_INFO, (task_info_t) &t_info, &t_info_count ) ) { + return 0; + } + size_t N_bytes = t_info.virtual_size; + #elif defined( USE_WINDOWS ) + PROCESS_MEMORY_COUNTERS memCounter; + GetProcessMemoryInfo( GetCurrentProcess(), &memCounter, sizeof( memCounter ) ); + size_t N_bytes = memCounter.WorkingSetSize; + #else + #error Unknown OS + #endif + #endif + return N_bytes; +} +// clang-format on + + +/**************************************************************************** + * Functions to get the time and timer resolution * + ****************************************************************************/ +#if defined( USE_WINDOWS ) +double Utilities::time() +{ + LARGE_INTEGER end, f; + QueryPerformanceFrequency( &f ); + QueryPerformanceCounter( &end ); + double time = ( (double) end.QuadPart ) / ( (double) f.QuadPart ); + return time; +} +double Utilities::tick() +{ + LARGE_INTEGER f; + QueryPerformanceFrequency( &f ); + double resolution = ( (double) 1.0 ) / ( (double) f.QuadPart ); + return resolution; +} +#elif defined( USE_LINUX ) || defined( USE_MAC ) +double Utilities::time() +{ + timeval current_time; + gettimeofday( ¤t_time, nullptr ); + double time = ( (double) current_time.tv_sec ) + 1e-6 * ( (double) current_time.tv_usec ); + return time; +} +double Utilities::tick() +{ + timeval start, end; + gettimeofday( &start, nullptr ); + gettimeofday( &end, nullptr ); + while ( end.tv_sec == start.tv_sec && end.tv_usec == start.tv_usec ) + gettimeofday( &end, nullptr ); + double resolution = ( (double) ( end.tv_sec - start.tv_sec ) ) + + 1e-6 * ( (double) ( end.tv_usec - start.tv_usec ) ); + return resolution; +} +#else +#error Unknown OS +#endif + + +/**************************************************************************** + * Cause a segfault * + ****************************************************************************/ +void Utilities::cause_segfault() +{ + int *ptr = nullptr; + ptr[0] = 0; +} + + +/**************************************************************************** + * Call system command * + ****************************************************************************/ +std::string Utilities::exec( const string_view &cmd, int &exit_code ) +{ + return StackTrace::exec( cmd, exit_code ); +} + + +} // namespace StackTrace diff --git a/StackTrace/Utilities.h b/StackTrace/Utilities.h new file mode 100644 index 00000000..10ed9085 --- /dev/null +++ b/StackTrace/Utilities.h @@ -0,0 +1,99 @@ +#ifndef included_StackTrace_Utilities +#define included_StackTrace_Utilities + +#include +#include +#include + +#include "StackTrace/StackTrace.h" +#include "StackTrace/string_view.h" + + +namespace StackTrace { +namespace Utilities { + + +/*! + * Aborts the run after printing an error message with file and + * line number information. + */ +void abort( const std::string &message, const std::string &filename, const int line ); + + +/*! + * Set the behavior of abort + * @param throwException Throw an exception instead of MPI_Abort (default is false) + * @param stackType Type of stack to get (1: thread local stack, 2: all threads, 3: global) + */ +void setAbortBehavior( bool throwException, int stackType = 2 ); + + +//! Function to set the error handlers +void setErrorHandlers(); + +//! Function to clear the error handlers +void clearErrorHandlers(); + + +/*! + * Function to get the memory availible. + * This function will return the total memory availible + * Note: depending on the implimentation, this number may be rounded to + * to a multiple of the page size. + * If this function fails, it will return 0. + */ +size_t getSystemMemory(); + + +/*! + * Function to get the memory usage. + * This function will return the total memory used by the application. + * Note: depending on the implimentation, this number may be rounded to + * to a multiple of the page size. + * If this function fails, it will return 0. + */ +size_t getMemoryUsage(); + + +//! Function to get an arbitrary point in time +double time(); + + +//! Function to get the resolution of time +double tick(); + + +/*! + * Sleep for X ms + * @param N Time to sleep (ms) + */ +inline void sleep_ms( int N ) { std::this_thread::sleep_for( std::chrono::milliseconds( N ) ); } + + +/*! + * Sleep for X s + * @param N Time to sleep (s) + */ +inline void sleep_s( int N ) { std::this_thread::sleep_for( std::chrono::seconds( N ) ); } + + +//! Cause a segfault +void cause_segfault(); + + +/*! + * @brief Call system command + * @details This function calls a system command, waits for the program + * to execute, captures and returns the output and exit code. + * @param[in] cmd Command to execute + * @param[out] exit_code Exit code returned from child process + * @return Returns string containing the output + */ +std::string exec( const StackTrace::string_view &cmd, int &exit_code ); + + +} // namespace Utilities +} // namespace StackTrace + + +#endif diff --git a/StackTrace/string_view.h b/StackTrace/string_view.h new file mode 100644 index 00000000..d83d1f24 --- /dev/null +++ b/StackTrace/string_view.h @@ -0,0 +1,193 @@ +#ifndef included_StackTrace_stringView +#define included_StackTrace_stringView + +#include +#include + +namespace StackTrace { + +// string_view +class string_view +{ +public: + // Constants: + static constexpr size_t npos = size_t( -1 ); + + // Constructions + constexpr string_view() noexcept : d_data( nullptr ), d_size( 0 ) {} + constexpr string_view( string_view&& ) noexcept = default; + constexpr string_view( const string_view& ) noexcept = default; + constexpr string_view( const char* s ) : d_data( s ), d_size( s ? strlen( s ) : 0 ) {} + constexpr string_view( const char* s, size_t count ) : d_data( s ), d_size( count ) {} + inline string_view( const std::string& s ) : d_data( s.data() ), d_size( s.size() ) {} + + // Assignment + constexpr string_view& operator=( string_view&& other ) noexcept = default; + constexpr string_view& operator=( const string_view& other ) noexcept = default; + + // Iterators + constexpr const char* begin() const noexcept { return d_data; } + constexpr const char* end() const noexcept { return d_data + d_size; } + constexpr const char* cbegin() const noexcept { return begin(); } + constexpr const char* cend() const noexcept { return end(); } + + // capacity + constexpr size_t size() const noexcept { return d_size; } + constexpr size_t length() const noexcept { return d_size; } + constexpr bool empty() const noexcept { return d_size == 0; } + + // Element access + constexpr const char& operator[]( size_t pos ) const + { + if ( pos >= d_size ) + throw std::out_of_range( "string_view[]" ); + return d_data[pos]; + } + constexpr const char& at( size_t pos ) const + { + if ( pos >= d_size ) + throw std::out_of_range( "string_view::at()" ); + return d_data[pos]; + } + constexpr const char& front() const + { + if ( d_size == 0 ) + throw std::out_of_range( "front()" ); + return d_data[0]; + } + constexpr const char& back() const + { + if ( d_size == 0 ) + throw std::out_of_range( "back()" ); + return d_data[size() - 1]; + } + constexpr const char* data() const noexcept { return d_data; } + + // Swap data + void swap( string_view& other ) noexcept + { + std::swap( d_data, other.d_data ); + std::swap( d_size, other.d_size ); + } + + // String operations + size_t copy( char* dest, size_t n, size_t pos = 0 ) const + { + if ( pos > size() ) + throw std::out_of_range( "string_view::copy()" ); + const size_t rlen = std::min( n, size() - pos ); + memcpy( dest, data() + pos, rlen ); + return rlen; + } + constexpr string_view substr( size_t pos = 0, size_t n = npos ) const + { + if ( pos > size() ) + throw std::out_of_range( "string_view::substr()" ); + return string_view( data() + pos, std::min( n, size() - pos ) ); + } + + // Find + constexpr size_t find( char ch, size_t pos = 0 ) const noexcept + { + for ( size_t i = pos; i < d_size; i++ ) + if ( d_data[i] == ch ) + return i; + return std::string::npos; + } + constexpr size_t find( string_view v, size_t pos = 0 ) const noexcept + { + size_t i = pos; + size_t N = v.size(); + if ( N == 0 || N > ( d_size - pos ) ) + return std::string::npos; + while ( i < ( d_size - N + 1 ) ) { + size_t j = 0; + for ( j = 0; j < N && i + j < d_size; j++ ) + if ( d_data[i + j] != v[j] ) + break; + if ( j == N ) + return i; + i++; + } + return std::string::npos; + } + + // compare() + constexpr int compare( const string_view& other ) const noexcept + { + int N = std::min( size(), other.size() ); + int result = 0; + for ( int i = 0; i < N && result == 0; i++ ) + if ( d_data[i] != other[i] ) + result = d_data[i] < other[i] ? -i : i; + if ( result == 0 ) + result = size() == other.size() ? 0 : size() < other.size() ? -1 : 1; + return result; + } + constexpr int compare( size_t pos1, size_t n1, string_view other ) const + { + return substr( pos1, n1 ).compare( other ); + } + constexpr int compare( size_t pos1, size_t n1, string_view other, size_t pos2, size_t n2 ) const + { + return substr( pos1, n1 ).compare( other.substr( pos2, n2 ) ); + } + constexpr int compare( char const* s ) const { return compare( string_view( s ) ); } + constexpr int compare( size_t pos1, size_t n1, char const* s ) const + { + return substr( pos1, n1 ).compare( string_view( s ) ); + } + constexpr int compare( size_t pos1, size_t n1, char const* s, size_t n2 ) const + { + return substr( pos1, n1 ).compare( string_view( s, n2 ) ); + } + + explicit operator std::string() const { return std::string( begin(), end() ); } + std::string to_string() const { return std::string( begin(), end() ); } + +private: + const char* d_data; + size_t d_size; +}; + + +// Non-member functions: +constexpr inline bool operator==( const string_view& lhs, const string_view& rhs ) noexcept +{ + return lhs.compare( rhs ) == 0; +} +constexpr inline bool operator!=( const string_view& lhs, const string_view& rhs ) noexcept +{ + return lhs.compare( rhs ) != 0; +} +constexpr inline bool operator<( const string_view& lhs, const string_view& rhs ) noexcept +{ + return lhs.compare( rhs ) < 0; +} + +constexpr inline bool operator<=( const string_view& lhs, const string_view& rhs ) noexcept +{ + return lhs.compare( rhs ) <= 0; +} +constexpr inline bool operator>( const string_view& lhs, const string_view& rhs ) noexcept +{ + return lhs.compare( rhs ) > 0; +} +constexpr inline bool operator>=( const string_view& lhs, const string_view& rhs ) noexcept +{ + return lhs.compare( rhs ) >= 0; +} +inline std::string to_string( const string_view& v ) { return std::string( v.begin(), v.end() ); } +inline string_view to_string_view( std::string const& s ) +{ + return string_view( s.data(), s.size() ); +} +inline std::ostream& operator<<( std::ostream& out, const string_view& s ) +{ + out << s.data(); + return out; +} + +} // namespace StackTrace + +#endif diff --git a/analysis/Minkowski.cpp b/analysis/Minkowski.cpp index 650d30dc..743e4751 100644 --- a/analysis/Minkowski.cpp +++ b/analysis/Minkowski.cpp @@ -1,10 +1,8 @@ #include "analysis/Minkowski.h" #include "analysis/pmmc.h" +#include "analysis/analysis.h" #include "common/Domain.h" #include "common/Communication.h" -#include "analysis/analysis.h" - -#include "shared_ptr.h" #include "common/Utilities.h" #include "common/MPI_Helpers.h" #include "IO/MeshDatabase.h" @@ -13,6 +11,8 @@ #include "ProfilerApp.h" +#include + #define PI 3.14159265359 diff --git a/analysis/Minkowski.h b/analysis/Minkowski.h index 8c39b68a..472b4489 100644 --- a/analysis/Minkowski.h +++ b/analysis/Minkowski.h @@ -2,6 +2,7 @@ #ifndef Minkowski_INC #define Minkowski_INC +#include #include #include "analysis/dcel.h" @@ -9,7 +10,6 @@ #include "common/Communication.h" #include "analysis/analysis.h" -#include "shared_ptr.h" #include "common/Utilities.h" #include "common/MPI_Helpers.h" #include "IO/MeshDatabase.h" diff --git a/analysis/TwoPhase.cpp b/analysis/TwoPhase.cpp index cb752f07..a558fea6 100644 --- a/analysis/TwoPhase.cpp +++ b/analysis/TwoPhase.cpp @@ -1,17 +1,18 @@ #include "analysis/TwoPhase.h" #include "analysis/pmmc.h" +#include "analysis/analysis.h" #include "common/Domain.h" #include "common/Communication.h" -#include "analysis/analysis.h" - -#include "shared_ptr.h" #include "common/Utilities.h" #include "common/MPI_Helpers.h" #include "IO/MeshDatabase.h" #include "IO/Reader.h" #include "IO/Writer.h" +#include + + #define BLOB_AVG_COUNT 35 // Array access for averages defined by the following diff --git a/analysis/TwoPhase.h b/analysis/TwoPhase.h index 01df349d..fddd04e8 100644 --- a/analysis/TwoPhase.h +++ b/analysis/TwoPhase.h @@ -2,16 +2,15 @@ #ifndef TwoPhase_INC #define TwoPhase_INC +#include #include #include "analysis/pmmc.h" -#include "common/Domain.h" -#include "common/Communication.h" #include "analysis/analysis.h" #include "analysis/distance.h" #include "analysis/Minkowski.h" - -#include "shared_ptr.h" +#include "common/Domain.h" +#include "common/Communication.h" #include "common/Utilities.h" #include "common/MPI_Helpers.h" #include "IO/MeshDatabase.h" diff --git a/cmake/SharedPtr.cmake b/cmake/SharedPtr.cmake deleted file mode 100644 index 9f610a98..00000000 --- a/cmake/SharedPtr.cmake +++ /dev/null @@ -1,170 +0,0 @@ -# Create a shared_ptr.h file in the include directory that contains -# a shared_ptr class (hopefully typedef to a compiler basic) -# Arguements: -# INSTALL_DIR - Directory to install shared_ptr.h -# NAMESPACE - Namespace to contain the shared_ptr class (may be empty) -INCLUDE( CheckCXXSourceCompiles ) -FUNCTION( CONFIGURE_SHARED_PTR INSTALL_DIR NAMESPACE ) - SET( CMAKE_REQUIRED_FLAGS ${CMAKE_CXX_FLAGS} ) - CHECK_CXX_SOURCE_COMPILES( - " #include - namespace ${NAMESPACE} { using std::shared_ptr; } - int main() { - ${NAMESPACE}::shared_ptr ptr; - return 0; - } - " - MEMORY_SHARED_PTR ) - CHECK_CXX_SOURCE_COMPILES( - " #include - namespace ${NAMESPACE} { using std::tr1::shared_ptr; } - int main() { - ${NAMESPACE}::shared_ptr ptr; - return 0; - } - " - MEMORY_TR1_SHARED_PTR ) - CHECK_CXX_SOURCE_COMPILES( - " #include - namespace ${NAMESPACE} { using std::tr1::shared_ptr; } - int main() { - ${NAMESPACE}::shared_ptr ptr; - return 0; - } - " - TR1_MEMORY_TR1_SHARED_PTR ) - GET_DIRECTORY_PROPERTY( dirs INCLUDE_DIRECTORIES ) - SET( CMAKE_REQUIRED_FLAGS "${CMAKE_CXX_FLAGS}" ) - SET( CMAKE_REQUIRED_INCLUDES ${dirs} "${BOOST_INCLUDE}" ) - CHECK_CXX_SOURCE_COMPILES( - " #include \"boost/shared_ptr.hpp\" - namespace ${NAMESPACE} { using boost::shared_ptr; } - int main() { - ${NAMESPACE}::shared_ptr ptr; - return 0; - } - " - BOOST_SHARED_PTR ) - WRITE_DUMMY_SHARED_PTR( "${NAMESPACE}" "${CMAKE_CURRENT_BINARY_DIR}/tmp/dummy_shared_ptr.h" ) - CHECK_CXX_SOURCE_COMPILES( - " #include - #include \"${CMAKE_CURRENT_BINARY_DIR}/tmp/dummy_shared_ptr.h\" - int main() { - ${NAMESPACE}::shared_ptr ptr; - return 0; - } - " - DUMMY_SHARED_PTR ) - IF ( NOT NAMESPACE ) - SET( NAMESPACE " " ) - ENDIF() - IF ( BOOST_SHARED_PTR ) - FILE(WRITE "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "#include \"boost/shared_ptr.hpp\"\n") - FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "#include \"boost/weak_ptr.hpp\"\n") - FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "#include \"boost/enable_shared_from_this.hpp\"\n") - FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "namespace ${NAMESPACE} {\n") - FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using boost::shared_ptr; \n") - FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using boost::dynamic_pointer_cast; \n") - FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using boost::const_pointer_cast; \n") - FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using boost::weak_ptr; \n") - FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using boost::enable_shared_from_this; \n") - FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "}\n") - ELSEIF ( MEMORY_SHARED_PTR ) - IF ( ${NAMESPACE} STREQUAL "std" ) - FILE(WRITE "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "#include \n") - ELSE() - FILE(WRITE "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "#include \n") - FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "namespace ${NAMESPACE} {\n") - FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using std::shared_ptr; \n") - FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using std::dynamic_pointer_cast; \n") - FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using std::const_pointer_cast; \n") - FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using std::weak_ptr; \n") - FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using std::enable_shared_from_this; \n") - FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "}\n") - ENDIF() - ELSEIF ( MEMORY_TR1_SHARED_PTR ) - FILE(WRITE "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "#include \n") - FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "namespace ${NAMESPACE} {\n") - FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using std::tr1::shared_ptr; \n") - FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using std::tr1::dynamic_pointer_cast; \n") - FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using std::tr1::const_pointer_cast; \n") - FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using std::tr1::weak_ptr; \n") - FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using std::tr1::enable_shared_from_this; \n") - FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "}\n") - ELSEIF ( TR1_MEMORY_TR1_SHARED_PTR ) - FILE(WRITE "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "#include \n") - FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "namespace ${NAMESPACE} {\n") - FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using std::tr1::shared_ptr; \n") - FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using std::tr1::dynamic_pointer_cast; \n") - FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using std::tr1::const_pointer_cast; \n") - FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using std::tr1::weak_ptr; \n") - FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using std::tr1::enable_shared_from_this; \n") - FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "}\n") - ELSEIF ( DUMMY_SHARED_PTR ) - MESSAGE("Warning: No valid shared_ptr found, using dummy shared_ptr" ) - WRITE_DUMMY_SHARED_PTR( "${NAMESPACE}" "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" ) - ELSE() - MESSAGE(FATAL_ERROR "No shared_ptr availible") - ENDIF() - EXECUTE_PROCESS( COMMAND ${CMAKE_COMMAND} -E copy_if_different - "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "${INSTALL_DIR}/shared_ptr.h" ) -ENDFUNCTION() - - -FUNCTION( WRITE_DUMMY_SHARED_PTR NAMESPACE FILENAME ) - FILE(WRITE "${FILENAME}" "#ifndef DUMMY_SHARED_PTR_INC\n") - FILE(APPEND "${FILENAME}" "#define DUMMY_SHARED_PTR_INC\n") - FILE(APPEND "${FILENAME}" "namespace dummy {\n\n") - FILE(APPEND "${FILENAME}" "template void DefaultDeleter(T* p) {delete p;}\n\n") - FILE(APPEND "${FILENAME}" "template class shared_ptr {\n") - FILE(APPEND "${FILENAME}" "public:\n") - FILE(APPEND "${FILENAME}" " typedef void (*D)(T*);\n") - FILE(APPEND "${FILENAME}" " shared_ptr( ): obj(NULL), deleter(DefaultDeleter), count(NULL) {}\n") - FILE(APPEND "${FILENAME}" " shared_ptr( T *ptr, void (*D)(T*)=DefaultDeleter):\n") - FILE(APPEND "${FILENAME}" " obj(ptr), deleter(D), count(NULL) { if (ptr) { count = new int; (*count)=1; } } \n") - FILE(APPEND "${FILENAME}" " shared_ptr( const shared_ptr& rhs ): \n") - FILE(APPEND "${FILENAME}" " obj(rhs.get()), deleter(reinterpret_cast(rhs.deleter)), count(rhs.count) { if ( count!=NULL ) { ++(*count); } } \n") - FILE(APPEND "${FILENAME}" " template shared_ptr( const shared_ptr& rhs ): \n") - FILE(APPEND "${FILENAME}" " obj(rhs.get()), deleter(reinterpret_cast(rhs.deleter)), count(rhs.count) { if ( count!=NULL ) { ++(*count); } } \n") - FILE(APPEND "${FILENAME}" " shared_ptr& operator=( const shared_ptr& rhs )\n") - FILE(APPEND "${FILENAME}" " { if (this==&rhs) { return *this;} reset(); obj=rhs.obj; deleter=reinterpret_cast(rhs.deleter); count=rhs.count; ++(*count); return *this; } \n") - FILE(APPEND "${FILENAME}" " ~shared_ptr( ) { reset(); }\n") - FILE(APPEND "${FILENAME}" " void reset( T *ptr ) { reset(); obj=ptr; count=new int; (*count)=1; }\n") - FILE(APPEND "${FILENAME}" " void reset( void ) { \n") - FILE(APPEND "${FILENAME}" " if ( count!=NULL) { int tmp=--(*count); if ( tmp==0 ) { deleter(obj); delete count; } } \n") - FILE(APPEND "${FILENAME}" " obj=NULL; count=NULL; \n") - FILE(APPEND "${FILENAME}" " }\n") - FILE(APPEND "${FILENAME}" " T* get( ) const { return obj; } \n") - FILE(APPEND "${FILENAME}" " T* operator->( ) const { return obj; } \n") - FILE(APPEND "${FILENAME}" " const T& operator*( ) const { return *obj; } \n") - FILE(APPEND "${FILENAME}" " bool operator==( const T * rhs ) const { return obj==rhs; } \n") - FILE(APPEND "${FILENAME}" " bool operator!=( const T * rhs ) const { return obj!=rhs; } \n") - FILE(APPEND "${FILENAME}" "protected:\n") - FILE(APPEND "${FILENAME}" " T *obj;\n") - FILE(APPEND "${FILENAME}" " void (*deleter)(T*);\n") - FILE(APPEND "${FILENAME}" " volatile int *count;\n") - FILE(APPEND "${FILENAME}" "template friend shared_ptr dynamic_pointer_cast( shared_ptr const & );\n") - FILE(APPEND "${FILENAME}" "template friend shared_ptr const_pointer_cast( shared_ptr const & );\n") - FILE(APPEND "${FILENAME}" "template friend class shared_ptr;\n") - FILE(APPEND "${FILENAME}" "};\n\n") - FILE(APPEND "${FILENAME}" "template shared_ptr dynamic_pointer_cast( shared_ptr const & rhs ) {\n") - FILE(APPEND "${FILENAME}" " T* obj = dynamic_cast(rhs.obj);\n") - FILE(APPEND "${FILENAME}" " shared_ptr ptr;\n") - FILE(APPEND "${FILENAME}" " if ( obj!=NULL ) { ptr.obj = obj; ptr.count=rhs.count; ++(*ptr.count); }\n") - FILE(APPEND "${FILENAME}" " return ptr;\n}\n") - FILE(APPEND "${FILENAME}" "template shared_ptr const_pointer_cast( shared_ptr const & rhs ) {\n") - FILE(APPEND "${FILENAME}" " T* obj = const_cast(rhs.obj);\n") - FILE(APPEND "${FILENAME}" " shared_ptr ptr;\n") - FILE(APPEND "${FILENAME}" " if ( obj!=NULL ) { ptr.obj = obj; ptr.count=rhs.count; ++(*ptr.count); }\n") - FILE(APPEND "${FILENAME}" " return ptr;\n}\n") - FILE(APPEND "${FILENAME}" "\n} // namespace dummy\n") - FILE(APPEND "${FILENAME}" "\n\n") - FILE(APPEND "${FILENAME}" "namespace ${NAMESPACE} {\n") - FILE(APPEND "${FILENAME}" " using dummy::shared_ptr; \n") - FILE(APPEND "${FILENAME}" " using dummy::dynamic_pointer_cast; \n") - FILE(APPEND "${FILENAME}" " using dummy::const_pointer_cast; \n") - FILE(APPEND "${FILENAME}" "}\n\n") - FILE(APPEND "${FILENAME}" "#endif\n") -ENDFUNCTION() - - diff --git a/common/Array.hpp b/common/Array.hpp index 6b2dd16f..fe915ff7 100644 --- a/common/Array.hpp +++ b/common/Array.hpp @@ -1117,9 +1117,8 @@ Array Array::cat( const std::vector< * Interpolate * ********************************************************/ template -struct is_compatible_double : std::integral_constant::type>::value || - std::is_integral::type>::value> { +struct is_compatible_double + : std::integral_constant::value || std::is_integral::value> { }; template inline typename std::enable_if::value, TYPE>::type Array_interp_1D( diff --git a/common/MPI_Helpers.cpp b/common/MPI_Helpers.cpp index 23924f21..736a2f02 100644 --- a/common/MPI_Helpers.cpp +++ b/common/MPI_Helpers.cpp @@ -36,7 +36,7 @@ template<> MPI_Datatype getMPItype() { ********************************************************/ // unsigned char template<> -size_t packsize( const unsigned char& rhs ) +size_t packsize( const unsigned char& ) { return sizeof(unsigned char); } @@ -52,7 +52,7 @@ void unpack( unsigned char& data, const char *buffer ) } // char template<> -size_t packsize( const char& rhs ) +size_t packsize( const char& ) { return sizeof(char); } @@ -68,7 +68,7 @@ void unpack( char& data, const char *buffer ) } // int template<> -size_t packsize( const int& rhs ) +size_t packsize( const int& ) { return sizeof(int); } @@ -84,7 +84,7 @@ void unpack( int& data, const char *buffer ) } // unsigned int template<> -size_t packsize( const unsigned int& rhs ) +size_t packsize( const unsigned int& ) { return sizeof(unsigned int); } @@ -100,7 +100,7 @@ void unpack( unsigned int& data, const char *buffer ) } // size_t template<> -size_t packsize( const size_t& rhs ) +size_t packsize( const size_t& ) { return sizeof(size_t); } diff --git a/common/StackTrace.cpp b/common/StackTrace.cpp deleted file mode 100644 index 8b9e4015..00000000 --- a/common/StackTrace.cpp +++ /dev/null @@ -1,1876 +0,0 @@ -#include "common/StackTrace.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#define perr std::cerr - - -// Detect the OS -// clang-format off -#if defined( WIN32 ) || defined( _WIN32 ) || defined( WIN64 ) || defined( _WIN64 ) || defined( _MSC_VER ) - #define USE_WINDOWS - #define NOMINMAX -#elif defined( __APPLE__ ) - #define USE_MAC - #define USE_NM -#elif defined( __linux ) || defined( __linux__ ) || defined( __unix ) || defined( __posix ) - #define USE_LINUX - #define USE_NM -#else - #error Unknown OS -#endif -// clang-format on - - -// Include system dependent headers -// clang-format off -// Detect the OS and include system dependent headers -#ifdef USE_WINDOWS - #include - #include - #include - #include - #include - #include - #include - #include - #pragma comment( lib, "version.lib" ) // for "VerQueryValue" -#else - #include - #include - #include - #include - #include - #include - #include -#endif -#ifdef USE_MAC - #include - #include - #include - #include -#endif -// clang-format on - - -#ifdef __GNUC__ -#define USE_ABI -#include -#endif - - -#ifndef NULL_USE -#define NULL_USE( variable ) \ - do { \ - if ( 0 ) { \ - char *temp = (char *) &variable; \ - temp++; \ - } \ - } while ( 0 ) -#endif - - -// Set the callstack signal -#ifdef SIGRTMIN -#define CALLSTACK_SIG SIGRTMIN + 4 -#else -#define CALLSTACK_SIG SIGUSR1 -#define SIGRTMIN SIGUSR1 -#define SIGRTMAX SIGUSR1 -#endif - - -// Helper thread -static std::shared_ptr globalMonitorThread; - - -// Utility to break a string by a newline -static inline std::vector breakString( const std::string &str ) -{ - std::vector strvec; - size_t i1 = 0; - size_t i2 = std::min( str.find( '\n', i1 ), str.length() ); - while ( i1 < str.length() ) { - strvec.push_back( str.substr( i1, i2 - i1 ) ); - i1 = i2 + 1; - i2 = std::min( str.find( '\n', i1 ), str.length() ); - } - return strvec; -} - - -// Function to replace all instances of a string with another -static inline void strrep( std::string &str, const std::string &s, const std::string &r ) -{ - size_t i = 0; - while ( i < str.length() ) { - i = str.find( s, i ); - if ( i == std::string::npos ) { - break; - } - str.replace( i, s.length(), r ); - i += r.length(); - } -} - - -// Utility to strip the path from a filename -static inline std::string stripPath( const std::string &filename ) -{ - if ( filename.empty() ) - return std::string(); - int i = 0; - for ( i = (int) filename.size() - 1; i >= 0 && filename[i] != 47 && filename[i] != 92; i-- ) { - } - i = std::max( 0, i + 1 ); - return filename.substr( i ); -} - - -// Inline function to subtract two addresses returning the absolute difference -static inline void *subtractAddress( void *a, void *b ) -{ - return reinterpret_cast( - std::abs( reinterpret_cast( a ) - reinterpret_cast( b ) ) ); -} - - -#ifdef USE_WINDOWS -static BOOL __stdcall readProcMem( HANDLE hProcess, - DWORD64 qwBaseAddress, - PVOID lpBuffer, - DWORD nSize, - LPDWORD lpNumberOfBytesRead ) -{ - SIZE_T st; - BOOL bRet = ReadProcessMemory( hProcess, (LPVOID) qwBaseAddress, lpBuffer, nSize, &st ); - *lpNumberOfBytesRead = (DWORD) st; - return bRet; -} -static inline std::string getCurrentDirectory() -{ - char temp[1024] = { 0 }; - GetCurrentDirectoryA( sizeof( temp ), temp ); - return temp; -} -namespace StackTrace { -BOOL GetModuleListTH32( HANDLE hProcess, DWORD pid ); -BOOL GetModuleListPSAPI( HANDLE hProcess ); -DWORD LoadModule( HANDLE hProcess, LPCSTR img, LPCSTR mod, DWORD64 baseAddr, DWORD size ); -void LoadModules(); -}; // namespace StackTrace -#endif - - -// Functions to copy data -static inline char *copy_in( size_t N, const void *data, char *ptr ) -{ - memcpy( ptr, data, N ); - return ptr + N; -} -static inline const char *copy_out( size_t N, void *data, const char *ptr ) -{ - memcpy( data, ptr, N ); - return ptr + N; -} - - -/**************************************************************************** - * Utility to call system command and return output * - ****************************************************************************/ -#ifdef USE_WINDOWS -#define popen _popen -#define pclose _pclose -#endif -std::string StackTrace::exec( const std::string &cmd, int &code ) -{ - signal( SIGCHLD, SIG_DFL ); // Clear child exited - FILE *pipe = popen( cmd.c_str(), "r" ); - if ( pipe == nullptr ) - return std::string(); - std::string result = ""; - result.reserve( 1024 ); - while ( !feof( pipe ) ) { - char buffer[257]; - buffer[256] = 0; - if ( fgets( buffer, 128, pipe ) != nullptr ) - result += buffer; - } - auto status = pclose( pipe ); - code = WEXITSTATUS( status ); - return result; -} - - -/**************************************************************************** - * stack_info * - ****************************************************************************/ -void StackTrace::stack_info::clear() -{ - address = nullptr; - address2 = nullptr; - object.clear(); - function.clear(); - filename.clear(); - line = -1; -} -bool StackTrace::stack_info::operator==( const StackTrace::stack_info &rhs ) const -{ - if ( address == rhs.address ) - return true; - if ( address2 == rhs.address2 && object == rhs.object ) - return true; - return false; -} -bool StackTrace::stack_info::operator!=( const StackTrace::stack_info &rhs ) const -{ - return !operator==( rhs ); -} -int StackTrace::stack_info::getAddressWidth() const -{ - auto addr = reinterpret_cast( address ); - if ( addr <= 0xFFFF ) - return 4; - if ( addr <= 0xFFFFFFFF ) - return 8; - if ( addr <= 0xFFFFFFFFFFFF ) - return 12; - return 16; -} -std::string -StackTrace::stack_info::print( int widthAddress, int widthObject, int widthFunction ) const -{ - char tmp1[64], tmp2[64]; - sprintf( tmp1, "0x%%0%illx: ", widthAddress ); - sprintf( tmp2, tmp1, reinterpret_cast( address ) ); - std::string stack( tmp2 ); - sprintf( tmp2, "%i", line ); - std::string line_str( tmp2 ); - size_t N = stack.length(); - stack += stripPath( object ); - stack.resize( std::max( stack.size(), N + widthObject ), ' ' ); - N = stack.length() + 2; - stack += " " + function; - if ( !filename.empty() && line > 0 ) { - stack.resize( std::max( stack.size(), N + widthFunction ), ' ' ); - stack += " " + stripPath( filename ) + ":" + line_str; - } else if ( !filename.empty() ) { - stack.resize( std::max( stack.size(), N + widthFunction ), ' ' ); - stack += " " + stripPath( filename ); - } else if ( line > 0 ) { - stack += " : " + line_str; - } - return stack; -} -size_t StackTrace::stack_info::size() const -{ - return 2 * sizeof( void * ) + 4 * sizeof( int ) + object.size() + function.size() + - filename.size(); -} -char *StackTrace::stack_info::pack( char *ptr ) const -{ - int Nobj = object.size(); - int Nfun = function.size(); - int Nfile = filename.size(); - ptr = copy_in( sizeof( void * ), &address, ptr ); - ptr = copy_in( sizeof( void * ), &address2, ptr ); - ptr = copy_in( sizeof( int ), &Nobj, ptr ); - ptr = copy_in( sizeof( int ), &Nfun, ptr ); - ptr = copy_in( sizeof( int ), &Nfile, ptr ); - ptr = copy_in( sizeof( int ), &line, ptr ); - ptr = copy_in( Nobj, object.data(), ptr ); - ptr = copy_in( Nfun, function.data(), ptr ); - ptr = copy_in( Nfile, filename.data(), ptr ); - return ptr; -} -const char *StackTrace::stack_info::unpack( const char *ptr ) -{ - int Nobj, Nfun, Nfile; - ptr = copy_out( sizeof( void * ), &address, ptr ); - ptr = copy_out( sizeof( void * ), &address2, ptr ); - ptr = copy_out( sizeof( int ), &Nobj, ptr ); - ptr = copy_out( sizeof( int ), &Nfun, ptr ); - ptr = copy_out( sizeof( int ), &Nfile, ptr ); - ptr = copy_out( sizeof( int ), &line, ptr ); - object.resize( Nobj ); - function.resize( Nfun ); - filename.resize( Nfile ); - ptr = copy_out( Nobj, &object.front(), ptr ); - ptr = copy_out( Nfun, &function.front(), ptr ); - ptr = copy_out( Nfile, &filename.front(), ptr ); - return ptr; -} -std::vector StackTrace::stack_info::packArray( const std::vector &data ) -{ - size_t size = sizeof( int ); - for ( const auto &i : data ) - size += i.size(); - std::vector vec( size, 0 ); - char *ptr = vec.data(); - int N = data.size(); - ptr = copy_in( sizeof( int ), &N, ptr ); - for ( const auto &i : data ) - ptr = i.pack( ptr ); - return vec; -} -std::vector StackTrace::stack_info::unpackArray( const char *ptr ) -{ - int N; - ptr = copy_out( sizeof( int ), &N, ptr ); - std::vector data( N ); - for ( auto &i : data ) - ptr = i.unpack( ptr ); - return data; -} -#ifdef USE_MPI -static std::vector pack( const std::vector> &data ) -{ - size_t size = sizeof( int ); - for ( const auto &i : data ) { - size += sizeof( int ); - for ( size_t j = 0; j < i.size(); j++ ) - size += i[j].size(); - } - std::vector out( size, 0 ); - char *ptr = out.data(); - int N = data.size(); - ptr = copy_in( sizeof( int ), &N, ptr ); - for ( int i = 0; i < N; i++ ) { - int M = data[i].size(); - ptr = copy_in( sizeof( int ), &M, ptr ); - for ( int j = 0; j < M; j++ ) - ptr = data[i][j].pack( ptr ); - } - return out; -} -static std::vector> unpack( const std::vector &in ) -{ - const char *ptr = in.data(); - int N; - ptr = copy_out( sizeof( int ), &N, ptr ); - std::vector> data( N ); - for ( int i = 0; i < N; i++ ) { - int M; - ptr = copy_out( sizeof( int ), &M, ptr ); - data[i].resize( M ); - for ( int j = 0; j < M; j++ ) - ptr = data[i][j].unpack( ptr ); - } - return data; -} -#endif - - -/**************************************************************************** - * multi_stack_info * - ****************************************************************************/ -StackTrace::multi_stack_info::multi_stack_info( const std::vector &rhs ) -{ - operator=( rhs ); -} -StackTrace::multi_stack_info &StackTrace::multi_stack_info:: -operator=( const std::vector &rhs ) -{ - clear(); - if ( rhs.empty() ) - return *this; - N = 1; - stack = rhs[0]; - if ( rhs.size() > 1 ) - add( rhs.size() - 1, &rhs[1] ); - return *this; -} -void StackTrace::multi_stack_info::clear() -{ - N = 0; - stack.clear(); - children.clear(); -} -void StackTrace::multi_stack_info::print2( const std::string &prefix, - int w[3], - std::vector &text ) const -{ - if ( stack == stack_info() ) { - for ( const auto &child : children ) - child.print2( "", w, text ); - return; - } - std::string line = prefix + "[" + std::to_string( N ) + "] " + stack.print( w[0], w[1], w[2] ); - text.push_back( line ); - std::string prefix2 = prefix + " "; - for ( size_t i = 0; i < children.size(); i++ ) { - const auto &child = children[i]; - std::vector text2; - child.print2( "", w, text2 ); - for ( size_t j = 0; j < text2.size(); j++ ) { - std::string line = prefix2 + text2[j]; - if ( children.size() > 1 && j > 0 && i < children.size() - 1 ) - line[prefix2.size()] = '|'; - text.push_back( line ); - } - } -} -std::vector StackTrace::multi_stack_info::print( const std::string &prefix ) const -{ - std::vector text; - int w[3] = { 0 }; - w[0] = getAddressWidth(); - w[1] = getObjectWidth(); - w[2] = getFunctionWidth(); - print2( prefix, w, text ); - return text; -} -int StackTrace::multi_stack_info::getAddressWidth() const -{ - int w = stack.getAddressWidth(); - for ( const auto &child : children ) - w = std::max( w, child.getAddressWidth() ); - return w; -} -int StackTrace::multi_stack_info::getObjectWidth() const -{ - int w = std::min( stripPath( stack.object ).size() + 1, 20 ); - for ( const auto &child : children ) - w = std::max( w, child.getObjectWidth() ); - return w; -} -int StackTrace::multi_stack_info::getFunctionWidth() const -{ - int w = std::min( stack.function.size() + 1, 40 ); - for ( const auto &child : children ) - w = std::max( w, child.getFunctionWidth() ); - return w; -} -void StackTrace::multi_stack_info::add( size_t len, const stack_info *stack ) -{ - if ( len == 0 ) - return; - const auto &s = stack[len - 1]; - for ( auto &i : children ) { - if ( i.stack == s ) { - i.N++; - if ( len > 1 ) - i.add( len - 1, stack ); - return; - } - } - children.resize( children.size() + 1 ); - children.back().N = 1; - children.back().stack = s; - if ( len > 1 ) - children.back().add( len - 1, stack ); -} - - -/**************************************************************************** - * Function to find an entry * - ****************************************************************************/ -template -inline size_t findfirst( const std::vector &X, TYPE Y ) -{ - if ( X.empty() ) - return 0; - size_t lower = 0; - size_t upper = X.size() - 1; - if ( X[lower] >= Y ) - return lower; - if ( X[upper] < Y ) - return upper; - while ( ( upper - lower ) != 1 ) { - size_t value = ( upper + lower ) / 2; - if ( X[value] >= Y ) - upper = value; - else - lower = value; - } - return upper; -} - - -/**************************************************************************** - * Function to get the executable name * - ****************************************************************************/ -static char global_exe_name[1000] = { 0 }; -static bool setGlobalExecutableName( char *exe ) -{ - try { -#ifdef USE_LINUX - auto *buf = new char[0x10000]; - int len = ::readlink( "/proc/self/exe", buf, 0x10000 ); - if ( len != -1 ) { - buf[len] = '\0'; - strcpy( exe, buf ); - } - delete[] buf; -#elif defined( USE_MAC ) - uint32_t size = 0x10000; - char *buf = new char[size]; - memset( buf, 0, size ); - if ( _NSGetExecutablePath( buf, &size ) == 0 ) - strcpy( exe, buf ); - delete[] buf; -#elif defined( USE_WINDOWS ) - DWORD size = 0x10000; - char *buf = new char[size]; - memset( buf, 0, size ); - GetModuleFileName( nullptr, buf, size ); - strcpy( exe, buf ); - delete[] buf; -#endif - } catch ( ... ) { - } - return true; -} -static bool global_exe_name_set = setGlobalExecutableName( global_exe_name ); -std::string StackTrace::getExecutable() -{ - if ( !global_exe_name_set ) - global_exe_name_set = setGlobalExecutableName( global_exe_name ); - return std::string( global_exe_name ); -} - - -/**************************************************************************** - * Function to get symbols for the executable from nm (if availible) * - * Note: this function maintains an internal cached copy to prevent * - * exccessive calls to nm. This function also uses a lock to ensure * - * thread safety. * - ****************************************************************************/ -std::mutex getSymbols_mutex; -struct global_symbols_struct { - std::vector address; - std::vector type; - std::vector obj; - int error; -} global_symbols; -static const global_symbols_struct &getSymbols2() -{ - static bool loaded = false; - static global_symbols_struct data; - // Load the symbol tables if they have not been loaded - if ( !loaded ) { - getSymbols_mutex.lock(); - if ( !loaded ) { - loaded = true; -#ifdef USE_NM - try { - char cmd[1024]; -#ifdef USE_LINUX - sprintf( cmd, "nm -n --demangle %s", global_exe_name ); -#elif defined( USE_MAC ) - sprintf( cmd, "nm -n %s | c++filt", global_exe_name ); -#else -#error Unknown OS using nm -#endif - int code; - auto output = breakString( StackTrace::exec( cmd, code ) ); - for ( const auto &line : output ) { - if ( line.empty() ) - continue; - if ( line[0] == ' ' ) - continue; - auto *a = const_cast( line.c_str() ); - char *b = strchr( a, ' ' ); - if ( b == nullptr ) - continue; - b[0] = 0; - b++; - char *c = strchr( b, ' ' ); - if ( c == nullptr ) - continue; - c[0] = 0; - c++; - char *d = strchr( c, '\n' ); - if ( d ) - d[0] = 0; - size_t add = strtoul( a, nullptr, 16 ); - data.address.push_back( reinterpret_cast( add ) ); - data.type.push_back( b[0] ); - data.obj.emplace_back( c ); - } - } catch ( ... ) { - data.error = -3; - } - data.error = 0; -#else - data.error = -1; -#endif - } - getSymbols_mutex.unlock(); - } - return data; -} -int StackTrace::getSymbols( std::vector &address, - std::vector &type, - std::vector &obj ) -{ - const global_symbols_struct &data = getSymbols2(); - address = data.address; - type = data.type; - obj = data.obj; - return data.error; -} - - -/**************************************************************************** - * Function to get call stack info * - ****************************************************************************/ -#ifdef USE_MAC -static void *loadAddress( const std::string &object ) -{ - static std::map obj_map; - if ( obj_map.empty() ) { - uint32_t numImages = _dyld_image_count(); - for ( uint32_t i = 0; i < numImages; i++ ) { - const struct mach_header *header = _dyld_get_image_header( i ); - const char *name = _dyld_get_image_name( i ); - const char *p = strrchr( name, '/' ); - struct mach_header *address = const_cast( header ); - obj_map.insert( std::pair( p + 1, address ) ); - // printf(" module=%s, address=%p\n", p + 1, header); - } - } - auto it = obj_map.find( object ); - void *address = 0; - if ( it != obj_map.end() ) { - address = it->second; - } else { - it = obj_map.find( stripPath( object ) ); - if ( it != obj_map.end() ) - address = it->second; - } - // printf("%s: 0x%016llx\n",object.c_str(),address); - return address; -} -static std::tuple split_atos( const std::string &buf ) -{ - if ( buf.empty() ) - return std::tuple(); - // Get the function - size_t index = buf.find( " (in " ); - if ( index == std::string::npos ) - return std::make_tuple( - buf.substr( 0, buf.length() - 1 ), std::string(), std::string(), 0 ); - std::string fun = buf.substr( 0, index ); - std::string tmp = buf.substr( index + 5 ); - // Get the object - index = tmp.find( ')' ); - std::string obj = tmp.substr( 0, index ); - tmp = tmp.substr( index + 1 ); - // Get the filename and line number - size_t p1 = tmp.find( '(' ); - size_t p2 = tmp.find( ')' ); - tmp = tmp.substr( p1 + 1, p2 - p1 - 1 ); - index = tmp.find( ':' ); - std::string file; - int line = 0; - if ( index != std::string::npos ) { - file = tmp.substr( 0, index ); - line = std::stoi( tmp.substr( index + 1 ) ); - } else if ( p1 != std::string::npos ) { - file = tmp; - } - return std::make_tuple( fun, obj, file, line ); -} -#endif -#ifdef USE_LINUX -using uint_p = uint64_t; -#elif defined( USE_MAC ) -typedef unsigned long uint_p; -#endif -#if defined( USE_LINUX ) || defined( USE_MAC ) -static inline std::string generateCmd( const std::string &s1, - const std::string &s2, - const std::string &s3, - std::vector addresses, - const std::string &s4 ) -{ - std::string cmd = s1 + s2 + s3; - for ( auto &addresse : addresses ) { - char tmp[32]; - sprintf( tmp, "%lx ", reinterpret_cast( addresse ) ); - cmd += tmp; - } - cmd += s4; - return cmd; -} -#endif -// clang-format off -static void getFileAndLineObject( std::vector &info ) -{ - if ( info.empty() ) - return; - // This gets the file and line numbers for multiple stack lines in the same object - #if defined( USE_LINUX ) - // Create the call command - std::vector address_list(info.size(),nullptr); - for (size_t i=0; iaddress; - if ( info[i]->object.find( ".so" ) != std::string::npos ) - address_list[i] = info[i]->address2; - if ( info[i]->object.find( ".mexa64" ) != std::string::npos ) - address_list[i] = info[i]->address2; - } - std::string cmd = generateCmd( "addr2line -C -e ", info[0]->object, - " -f -i ", address_list, " 2> /dev/null" ); - // Get the function/line/file - int code; - auto cmd_output = StackTrace::exec( cmd, code ); - auto output = breakString( cmd_output ); - if ( output.size() != 2*info.size() ) - return; - // Add the results to info - for (size_t i=0; ifunction.empty() ) - info[i]->function = output[2*i+0]; - // get file and line - const char *buf = output[2*i+1].c_str(); - if ( buf[0] != '?' && buf[0] != 0 ) { - size_t j = 0; - for ( j = 0; j < 4095 && buf[j] != ':'; j++ ) { - } - info[i]->filename = std::string( buf, j ); - info[i]->line = atoi( &buf[j + 1] ); - } - } - #elif defined( USE_MAC ) - // Create the call command - void* load_address = loadAddress( info[0]->object ); - if ( load_address == nullptr ) - return; - std::vector address_list(info.size(),nullptr); - for (size_t i=0; iaddress; - // Call atos to get the object info - char tmp[64]; - sprintf( tmp, " -l %lx ", (uint_p) load_address ); - std::string cmd = generateCmd( "atos -o ", info[0]->object, - tmp, address_list, " 2> /dev/null" ); - // Get the function/line/file - int code; - auto cmd_output = StackTrace::exec( cmd, code ); - auto output = breakString( cmd_output ); - if ( output.size() != info.size() ) - return; - // Parse the output for function, file and line info - for ( size_t i=0; ifunction.empty() ) - info[i]->function = std::get<0>(data); - if ( info[i]->object.empty() ) - info[i]->object = std::get<1>(data); - if ( info[i]->filename.empty() ) - info[i]->filename = std::get<2>(data); - if ( info[i]->line==0 ) - info[i]->line = std::get<3>(data); - } - #endif -} -static void getFileAndLine( std::vector &info ) -{ - // Build a list of stack elements for each object - std::map> obj_map; - for (auto & i : info) { - auto& list = obj_map[i.object]; - list.emplace_back( &i ); - } - // For each object, get the file/line numbers for all entries - for ( auto& entry : obj_map ) - getFileAndLineObject( entry.second ); -} -// Try to use the global symbols to decode info about the stack -static void getDataFromGlobalSymbols( StackTrace::stack_info &info ) -{ - const global_symbols_struct &data = getSymbols2(); - if ( data.error == 0 ) { - size_t index = findfirst( global_symbols.address, info.address ); - if ( index > 0 ) - info.object = global_symbols.obj[index - 1]; - else - info.object = std::string(global_exe_name); - } -} -static void signal_handler( int sig ) -{ - printf("Signal caught acquiring stack (%i)\n",sig); - StackTrace::setErrorHandlers( [](std::string,StackTrace::terminateType) { exit( -1 ); } ); -} -StackTrace::stack_info StackTrace::getStackInfo( void *address ) -{ - return getStackInfo( std::vector(1,address) )[0]; -} -std::vector StackTrace::getStackInfo( const std::vector& address ) -{ - // Temporarily handle signals to prevent recursion on the stack - auto prev_handler = signal( SIGINT, signal_handler ); - // Get the detailed stack info - std::vector info(address.size()); - try { - #ifdef USE_WINDOWS - IMAGEHLP_SYMBOL64 pSym[1024]; - memset( pSym, 0, sizeof( pSym ) ); - pSym->SizeOfStruct = sizeof( IMAGEHLP_SYMBOL64 ); - pSym->MaxNameLength = 1024; - - IMAGEHLP_MODULE64 Module; - memset( &Module, 0, sizeof( Module ) ); - Module.SizeOfStruct = sizeof( Module ); - - HANDLE pid = GetCurrentProcess(); - - for (size_t i=0; i( address[i] ); - DWORD64 offsetFromSymbol; - if ( SymGetSymFromAddr( pid, address2, &offsetFromSymbol, pSym ) != FALSE ) { - char name[8192]={0}; - DWORD rtn = UnDecorateSymbolName( pSym->Name, name, sizeof(name)-1, UNDNAME_COMPLETE ); - if ( rtn == 0 ) - info[i].function = std::string(pSym->Name); - else - info[i].function = std::string(name); - } else { - printf( "ERROR: SymGetSymFromAddr (%d,%p)\n", GetLastError(), address2 ); - } - - // Get line number - IMAGEHLP_LINE64 Line; - memset( &Line, 0, sizeof( Line ) ); - Line.SizeOfStruct = sizeof( Line ); - DWORD offsetFromLine; - if ( SymGetLineFromAddr64( pid, address2, &offsetFromLine, &Line ) != FALSE ) { - info[i].line = Line.LineNumber; - info[i].filename = std::string( Line.FileName ); - } else { - info[i].line = 0; - info[i].filename = std::string(); - } - - // Get the object - if ( SymGetModuleInfo64( pid, address2, &Module ) != FALSE ) { - //info[i].object = std::string( Module.ModuleName ); - info[i].object = std::string( Module.LoadedImageName ); - //info[i].baseOfImage = Module.BaseOfImage; - } - } - #else - for (size_t i=0; i(t2-t1).count()<0.15 ) { - std::this_thread::yield(); - t2 = std::chrono::high_resolution_clock::now(); - } - count = std::max(thread_backtrace_count,0); - memcpy( buffer, thread_backtrace, count*sizeof(void*) ); - thread_backtrace_count = -1; - thread_backtrace_mutex.unlock(); - } - #elif defined( USE_WINDOWS ) - #if defined(DBGHELP) - - // Load the modules for the stack trace - LoadModules(); - - // Initialize stackframe for first call - ::CONTEXT context; - memset( &context, 0, sizeof( context ) ); - context.ContextFlags = CONTEXT_FULL; - RtlCaptureContext( &context ); - STACKFRAME64 frame; // in/out stackframe - memset( &frame, 0, sizeof( frame ) ); - #ifdef _M_IX86 - DWORD imageType = IMAGE_FILE_MACHINE_I386; - frame.AddrPC.Offset = context.Eip; - frame.AddrPC.Mode = AddrModeFlat; - frame.AddrFrame.Offset = context.Ebp; - frame.AddrFrame.Mode = AddrModeFlat; - frame.AddrStack.Offset = context.Esp; - frame.AddrStack.Mode = AddrModeFlat; - #elif _M_X64 - DWORD imageType = IMAGE_FILE_MACHINE_AMD64; - frame.AddrPC.Offset = context.Rip; - frame.AddrPC.Mode = AddrModeFlat; - frame.AddrFrame.Offset = context.Rsp; - frame.AddrFrame.Mode = AddrModeFlat; - frame.AddrStack.Offset = context.Rsp; - frame.AddrStack.Mode = AddrModeFlat; - #elif _M_IA64 - DWORD imageType = IMAGE_FILE_MACHINE_IA64; - frame.AddrPC.Offset = context.StIIP; - frame.AddrPC.Mode = AddrModeFlat; - frame.AddrFrame.Offset = context.IntSp; - frame.AddrFrame.Mode = AddrModeFlat; - frame.AddrBStore.Offset = context.RsBSP; - frame.AddrBStore.Mode = AddrModeFlat; - frame.AddrStack.Offset = context.IntSp; - frame.AddrStack.Mode = AddrModeFlat; - #else - #error "Platform not supported!" - #endif - - auto pid = GetCurrentProcess(); - for ( int frameNum = 0; frameNum<1024; ++frameNum ) { - BOOL rtn = StackWalk64( imageType, pid, tid, &frame, &context, readProcMem, - SymFunctionTableAccess, SymGetModuleBase64, NULL ); - if ( !rtn ) { - printf( "ERROR: StackWalk64 (%p)\n", frame.AddrPC.Offset ); - break; - } - if ( frame.AddrPC.Offset != 0 ) { - buffer[count] = reinterpret_cast( frame.AddrPC.Offset ) ); - count++; - } - if ( frame.AddrReturn.Offset == 0 ) - break; - } - SetLastError( ERROR_SUCCESS ); - #endif - #else - #warning Stack trace is not supported on this compiler/OS - #endif - return count; -} -std::vector StackTrace::backtrace( std::thread::native_handle_type tid ) -{ - std::vector trace( 1000, nullptr ); - size_t count = backtrace_thread( tid, trace.data(), trace.size() ); - trace.resize(count); - return trace; -} -std::vector StackTrace::backtrace() -{ - std::vector trace( 1000, nullptr ); - size_t count = backtrace_thread( thisThread(), trace.data(), trace.size() ); - trace.resize(count); - return trace; -} -std::vector> StackTrace::backtraceAll() -{ - // Get the list of threads - auto threads = activeThreads( ); - // Get the backtrace of each thread - std::vector> trace(threads.size()); - size_t i = 0; - for ( auto it=threads.begin(); i StackTrace::activeThreads( ) -{ - std::set threads; - #if defined( USE_LINUX ) - std::set tid; - int pid = getpid(); - char cmd[128]; - sprintf( cmd, "ps -T -p %i", pid ); - signal( SIGCHLD, SIG_DFL ); // Clear child exited - int code; - auto output = breakString( exec( cmd, code ) ); - for ( const auto& line : output ) { - int tid2 = get_tid( pid, line ); - if ( tid2 != -1 ) - tid.insert( tid2 ); - } - tid.erase( syscall(SYS_gettid) ); - signal( CALLSTACK_SIG, _activeThreads_signal_handler ); - for ( auto tid2 : tid ) { - thread_backtrace_mutex.lock(); - thread_id_finished = false; - thread_handle = thisThread(); - syscall( SYS_tgkill, pid, tid2, CALLSTACK_SIG ); - auto t1 = std::chrono::high_resolution_clock::now(); - auto t2 = std::chrono::high_resolution_clock::now(); - while ( !thread_id_finished && std::chrono::duration(t2-t1).count()<0.1 ) { - std::this_thread::yield(); - t2 = std::chrono::high_resolution_clock::now(); - } - threads.insert( thread_handle ); - thread_backtrace_mutex.unlock(); - } - #elif defined( USE_MAC ) - printf("activeThreads not finished\n"); - #elif defined( USE_WINDOWS ) - HANDLE hThreadSnap = CreateToolhelp32Snapshot( TH32CS_SNAPTHREAD, 0 ); - if( hThreadSnap != INVALID_HANDLE_VALUE ) { - // Fill in the size of the structure before using it - THREADENTRY32 te32 - te32.dwSize = sizeof(THREADENTRY32 ); - // Retrieve information about the first thread, and exit if unsuccessful - if( !Thread32First( hThreadSnap, &te32 ) ) { - printError( TEXT("Thread32First") ); // Show cause of failure - CloseHandle( hThreadSnap ); // Must clean up the snapshot object! - return( FALSE ); - } - // Now walk the thread list of the system - do { - if ( te32.th32OwnerProcessID == dwOwnerPID ) - threads.insert( te32.th32ThreadID ); - } while( Thread32Next(hThreadSnap, &te32 ) ); - CloseHandle( hThreadSnap ); // Must clean up the snapshot object! - } - #else - #warning activeThreads is not yet supported on this compiler/OS - #endif - threads.insert( thisThread() ); - if ( globalMonitorThread ) - threads.erase( globalMonitorThread->native_handle() ); - return threads; -} -// clang-format on - - -/**************************************************************************** - * Function to get the current call stack * - ****************************************************************************/ -std::vector StackTrace::getCallStack() -{ - auto trace = StackTrace::backtrace(); - auto info = getStackInfo( trace ); - return info; -} -std::vector StackTrace::getCallStack( std::thread::native_handle_type id ) -{ - auto trace = StackTrace::backtrace( id ); - auto info = getStackInfo( trace ); - return info; -} -static StackTrace::multi_stack_info -generateMultiStack( const std::vector> &thread_backtrace ) -{ - // Get the stack data for all pointers - std::set addresses_set; - for ( const auto &trace : thread_backtrace ) { - for ( auto ptr : trace ) - addresses_set.insert( ptr ); - } - std::vector addresses( addresses_set.begin(), addresses_set.end() ); - auto stack_data = StackTrace::getStackInfo( addresses ); - std::map map_data; - for ( size_t i = 0; i < addresses.size(); i++ ) - map_data.insert( std::make_pair( addresses[i], stack_data[i] ) ); - // Create the multi-stack trace - StackTrace::multi_stack_info multistack; - for ( const auto &trace : thread_backtrace ) { - if ( trace.empty() ) - continue; - // Create the stack for the given thread trace - std::vector stack( trace.size() ); - for ( size_t i = 0; i < trace.size(); i++ ) - stack[i] = map_data[trace[i]]; - // Add the data to the multistack - multistack.add( stack.size(), stack.data() ); - } - return multistack; -} -StackTrace::multi_stack_info StackTrace::getAllCallStacks() -{ - // Get the backtrace of each thread - auto thread_backtrace = backtraceAll(); - // Create the multi-stack strucutre - auto stack = generateMultiStack( thread_backtrace ); - return stack; -} - - -/**************************************************************************** - * Function to get system search paths * - ****************************************************************************/ -std::string StackTrace::getSymPaths() -{ - std::string paths; -#ifdef USE_WINDOWS - // Create the path list (seperated by ';' ) - paths = std::string( ".;" ); - paths.reserve( 1000 ); - // Add the current directory - paths += getCurrentDirectory() + ";"; - // Now add the path for the main-module: - char temp[1024]; - memset( temp, 0, sizeof( temp ) ); - if ( GetModuleFileNameA( nullptr, temp, sizeof( temp ) - 1 ) > 0 ) { - for ( char *p = ( temp + strlen( temp ) - 1 ); p >= temp; --p ) { - // locate the rightmost path separator - if ( ( *p == '\\' ) || ( *p == '/' ) || ( *p == ':' ) ) { - *p = 0; - break; - } - } - if ( strlen( temp ) > 0 ) { - paths += temp; - paths += ";"; - } - } - memset( temp, 0, sizeof( temp ) ); - if ( GetEnvironmentVariableA( "_NT_SYMBOL_PATH", temp, sizeof( temp ) - 1 ) > 0 ) { - paths += temp; - paths += ";"; - } - memset( temp, 0, sizeof( temp ) ); - if ( GetEnvironmentVariableA( "_NT_ALTERNATE_SYMBOL_PATH", temp, sizeof( temp ) - 1 ) > 0 ) { - paths += temp; - paths += ";"; - } - memset( temp, 0, sizeof( temp ) ); - if ( GetEnvironmentVariableA( "SYSTEMROOT", temp, sizeof( temp ) - 1 ) > 0 ) { - paths += temp; - paths += ";"; - // also add the "system32"-directory: - paths += temp; - paths += "\\system32;"; - } - memset( temp, 0, sizeof( temp ) ); - if ( GetEnvironmentVariableA( "SYSTEMDRIVE", temp, sizeof( temp ) - 1 ) > 0 ) { - paths += "SRV*;" + std::string( temp ) + - "\\websymbols*http://msdl.microsoft.com/download/symbols;"; - } else { - paths += "SRV*c:\\websymbols*http://msdl.microsoft.com/download/symbols;"; - } -#endif - return paths; -} - - -/**************************************************************************** - * Load modules for windows * - ****************************************************************************/ -#ifdef USE_WINDOWS -BOOL StackTrace::GetModuleListTH32( HANDLE hProcess, DWORD pid ) -{ - // CreateToolhelp32Snapshot() - typedef HANDLE( __stdcall * tCT32S )( DWORD dwFlags, DWORD th32ProcessID ); - // Module32First() - typedef BOOL( __stdcall * tM32F )( HANDLE hSnapshot, LPMODULEENTRY32 lpme ); - // Module32Next() - typedef BOOL( __stdcall * tM32N )( HANDLE hSnapshot, LPMODULEENTRY32 lpme ); - - // try both dlls... - const TCHAR *dllname[] = { _T("kernel32.dll"), _T("tlhelp32.dll") }; - HINSTANCE hToolhelp = nullptr; - tCT32S pCT32S = nullptr; - tM32F pM32F = nullptr; - tM32N pM32N = nullptr; - - HANDLE hSnap; - MODULEENTRY32 me; - me.dwSize = sizeof( me ); - - for ( size_t i = 0; i < ( sizeof( dllname ) / sizeof( dllname[0] ) ); i++ ) { - hToolhelp = LoadLibrary( dllname[i] ); - if ( hToolhelp == nullptr ) - continue; - pCT32S = (tCT32S) GetProcAddress( hToolhelp, "CreateToolhelp32Snapshot" ); - pM32F = (tM32F) GetProcAddress( hToolhelp, "Module32First" ); - pM32N = (tM32N) GetProcAddress( hToolhelp, "Module32Next" ); - if ( ( pCT32S != nullptr ) && ( pM32F != nullptr ) && ( pM32N != nullptr ) ) - break; // found the functions! - FreeLibrary( hToolhelp ); - hToolhelp = nullptr; - } - - if ( hToolhelp == nullptr ) - return FALSE; - - hSnap = pCT32S( TH32CS_SNAPMODULE, pid ); - if ( hSnap == (HANDLE) -1 ) { - FreeLibrary( hToolhelp ); - return FALSE; - } - - bool keepGoing = !!pM32F( hSnap, &me ); - int cnt = 0; - while ( keepGoing ) { - LoadModule( hProcess, me.szExePath, me.szModule, (DWORD64) me.modBaseAddr, me.modBaseSize ); - cnt++; - keepGoing = !!pM32N( hSnap, &me ); - } - CloseHandle( hSnap ); - FreeLibrary( hToolhelp ); - if ( cnt <= 0 ) - return FALSE; - return TRUE; -} -DWORD StackTrace::LoadModule( - HANDLE hProcess, LPCSTR img, LPCSTR mod, DWORD64 baseAddr, DWORD size ) -{ - CHAR *szImg = _strdup( img ); - CHAR *szMod = _strdup( mod ); - DWORD result = ERROR_SUCCESS; - if ( ( szImg == nullptr ) || ( szMod == nullptr ) ) { - result = ERROR_NOT_ENOUGH_MEMORY; - } else { - if ( SymLoadModule( hProcess, 0, szImg, szMod, baseAddr, size ) == 0 ) - result = GetLastError(); - } - ULONGLONG fileVersion = 0; - if ( szImg != nullptr ) { - // try to retrive the file-version: - VS_FIXEDFILEINFO *fInfo = nullptr; - DWORD dwHandle; - DWORD dwSize = GetFileVersionInfoSizeA( szImg, &dwHandle ); - if ( dwSize > 0 ) { - LPVOID vData = malloc( dwSize ); - if ( vData != nullptr ) { - if ( GetFileVersionInfoA( szImg, dwHandle, dwSize, vData ) != 0 ) { - UINT len; - TCHAR szSubBlock[] = _T("\\"); - if ( VerQueryValue( vData, szSubBlock, (LPVOID *) &fInfo, &len ) == 0 ) { - fInfo = nullptr; - } else { - fileVersion = ( (ULONGLONG) fInfo->dwFileVersionLS ) + - ( (ULONGLONG) fInfo->dwFileVersionMS << 32 ); - } - } - free( vData ); - } - } - - // Retrive some additional-infos about the module - IMAGEHLP_MODULE64 Module; - Module.SizeOfStruct = sizeof( IMAGEHLP_MODULE64 ); - SymGetModuleInfo64( hProcess, baseAddr, &Module ); - LPCSTR pdbName = Module.LoadedImageName; - if ( Module.LoadedPdbName[0] != 0 ) - pdbName = Module.LoadedPdbName; - } - if ( szImg != nullptr ) - free( szImg ); - if ( szMod != nullptr ) - free( szMod ); - return result; -} -BOOL StackTrace::GetModuleListPSAPI( HANDLE hProcess ) -{ - DWORD cbNeeded; - HMODULE hMods[1024]; - char tt[8192]; - char tt2[8192]; - if ( !EnumProcessModules( hProcess, hMods, sizeof( hMods ), &cbNeeded ) ) { - return false; - } - if ( cbNeeded > sizeof( hMods ) ) { - printf( "Insufficient memory allocated in GetModuleListPSAPI\n" ); - return false; - } - int cnt = 0; - for ( DWORD i = 0; i < cbNeeded / sizeof( hMods[0] ); i++ ) { - // base address, size - MODULEINFO mi; - GetModuleInformation( hProcess, hMods[i], &mi, sizeof( mi ) ); - // image file name - tt[0] = 0; - GetModuleFileNameExA( hProcess, hMods[i], tt, sizeof( tt ) ); - // module name - tt2[0] = 0; - GetModuleBaseNameA( hProcess, hMods[i], tt2, sizeof( tt2 ) ); - DWORD dwRes = LoadModule( hProcess, tt, tt2, (DWORD64) mi.lpBaseOfDll, mi.SizeOfImage ); - if ( dwRes != ERROR_SUCCESS ) - printf( "ERROR: LoadModule (%d)\n", dwRes ); - cnt++; - } - - return cnt != 0; -} -void StackTrace::LoadModules() -{ - static bool modules_loaded = false; - if ( !modules_loaded ) { - modules_loaded = true; - - // Get the search paths for symbols - std::string paths = StackTrace::getSymPaths(); - - // Initialize the symbols - if ( SymInitialize( GetCurrentProcess(), paths.c_str(), FALSE ) == FALSE ) - printf( "ERROR: SymInitialize (%d)\n", GetLastError() ); - - DWORD symOptions = SymGetOptions(); - symOptions |= SYMOPT_LOAD_LINES | SYMOPT_FAIL_CRITICAL_ERRORS; - symOptions = SymSetOptions( symOptions ); - char buf[1024] = { 0 }; - if ( SymGetSearchPath( GetCurrentProcess(), buf, sizeof( buf ) ) == FALSE ) - printf( "ERROR: SymGetSearchPath (%d)\n", GetLastError() ); - - // First try to load modules from toolhelp32 - BOOL loaded = StackTrace::GetModuleListTH32( GetCurrentProcess(), GetCurrentProcessId() ); - - // Try to load from Psapi - if ( !loaded ) - loaded = StackTrace::GetModuleListPSAPI( GetCurrentProcess() ); - } -} -#endif - - -/**************************************************************************** - * Get the signal name * - ****************************************************************************/ -std::string StackTrace::signalName( int sig ) { return std::string( strsignal( sig ) ); } -std::vector StackTrace::allSignalsToCatch() -{ - std::set signals; - for ( int i = 1; i < 32; i++ ) - signals.insert( i ); - for ( int i = SIGRTMIN; i <= SIGRTMAX; i++ ) - signals.insert( i ); - signals.erase( SIGKILL ); - signals.erase( SIGSTOP ); - return std::vector( signals.begin(), signals.end() ); -} -std::vector StackTrace::defaultSignalsToCatch() -{ - auto tmp = allSignalsToCatch(); - std::set signals( tmp.begin(), tmp.end() ); - signals.erase( SIGWINCH ); // Don't catch window changed by default - signals.erase( SIGCONT ); // Don't catch continue by default - return std::vector( signals.begin(), signals.end() ); -} - - -/**************************************************************************** - * Set the signal handlers * - ****************************************************************************/ -static std::function abort_fun; -static std::string rethrow() -{ - std::string last_message; -#ifdef USE_LINUX - try { - static int tried_throw = 0; - if ( tried_throw == 0 ) { - tried_throw = 1; - throw; - } - // No active exception - } catch ( const std::exception &err ) { - // Caught a std::runtime_error - last_message = err.what(); - } catch ( ... ) { - // Caught an unknown exception - last_message = "unknown exception occurred."; - } -#endif - return last_message; -} -static void term_func_abort( int sig ) -{ - std::string msg( "Caught signal: " ); - msg += StackTrace::signalName( sig ); - abort_fun( msg, StackTrace::terminateType::signal ); -} -static std::set signals_set = std::set(); -static void term_func() -{ - std::string last_message = rethrow(); - StackTrace::clearSignals(); - abort_fun( "Unhandled exception:\n" + last_message, StackTrace::terminateType::exception ); -} -void StackTrace::clearSignal( int sig ) -{ - if ( signals_set.find( sig ) != signals_set.end() ) { - signal( sig, SIG_DFL ); - signals_set.erase( sig ); - } -} -void StackTrace::clearSignals() -{ - for ( auto sig : signals_set ) - signal( sig, SIG_DFL ); - signals_set.clear(); -} -void StackTrace::setSignals( const std::vector &signals, void ( *handler )( int ) ) -{ - for ( auto sig : signals ) { - signal( sig, handler ); - signals_set.insert( sig ); - } -} -void StackTrace::setErrorHandlers( - std::function abort ) -{ - abort_fun = abort; - std::set_terminate( term_func ); - setSignals( defaultSignalsToCatch(), &term_func_abort ); - std::set_unexpected( term_func ); -} - - -/**************************************************************************** - * Global call stack functionallity * - ****************************************************************************/ -#ifdef USE_MPI -static MPI_Comm globalCommForGlobalCommStack = MPI_COMM_NULL; -static bool stopGlobalMonitorThread = false; -static void runGlobalMonitorThread() -{ - int rank = 0; - int size = 1; - MPI_Comm_size( globalCommForGlobalCommStack, &size ); - MPI_Comm_rank( globalCommForGlobalCommStack, &rank ); - while ( !stopGlobalMonitorThread ) { - // Check for any messages - int flag = 0; - MPI_Status status; - int err = MPI_Iprobe( MPI_ANY_SOURCE, 1, globalCommForGlobalCommStack, &flag, &status ); - if ( err != MPI_SUCCESS ) { - printf( "Internal error in StackTrace::getGlobalCallStacks::runGlobalMonitorThread\n" ); - break; - } else if ( flag != 0 ) { - // We received a request - int src_rank = status.MPI_SOURCE; - int tag; - MPI_Recv( &tag, 1, MPI_INT, src_rank, 1, globalCommForGlobalCommStack, &status ); - // Get a trace of all threads (except this) - auto threads = StackTrace::activeThreads(); - threads.erase( StackTrace::thisThread() ); - if ( threads.empty() ) - continue; - // Get the stack trace of each thread - std::vector> stack; - for ( auto thread : threads ) - stack.push_back( StackTrace::getCallStack( thread ) ); - // Pack and send the data - auto data = pack( stack ); - int count = data.size(); - MPI_Send( data.data(), count, MPI_CHAR, src_rank, tag, globalCommForGlobalCommStack ); - } else { - // No requests recieved - std::this_thread::sleep_for( std::chrono::milliseconds( 50 ) ); - } - } -} -void StackTrace::globalCallStackInitialize( MPI_Comm comm ) -{ -#ifdef USE_MPI - MPI_Comm_dup( comm, &globalCommForGlobalCommStack ); -#endif - stopGlobalMonitorThread = false; - globalMonitorThread.reset( new std::thread( runGlobalMonitorThread ) ); -} -void StackTrace::globalCallStackFinalize() -{ - stopGlobalMonitorThread = true; - globalMonitorThread->join(); - globalMonitorThread.reset(); -#ifdef USE_MPI - if ( globalCommForGlobalCommStack != MPI_COMM_NULL ) - MPI_Comm_free( &globalCommForGlobalCommStack ); - globalCommForGlobalCommStack = MPI_COMM_NULL; -#endif -} -StackTrace::multi_stack_info StackTrace::getGlobalCallStacks() -{ - // Check if we properly initialized the comm - if ( globalMonitorThread == nullptr ) { - printf( "Warning: getGlobalCallStacks called without call to globalCallStackInitialize\n" ); - return getAllCallStacks(); - } - if ( globalMonitorThread == nullptr ) { - printf( "Warning: getGlobalCallStacks called without call to globalCallStackInitialize\n" ); - return getAllCallStacks(); - } -#ifdef USE_MPI - int provided; - MPI_Query_thread( &provided ); - if ( provided != MPI_THREAD_MULTIPLE ) { - printf( "Warning: getGlobalCallStacks requires support for MPI_THREAD_MULTIPLE\n" ); - return getAllCallStacks(); - } -#endif - if ( activeThreads().size() == 1 ) { - printf( "Warning: getAllCallStacks not supported on this OS, defaulting to basic call " - "stack\n" ); - return getAllCallStacks(); - } - // Signal all processes that we want their stack for all threads - int rank = 0; - int size = 1; - MPI_Comm_size( globalCommForGlobalCommStack, &size ); - MPI_Comm_rank( globalCommForGlobalCommStack, &rank ); - std::random_device rd; - std::mt19937 gen( rd() ); - std::uniform_int_distribution<> dis( 2, 0x7FFF ); - int tag = dis( gen ); - std::vector sendRequest( size ); - for ( int i = 0; i < size; i++ ) { - if ( i == rank ) - continue; - MPI_Isend( &tag, 1, MPI_INT, i, 1, globalCommForGlobalCommStack, &sendRequest[i] ); - } - // Get the trace for the current process - auto threads = StackTrace::activeThreads(); - StackTrace::multi_stack_info multistack; - for ( auto thread : threads ) { - auto stack = StackTrace::getCallStack( thread ); - multistack.add( stack.size(), stack.data() ); - } - // Recieve the backtrace for all processes/threads - int N_finished = 1; - auto start = std::chrono::steady_clock::now(); - double time = 0; - const double max_time = 2.0 + size * 20e-3; - while ( N_finished < size && time < max_time ) { - int flag = 0; - MPI_Status status; - int err = MPI_Iprobe( MPI_ANY_SOURCE, tag, globalCommForGlobalCommStack, &flag, &status ); - if ( err != MPI_SUCCESS ) { - printf( "Internal error in StackTrace::getGlobalCallStacks\n" ); - break; - } else if ( flag != 0 ) { - // We recieved a response - int src_rank = status.MPI_SOURCE; - int count; - MPI_Get_count( &status, MPI_CHAR, &count ); - std::vector data( count, 0 ); - MPI_Recv( data.data(), - count, - MPI_CHAR, - src_rank, - tag, - globalCommForGlobalCommStack, - &status ); - auto stack_list = unpack( data ); - for ( const auto &stack : stack_list ) - multistack.add( stack.size(), stack.data() ); - N_finished++; - } else { - auto stop = std::chrono::steady_clock::now(); - time = std::chrono::duration_cast( stop - start ).count(); - std::this_thread::yield(); - } - } - for ( int i = 0; i < size; i++ ) { - if ( i == rank ) - continue; - MPI_Request_free( &sendRequest[i] ); - } - return multistack; -} -#else -void StackTrace::globalCallStackInitialize( MPI_Comm ) {} -void StackTrace::globalCallStackFinalize() {} -StackTrace::multi_stack_info StackTrace::getGlobalCallStacks() { return getAllCallStacks(); } -#endif - - -/**************************************************************************** - * Cleanup the call stack * - ****************************************************************************/ -static inline size_t findMatching( const std::string &str, size_t pos ) -{ - if ( str[pos] != '<' ) { - perr << "Internal error string matching\n"; - perr << " " << str << std::endl; - perr << " " << pos << std::endl; - return pos; - } - size_t pos2 = pos + 1; - int count = 1; - while ( count != 0 && pos2 < str.size() ) { - if ( str[pos2] == '<' ) - count++; - if ( str[pos2] == '>' ) - count--; - pos2++; - } - return pos2; -} -void StackTrace::cleanupStackTrace( multi_stack_info &stack ) -{ - auto it = stack.children.begin(); - const size_t npos = std::string::npos; - while ( it != stack.children.end() ) { - auto &object = it->stack.object; - auto &function = it->stack.function; - auto &filename = it->stack.filename; - bool remove_entry = false; - // Cleanup object and filename - object = stripPath( object ); - filename = stripPath( filename ); - // Remove callstack (and all children) for threads that are just contributing - if ( function.find( "_callstack_signal_handler" ) != npos && - filename.find( "StackTrace.cpp" ) != npos ) { - it = stack.children.erase( it ); - continue; - } - // Remove __libc_start_main - if ( function.find( "__libc_start_main" ) != npos && - filename.find( "libc-start.c" ) != npos ) - remove_entry = true; - // Remove backtrace_thread - if ( function.find( "backtrace_thread" ) != npos && - filename.find( "StackTrace.cpp" ) != npos ) - remove_entry = true; - // Remove __restore_rt - if ( function.find( "__restore_rt" ) != npos && object.find( "libpthread" ) != npos ) - remove_entry = true; - // Remove std::condition_variable::__wait_until_impl - if ( function.find( "std::condition_variable::__wait_until_impl" ) != npos && - filename == "condition_variable" ) - remove_entry = true; - // Remove std::_Function_handler< - if ( function.find( "std::_Function_handler<" ) != npos && filename == "functional" ) - remove_entry = true; - // Remove std::_Bind_simple< - if ( function.find( "std::_Bind_simple<" ) != npos && filename == "functional" ) { - auto pos = function.find( "std::_Bind_simple<" ); - function = function.substr( 0, pos ) + "std::_Bind_simple<...>(...)"; - remove_entry = true; - } - // Remove std::this_thread::__sleep_for - if ( function.find( "std::this_thread::__sleep_for(" ) != npos && - object.find( "libstdc++" ) != npos ) - remove_entry = true; - // Remove std::thread::_Impl - if ( function.find( "std::thread::_Impl<" ) != npos && filename == "thread" ) - remove_entry = true; - // Remove MATLAB internal routines - if ( object == "libmwmcr.so" || object == "libmwm_lxe.so" || object == "libmwbridge.so" || - object == "libmwiqm.so" ) - remove_entry = true; - // Remove the desired entry - if ( remove_entry ) { - if ( it->children.empty() ) { - it = stack.children.erase( it ); - continue; - } else if ( it->children.size() == 1 ) { - *it = it->children[0]; - continue; - } - } - // Cleanup template space - strrep( function, " >", ">" ); - strrep( function, "< ", "<" ); - // Replace std::chrono::duration with abbriviated version - if ( function.find( "std::chrono::duration<" ) != npos ) { - strrep( function, "std::chrono::duration >", "ticks" ); - strrep( function, - "std::chrono::duration >", - "nanoseconds" ); - } - // Replace std::ratio with abbriviated version. - if ( function.find( "std::ratio<" ) != npos ) { - strrep( function, "std::ratio<1l, 1000000000000000000000000l>", "std::yocto" ); - strrep( function, "std::ratio<1l, 1000000000000000000000l>", "std::zepto" ); - strrep( function, "std::ratio<1l, 1000000000000000000l>", "std::atto" ); - strrep( function, "std::ratio<1l, 1000000000000000l>", "std::femto" ); - strrep( function, "std::ratio<1l, 1000000000000l>", "std::pico" ); - strrep( function, "std::ratio<1l, 1000000000l>", "std::nano" ); - strrep( function, "std::ratio<1l, 1000000l>", "std::micro" ); - strrep( function, "std::ratio<1l, 1000l>", "std::milli" ); - strrep( function, "std::ratio<1l, 100l>", "std::centi" ); - strrep( function, "std::ratio<1l, 10l>", "std::deci" ); - strrep( function, "std::ratio<1l, 1l>", "" ); - strrep( function, "std::ratio<10l, 1l>", "std::deca" ); - strrep( function, "std::ratio<60l, 1l>", "std::ratio<60>" ); - strrep( function, "std::ratio<100l, 1l>", "std::hecto" ); - strrep( function, "std::ratio<1000l, 1l>", "std::kilo" ); - strrep( function, "std::ratio<3600l, 1l>", "std::ratio<3600>" ); - strrep( function, "std::ratio<1000000l, 1l>", "std::mega" ); - strrep( function, "std::ratio<1000000000l, 1l>", "std::giga" ); - strrep( function, "std::ratio<1000000000000l, 1l>", "std::tera" ); - strrep( function, "std::ratio<1000000000000000l, 1l>", "std::peta" ); - strrep( function, "std::ratio<1000000000000000000l, 1l>", "std::exa" ); - strrep( function, "std::ratio<1000000000000000000000l, 1l>", "std::zetta" ); - strrep( function, "std::ratio<1000000000000000000000000l, 1l>", "std::yotta" ); - strrep( function, " >", ">" ); - strrep( function, "< ", "<" ); - } - // Replace std::chrono::duration with abbriviated version. - if ( function.find( "std::chrono::duration<" ) != npos ) { - // clang-format off - strrep( function, "std::chrono::duration", "std::chrono::nanoseconds" ); - strrep( function, "std::chrono::duration", "std::chrono::microseconds" ); - strrep( function, "std::chrono::duration", "std::chrono::milliseconds" ); - strrep( function, "std::chrono::duration", "std::chrono::seconds" ); - strrep( function, "std::chrono::duration", "std::chrono::seconds" ); - strrep( function, "std::chrono::duration>", "std::chrono::minutes" ); - strrep( function, "std::chrono::duration>", "std::chrono::hours" ); - strrep( function, " >", ">" ); - strrep( function, "< ", "<" ); - // clang-format on - } - // Replace std::this_thread::sleep_for with abbriviated version. - if ( function.find( "::sleep_for<" ) != npos ) { - strrep( function, "::sleep_for", "::sleep_for" ); - strrep( function, "::sleep_for", "::sleep_for" ); - strrep( function, "::sleep_for", "::sleep_for" ); - strrep( function, "::sleep_for", "::sleep_for" ); - strrep( function, "::sleep_for", "::sleep_for" ); - strrep( function, "::sleep_for>", "::sleep_for" ); - strrep( function, "::sleep_for>", "::sleep_for" ); - strrep( function, - "::sleep_for(std::chrono::nanoseconds", - "::sleep_for(std::chrono::nanoseconds" ); - strrep( function, - "::sleep_for(std::chrono::microseconds", - "::sleep_for(std::chrono::microseconds" ); - strrep( function, - "::sleep_for(std::chrono::milliseconds", - "::sleep_for(std::chrono::milliseconds" ); - strrep( function, - "::sleep_for(std::chrono::seconds", - "::sleep_for(std::chrono::seconds" ); - strrep( function, - "::sleep_for(std::chrono::minutes", - "::sleep_for(std::chrono::milliseconds" ); - strrep( function, - "::sleep_for(std::chrono::hours", - "::sleep_for(std::chrono::hours" ); - } - // Replace std::basic_string with abbriviated version - size_t pos = 0; - while ( pos < function.size() ) { - // Find next instance of std::basic_string - const std::string match = "std::basic_string<"; - pos = function.find( match, pos ); - if ( pos == npos ) - break; - // Find the matching > - size_t pos1 = pos + match.size() - 1; - size_t pos2 = findMatching( function, pos1 ); - if ( pos2 == pos1 ) - break; - if ( function.substr( pos1 + 1, 4 ) == "char" ) - function.replace( pos, pos2 - pos, "std::string" ); - else if ( function.substr( pos1 + 1, 7 ) == "wchar_t" ) - function.replace( pos, pos2 - pos, "std::wstring" ); - else if ( function.substr( pos1 + 1, 8 ) == "char16_t" ) - function.replace( pos, pos2 - pos, "std::u16string" ); - else if ( function.substr( pos1 + 1, 8 ) == "char32_t" ) - function.replace( pos, pos2 - pos, "std::u32string" ); - pos++; - } - // Cleanup the children - cleanupStackTrace( *it ); - ++it; - } -} diff --git a/common/Utilities.cpp b/common/Utilities.cpp index d34385a2..f6d810af 100644 --- a/common/Utilities.cpp +++ b/common/Utilities.cpp @@ -1,303 +1,8 @@ #include "common/Utilities.h" -#include "common/StackTrace.h" -#include -#include -#include -#include -#include -#include #include #include -#ifdef USE_MPI - #include "mpi.h" -#endif - -// Detect the OS and include system dependent headers -#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) || defined(_MSC_VER) - // Note: windows has not been testeds - #define USE_WINDOWS - #include - #include - #include - #include - #include - #include - #define mkdir(path, mode) _mkdir(path) - //#pragma comment(lib, psapi.lib) //added - //#pragma comment(linker, /DEFAULTLIB:psapi.lib) -#elif defined(__APPLE__) - #define USE_MAC - #include - #include - #include - #include - #include - #include -#elif defined( __linux ) || defined( __linux__ ) || defined( __unix ) || defined( __posix ) - #define USE_LINUX - #include - #include - #include - #include - #include -#else - #error Unknown OS -#endif - - -#ifdef __GNUC__ - #define USE_ABI - #include -#endif - - -/**************************************************************************** -* Function to terminate the program * -****************************************************************************/ -static bool abort_printMemory = true; -static bool abort_printStack = true; -static bool abort_throwException = false; -static int force_exit = 0; -void Utilities::setAbortBehavior( bool printMemory, bool printStack, bool throwException ) -{ - abort_printMemory = printMemory; - abort_printStack = printStack; - abort_throwException = throwException; -} -void Utilities::abort(const std::string &message, const std::string &filename, const int line) -{ - std::stringstream msg; - msg << "Program abort called in file `" << filename << "' at line " << line << std::endl; - // Add the memory usage and call stack to the error message - if ( abort_printMemory ) { - size_t N_bytes = Utilities::getMemoryUsage(); - msg << "Bytes used = " << N_bytes << std::endl; - } - if ( abort_printStack ) { - std::vector stack = StackTrace::getCallStack(); - msg << std::endl; - msg << "Stack Trace:\n"; - for (size_t i=0; i1 ) { - exit(-1); - } else if ( !abort_throwException ) { - // Use MPI_abort (will terminate all processes) - force_exit = 2; - std::cerr << msg.str(); - #if defined(USE_MPI) || defined(HAVE_MPI) - int initialized=0, finalized=0; - MPI_Initialized(&initialized); - MPI_Finalized(&finalized); - if ( initialized!=0 && finalized==0 ) - MPI_Abort(MPI_COMM_WORLD,-1); - #endif - exit(-1); - } else if ( force_exit>0 ) { - exit(-1); - } else { - // Throw and standard exception (allows the use of try, catch) - throw std::logic_error(msg.str()); - } -} - - -/**************************************************************************** -* Function to handle MPI errors * -****************************************************************************/ -/*#if defined(USE_MPI) || defined(HAVE_MPI) -MPI_Errhandler mpierr; -void MPI_error_handler_fun( MPI_Comm *comm, int *err, ... ) -{ - if ( *err==MPI_ERR_COMM && *comm==MPI_COMM_WORLD ) { - // Special error handling for an invalid MPI_COMM_WORLD - std::cerr << "Error invalid MPI_COMM_WORLD"; - exit(-1); - } - int msg_len=0; - char message[1000]; - MPI_Error_string( *err, message, &msg_len ); - if ( msg_len <= 0 ) - abort("Unkown error in MPI"); - abort( "Error calling MPI routine:\n" + std::string(message) ); -} -#endif*/ - - -/**************************************************************************** -* Function to handle unhandled exceptions * -****************************************************************************/ -bool tried_MPI_Abort=false; -void term_func_abort(int err) -{ - printf("Exiting due to abort (%i)\n",err); - std::vector stack = StackTrace::getCallStack(); - std::string message = "Stack Trace:\n"; - for (size_t i=0; i( new MPI_Errhandler ); - MPI_Comm_create_errhandler( MPI_error_handler_fun, mpierr.get() ); - } - MPI_Comm_set_errhandler( mpi.getCommunicator(), *mpierr ); - MPI_Comm_set_errhandler( MPI_COMM_WORLD, *mpierr ); - #endif -} -void Utilities::clearMPIErrorHandler( ) -{ - #if defined(USE_MPI) || defined(HAVE_MPI) - if ( mpierr.get()!=NULL ) - MPI_Errhandler_free( mpierr.get() ); // Delete the error handler - mpierr.reset(); - MPI_Comm_set_errhandler( MPI_COMM_SELF, MPI_ERRORS_ARE_FATAL ); - MPI_Comm_set_errhandler( MPI_COMM_WORLD, MPI_ERRORS_ARE_FATAL ); - #endif -}*/ - - -/**************************************************************************** -* Function to get the memory usage * -* Note: this function should be thread-safe * -****************************************************************************/ -#if defined(USE_MAC) - // Get the page size on mac - static size_t page_size = static_cast(sysconf(_SC_PAGESIZE)); -#endif -static size_t N_bytes_initialization = Utilities::getMemoryUsage(); -size_t Utilities::getMemoryUsage() -{ - size_t N_bytes = 0; - #if defined(USE_LINUX) - struct mallinfo meminfo = mallinfo(); - size_t size_hblkhd = static_cast( meminfo.hblkhd ); - size_t size_uordblks = static_cast( meminfo.uordblks ); - N_bytes = size_hblkhd + size_uordblks; - #elif defined(USE_MAC) - struct task_basic_info t_info; - mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT; - if (KERN_SUCCESS != task_info(mach_task_self(), - TASK_BASIC_INFO, (task_info_t)&t_info, - &t_info_count)) { - return 0; - } - N_bytes = t_info.virtual_size; - #elif defined(USE_WINDOWS) - PROCESS_MEMORY_COUNTERS memCounter; - GetProcessMemoryInfo( GetCurrentProcess(), &memCounter, sizeof(memCounter) ); - N_bytes = memCounter.WorkingSetSize; - #endif - return N_bytes; -} - - -/**************************************************************************** -* Functions to get the time and timer resolution * -****************************************************************************/ -#if defined(USE_WINDOWS) - double Utilities::time() - { - LARGE_INTEGER end, f; - QueryPerformanceFrequency(&f); - QueryPerformanceCounter(&end); - double time = ((double)end.QuadPart)/((double)f.QuadPart); - return time; - } - double Utilities::tick() - { - LARGE_INTEGER f; - QueryPerformanceFrequency(&f); - double resolution = ((double)1.0)/((double)f.QuadPart); - return resolution; - } -#elif defined(USE_LINUX) || defined(USE_MAC) - double Utilities::time() - { - timeval current_time; - gettimeofday(¤t_time,NULL); - double time = ((double)current_time.tv_sec)+1e-6*((double)current_time.tv_usec); - return time; - } - double Utilities::tick() - { - timeval start, end; - gettimeofday(&start,NULL); - gettimeofday(&end,NULL); - while ( end.tv_sec==start.tv_sec && end.tv_usec==start.tv_usec ) - gettimeofday(&end,NULL); - double resolution = ((double)(end.tv_sec-start.tv_sec))+1e-6*((double)(end.tv_usec-start.tv_usec)); - return resolution; - } -#else - #error Unknown OS -#endif - // Factor a number into it's prime factors std::vector Utilities::factor(size_t number) diff --git a/common/Utilities.h b/common/Utilities.h index e6db4279..90cb4008 100644 --- a/common/Utilities.h +++ b/common/Utilities.h @@ -1,91 +1,42 @@ #ifndef included_Utilities #define included_Utilities -#include #include -#include -#include -#include -#include -#include -#include #include +#include "StackTrace/Utilities.h" + namespace Utilities { -/*! - * Aborts the run after printing an error message with file and - * linenumber information. - */ -void abort( const std::string &message, const std::string &filename, const int line ); - - -/*! - * Set the behavior of abort - * @param printMemory Print the current memory usage (default is true) - * @param printStack Print the current call stack (default is true) - * @param throwException Throw an exception instead of MPI_Abort (default is false) - */ -void setAbortBehavior( bool printMemory, bool printStack, bool throwException ); - -//! Function to set the error handlers -void setErrorHandlers(); - - -/*! - * Function to get the memory availible. - * This function will return the total memory availible - * Note: depending on the implimentation, this number may be rounded to - * to a multiple of the page size. - * If this function fails, it will return 0. - */ -size_t getSystemMemory(); - - -/*! - * Function to get the memory usage. - * This function will return the total memory used by the application. - * Note: depending on the implimentation, this number may be rounded to - * to a multiple of the page size. - * If this function fails, it will return 0. - */ -size_t getMemoryUsage(); - - -//! Function to get an arbitrary point in time -double time(); - - -//! Function to get the resolution of time -double tick(); +// Functions inherited from StackTrace::Utilities +using StackTrace::Utilities::abort; +using StackTrace::Utilities::cause_segfault; +using StackTrace::Utilities::clearErrorHandlers; +using StackTrace::Utilities::exec; +using StackTrace::Utilities::getMemoryUsage; +using StackTrace::Utilities::getSystemMemory; +using StackTrace::Utilities::setAbortBehavior; +using StackTrace::Utilities::setErrorHandlers; +using StackTrace::Utilities::tick; +using StackTrace::Utilities::time; +using StackTrace::Utilities::sleep_ms; +using StackTrace::Utilities::sleep_s; //! std::string version of sprintf inline std::string stringf( const char *format, ... ); -/*! - * Sleep for X ms - * @param N Time to sleep (ms) - */ -inline void sleep_ms( int N ) { std::this_thread::sleep_for( std::chrono::milliseconds( N ) ); } - - -/*! - * Sleep for X s - * @param N Time to sleep (s) - */ -inline void sleep_s( int N ) { std::this_thread::sleep_for( std::chrono::seconds( N ) ); } - - //! Factor a number into it's prime factors std::vector factor(size_t number); -//! Print AMP Banner + +//! Null use function void nullUse( void* ); + } // namespace Utilities diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index af648c76..c6775e68 100755 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -64,7 +64,6 @@ ADD_LBPM_TEST_1_2_4( TestBlobIdentify ) ADD_LBPM_TEST_PARALLEL( TestSegDist 8 ) ADD_LBPM_TEST_PARALLEL( TestCommD3Q19 8 ) ADD_LBPM_TEST_1_2_4( testCommunication ) -ADD_LBPM_TEST_1_2_4( testUtilities ) ADD_LBPM_TEST( TestWriter ) IF ( USE_NETCDF ) ADD_LBPM_TEST_PARALLEL( TestNetcdf 8 ) diff --git a/tests/TestWriter.cpp b/tests/TestWriter.cpp index 855f33f6..78dab50b 100644 --- a/tests/TestWriter.cpp +++ b/tests/TestWriter.cpp @@ -4,8 +4,8 @@ #include #include #include +#include -#include "shared_ptr.h" #include "common/UnitTest.h" #include "common/Utilities.h" #include "common/MPI_Helpers.h" diff --git a/tests/testUtilities.cpp b/tests/testUtilities.cpp deleted file mode 100644 index b084a695..00000000 --- a/tests/testUtilities.cpp +++ /dev/null @@ -1,145 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "common/Utilities.h" -#include "common/StackTrace.h" -#include "common/UnitTest.h" -#include "common/MPI_Helpers.h" - - -// Detect the OS (defines which tests we allow to fail) -#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) || defined(_MSC_VER) - #define USE_WINDOWS -#elif defined(__APPLE__) - #define USE_MAC -#elif defined( __linux ) || defined( __linux__ ) || defined( __unix ) || defined( __posix ) - #define USE_LINUX -#else - #error Unknown OS -#endif - - -// Function to return the call stack -std::vector get_call_stack() -{ - std::vector stack = StackTrace::getCallStack(); - std::vector stack2(stack.size()); - for (size_t i=0; i 10000 ) { stack2 = get_call_stack(); } - return stack2; -} - - -// The main function -int main(int argc, char *argv[]) -{ - int rank = 0; - MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - UnitTest ut; - Utilities::setAbortBehavior( true, true, true ); - - // Limit the scope of variables - { - // Test the memory usage - double t0 = Utilities::time(); - size_t n_bytes1 = Utilities::getMemoryUsage(); - double time1 = Utilities::time() - t0; - uint64_t *tmp = new uint64_t[0x100000]; - memset(tmp,0xAA,0x100000*sizeof(uint64_t)); - Utilities::nullUse( tmp ); - t0 = Utilities::time(); - size_t n_bytes2 = Utilities::getMemoryUsage(); - double time2 = Utilities::time() - t0; - delete [] tmp; - t0 = Utilities::time(); - size_t n_bytes3 = Utilities::getMemoryUsage(); - double time3 = Utilities::time() - t0; - std::cout << "Number of bytes used for a basic test: " << n_bytes1 << ", " << n_bytes2 << ", " << n_bytes3 << std::endl; - std::cout << " Time to query: " << time1*1e6 << " us, " << time2*1e6 << " us, " << time3*1e6 << " us" << std::endl; - if ( n_bytes1==0 ) { - ut.failure("getMemoryUsage returns 0"); - } else { - ut.passes("getMemoryUsage returns non-zero"); - if ( n_bytes2>n_bytes1 ) { - ut.passes("getMemoryUsage increases size"); - } else { - #if defined(USE_MAC) - ut.expected_failure("getMemoryUsage does not increase size"); - #else - ut.failure("getMemoryUsage increases size"); - #endif - } - if ( n_bytes1==n_bytes3 ) { - ut.passes("getMemoryUsage decreases size properly"); - } else { - #if defined(USE_MAC) || defined(USE_WINDOWS) - ut.expected_failure("getMemoryUsage does not decrease size properly"); - #else - ut.failure("getMemoryUsage does not decrease size properly"); - #endif - } - } - - // Test getting the current call stack - std::vector call_stack = get_call_stack(); - if ( rank==0 ) { - std::cout << "Call stack:" << std::endl; - for (size_t i=0; i( x ); + while ( !swap ) { + a.i = atomic_add( x2, 0 ); + b.d = a.d + y; + swap = atomic_compare_and_swap( x2, a.i, b.i ); + } + return b.d; +} +float atomic_add( float volatile *x, float y ) +{ + static_assert( sizeof( float ) == sizeof( int32_atomic ), "Unexpected size" ); + union U { + float d; + int32_atomic i; + }; + U a, b; + bool swap = false; + auto x2 = reinterpret_cast( x ); + while ( !swap ) { + a.i = atomic_add( x2, 0 ); + b.d = a.d + y; + swap = atomic_compare_and_swap( x2, a.i, b.i ); + } + return b.d; +} + + } // AtomicOperations namespace diff --git a/threadpool/atomic_helpers.h b/threadpool/atomic_helpers.h index e1eec545..32b67200 100644 --- a/threadpool/atomic_helpers.h +++ b/threadpool/atomic_helpers.h @@ -2,6 +2,8 @@ // but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. #ifndef included_ThreadPoolAtomicHelpers #define included_ThreadPoolAtomicHelpers + +#include #include #include #include @@ -10,7 +12,6 @@ #if defined( WIN32 ) || defined( _WIN32 ) || defined( WIN64 ) || defined( _WIN64 ) // Using windows #define USE_WINDOWS -#define NOMINMAX #include #include #include @@ -529,6 +530,11 @@ inline void atomic_swap( int64_atomic volatile *x, int64_atomic *y ) } +// Atomic operations for floating types +double atomic_add( double volatile *x, double y ); +float atomic_add( float volatile *x, float y ); + + // Define an atomic counter struct counter_t { public: diff --git a/threadpool/atomic_list.h b/threadpool/atomic_list.h index 5da8cc85..c60c2869 100644 --- a/threadpool/atomic_list.h +++ b/threadpool/atomic_list.h @@ -14,12 +14,16 @@ * \details This class implements a basic sorted list that is thread-safe and lock-free. * Entries are stored smallest to largest according to the compare operator */ -template> +template> class AtomicList final { public: //! Default constructor - AtomicList( const TYPE &default_value = TYPE(), const COMPARE &comp = COMPARE() ); + AtomicList( size_t capacity = 1024, const TYPE &default_value = TYPE(), + const COMPARE &comp = COMPARE() ); + + //! Destructor + ~AtomicList(); /*! * \brief Remove an item from the list @@ -33,8 +37,8 @@ public: * bool cmp( const TYPE& value, ... ); * @param args Additional arguments for the comparison */ - template - inline TYPE remove( Compare compare, Args... args ); + template + inline TYPE remove( Compare compare, const Args &... args ); //! Remove the first from the list inline TYPE remove_first(); @@ -44,13 +48,13 @@ public: * \details Insert an item into the list * @param x Item to insert */ - inline void insert( TYPE x ); + inline void insert( const TYPE &x ); /*! * \brief Return the size of the list * \details Return the number of items in the list */ - inline int size() const { return AtomicOperations::atomic_get( &d_N ); } + inline size_t size() const { return AtomicOperations::atomic_get( &d_N ); } /*! * \brief Check if the list is empty @@ -58,11 +62,23 @@ public: */ inline bool empty() const { return AtomicOperations::atomic_get( &d_N ) == 0; } + /*! + * \brief Clear the list + * \details Removes all entries from the list + */ + inline void clear() + { + while ( !empty() ) { + remove_first(); + } + } + /*! * \brief Return the capacity of the list * \details Return the maximum number of items the list can hold */ - inline int capacity() const { return MAX_SIZE; } + inline constexpr size_t capacity() const { return d_capacity; } + /*! * \brief Check the list @@ -76,19 +92,20 @@ public: //! Return the total number of inserts since object creation - inline int64_t N_insert() const { return AtomicOperations::atomic_get( &d_N_insert ); } + inline size_t N_insert() const { return AtomicOperations::atomic_get( &d_N_insert ); } //! Return the total number of removals since object creation - inline int64_t N_remove() const { return AtomicOperations::atomic_get( &d_N_remove ); } + inline size_t N_remove() const { return AtomicOperations::atomic_get( &d_N_remove ); } private: // Data members COMPARE d_compare; + const size_t d_capacity; volatile TYPE d_default; - volatile TYPE d_objects[MAX_SIZE]; + volatile TYPE *d_objects; volatile AtomicOperations::int32_atomic d_N; - volatile AtomicOperations::int32_atomic d_next[MAX_SIZE + 1]; + volatile AtomicOperations::int32_atomic *d_next; volatile AtomicOperations::int32_atomic d_unused; volatile AtomicOperations::int64_atomic d_N_insert; volatile AtomicOperations::int64_atomic d_N_remove; @@ -99,8 +116,9 @@ private: if ( i == -1 ) return -1; int tmp = 0; - while ( tmp == 0 ) + do { tmp = AtomicOperations::atomic_fetch_and_and( &d_next[i], 0 ); + } while ( tmp == 0 ); return tmp; } inline void unlock( int i, int value ) @@ -111,8 +129,9 @@ private: inline int get_unused() { int i = 0; - while ( i == 0 ) + do { i = AtomicOperations::atomic_fetch_and_and( &d_unused, 0 ); + } while ( i == 0 ); AtomicOperations::atomic_fetch_and_or( &d_unused, -( d_next[i] + 4 ) + 1 ); d_next[i] = -3; return i; @@ -120,16 +139,17 @@ private: inline void put_unused( int i ) { int j = 0; - while ( j == 0 ) + do { AtomicOperations::atomic_swap( &d_unused, &j ); + } while ( j == 0 ); d_next[i] = -3 - j; AtomicOperations::atomic_fetch_and_or( &d_unused, i ); } -private: - AtomicList( const AtomicList & ); - AtomicList &operator=( const AtomicList & ); +public: + AtomicList( const AtomicList & ) = delete; + AtomicList &operator=( const AtomicList & ) = delete; }; diff --git a/threadpool/atomic_list.hpp b/threadpool/atomic_list.hpp index a0850971..3a4df598 100644 --- a/threadpool/atomic_list.hpp +++ b/threadpool/atomic_list.hpp @@ -10,28 +10,39 @@ /****************************************************************** * Constructor * ******************************************************************/ -template -AtomicList::AtomicList( const TYPE &default_value, const COMPARE &comp ) - : d_compare( comp ), d_default( default_value ) +template +AtomicList::AtomicList( + size_t capacity, const TYPE &default_value, const COMPARE &comp ) + : d_compare( comp ), + d_capacity( capacity ), + d_default( default_value ), + d_objects( new TYPE[capacity] ), + d_N( 0 ), + d_next( new AtomicOperations::int32_atomic[capacity + 1] ), + d_unused( 1 ), + d_N_insert( 0 ), + d_N_remove( 0 ) { - d_N = 0; - d_next[0] = -1; - d_unused = 1; - d_N_insert = 0; - d_N_remove = 0; - for ( int i = 0; i < MAX_SIZE; i++ ) { + d_next[0] = -1; + for ( size_t i = 0; i < d_capacity; i++ ) { d_next[i + 1] = -5 - i; d_objects[i] = d_default; } } +template +AtomicList::~AtomicList() +{ + delete[] d_objects; + delete[] d_next; +} /****************************************************************** * Remove an item * ******************************************************************/ -template +template template -inline TYPE AtomicList::remove( Compare compare, Args... args ) +inline TYPE AtomicList::remove( Compare compare, const Args &... args ) { // Acquiring temporary ownership int pos = 0; @@ -50,8 +61,7 @@ inline TYPE AtomicList::remove( Compare compare, Args.. // Test to see if the object passes compare bool test = compare( const_cast( d_objects[next - 1] ), args... ); if ( test ) { - // We want to return this object, update next to point to another entry and remove the - // entry + // We want to return this object, update next to point to another entry and remove unlock( next, -3 ); unlock( pos, next2 ); pos = next; @@ -71,8 +81,8 @@ inline TYPE AtomicList::remove( Compare compare, Args.. } return rtn; } -template -inline TYPE AtomicList::remove_first() +template +inline TYPE AtomicList::remove_first() { TYPE rtn( d_default ); auto next = lock( 0 ); @@ -94,11 +104,11 @@ inline TYPE AtomicList::remove_first() /****************************************************************** * Insert an item * ******************************************************************/ -template -inline void AtomicList::insert( TYPE x ) +template +inline void AtomicList::insert( const TYPE &x ) { - int N_used = AtomicOperations::atomic_increment( &d_N ); - if ( N_used > MAX_SIZE ) { + size_t N_used = AtomicOperations::atomic_increment( &d_N ); + if ( N_used > d_capacity ) { AtomicOperations::atomic_decrement( &d_N ); throw std::logic_error( "No room in list" ); } @@ -141,8 +151,8 @@ inline void AtomicList::insert( TYPE x ) * Check the internal structures of the list * * This is mostly thread-safe, but blocks all threads * ******************************************************************/ -template -inline bool AtomicList::check() +template +inline bool AtomicList::check() { // Get the lock and check for any other threads modifying the list auto start = lock( 0 ); @@ -153,11 +163,11 @@ inline bool AtomicList::check() int N2 = 0; int N_unused = 0; int N_tail = 0; - for ( int i = 0; i < MAX_SIZE; i++ ) { + for ( size_t i = 0; i < d_capacity; i++ ) { if ( d_objects[i] != d_default ) N1++; } - for ( int i = 0; i < MAX_SIZE + 1; i++ ) { + for ( size_t i = 0; i <= d_capacity; i++ ) { int next = i == 0 ? start : d_next[i]; if ( next > 0 ) { N2++; @@ -169,7 +179,7 @@ inline bool AtomicList::check() pass = false; } } - pass = pass && N_tail == 1 && N1 == d_N && N2 == d_N && N_unused + d_N == MAX_SIZE; + pass = pass && N_tail == 1 && N1 == d_N && N2 == d_N && N_unused + d_N == (int) d_capacity; int it = 0; int pos = 0; while ( true ) { diff --git a/threadpool/test/CMakeLists.txt b/threadpool/test/CMakeLists.txt deleted file mode 100644 index 90490864..00000000 --- a/threadpool/test/CMakeLists.txt +++ /dev/null @@ -1,16 +0,0 @@ -# Add thread pool tests -ADD_LBPM_TEST( test_atomic ) -ADD_LBPM_TEST( test_atomic_list ) -SET_TESTS_PROPERTIES ( test_atomic PROPERTIES FAIL_REGULAR_EXPRESSION ".*FAILED.*" PROCESSORS 64 ) -ADD_LBPM_TEST_THREAD_MPI( test_thread_pool 1 4 ) -ADD_LBPM_TEST_THREAD_MPI( test_thread_pool 2 4 ) -ADD_LBPM_TEST_THREAD_MPI( test_thread_pool 4 4 ) -SET_PROPERTY( TEST test_thread_pool_1procs_4threads APPEND PROPERTY RUN_SERIAL 1 ) -IF ( USE_MPI ) - SET_PROPERTY( TEST test_thread_pool_2procs_4threads APPEND PROPERTY RUN_SERIAL 1 ) - SET_PROPERTY( TEST test_thread_pool_4procs_4threads APPEND PROPERTY RUN_SERIAL 1 ) -ENDIF() - - - - diff --git a/threadpool/test/test_atomic.cpp b/threadpool/test/test_atomic.cpp deleted file mode 100644 index 27c76ee1..00000000 --- a/threadpool/test/test_atomic.cpp +++ /dev/null @@ -1,154 +0,0 @@ -#include "threadpool/atomic_helpers.h" -#include "common/UnitTest.h" -#include "common/Utilities.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#define perr std::cerr -#define pout std::cout -#define printp printf - - -// Function to increment/decrement a counter N times -static void modify_counter( int N, AtomicOperations::counter_t &counter ) -{ - if ( N > 0 ) { - for ( int i = 0; i < N; i++ ) - counter.increment(); - } else if ( N < 0 ) { - for ( int i = 0; i < -N; i++ ) - counter.decrement(); - } -} - - -/****************************************************************** - * The main program * - ******************************************************************/ -#ifdef USE_WINDOWS -int __cdecl main( int, char ** ) -{ -#elif defined( USE_LINUX ) || defined( USE_MAC ) -int main( int, char *[] ) -{ -#else -#error Unknown OS -#endif - UnitTest ut; - - int N_threads = 64; // Number of threads - int N_count = 1000000; // Number of work items - -// Ensure we are using all processors -#ifdef __USE_GNU - int N_procs = sysconf( _SC_NPROCESSORS_ONLN ); - cpu_set_t mask; - CPU_ZERO( &mask ); - for ( int i = 0; i < N_procs; i++ ) - CPU_SET( i, &mask ); - sched_setaffinity( getpid(), sizeof( cpu_set_t ), &mask ); -#endif - - // Create the counter we want to test - AtomicOperations::counter_t count; - if ( count.increment() == 1 ) - ut.passes( "increment count" ); - else - ut.failure( "increment count" ); - if ( count.decrement() == 0 ) - ut.passes( "decrement count" ); - else - ut.failure( "decrement count" ); - count.setCount( 3 ); - if ( count.getCount() == 3 ) - ut.passes( "set count" ); - else - ut.failure( "set count" ); - count.setCount( 0 ); - - // Increment the counter in serial - auto start = std::chrono::high_resolution_clock::now(); - modify_counter( N_count, count ); - auto stop = std::chrono::high_resolution_clock::now(); - double time_inc_serial = std::chrono::duration( stop - start ).count() / N_count; - int val = count.getCount(); - if ( val != N_count ) { - char tmp[100]; - sprintf( tmp, "Count of %i did not match expected count of %i", val, N_count ); - ut.failure( tmp ); - } - printp( "Time to increment (serial) = %0.1f ns\n", 1e9 * time_inc_serial ); - - // Decrement the counter in serial - start = std::chrono::high_resolution_clock::now(); - modify_counter( -N_count, count ); - stop = std::chrono::high_resolution_clock::now(); - double time_dec_serial = std::chrono::duration( stop - start ).count() / N_count; - val = count.getCount(); - if ( val != 0 ) { - char tmp[100]; - sprintf( tmp, "Count of %i did not match expected count of %i", val, 0 ); - ut.failure( tmp ); - } - printp( "Time to decrement (serial) = %0.1f ns\n", 1e9 * time_dec_serial ); - - // Increment the counter in parallel - std::vector threads( N_threads ); - start = std::chrono::high_resolution_clock::now(); - for ( int i = 0; i < N_threads; i++ ) - threads[i] = std::thread( modify_counter, N_count, std::ref( count ) ); - for ( int i = 0; i < N_threads; i++ ) - threads[i].join(); - stop = std::chrono::high_resolution_clock::now(); - double time_inc_parallel = - std::chrono::duration( stop - start ).count() / ( N_count * N_threads ); - val = count.getCount(); - if ( val != N_count * N_threads ) { - char tmp[100]; - sprintf( tmp, "Count of %i did not match expected count of %i", val, N_count * N_threads ); - ut.failure( tmp ); - } - printp( "Time to increment (parallel) = %0.1f ns\n", 1e9 * time_inc_parallel ); - - // Decrement the counter in parallel - start = std::chrono::high_resolution_clock::now(); - for ( int i = 0; i < N_threads; i++ ) - threads[i] = std::thread( modify_counter, -N_count, std::ref( count ) ); - for ( int i = 0; i < N_threads; i++ ) - threads[i].join(); - stop = std::chrono::high_resolution_clock::now(); - double time_dec_parallel = - std::chrono::duration( stop - start ).count() / ( N_count * N_threads ); - val = count.getCount(); - if ( val != 0 ) { - char tmp[100]; - sprintf( tmp, "Count of %i did not match expected count of %i", val, 0 ); - ut.failure( tmp ); - } - printp( "Time to decrement (parallel) = %0.1f ns\n", 1e9 * time_dec_parallel ); - - // Check the time to increment/decrement - if ( time_inc_serial > 100e-9 || time_dec_serial > 100e-9 || time_inc_parallel > 100e-9 || - time_dec_serial > 100e-9 ) { -#if USE_GCOV - ut.expected_failure( "Time to increment/decrement count is too expensive" ); -#else - ut.failure( "Time to increment/decrement count is too expensive" ); -#endif - } else { - ut.passes( "Time to increment/decrement passed" ); - } - - // Finished - ut.report(); - auto N_errors = static_cast( ut.NumFailGlobal() ); - return N_errors; -} diff --git a/threadpool/test/test_atomic_list.cpp b/threadpool/test/test_atomic_list.cpp deleted file mode 100644 index 4717dcc3..00000000 --- a/threadpool/test/test_atomic_list.cpp +++ /dev/null @@ -1,221 +0,0 @@ -#include "threadpool/atomic_list.h" -#include "common/UnitTest.h" -#include "common/Utilities.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - - -static void modify_list( AtomicList &list ) -{ - const int N_count = 50000; - for ( int i = 0; i < N_count; i++ ) { - auto v1 = list.remove_first(); - auto v2 = list.remove( []( int ) { return true; } ); - auto v3 = list.remove( []( int v ) { return v >= ( rand() / 8 ); } ); - auto v4 = list.remove( []( int v ) { return v >= ( rand() / 4 ); } ); - auto v5 = list.remove( []( int v ) { return v >= ( rand() / 2 ); } ); - if ( v1 != -1 ) { - list.insert( v1 ); - } - if ( v2 != -1 ) { - list.insert( v2 ); - } - if ( v3 != -1 ) { - list.insert( v3 ); - } - if ( v4 != -1 ) { - list.insert( v4 ); - } - if ( v5 != -1 ) { - list.insert( v5 ); - } - } -} - - -static bool check_list( const std::vector &x, AtomicList &list ) -{ - bool pass = list.check(); - pass = pass && (int) x.size() == list.size(); - if ( pass ) { - for ( int i : x ) - pass = pass && i == list.remove( []( int ) { return true; } ); - } - // Restore the list - for ( int i = 0; i < list.size(); i++ ) - list.remove_first(); - for ( int i : x ) - list.insert( i ); - return pass; -} - - -static inline void clear_list( AtomicList &list ) -{ - for ( int i = 0; i < list.size(); i++ ) - list.remove_first(); -} - - -/****************************************************************** - * The main program * - ******************************************************************/ -int main( int, char *[] ) -{ - UnitTest ut; - - int N_threads = 8; // Number of threads - - // Create the list - AtomicList list( -1 ); - if ( list.size() == 0 && list.check() ) - ut.passes( "Initialize" ); - else - ut.failure( "Initialize" ); - - // Initialize the list with some empty values - for ( int i = 0; i < 80; i++ ) - list.insert( rand() ); - list.insert( 2 ); - list.insert( 1 ); - list.insert( rand() ); - - // Try to pull off a couple of values - int v1 = list.remove( []( int a ) { return a == 1; } ); // Find the entry with 1 - int v2 = list.remove( []( int ) { return true; } ); // Get the first entry - int v3 = list.remove( []( int ) { return false; } ); // Fail to get an entry - if ( v1 == 1 && v2 == 2 && v3 == -1 && list.size() == 81 && list.check() ) - ut.passes( "Basic sanity test" ); - else - ut.failure( "Basic sanity test" ); - - // Clear the list - while ( list.remove( []( int ) { return true; } ) != -1 ) { - } - - // Create a list of known values - // std::vector data0(512); - std::vector data0( 5 * N_threads ); - for ( int &i : data0 ) - i = rand(); - auto data = data0; - std::sort( data.begin(), data.end() ); - - // Test the cost to insert - int N_it = 20; - for ( int i = 0; i < list.size(); i++ ) - list.remove( []( int ) { return true; } ); - std::chrono::duration time; - std::chrono::time_point start, stop; - time = time.zero(); - for ( int it = 0; it < N_it; it++ ) { - clear_list( list ); - start = std::chrono::high_resolution_clock::now(); - for ( int i : data0 ) - list.insert( i ); - stop = std::chrono::high_resolution_clock::now(); - time += ( stop - start ); - } - printf( "insert time/item = %0.0f ns\n", 1e9 * time.count() / ( N_it * data0.size() ) ); - - // Test the cost to remove (first) - time = time.zero(); - for ( int it = 0; it < N_it; it++ ) { - check_list( data, list ); - start = std::chrono::high_resolution_clock::now(); - for ( size_t i = 0; i < data0.size(); i++ ) - list.remove_first(); - stop = std::chrono::high_resolution_clock::now(); - time += ( stop - start ); - } - printf( "remove (first) time/item = %0.0f ns\n", 1e9 * time.count() / ( N_it * data0.size() ) ); - - // Test the cost to remove (in order) - time = time.zero(); - for ( int it = 0; it < N_it; it++ ) { - check_list( data, list ); - start = std::chrono::high_resolution_clock::now(); - for ( size_t i = 0; i < data0.size(); i++ ) - list.remove( []( int ) { return true; } ); - stop = std::chrono::high_resolution_clock::now(); - time += ( stop - start ); - } - printf( - "remove (ordered) time/item = %0.0f ns\n", 1e9 * time.count() / ( N_it * data0.size() ) ); - - // Test the cost to remove (out order) - time = time.zero(); - for ( int it = 0; it < N_it; it++ ) { - check_list( data, list ); - start = std::chrono::high_resolution_clock::now(); - for ( int tmp : data0 ) { - list.remove( [tmp]( int v ) { return v == tmp; } ); - } - stop = std::chrono::high_resolution_clock::now(); - time += ( stop - start ); - } - printf( - "remove (unordered) time/item = %0.0f ns\n", 1e9 * time.count() / ( N_it * data0.size() ) ); - - // Read/write to the list and check the results - int64_t N0 = list.N_remove(); - check_list( data, list ); - start = std::chrono::high_resolution_clock::now(); - modify_list( list ); - stop = std::chrono::high_resolution_clock::now(); - double time_serial = std::chrono::duration( stop - start ).count(); - int64_t N1 = list.N_remove(); - bool pass = check_list( data, list ); - if ( pass ) - ut.passes( "Serial get/insert" ); - else - ut.failure( "Serial get/insert" ); - printf( "serial time = %0.5f s\n", time_serial ); - printf( "serial time/item = %0.0f ns\n", 1e9 * time_serial / ( N1 - N0 ) ); - - // Have multiple threads reading/writing to the list simultaneously - std::vector threads( N_threads ); - start = std::chrono::high_resolution_clock::now(); - for ( int i = 0; i < N_threads; i++ ) - threads[i] = std::thread( modify_list, std::ref( list ) ); - for ( int i = 0; i < N_threads; i++ ) - threads[i].join(); - stop = std::chrono::high_resolution_clock::now(); - double time_parallel = std::chrono::duration( stop - start ).count(); - int64_t N2 = list.N_remove(); - pass = check_list( data, list ); - if ( pass ) - ut.passes( "Parallel get/insert" ); - else - ut.failure( "Parallel get/insert" ); - printf( "parallel time = %0.5f s\n", time_parallel ); - printf( "parallel time/item = %0.0f ns\n", 1e9 * time_parallel / ( N2 - N1 ) ); - - // Try to over-fill the list - while ( !list.empty() ) - list.remove_first(); - for ( int i = 1; i <= list.capacity(); i++ ) - list.insert( i ); - try { - list.insert( list.capacity() + 1 ); - ut.failure( "List overflow" ); - } catch ( const std::exception &e ) { - ut.passes( "List overflow" ); - } catch ( ... ) { - ut.failure( "List overflow (unknown exception)" ); - } - - // Finished - ut.report(); - auto N_errors = static_cast( ut.NumFailGlobal() ); - return N_errors; -} diff --git a/threadpool/test/test_thread_pool.cpp b/threadpool/test/test_thread_pool.cpp deleted file mode 100644 index b7168f4b..00000000 --- a/threadpool/test/test_thread_pool.cpp +++ /dev/null @@ -1,967 +0,0 @@ -#include "ProfilerApp.h" -#ifdef USE_TIMER -#include "MemoryApp.h" -#endif -#include "threadpool/thread_pool.h" -#include "common/UnitTest.h" -#include "common/Utilities.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#define MAX( x, y ) ( ( x ) > ( y ) ? ( x ) : ( y ) ) - - -#define perr std::cerr -#define pout std::cout -#define printp printf - - -#ifdef USE_MPI -#include "mpi.h" -#endif - -#define to_ns( x ) std::chrono::duration_cast( x ).count() -#define to_ms( x ) std::chrono::duration_cast( x ).count() - - -// Wrapper functions for mpi -static inline void barrier() -{ -#ifdef USE_MPI - MPI_Barrier( MPI_COMM_WORLD ); -#endif -} -static inline int getRank() -{ - int rank = 0; -#ifdef USE_MPI - MPI_Comm_rank( MPI_COMM_WORLD, &rank ); -#endif - return rank; -} -static inline int getSize() -{ - int size = 0; -#ifdef USE_MPI - MPI_Comm_size( MPI_COMM_WORLD, &size ); -#endif - return size; -} - - -// Function to waste CPU cycles -void waste_cpu( int N ) -{ - if ( N > 10000 ) { - PROFILE_START( "waste_cpu", 2 ); - } - double pi = 3.141592653589793; - double x = 1.0; - N = std::max( 10, N ); - { - for ( int i = 0; i < N; i++ ) - x = sqrt( x * exp( pi / x ) ); - } // style to limit gcov hits - if ( fabs( x - 2.926064057273157 ) > 1e-12 ) { - abort(); - } - if ( N > 10000 ) { - PROFILE_STOP( "waste_cpu", 2 ); - } -} - - -// Sleep for the given time -// Note: since we may encounter interrupts, we may not sleep for the desired time -// so we need to perform the sleep in a loop -void sleep_ms( int64_t N ) -{ - auto t1 = std::chrono::high_resolution_clock::now(); - auto t2 = std::chrono::high_resolution_clock::now(); - while ( to_ms( t2 - t1 ) < N ) { - int N2 = N - to_ms( t2 - t1 ); - std::this_thread::sleep_for( std::chrono::milliseconds( N2 ) ); - t2 = std::chrono::high_resolution_clock::now(); - } -} -void sleep_s( int N ) { sleep_ms( 1000 * N ); } - - -// Function to sleep for N seconds then increment a global count -static volatile int global_sleep_count = 0; -void sleep_inc( int N ) -{ - PROFILE_START( "sleep_inc" ); - sleep_s( N ); - ++global_sleep_count; - PROFILE_STOP( "sleep_inc" ); -} -void sleep_inc2( double x ) -{ - sleep_ms( static_cast( round( x * 1000 ) ) ); - ++global_sleep_count; -} -void sleep_msg( double x, std::string msg ) -{ - PROFILE_START( msg ); - sleep_ms( static_cast( round( x * 1000 ) ) ); - NULL_USE( msg ); - PROFILE_STOP( msg ); -} -bool check_inc( int N ) { return global_sleep_count == N; } - - -// Function to return the processor for the given thread -std::mutex print_processor_mutex; - -void print_processor( ThreadPool *tpool ) -{ - int rank = 0; -#ifdef USE_MPI - MPI_Comm_rank( MPI_COMM_WORLD, &rank ); -#endif - int thread = tpool->getThreadNumber(); - int processor = ThreadPool::getCurrentProcessor(); - char tmp[100]; - sprintf( tmp, "%i: Thread,proc = %i,%i\n", rank, thread, processor ); - sleep_ms( 10 * rank ); - print_processor_mutex.lock(); - pout << tmp; - print_processor_mutex.unlock(); - sleep_ms( 100 ); -} - - -// Function to test how a member thread interacts with the thread pool -int test_member_thread( ThreadPool *tpool ) -{ - int N_errors = 0; - // Member threads are not allowed to wait for the pool to finish - try { - tpool->wait_pool_finished(); - N_errors++; - } catch ( ... ) { - } - // Member threads are not allowed to change the size of the pool - try { - tpool->wait_pool_finished(); - N_errors++; - } catch ( ... ) { - } - return N_errors; -} - - -/****************************************************************** - * Test the TPOOL_ADD_WORK macro with variable number of arguments * - ******************************************************************/ -static int myfun0() { return 0; } -static int myfun1( int ) { return 1; } -static int myfun2( int, float ) { return 2; } -static int myfun3( int, float, double ) { return 3; } -static int myfun4( int, float, double, char ) { return 4; } -static int myfun5( int, float, double, char, std::string ) { return 5; } -static int myfun6( int, float, double, char, std::string, int ) { return 6; } -static int myfun7( int, float, double, char, std::string, int, int ) { return 7; } -static int test_function_arguements( ThreadPool *tpool ) -{ - int N_errors = 0; - // Test some basic types of instantiations - ThreadPool::thread_id_t id0 = TPOOL_ADD_WORK( tpool, myfun0, ( nullptr ) ); - ThreadPool::thread_id_t id1 = TPOOL_ADD_WORK( tpool, myfun1, ( (int) 1 ) ); - ThreadPool::thread_id_t id2 = TPOOL_ADD_WORK( tpool, myfun2, ( (int) 1, (float) 2 ) ); - ThreadPool::thread_id_t id3 = - TPOOL_ADD_WORK( tpool, myfun3, ( (int) 1, (float) 2, (double) 3 ) ); - ThreadPool::thread_id_t id4 = - TPOOL_ADD_WORK( tpool, myfun4, ( (int) 1, (float) 2, (double) 3, (char) 4 ) ); - ThreadPool::thread_id_t id5 = TPOOL_ADD_WORK( - tpool, myfun5, ( (int) 1, (float) 2, (double) 3, (char) 4, std::string( "test" ) ) ); - ThreadPool::thread_id_t id52 = TPOOL_ADD_WORK( - tpool, myfun5, ( (int) 1, (float) 2, (double) 3, (char) 4, std::string( "test" ) ), -1 ); - ThreadPool::thread_id_t id6 = TPOOL_ADD_WORK( tpool, myfun6, - ( (int) 1, (float) 2, (double) 3, (char) 4, std::string( "test" ), (int) 1 ) ); - ThreadPool::thread_id_t id7 = TPOOL_ADD_WORK( tpool, myfun7, - ( (int) 1, (float) 2, (double) 3, (char) 4, std::string( "test" ), (int) 1, (int) 1 ) ); - tpool->wait_pool_finished(); - if ( !tpool->isFinished( id0 ) ) { - N_errors++; - } - if ( tpool->getFunctionRet( id0 ) != 0 ) { - N_errors++; - } - if ( tpool->getFunctionRet( id1 ) != 1 ) { - N_errors++; - } - if ( tpool->getFunctionRet( id2 ) != 2 ) { - N_errors++; - } - if ( tpool->getFunctionRet( id3 ) != 3 ) { - N_errors++; - } - if ( tpool->getFunctionRet( id4 ) != 4 ) { - N_errors++; - } - if ( tpool->getFunctionRet( id5 ) != 5 ) { - N_errors++; - } - if ( tpool->getFunctionRet( id52 ) != 5 ) { - N_errors++; - } - if ( tpool->getFunctionRet( id6 ) != 6 ) { - N_errors++; - } - if ( tpool->getFunctionRet( id7 ) != 7 ) { - N_errors++; - } - return N_errors; -} - - -/****************************************************************** - * Examples to derive a user work item * - ******************************************************************/ -class UserWorkItemVoid : public ThreadPool::WorkItem -{ -public: - // User defined constructor (does not need to match any interfaces) - explicit UserWorkItemVoid( int dummy ) - { - // User initialized variables - NULL_USE( dummy ); - } - // User defined run (can do anything) - void run() override - { - // Perform the tasks - printf( "Hello work from UserWorkItem (void)" ); - } - // Will the routine return a result - bool has_result() const override { return false; } - // User defined destructor - ~UserWorkItemVoid() override = default; -}; -class UserWorkItemInt : public ThreadPool::WorkItemRet -{ -public: - // User defined constructor (does not need to match any interfaces) - explicit UserWorkItemInt( int dummy ) - { - // User initialized variables - NULL_USE( dummy ); - } - // User defined run (can do anything) - void run() override - { - // Perform the tasks - printf( "Hello work from UserWorkItem (int)" ); - // Store the results (it's type will match the template) - ThreadPool::WorkItemRet::d_result = 1; - } - // User defined destructor - ~UserWorkItemInt() override = default; -}; - - -/****************************************************************** - * test the time to run N tasks in parallel * - ******************************************************************/ -template -inline double launchAndTime( ThreadPool &tpool, int N, Ret ( *routine )( Args... ), Args... args ) -{ - tpool.wait_pool_finished(); - auto start = std::chrono::high_resolution_clock::now(); - for ( int i = 0; i < N; i++ ) - ThreadPool_add_work( &tpool, 0, routine, args... ); - tpool.wait_pool_finished(); - auto stop = std::chrono::high_resolution_clock::now(); - return std::chrono::duration( stop - start ).count(); -} - - -// Move constructor function -volatile ThreadPool::thread_id_t f1( volatile ThreadPool::thread_id_t a ) { return a; } -ThreadPool::thread_id_t f2( ThreadPool::thread_id_t a ) { return a; } - - -/****************************************************************** - * Test the basic functionallity of the atomics * - ******************************************************************/ -int test_atomics() -{ - using namespace AtomicOperations; - int N_errors = 0; - volatile int32_atomic i32; - volatile int64_atomic i64; - i32 = 32; - i64 = 64; - if ( atomic_increment( &i32 ) != 33 || atomic_increment( &i64 ) != 65 ) - N_errors++; - if ( atomic_decrement( &i32 ) != 32 || atomic_decrement( &i64 ) != 64 ) - N_errors++; - if ( atomic_add( &i32, 2 ) != 34 || atomic_add( &i64, 4 ) != 68 ) - N_errors++; - if ( atomic_compare_and_swap( &i32, 0, 0 ) || atomic_compare_and_swap( &i64, 0, 0 ) ) - N_errors++; - if ( !atomic_compare_and_swap( &i32, 34, 32 ) || !atomic_compare_and_swap( &i64, 68, 64 ) ) - N_errors++; - if ( i32 != 32 || i64 != 64 ) - N_errors++; - return N_errors; -} - - -/****************************************************************** - * Test FIFO behavior * - ******************************************************************/ -void test_FIFO( UnitTest &ut, ThreadPool &tpool ) -{ - int rank = getRank(); - int size = getSize(); - const int N = 4000; - for ( int r = 0; r < size; r++ ) { - barrier(); - if ( r != rank ) - continue; - std::vector ids; - ids.reserve( N ); - for ( size_t i = 0; i < N; i++ ) - ids.emplace_back( TPOOL_ADD_WORK( &tpool, sleep_inc2, ( 0.001 ) ) ); - bool pass = true; - while ( tpool.N_queued() > 0 ) { - int i1 = -1, i2 = ids.size(); - for ( int i = N - 1; i >= 0; i-- ) { - bool started = ids[i].started(); - if ( started ) - i1 = std::max( i1, i ); // Last index to processing item - else - i2 = std::min( i2, i ); // First index to queued item - } - int diff = i1 == -1 ? 0 : ( i2 - i1 - 1 ); - if ( abs( diff ) > 4 ) { - printf( "%i %i %i\n", i1, i2, diff ); - pass = pass && abs( i2 - i1 - 1 ) <= 2; - } - } - ids.clear(); - tpool.wait_pool_finished(); - if ( pass ) - ut.passes( "Thread pool behaves as FIFO" ); - else - ut.failure( "Thread pool does not behave as FIFO" ); - } -} - - -/****************************************************************** - * The main program * - ******************************************************************/ -#ifdef USE_WINDOWS -int __cdecl main( int argc, char **argv ) -{ -#elif defined( USE_LINUX ) || defined( USE_MAC ) -int main( int argc, char *argv[] ) -{ -#else -#error Unknown OS -#endif - - int N_threads = 4; // Number of threads - int N_work = 2000; // Number of work items - int N_it = 10; // Number of cycles to run - int N_problem = 5; // Problem size - PROFILE_ENABLE( 3 ); - PROFILE_ENABLE_TRACE(); - PROFILE_DISABLE_MEMORY(); - UnitTest ut; - - - // Initialize MPI and set the error handlers -#ifdef USE_MPI - int provided_thread_support = -1; - MPI_Init_thread( &argc, &argv, MPI_THREAD_MULTIPLE, &provided_thread_support ); - Utilities::setErrorHandlers(); - // Disable OS specific warnings for all non-root ranks -#endif - int rank = getRank(); - int size = getSize(); - if ( rank > 0 ) - ThreadPool::set_OS_warnings( 1 ); - NULL_USE( size ); - NULL_USE( argc ); - NULL_USE( argv ); - - - // Test the atomics - if ( test_atomics() == 0 ) - ut.passes( "Atomics passed" ); - else - ut.failure( "Atomics failed" ); - - // Initialize the data - std::vector data1( N_work, 0 ); - std::vector priority( N_work, 0 ); - for ( int i = 0; i < N_work; i++ ) { - data1[i] = N_problem; - priority[i] = i % 128; - } - - - // Print the size of the thread pool class - printp( "Size of ThreadPool = %i\n", (int) sizeof( ThreadPool ) ); - - - // Get the number of processors availible - barrier(); - int N_procs = ThreadPool::getNumberOfProcessors(); - if ( N_procs > 0 ) - ut.passes( "getNumberOfProcessors" ); - else - ut.failure( "getNumberOfProcessors" ); - printp( "%i processors availible\n", N_procs ); - - - // Get the processor affinities for the process - barrier(); - std::vector cpus = ThreadPool::getProcessAffinity(); - printp( "%i cpus for current process: ", (int) cpus.size() ); - for ( int cpu : cpus ) - printp( "%i ", cpu ); - printp( "\n" ); - if ( !cpus.empty() ) { - ut.passes( "getProcessAffinity" ); - } else { -#ifdef __APPLE__ - ut.expected_failure( "getProcessAffinity" ); -#else - ut.failure( "getProcessAffinity" ); -#endif - } - - - // Test setting the process affinities - barrier(); - bool pass = false; - if ( !cpus.empty() && N_procs > 0 ) { - if ( cpus.size() == 1 ) { - cpus.resize( N_procs ); - for ( int i = 0; i < N_procs; i++ ) - cpus.push_back( i ); - try { - ThreadPool::setProcessAffinity( cpus ); - } catch ( ... ) { - } - cpus = ThreadPool::getProcessAffinity(); - std::vector cpus = ThreadPool::getProcessAffinity(); - printp( "%i cpus for current process (updated): ", (int) cpus.size() ); - for ( int cpu : cpus ) - printp( "%i ", cpu ); - printp( "\n" ); - pass = cpus.size() > 1; - } else { - std::vector cpus_orig = cpus; - std::vector cpus_tmp( 1, cpus[0] ); - try { - ThreadPool::setProcessAffinity( cpus_tmp ); - } catch ( ... ) { - } - cpus = ThreadPool::getProcessAffinity(); - if ( cpus.size() == 1 ) - pass = true; - try { - ThreadPool::setProcessAffinity( cpus_orig ); - } catch ( ... ) { - } - cpus = ThreadPool::getProcessAffinity(); - if ( cpus.size() != cpus_orig.size() ) - pass = false; - } - } - if ( pass ) { - ut.passes( "setProcessAffinity" ); - } else { -#ifdef __APPLE__ - ut.expected_failure( "setProcessAffinity" ); -#else - ut.failure( "setProcessAffinity" ); -#endif - } - int N_procs_used = std::min( N_procs, N_threads ); - printp( "%i processors used\n", N_procs_used ); - - - // Create the thread pool - barrier(); - printp( "Creating thread pool\n" ); - ThreadPool tpool0; - ThreadPool tpool; - ThreadPool::thread_id_t id; - id = TPOOL_ADD_WORK( &tpool, waste_cpu, ( data1[0] ) ); - if ( id == ThreadPool::thread_id_t() || !tpool.isValid( id ) ) - ut.failure( "Errors with id" ); - tpool.setNumThreads( N_threads ); - if ( tpool.getNumThreads() == N_threads ) - ut.passes( "Created thread pool" ); - else - ut.failure( "Failed to create tpool with desired number of threads" ); - - - // Test setting the thread affinities - barrier(); - if ( cpus.size() > 1 ) { - sleep_ms( 50 ); - // First make sure we can get the thread affinities - std::vector procs = ThreadPool::getThreadAffinity(); - if ( procs == cpus ) { - ut.passes( "getThreadAffinity() matches procs" ); - } else { - char msg[100]; - sprintf( msg, "getThreadAffinity() does not match procs (%i,%i)", - static_cast( procs.size() ), static_cast( cpus.size() ) ); - ut.failure( msg ); - } - pass = true; - for ( int i = 0; i < N_threads; i++ ) { - std::vector procs_thread = tpool.getThreadAffinity( i ); - if ( procs_thread != procs ) { - printp( "%i: Initial thread affinity: ", rank ); - for ( int i : procs_thread ) - printp( "%i ", i ); - printp( "\n" ); - pass = false; - } - } - if ( pass ) - ut.passes( "getThreadAffinity(thread) matches procs" ); - else - ut.failure( "getThreadAffinity(thread) does not match procs" ); - // Try to set the thread affinities - pass = true; - if ( !procs.empty() ) { - int N_procs_thread = std::max( (int) cpus.size() / N_threads, 1 ); - for ( int i = 0; i < N_threads; i++ ) { - std::vector procs_thread( N_procs_thread, -1 ); - for ( int j = 0; j < N_procs_thread; j++ ) - procs_thread[j] = procs[( i * N_procs_thread + j ) % procs.size()]; - tpool.setThreadAffinity( i, procs_thread ); - sleep_ms( 10 ); // Give time for OS to update thread affinities - std::vector procs_thread2 = tpool.getThreadAffinity( i ); - if ( procs_thread2 != procs_thread ) { - printp( "%i: Final thread affinity: ", rank ); - for ( int i : procs_thread ) - printp( "%i ", i ); - printp( "\n" ); - pass = false; - } - } - } - if ( pass ) - ut.passes( "setThreadAffinity passes" ); - else - ut.failure( "setThreadAffinity failed to change affinity" ); - } - - - // Reset the thread affinities - barrier(); - tpool.setNumThreads( tpool.getNumThreads(), "none" ); - // tpool.setNumThreads(tpool.getNumThreads(),"independent"); - for ( int i = 0; i < N_threads; i++ ) { - std::vector procs_thread = tpool.getThreadAffinity( i ); - printp( "Thread affinity: " ); - for ( int i : procs_thread ) - printp( "%i ", i ); - printp( "\n" ); - } - - // Print the current processors by thread id - barrier(); - ThreadPool::set_OS_warnings( 1 ); - print_processor( &tpool ); - launchAndTime( tpool, N_threads, print_processor, &tpool ); - - // Run some basic tests - barrier(); - auto start = std::chrono::high_resolution_clock::now(); - for ( int n = 0; n < N_it; n++ ) { - for ( int i = 0; i < N_work; i++ ) - waste_cpu( data1[i] ); - } - auto stop = std::chrono::high_resolution_clock::now(); - double time = std::chrono::duration( stop - start ).count(); - printp( "Time for serial cycle = %0.0f us\n", 1e6 * time / N_it ); - printp( "Time for serial item = %0.0f ns\n", 1e9 * time / ( N_it * N_work ) ); - id = TPOOL_ADD_WORK( &tpool, waste_cpu, ( data1[0] ) ); - tpool.wait( id ); - std::vector ids2; - ids2.push_back( TPOOL_ADD_WORK( &tpool, waste_cpu, ( data1[0] ) ) ); - tpool.wait( ids2[0] ); - - // Test the move operator for thread_id - ThreadPool::thread_id_t id1 = f1( id ); // move-construct from rvalue temporary - ThreadPool::thread_id_t id2 = std::move( id1 ); // move-construct from xvalue - volatile ThreadPool::thread_id_t id3 = f2( id ); // move-construct from rvalue temporary - volatile ThreadPool::thread_id_t id4 = std::move( id3 ); // move-construct from xvalue - id2.reset(); - id4.reset(); - - // Test calling functions with different number of arguments - barrier(); - printp( "Testing arguments:\n" ); - int N_errors_args = test_function_arguements( &tpool ); - if ( N_errors_args == 0 ) - ut.passes( "Calling function with default arguments" ); - else - ut.failure( "Error calling function with default arguments" ); - - - // Check that the threads can sleep in parallel (this does not depend on the number of - // processors) - barrier(); - tpool.wait_pool_finished(); - start = std::chrono::high_resolution_clock::now(); - sleep_inc( 1 ); - stop = std::chrono::high_resolution_clock::now(); - double sleep_serial = std::chrono::duration( stop - start ).count(); - double sleep_parallel = launchAndTime( tpool, N_threads, sleep_inc, 1 ); - double sleep_speedup = N_procs_used * sleep_serial / sleep_parallel; - printf( "%i: Speedup on %i sleeping threads: %0.3f\n", rank, N_procs_used, sleep_speedup ); - printf( "%i: ts = %0.3f, tp = %0.3f\n", rank, sleep_serial, sleep_parallel ); - if ( fabs( sleep_serial - 1.0 ) < 0.05 && fabs( sleep_parallel - 1.0 ) < 0.25 && - sleep_speedup > 3 ) - ut.passes( "Passed thread sleep" ); - else - ut.failure( "Failed thread sleep" ); - - - // Check that the threads are actually working in parallel - barrier(); - if ( N_procs_used > 1 ) { -#ifdef USE_MPI - // Use a non-blocking serialization of the MPI processes - // if we do not have a sufficient number of processors - bool serialize_mpi = N_procs < N_threads * size; - int buf; - MPI_Request request; - MPI_Status status; - if ( serialize_mpi && rank > 0 ) { - MPI_Irecv( &buf, 1, MPI_INT, rank - 1, 0, MPI_COMM_WORLD, &request ); - int flag = false; - while ( !flag ) { - MPI_Test( &request, &flag, &status ); - sleep_s( 1 ); - } - } -#endif - int N = 20000000; // Enough work to keep the processor busy for ~ 1 s - // Run in serial - start = std::chrono::high_resolution_clock::now(); - waste_cpu( N ); - stop = std::chrono::high_resolution_clock::now(); - double time_serial = std::chrono::duration( stop - start ).count(); - // Run in parallel - double time_parallel = launchAndTime( tpool, N_procs_used, waste_cpu, N ); - double time_parallel2 = launchAndTime( tpool, N_procs_used, waste_cpu, N / 1000 ); - double speedup = N_procs_used * time_serial / time_parallel; - printf( "%i: Speedup on %i procs: %0.3f\n", rank, N_procs_used, speedup ); - printf( "%i: ts = %0.3f, tp = %0.3f, tp2 = %0.3f\n", rank, time_serial, time_parallel, - time_parallel2 ); - if ( speedup > 1.4 ) { - ut.passes( "Passed speedup test" ); - } else { -#ifdef USE_GCOV - ut.expected_failure( "Times do not indicate tests are running in parallel (gcov)" ); -#else - ut.failure( "Times do not indicate tests are running in parallel" ); -#endif - } -#ifdef USE_MPI - if ( serialize_mpi ) { - if ( rank < size - 1 ) - MPI_Send( &N, 1, MPI_INT, rank + 1, 0, MPI_COMM_WORLD ); - if ( rank == size - 1 ) { - for ( int i = 0; i < size - 1; i++ ) - MPI_Send( &N, 1, MPI_INT, i, 1, MPI_COMM_WORLD ); - } else { - MPI_Irecv( &buf, 1, MPI_INT, size - 1, 1, MPI_COMM_WORLD, &request ); - int flag = false; - MPI_Status status; - while ( !flag ) { - MPI_Test( &request, &flag, &status ); - sleep_s( 1 ); - } - } - } -#endif - } else { - ut.expected_failure( "Testing thread performance with less than 1 processor" ); - } - - - // Test first-in-first-out scheduler (also ensures priorities) - test_FIFO( ut, tpool ); - - - // Test adding a work item with a dependency - barrier(); - { - // Test that we sucessfully wait on the work items - std::vector ids; - ids.reserve( 5 ); - global_sleep_count = 0; // Reset the count before this test - ThreadPool::thread_id_t id0; - auto id1 = TPOOL_ADD_WORK( &tpool, sleep_inc, ( 1 ) ); - auto id2 = TPOOL_ADD_WORK( &tpool, sleep_inc, ( 2 ) ); - auto *wait1 = new WorkItemFull( check_inc, 1 ); - auto *wait2 = new WorkItemFull( check_inc, 2 ); - wait1->add_dependency( id0 ); - wait1->add_dependency( id1 ); - wait2->add_dependency( id1 ); - wait2->add_dependency( id2 ); - ids.clear(); - ids.push_back( tpool.add_work( wait1 ) ); - ids.push_back( tpool.add_work( wait2 ) ); - tpool.wait_all( ids.size(), &ids[0] ); - if ( !tpool.getFunctionRet( ids[0] ) || !tpool.getFunctionRet( ids[1] ) ) - ut.failure( "Failed to wait on required dependency" ); - else - ut.passes( "Dependencies" ); - tpool.wait_pool_finished(); - // Test waiting on more dependencies than in the thread pool (changing priorities) - ids.clear(); - for ( size_t i = 0; i < 20; i++ ) - ids.push_back( TPOOL_ADD_WORK( &tpool, sleep_inc2, ( 0.1 ) ) ); - auto *wait3 = new WorkItemFull( sleep_inc2, 0 ); - wait3->add_dependencies( ids ); - id = tpool.add_work( wait3, 50 ); - tpool.wait( id ); - bool pass = true; - for ( auto &id : ids ) - pass = pass && id.finished(); - ids.clear(); - if ( pass ) - ut.passes( "Dependencies2" ); - else - ut.failure( "Dependencies2" ); - // Check that we can handle more complex dependencies - id1 = TPOOL_ADD_WORK( &tpool, sleep_inc2, ( 0.5 ) ); - for ( int i = 0; i < 10; i++ ) { - wait1 = new WorkItemFull( check_inc, 1 ); - wait1->add_dependency( id1 ); - tpool.add_work( wait1 ); - } - tpool.wait_pool_finished(); - ids.clear(); - for ( int i = 0; i < 5; i++ ) - ids.push_back( TPOOL_ADD_WORK( &tpool, sleep_inc2, ( 0.5 ) ) ); - sleep_inc2( 0.002 ); - ThreadPool::WorkItem *work = new WorkItemFull( waste_cpu, 100 ); - work->add_dependencies( ids ); - id = tpool.add_work( work, 10 ); - tpool.wait( id ); - } - - // Test the timing creating and running a work item - barrier(); - { - printp( "Testing timmings (creating/running work item):\n" ); - std::string timer_name = "Create/Run work item"; - PROFILE_START( timer_name ); - int64_t time_create = 0; - int64_t time_run = 0; - int64_t time_delete = 0; - std::vector work( N_work ); - start = std::chrono::high_resolution_clock::now(); - for ( int n = 0; n < N_it; n++ ) { - auto t1 = std::chrono::high_resolution_clock::now(); - for ( int i = 0; i < N_work; i++ ) - work[i] = ThreadPool::createWork( waste_cpu, data1[i] ); - auto t2 = std::chrono::high_resolution_clock::now(); - for ( int i = 0; i < N_work; i++ ) - work[i]->run(); - auto t3 = std::chrono::high_resolution_clock::now(); - for ( int i = 0; i < N_work; i++ ) - delete work[i]; - auto t4 = std::chrono::high_resolution_clock::now(); - time_create += to_ns( t2 - t1 ); - time_run += to_ns( t3 - t2 ); - time_delete += to_ns( t4 - t3 ); - if ( ( n + 1 ) % 100 == 0 ) - printp( "Cycle %i of %i finished\n", n + 1, N_it ); - } - stop = std::chrono::high_resolution_clock::now(); - time = std::chrono::duration( stop - start ).count(); - PROFILE_STOP( timer_name ); - printp( " time = %0.0f ms\n", 1e3 * time ); - printp( " time / cycle = %0.0f us\n", 1e6 * time / N_it ); - printp( " average time / item = %0.0f ns\n", 1e9 * time / ( N_it * N_work ) ); - printp( " create = %i ns\n", time_create / ( N_it * N_work ) ); - printp( " run = %i ns\n", time_run / ( N_it * N_work ) ); - printp( " delete = %i us\n", time_delete / ( N_it * N_work ) ); - } - - // Test the timing adding a single item - barrier(); - for ( int it = 0; it < 2; it++ ) { - ThreadPool *tpool_ptr = nullptr; - std::string timer_name; - if ( it == 0 ) { - printp( "Testing timmings (adding a single item to empty tpool):\n" ); - timer_name = "Add single item to empty pool"; - tpool_ptr = &tpool0; - } else if ( it == 1 ) { - printp( "Testing timmings (adding a single item):\n" ); - timer_name = "Add single item to tpool"; - tpool_ptr = &tpool; - } - PROFILE_START( timer_name ); - std::vector ids( N_work ); - int64_t time_add = 0; - int64_t time_wait = 0; - start = std::chrono::high_resolution_clock::now(); - for ( int n = 0; n < N_it; n++ ) { - auto t1 = std::chrono::high_resolution_clock::now(); - for ( int i = 0; i < N_work; i++ ) - ids[i] = TPOOL_ADD_WORK( tpool_ptr, waste_cpu, ( data1[i] ), priority[i] ); - auto t2 = std::chrono::high_resolution_clock::now(); - tpool_ptr->wait_all( N_work, &ids[0] ); - auto t3 = std::chrono::high_resolution_clock::now(); - time_add += to_ns( t2 - t1 ); - time_wait += to_ns( t3 - t2 ); - if ( ( n + 1 ) % 100 == 0 ) - printp( "Cycle %i of %i finished\n", n + 1, N_it ); - } - stop = std::chrono::high_resolution_clock::now(); - time = std::chrono::duration( stop - start ).count(); - PROFILE_STOP( timer_name ); - printp( " time = %0.0f ms\n", 1e3 * time ); - printp( " time / cycle = %0.0f us\n", 1e6 * time / N_it ); - printp( " average time / item = %0.0f ns\n", 1e9 * time / ( N_it * N_work ) ); - printp( " create and add = %i ns\n", time_add / ( N_it * N_work ) ); - printp( " wait = %i us\n", time_wait / ( N_it * N_work ) ); - } - - // Test the timing pre-creating the work items and adding multiple at a time - barrier(); - for ( int it = 0; it < 2; it++ ) { - ThreadPool *tpool_ptr = nullptr; - std::string timer_name; - if ( it == 0 ) { - printp( "Testing timmings (adding a block of items to empty tpool):\n" ); - timer_name = "Add multiple items to empty pool"; - tpool_ptr = &tpool0; - } else if ( it == 1 ) { - printp( "Testing timmings (adding a block of items):\n" ); - timer_name = "Add multiple items to tpool"; - tpool_ptr = &tpool; - } - PROFILE_START( timer_name ); - int64_t time_create_work = 0; - int64_t time_add_work = 0; - int64_t time_wait_work = 0; - std::vector work( N_work ); - start = std::chrono::high_resolution_clock::now(); - for ( int n = 0; n < N_it; n++ ) { - auto t1 = std::chrono::high_resolution_clock::now(); - for ( int i = 0; i < N_work; i++ ) - work[i] = ThreadPool::createWork( waste_cpu, data1[i] ); - auto t2 = std::chrono::high_resolution_clock::now(); - auto ids = tpool_ptr->add_work( work, priority ); - auto t3 = std::chrono::high_resolution_clock::now(); - tpool_ptr->wait_all( ids ); - auto t4 = std::chrono::high_resolution_clock::now(); - time_create_work += to_ns( t2 - t1 ); - time_add_work += to_ns( t3 - t2 ); - time_wait_work += to_ns( t4 - t3 ); - if ( ( n + 1 ) % 100 == 0 ) - printp( "Cycle %i of %i finished\n", n + 1, N_it ); - } - stop = std::chrono::high_resolution_clock::now(); - time = std::chrono::duration( stop - start ).count(); - PROFILE_STOP( timer_name ); - printp( " time = %0.0f ms\n", 1e3 * time ); - printp( " time / cycle = %0.0f us\n", 1e6 * time / N_it ); - printp( " average time / item = %0.0f ns\n", 1e9 * time / ( N_it * N_work ) ); - printp( " create = %i ns\n", time_create_work / ( N_it * N_work ) ); - printp( " add = %i ns\n", time_add_work / ( N_it * N_work ) ); - printp( " wait = %i ns\n", time_wait_work / ( N_it * N_work ) ); - } - - // Run a dependency test that tests a simple case that should keep the thread pool busy - // Note: Checking the results requires looking at the trace data - tpool.wait_pool_finished(); - PROFILE_START( "Dependency test" ); - for ( int i = 0; i < 10; i++ ) { - char msg[3][100]; - sprintf( msg[0], "Item %i-%i", i, 0 ); - sprintf( msg[1], "Item %i-%i", i, 1 ); - sprintf( msg[2], "Item %i-%i", i, 2 ); - ThreadPool::WorkItem *work = - new WorkItemFull( sleep_msg, 0.5, msg[0] ); - ThreadPool::WorkItem *work1 = - new WorkItemFull( sleep_msg, 0.1, msg[1] ); - ThreadPool::WorkItem *work2 = - new WorkItemFull( sleep_msg, 0.1, msg[2] ); - ThreadPool::thread_id_t id = tpool.add_work( work ); - work1->add_dependency( id ); - work2->add_dependency( id ); - tpool.add_work( work1 ); - tpool.add_work( work2 ); - } - tpool.wait_pool_finished(); - PROFILE_STOP( "Dependency test" ); - - // Close the thread pool - tpool.setNumThreads( 0 ); - - // Save the profiling results - PROFILE_SAVE( "test_thread_pool" ); - PROFILE_DISABLE(); - - // Test creating/destroying a thread pool using new - barrier(); - pass = true; - try { - ThreadPool *tpool = new ThreadPool( ThreadPool::MAX_NUM_THREADS - 1 ); - if ( tpool->getNumThreads() != ThreadPool::MAX_NUM_THREADS - 1 ) - pass = false; - if ( !ThreadPool::is_valid( tpool ) ) - pass = false; - delete tpool; - // Check that tpool is invalid - // Note: valgrind will report this as an invalid memory read, but we want to keep the test) - if ( ThreadPool::is_valid( tpool ) ) - pass = false; - } catch ( ... ) { - pass = false; - } - if ( pass ) - ut.passes( "Created/destroyed thread pool with new" ); - else - ut.failure( "Created/destroyed thread pool with new" ); - - // Print the test results - barrier(); - ut.report(); - auto N_errors = static_cast( ut.NumFailGlobal() ); - - // Shudown MPI - pout << "Shutting down\n"; - barrier(); -#ifdef USE_TIMER - if ( rank == 0 ) - MemoryApp::print( pout ); -#endif -#ifdef USE_MPI - MPI_Finalize(); - sleep_ms( 10 ); -#endif - return N_errors; -} diff --git a/threadpool/thread_pool.cpp b/threadpool/thread_pool.cpp index 4cf7e222..837909cb 100644 --- a/threadpool/thread_pool.cpp +++ b/threadpool/thread_pool.cpp @@ -1,8 +1,10 @@ -#define _CRT_NONSTDC_NO_DEPRECATE #include "threadpool/thread_pool.h" #include "common/Utilities.h" -#include "common/StackTrace.h" +#include "StackTrace/StackTrace.h" +#include "StackTrace/Utilities.h" + #include "ProfilerApp.h" + #include #include #include @@ -10,11 +12,17 @@ #include #include #include +#include #include #include #include +// Add profile timers or performance counters to the threadpool +#define PROFILE_THREADPOOL_PERFORMANCE 0 +#define MONITOR_THREADPOOL_PERFORMANCE 0 + + #define perr std::cerr #define pout std::cout #define printp printf @@ -34,7 +42,6 @@ #if defined( USE_WINDOWS ) #include #include - #define NOMINMAX // Disable warning: the inline specifier cannot be used when a friend // declaration refers to a specialization of a function template #pragma warning( disable : 4396 ) @@ -62,30 +69,23 @@ // Set some macros -#if PROFILE_THREADPOOL_PERFORMANCE -#define PROFILE_THREADPOOL_START( X ) PROFILE_START( X, 3 ) -#define PROFILE_THREADPOOL_START2( X ) PROFILE_START2( X, 3 ) -#define PROFILE_THREADPOOL_STOP( X ) PROFILE_STOP( X, 3 ) -#define PROFILE_THREADPOOL_STOP2( X ) PROFILE_STOP2( X, 3 ) +// clang-format off +#if PROFILE_THREADPOOL_PERFORMANCE == 1 +#define PROFILE_THREADPOOL_START(X) PROFILE_START(X,3) +#define PROFILE_THREADPOOL_START2(X) PROFILE_START2(X,3) +#define PROFILE_THREADPOOL_STOP(X) PROFILE_STOP(X,3) +#define PROFILE_THREADPOOL_STOP2(X) PROFILE_STOP2(X,3) #else -#define PROFILE_THREADPOOL_START( X ) \ - do { \ - } while ( 0 ) -#define PROFILE_THREADPOOL_START2( X ) \ - do { \ - } while ( 0 ) -#define PROFILE_THREADPOOL_STOP( X ) \ - do { \ - } while ( 0 ) -#define PROFILE_THREADPOOL_STOP2( X ) \ - do { \ - } while ( 0 ) +#define PROFILE_THREADPOOL_START(X) do {} while ( 0 ) +#define PROFILE_THREADPOOL_START2(X) do {} while ( 0 ) +#define PROFILE_THREADPOOL_STOP(X) do {} while ( 0 ) +#define PROFILE_THREADPOOL_STOP2(X) do {} while ( 0 ) #endif #if MONITOR_THREADPOOL_PERFORMANCE == 1 -#define accumulate( x, t1, t2 ) \ - AtomicOperations::atomic_add( \ - &x, std::chrono::duration_cast( t2 - t1 ).count() ); +#define accumulate(x,t1,t2) AtomicOperations::atomic_add( \ + &x, std::chrono::duration_cast(t2-t1).count() ); #endif +// clang-format on #if MONITOR_THREADPOOL_PERFORMANCE == 1 @@ -93,37 +93,59 @@ static AtomicOperations::int64_atomic total_add_work_time[5] = { 0, 0, 0, 0, 0 } #endif +// Set env +static std::mutex Utilities_mutex; +void setenv( const std::string &name, const std::string &value ) +{ + Utilities_mutex.lock(); +#if defined( USE_LINUX ) || defined( USE_MAC ) + bool pass = false; + if ( value.empty() ) + pass = ::setenv( name.data(), value.data(), 1 ) == 0; + else + pass = ::unsetenv( name.data() ) == 0; +#elif defined( USE_WINDOWS ) + bool pass = SetEnvironmentVariable( name.data(), value.data() ) != 0; +#else +#error Unknown OS +#endif + Utilities_mutex.unlock(); + if ( !pass ) { + char msg[1024]; + if ( !value.empty() ) + sprintf( + msg, "Error setting enviornmental variable: %s=%s\n", name.data(), value.data() ); + else + sprintf( msg, "Error clearing enviornmental variable: %s\n", name.data() ); + ERROR( msg ); + } +} + + // Helper functions template void quicksort( int N, T *data ); template inline void quicksort( std::vector &x ) { - quicksort( (int) x.size(), x.data() ); + quicksort( x.size(), x.data() ); } static inline int find_id( int, const ThreadPool::thread_id_t *, const ThreadPool::thread_id_t & ); -// Function to generate a random size_t number (excluding 0 and ~0) -static size_t rand_size_t() +// Function to generate a random number for checking if tpool is valid +static inline bool validHeadTail( uint32_t key ) { - size_t key = 0; - double tmp = 1; - if ( sizeof( size_t ) == 4 ) { - while ( tmp < 4e9 ) { - key ^= rand() * 0x9E3779B9; // 2^32*0.5*(sqrt(5)-1) - tmp *= RAND_MAX; - } - } else if ( sizeof( size_t ) == 8 ) { - while ( tmp < 1.8e19 ) { - key ^= rand() * 0x9E3779B97F4A7C15; // 2^64*0.5*(sqrt(5)-1) - tmp *= RAND_MAX; - } - } else { - throw std::logic_error( "Unhandled case" ); - } - if ( key == 0 || ( ~key ) == 0 ) - key = rand_size_t(); + return ( key > 10 ) && ( ~key > 10 ) && ( key % 2 != 0 ) && ( key % 3 == 2 ); +} +static inline uint32_t generateHeadTail() +{ + uint32_t key = 0; + std::random_device rd; + std::mt19937 gen( rd() ); + std::uniform_int_distribution<> dis( 1, 0xFFFFFF ); + while ( !validHeadTail( key ) ) + key = static_cast( dis( gen ) ) * 0x9E3779B9; // 2^32*0.5*(sqrt(5)-1) return key; } @@ -131,22 +153,10 @@ static size_t rand_size_t() /****************************************************************** * Run some basic compile-time checks * ******************************************************************/ -#if MAX_NUM_THREADS % 64 != 0 -// We use a bit array for d_active and d_cancel -#error MAX_NUM_THREADS must be a multiple of 64 -#endif -#if MAX_NUM_THREADS >= 65535 -// We store N_threads as a short int -#error MAX_NUM_THREADS must < 65535 -#endif -#if MAX_QUEUED >= 65535 -// We store the indicies to the queue list as short ints -#error MAX_QUEUED must < 65535 -#endif -// Check the c++ std -#if CXX_STD == 98 -#error Thread pool class requires c++11 or newer -#endif +static_assert( ThreadPool::MAX_THREADS % 64 == 0, "MAX_THREADS must be a multiple of 64" ); +static_assert( ThreadPool::MAX_THREADS < 65535, "MAX_THREADS must < 65535" ); +static_assert( sizeof( AtomicOperations::int32_atomic ) == 4, "atomic32 must be a 32-bit integer" ); +static_assert( sizeof( AtomicOperations::int64_atomic ) == 8, "atomic64 must be a 64-bit integer" ); /****************************************************************** @@ -181,7 +191,7 @@ static inline bool get_bit( const volatile AtomicOperations::int64_atomic *x, si uint64_t mask = 0x01; mask <<= index % 64; // This is thread-safe since we only care about a single bit - AtomicOperations::int64_atomic y = x[index / 64]; + AtomicOperations::int64_atomic y = x[index / 64]; return ( y & mask ) != 0; } @@ -214,18 +224,15 @@ static inline int count_bits( int_type x ) /****************************************************************** * Set the global constants * ******************************************************************/ -constexpr int ThreadPool::MAX_NUM_THREADS; -constexpr int ThreadPool::MAX_QUEUED; -constexpr int ThreadPool::MAX_WAIT; -constexpr bool ThreadPool::PROFILE_THREADPOOL_PERFORMANCE; -constexpr bool ThreadPool::MONITOR_THREADPOOL_PERFORMANCE; +constexpr uint16_t ThreadPool::MAX_THREADS; +constexpr uint16_t ThreadPool::MAX_WAIT; /****************************************************************** * Set the behavior of OS warnings * ******************************************************************/ static int global_OS_behavior = 0; -std::mutex OS_warning_mutex; +static std::mutex OS_warning_mutex; void ThreadPool::set_OS_warnings( int behavior ) { ASSERT( behavior >= 0 && behavior <= 2 ); @@ -249,18 +256,7 @@ void ThreadPool::setErrorHandler( std::function fun /****************************************************************** * Function to return the number of processors availible * ******************************************************************/ -int ThreadPool::getNumberOfProcessors() -{ -#if defined( USE_LINUX ) || defined( USE_MAC ) - return sysconf( _SC_NPROCESSORS_ONLN ); -#elif defined( USE_WINDOWS ) - SYSTEM_INFO sysinfo; - GetSystemInfo( &sysinfo ); - return static_cast( sysinfo.dwNumberOfProcessors ); -#else -#error Unknown OS -#endif -} +int ThreadPool::getNumberOfProcessors() { return std::thread::hardware_concurrency(); } /****************************************************************** @@ -293,19 +289,17 @@ std::vector ThreadPool::getProcessAffinity() int error = sched_getaffinity( getpid(), sizeof( cpu_set_t ), &mask ); if ( error != 0 ) throw std::logic_error( "Error getting process affinity" ); - for ( int i = 0; i < (int) sizeof( cpu_set_t ) * CHAR_BIT; i++ ) { + for ( size_t i = 0; i < sizeof( cpu_set_t ) * CHAR_BIT; i++ ) { if ( CPU_ISSET( i, &mask ) ) procs.push_back( i ); } #else #warning sched_getaffinity is not supported for this compiler/OS OS_warning( "sched_getaffinity is not supported for this compiler/OS" ); - procs.clear(); #endif #elif defined( USE_MAC ) // MAC does not support getting or setting the affinity OS_warning( "MAC does not support getting the process affinity" ); - procs.clear(); #elif defined( USE_WINDOWS ) HANDLE hProc = GetCurrentProcess(); size_t procMask; @@ -313,7 +307,7 @@ std::vector ThreadPool::getProcessAffinity() PDWORD_PTR procMaskPtr = reinterpret_cast( &procMask ); PDWORD_PTR sysMaskPtr = reinterpret_cast( &sysMask ); GetProcessAffinityMask( hProc, procMaskPtr, sysMaskPtr ); - for ( int i = 0; i < (int) sizeof( size_t ) * CHAR_BIT; i++ ) { + for ( size_t i = 0; i < sizeof( size_t ) * CHAR_BIT; i++ ) { if ( ( procMask & 0x1 ) != 0 ) procs.push_back( i ); procMask >>= 1; @@ -323,7 +317,7 @@ std::vector ThreadPool::getProcessAffinity() #endif return procs; } -void ThreadPool::setProcessAffinity( std::vector procs ) +void ThreadPool::setProcessAffinity( const std::vector &procs ) { #ifdef USE_LINUX #ifdef _GNU_SOURCE @@ -337,12 +331,10 @@ void ThreadPool::setProcessAffinity( std::vector procs ) #else #warning sched_setaffinity is not supported for this compiler/OS OS_warning( "sched_setaffinity is not supported for this compiler/OS" ); - procs.clear(); #endif #elif defined( USE_MAC ) // MAC does not support getting or setting the affinity OS_warning( "MAC does not support setting the process affinity" ); - procs.clear(); #elif defined( USE_WINDOWS ) DWORD mask = 0; for ( size_t i = 0; i < procs.size(); i++ ) @@ -365,7 +357,7 @@ DWORD GetThreadAffinityMask( HANDLE thread ) DWORD old = 0; // try every CPU one by one until one works or none are left while ( mask ) { - old = static_cast( SetThreadAffinityMask( thread, mask ) ); + old = SetThreadAffinityMask( thread, mask ); if ( old ) { // this one worked SetThreadAffinityMask( thread, old ); // restore original return old; @@ -375,7 +367,6 @@ DWORD GetThreadAffinityMask( HANDLE thread ) } mask <<= 1; } - return 0; } #endif @@ -388,22 +379,20 @@ std::vector ThreadPool::getThreadAffinity() int error = pthread_getaffinity_np( pthread_self(), sizeof( cpu_set_t ), &mask ); if ( error != 0 ) throw std::logic_error( "Error getting thread affinity" ); - for ( int i = 0; i < (int) sizeof( cpu_set_t ) * CHAR_BIT; i++ ) { + for ( size_t i = 0; i < sizeof( cpu_set_t ) * CHAR_BIT; i++ ) { if ( CPU_ISSET( i, &mask ) ) procs.push_back( i ); } #else #warning pthread_getaffinity_np is not supported OS_warning( "pthread does not support pthread_getaffinity_np" ); - procs.clear(); #endif #elif defined( USE_MAC ) // MAC does not support getting or setting the affinity OS_warning( "MAC does not support getting the thread affinity" ); - procs.clear(); #elif defined( USE_WINDOWS ) size_t procMask = GetThreadAffinityMask( GetCurrentThread() ); - for ( int i = 0; i < (int) sizeof( size_t ) * CHAR_BIT; i++ ) { + for ( size_t i = 0; i < sizeof( size_t ) * CHAR_BIT; i++ ) { if ( ( procMask & 0x1 ) != 0 ) procs.push_back( i ); procMask >>= 1; @@ -418,30 +407,28 @@ std::vector ThreadPool::getThreadAffinity( int thread ) const if ( thread >= getNumThreads() ) std::logic_error( "Invalid thread number" ); std::vector procs; - auto handle = const_cast( d_thread[thread] ).native_handle(); #ifdef USE_LINUX #ifdef _GNU_SOURCE + auto handle = const_cast( d_thread[thread] ).native_handle(); cpu_set_t mask; int error = pthread_getaffinity_np( handle, sizeof( cpu_set_t ), &mask ); if ( error != 0 ) throw std::logic_error( "Error getting thread affinity" ); - for ( int i = 0; i < (int) sizeof( cpu_set_t ) * CHAR_BIT; i++ ) { + for ( size_t i = 0; i < sizeof( cpu_set_t ) * CHAR_BIT; i++ ) { if ( CPU_ISSET( i, &mask ) ) procs.push_back( i ); } #else #warning pthread_getaffinity_np is not supported OS_warning( "pthread does not support pthread_getaffinity_np" ); - procs.clear(); #endif #elif defined( USE_MAC ) // MAC does not support getting or setting the affinity - NULL_USE( handle ); OS_warning( "MAC does not support getting the thread affinity" ); - procs.clear(); #elif defined( USE_WINDOWS ) + auto handle = const_cast( d_thread[thread] ).native_handle(); size_t procMask = GetThreadAffinityMask( handle ); - for ( int i = 0; i < (int) sizeof( size_t ) * CHAR_BIT; i++ ) { + for ( size_t i = 0; i < sizeof( size_t ) * CHAR_BIT; i++ ) { if ( ( procMask & 0x1 ) != 0 ) procs.push_back( i ); procMask >>= 1; @@ -456,7 +443,7 @@ std::vector ThreadPool::getThreadAffinity( int thread ) const /****************************************************************** * Function to set the thread affinity * ******************************************************************/ -void ThreadPool::setThreadAffinity( std::vector procs ) +void ThreadPool::setThreadAffinity( const std::vector &procs ) { #ifdef USE_LINUX #ifdef _GNU_SOURCE @@ -470,7 +457,6 @@ void ThreadPool::setThreadAffinity( std::vector procs ) #else #warning pthread_getaffinity_np is not supported OS_warning( "pthread does not support pthread_setaffinity_np" ); - procs.clear(); #endif #elif defined( USE_MAC ) // MAC does not support getting or setting the affinity @@ -485,34 +471,33 @@ void ThreadPool::setThreadAffinity( std::vector procs ) #error Unknown OS #endif } -void ThreadPool::setThreadAffinity( int thread, std::vector procs ) const +void ThreadPool::setThreadAffinity( int thread, const std::vector &procs ) const { if ( thread >= getNumThreads() ) std::logic_error( "Invalid thread number" ); - auto handle = const_cast( d_thread[thread] ).native_handle(); #ifdef USE_LINUX #ifdef __USE_GNU cpu_set_t mask; CPU_ZERO( &mask ); for ( size_t i = 0; i < procs.size(); i++ ) CPU_SET( procs[i], &mask ); - int error = pthread_setaffinity_np( handle, sizeof( cpu_set_t ), &mask ); + auto handle = const_cast( d_thread[thread] ).native_handle(); + int error = pthread_setaffinity_np( handle, sizeof( cpu_set_t ), &mask ); if ( error != 0 ) throw std::logic_error( "Error setting thread affinity" ); #else #warning pthread_getaffinity_np is not supported OS_warning( "pthread does not support pthread_setaffinity_np" ); - procs.clear(); #endif #elif defined( USE_MAC ) // MAC does not support getting or setting the affinity - NULL_USE( handle ); NULL_USE( procs ); OS_warning( "MAC does not support getting the process affinity" ); #elif defined( USE_WINDOWS ) DWORD mask = 0; for ( size_t i = 0; i < procs.size(); i++ ) mask |= ( (DWORD) 1 ) << procs[i]; + auto handle = const_cast( d_thread[thread] ).native_handle(); SetThreadAffinityMask( handle, mask ); #else #error Unknown OS @@ -523,22 +508,10 @@ void ThreadPool::setThreadAffinity( int thread, std::vector procs ) const /****************************************************************** * Function to perform some basic checks before we start * ******************************************************************/ -void ThreadPool::check_startup( size_t size0 ) +void ThreadPool::check_startup() { - // Check the size of the class to make sure that we don't have any - // byte alignment problems between a library implimentation and a calling pacakge - size_t size1 = sizeof( ThreadPool ); - size_t size2 = ( (size_t) &d_NULL_HEAD ) - ( (size_t) this ) + sizeof( size_t ); - size_t size3 = ( (size_t) &d_NULL_TAIL ) - ( (size_t) this ) + sizeof( size_t ); - if ( size0 != size1 || size1 < size2 || size1 < size3 ) - throw std::logic_error( "Internal data format problem" ); - // Check the size of variables - if ( sizeof( AtomicOperations::int32_atomic ) != 4 ) - throw std::logic_error( "AtomicOperations::int32_atomic is not 32 bits" ); - if ( sizeof( AtomicOperations::int64_atomic ) != 8 ) - throw std::logic_error( "AtomicOperations::int32_atomic is not 64 bits" ); // Check getting/setting a bit - atomic_64 x[2] = { 0x0, 0x7 }; + AtomicOperations::int64_atomic x[2] = { 0x0, 0x7 }; set_bit( x, 2 ); unset_bit( x, 66 ); if ( x[0] != 4 || x[1] != 3 || !get_bit( x, 2 ) || get_bit( x, 66 ) ) @@ -578,17 +551,21 @@ void ThreadPool::check_startup( size_t size0 ) if ( isValid( id ) || !isValid( id2 ) ) pass = false; if ( !pass ) - throw std::logic_error( "Thread pool failed to initialize" ); + throw std::logic_error( "thread id test failed" ); } /****************************************************************** - * Function to initialize the thread pool * + * Constructors/destructor * ******************************************************************/ -void ThreadPool::initialize( const int N, const char *affinity, int N_procs, const int *procs ) +ThreadPool::ThreadPool( + const int N, const std::string &affinity, const std::vector &procs, int queueSize ) + : d_queue_list( queueSize ) { + // Run some basic tests on startup + check_startup(); // Initialize the header/tail - d_NULL_HEAD = rand_size_t(); + d_NULL_HEAD = generateHeadTail(); d_NULL_TAIL = d_NULL_HEAD; // Initialize the variables to NULL values d_id_assign = 0; @@ -600,31 +577,31 @@ void ThreadPool::initialize( const int N, const char *affinity, int N_procs, con d_N_started = 0; d_N_finished = 0; d_max_wait_time = 600; - memset( (void *) d_active, 0, MAX_NUM_THREADS / 8 ); - memset( (void *) d_cancel, 0, MAX_NUM_THREADS / 8 ); + memset( (void *) d_active, 0, MAX_THREADS / 8 ); + memset( (void *) d_cancel, 0, MAX_THREADS / 8 ); d_wait_last = nullptr; for ( auto &i : d_wait ) i = nullptr; // Initialize the id d_id_assign = thread_id_t::maxThreadID; // Create the threads - setNumThreads( N, affinity, N_procs, procs ); + setNumThreads( N, affinity, procs ); + // Verify that the threadpool is valid + if ( !is_valid( this ) ) + throw std::logic_error( "Thread pool is not valid" ); } - - -/****************************************************************** - * This is the de-constructor * - ******************************************************************/ ThreadPool::~ThreadPool() { DISABLE_WARNINGS - if ( !is_valid( this ) ) - throw std::logic_error( "Thread pool is not valid" ); + if ( !is_valid( this ) ) { + std::cerr << "Thread pool is not valid, error calling destructor\n"; + return; + } ENABLE_WARNINGS // Destroy the threads setNumThreads( 0 ); // Delete all remaining data - d_N_threads = -1; + d_N_threads = ~0; d_NULL_HEAD = 0; d_NULL_TAIL = 0; delete d_wait_last; @@ -645,9 +622,9 @@ bool ThreadPool::is_valid( const ThreadPool *tpool ) { if ( tpool == nullptr ) return false; - if ( tpool->d_N_threads < 0 || tpool->d_N_threads > MAX_NUM_THREADS ) + if ( tpool->d_N_threads > MAX_THREADS ) return false; - if ( tpool->d_NULL_HEAD == 0 || tpool->d_NULL_HEAD != tpool->d_NULL_TAIL ) + if ( !validHeadTail( tpool->d_NULL_HEAD ) || tpool->d_NULL_HEAD != tpool->d_NULL_TAIL ) return false; return true; } @@ -657,17 +634,17 @@ bool ThreadPool::is_valid( const ThreadPool *tpool ) * This function creates the threads in the thread pool * ******************************************************************/ void ThreadPool::setNumThreads( - int num_worker_threads, const char *affinity2, int N_procs, const int *procs ) + int num_worker_threads, const std::string &affinity, const std::vector &procs ) { // Check if we are a member thread if ( isMemberThread() ) throw std::logic_error( "Member threads are not allowed to change the number of threads in the pool" ); // Determing the number of threads we need to create or destroy - if ( num_worker_threads > MAX_NUM_THREADS ) { - printp( "Warning: Maximum Number of Threads is %i\n", MAX_NUM_THREADS ); + if ( num_worker_threads > MAX_THREADS ) { + printp( "Warning: Maximum Number of Threads is %i\n", MAX_THREADS ); printp( " Only that number will be created\n" ); - num_worker_threads = MAX_NUM_THREADS; + num_worker_threads = MAX_THREADS; } else if ( num_worker_threads < 0 ) { printp( "Error: cannot have a negitive number of threads\n" ); printp( " Setting the number of threads to 0\n" ); @@ -681,23 +658,10 @@ void ThreadPool::setNumThreads( throw std::logic_error( "Threads are being created and destroyed at the same time" ); } -// Create the thread attributes (linux only) -#if defined( USE_LINUX ) || defined( USE_MAC ) - pthread_attr_t attr; - pthread_attr_init( &attr ); -// int ptmp; -// pthread_attr_setstacksize(&attr,2097152); // Default stack size is 8MB -// pthread_attr_setschedpolicy(&attr,1); -// pthread_attr_getschedpolicy(&attr,&ptmp); -// pout << "getschedpolicy = " << ptmp << std::endl; -#endif // Create the threads - auto tmp = new void *[2 * d_N_threads_diff]; - int j = d_N_threads; + int j = d_N_threads; for ( int i = 0; i < d_N_threads_diff; i++ ) { d_N_threads++; - tmp[0 + 2 * i] = this; - tmp[1 + 2 * i] = reinterpret_cast( static_cast( j ) ); set_bit( d_cancel, j ); d_thread[j] = std::thread( create_new_thread, this, j ); j++; @@ -713,12 +677,7 @@ void ThreadPool::setNumThreads( if ( !wait ) break; } -// Delete the thread attributes (linux only) -#if defined( USE_LINUX ) || defined( USE_MAC ) - pthread_attr_destroy( &attr ); -#endif std::this_thread::sleep_for( std::chrono::milliseconds( 25 ) ); - delete[] tmp; } else if ( d_N_threads_diff < 0 ) { // Reduce the number of threads if ( num_worker_threads == 0 ) { @@ -752,15 +711,14 @@ void ThreadPool::setNumThreads( } catch ( ... ) { pout << "Warning: Unable to get default cpus for thread affinities\n"; } - if ( !cpus.empty() && N_procs > 0 ) { - cpus.resize( N_procs ); - for ( int i = 0; i < N_procs; i++ ) + if ( !cpus.empty() && !procs.empty() ) { + cpus.resize( procs.size() ); + for ( size_t i = 0; i < procs.size(); i++ ) cpus[i] = procs[i]; } // Set the affinity model and the associated thread affinities // Note: not all OS's support setting the thread affinities std::vector> t_procs( d_N_threads ); - std::string affinity( affinity2 ); if ( cpus.empty() ) { // We do not have a list of cpus to use, do nothing (OS not supported) } else if ( affinity == "none" ) { @@ -769,13 +727,13 @@ void ThreadPool::setNumThreads( t_procs[i] = cpus; } else if ( affinity == "independent" ) { // We want to use an independent set of processors for each thread - if ( (int) cpus.size() == d_N_threads ) { + if ( cpus.size() == d_N_threads ) { // The number of cpus matches the number of threads for ( int i = 0; i < d_N_threads; i++ ) t_procs[i] = std::vector( 1, cpus[i] ); - } else if ( (int) cpus.size() > d_N_threads ) { + } else if ( cpus.size() > d_N_threads ) { // There are more cpus than threads, threads will use more the one processor - int N_procs_thread = static_cast( cpus.size() + d_N_threads - 1 ) / d_N_threads; + int N_procs_thread = ( cpus.size() + d_N_threads - 1 ) / d_N_threads; size_t k = 0; for ( int i = 0; i < d_N_threads; i++ ) { for ( int j = 0; j < N_procs_thread && k < cpus.size(); j++ ) { @@ -785,8 +743,7 @@ void ThreadPool::setNumThreads( } } else { // There are fewer cpus than threads, threads will share a processor - auto N_threads_proc = - static_cast( ( cpus.size() + d_N_threads - 1 ) / cpus.size() ); + auto N_threads_proc = ( cpus.size() + d_N_threads - 1 ) / cpus.size(); for ( int i = 0; i < d_N_threads; i++ ) t_procs[i].push_back( cpus[i / N_threads_proc] ); } @@ -797,7 +754,7 @@ void ThreadPool::setNumThreads( try { for ( int i = 0; i < d_N_threads; i++ ) { ThreadPool::setThreadAffinity( i, t_procs[i] ); - std::vector cpus2 = getThreadAffinity( i ); + auto cpus2 = getThreadAffinity( i ); if ( cpus2 != t_procs[i] ) pout << "Warning: error setting affinities (failed to set)\n"; } @@ -823,12 +780,14 @@ void ThreadPool::tpool_thread( int thread_id ) AtomicOperations::atomic_increment( &d_num_active ); set_bit( d_active, thread_id ); unset_bit( d_cancel, thread_id ); + setenv( "OMP_NUM_THREADS", "1" ); + setenv( "MKL_NUM_THREADS", "1" ); if ( printInfo ) { // Print the pid printp( "pid = %i\n", (int) getpid() ); // Print the processor affinities for the process try { - std::vector cpus = ThreadPool::getProcessAffinity(); + auto cpus = ThreadPool::getProcessAffinity(); printp( "%i cpus for current thread: ", (int) cpus.size() ); for ( int cpu : cpus ) printp( "%i ", cpu ); @@ -842,7 +801,7 @@ void ThreadPool::tpool_thread( int thread_id ) shutdown = false; while ( !shutdown ) { // Check if there is work to do - if ( d_queue_list.size() > 0 ) { + if ( !d_queue_list.empty() ) { // Get next work item to process auto work_id = d_queue_list.remove( []( const thread_id_t &id ) { return id.ready(); } ); @@ -890,6 +849,8 @@ void ThreadPool::tpool_thread( int thread_id ) } else { int N_active = AtomicOperations::atomic_decrement( &d_num_active ); unset_bit( d_active, thread_id ); + // Yield to give the main thread a chance to update + std::this_thread::yield(); // Alert main thread that a thread finished processing if ( ( N_active == 0 ) && d_signal_empty ) { d_wait_finished.notify_all(); @@ -897,7 +858,9 @@ void ThreadPool::tpool_thread( int thread_id ) } // Wait for work PROFILE_THREADPOOL_STOP2( "thread active" ); - d_wait_work.wait_for( 1e-3 ); + double wait_time = thread_id <= 2 ? 0.01 : 0.1; + if ( d_queue_list.empty() ) + d_wait_work.wait_for( wait_time ); PROFILE_THREADPOOL_START2( "thread active" ); AtomicOperations::atomic_increment( &d_num_active ); set_bit( d_active, thread_id ); @@ -921,13 +884,13 @@ inline void ThreadPool::add_work( const ThreadPool::thread_id_t &id ) auto work = id.work(); work->d_state = 1; // Check and change priorities of dependency ids - const int priority = id.getPriority(); + int priority = id.getPriority(); + auto compare = []( const thread_id_t &a, const thread_id_t &b ) { return a == b; }; for ( int i = 0; i < work->d_N_ids; i++ ) { const auto &id1 = work->d_ids[i]; if ( !id1.started() && id1 < id ) { // Remove and add the id back with a higher priority - auto id2 = d_queue_list.remove( - []( const thread_id_t &a, const thread_id_t &b ) { return a == b; }, id1 ); + auto id2 = d_queue_list.remove( compare, id1 ); id2.setPriority( std::max( priority, id2.getPriority() ) ); d_queue_list.insert( id2 ); } @@ -939,7 +902,7 @@ void ThreadPool::add_work( size_t N, ThreadPool::WorkItem *work[], const int *priority, ThreadPool::thread_id_t *ids ) { // If we have a very long list, break it up into smaller pieces to keep the threads busy - const size_t block_size = MAX_QUEUED / 8; + constexpr size_t block_size = 256; if ( N > block_size ) { size_t i = 0; while ( i < N ) { @@ -949,13 +912,13 @@ void ThreadPool::add_work( return; } PROFILE_THREADPOOL_START( "add_work" ); -#if MONITOR_THREADPOOL_PERFORMANCE +#if MONITOR_THREADPOOL_PERFORMANCE == 1 auto t1 = std::chrono::high_resolution_clock::now(); #endif // Create the thread ids (can be done without blocking) for ( size_t i = 0; i < N; i++ ) ids[i].reset( priority[i], AtomicOperations::atomic_decrement( &d_id_assign ), work[i] ); -#if MONITOR_THREADPOOL_PERFORMANCE +#if MONITOR_THREADPOOL_PERFORMANCE == 1 auto t2 = std::chrono::high_resolution_clock::now(); accumulate( total_add_work_time[0], t1, t2 ); #endif @@ -966,7 +929,7 @@ void ThreadPool::add_work( work[i]->run(); work[i]->d_state = 3; } -#if MONITOR_THREADPOOL_PERFORMANCE +#if MONITOR_THREADPOOL_PERFORMANCE == 1 auto t5 = std::chrono::high_resolution_clock::now(); accumulate( total_add_work_time[4], t2, t5 ); #endif @@ -974,29 +937,29 @@ void ThreadPool::add_work( return; } // Wait for enough room in the queue (doesn't need blocking since it isn't that precise) - if ( N > static_cast( MAX_QUEUED - d_queue_list.size() ) ) { - auto N_wait = static_cast( N - ( MAX_QUEUED - d_queue_list.size() ) ); + if ( N > d_queue_list.capacity() - d_queue_list.size() ) { + int N_wait = N - ( d_queue_list.capacity() - d_queue_list.size() ); while ( N_wait > 0 ) { - d_signal_count = static_cast( std::min( N_wait, 255 ) ); + d_signal_count = std::min( N_wait, 255 ); d_wait_finished.wait_for( 1e-4 ); - N_wait = static_cast( N - ( MAX_QUEUED - d_queue_list.size() ) ); + N_wait = N - ( d_queue_list.capacity() - d_queue_list.size() ); } } -#if MONITOR_THREADPOOL_PERFORMANCE +#if MONITOR_THREADPOOL_PERFORMANCE == 1 auto t3 = std::chrono::high_resolution_clock::now(); accumulate( total_add_work_time[1], t2, t3 ); #endif // Get add the work items to the queue for ( size_t i = 0; i < N; i++ ) add_work( ids[i] ); -#if MONITOR_THREADPOOL_PERFORMANCE +#if MONITOR_THREADPOOL_PERFORMANCE == 1 auto t4 = std::chrono::high_resolution_clock::now(); accumulate( total_add_work_time[2], t3, t4 ); #endif // Activate sleeping threads if ( d_num_active == d_N_threads ) { // All threads are active, no need to wake anybody - } else if ( d_queue_list.size() == 0 ) { + } else if ( d_queue_list.empty() ) { // Queue is empty, no need to activate } else if ( N == 1 ) { // Added 1 item to the queue, wake 1 worker @@ -1005,7 +968,7 @@ void ThreadPool::add_work( // Added multple items in the queue, wake all workers d_wait_work.notify_all(); } -#if MONITOR_THREADPOOL_PERFORMANCE +#if MONITOR_THREADPOOL_PERFORMANCE == 1 auto t5 = std::chrono::high_resolution_clock::now(); accumulate( total_add_work_time[3], t4, t5 ); #endif @@ -1026,8 +989,8 @@ static inline void check_finished( } } } -int ThreadPool::wait_some( - size_t N_work, const ThreadPool::thread_id_t *ids, size_t N_wait, bool *finished ) const +int ThreadPool::wait_some( size_t N_work, const ThreadPool::thread_id_t *ids, size_t N_wait, + bool *finished, int max_wait ) const { // Check the inputs if ( N_wait > N_work ) @@ -1056,13 +1019,21 @@ int ThreadPool::wait_some( auto tmp = new wait_ids_struct( N_work, ids, N_wait, d_cond_pool, MAX_WAIT, d_wait ); // Wait for the ids auto t1 = std::chrono::high_resolution_clock::now(); - while ( !tmp->wait_for( 0.01 ) ) { - check_wait_time( t1 ); + auto t2 = t1; + int dt1 = 0; + while ( dt1 < max_wait ) { + if ( tmp->wait_for( std::min( max_wait, d_max_wait_time ), 0.01 ) ) + break; + auto t3 = std::chrono::high_resolution_clock::now(); + dt1 = std::chrono::duration_cast( t3 - t1 ).count(); + int dt2 = std::chrono::duration_cast( t3 - t2 ).count(); + if ( dt2 >= d_max_wait_time ) { + print_wait_warning(); + t2 = t3; + } } // Update the ids that have finished check_finished( N_work, ids, N_finished, finished ); - if ( N_finished < N_wait && N_work != 0 ) - throw std::logic_error( "Internal error: failed to wait" ); // Delete the wait event struct // Note: we want to maintain the reference in case a thread is still using it // Note: technically this should be atomic, but it really isn't necessary here @@ -1075,40 +1046,43 @@ int ThreadPool::wait_some( /****************************************************************** * This function waits for all of the threads to finish their work * ******************************************************************/ -void ThreadPool::check_wait_time( - std::chrono::time_point &t1 ) const +void ThreadPool::print_wait_warning() const { - auto t2 = std::chrono::high_resolution_clock::now(); - if ( std::chrono::duration_cast( t2 - t1 ).count() > d_max_wait_time ) { - pout << "Warning: Maximum wait time in ThreadPool exceeded, threads may be hung\n"; - pout << "N_active: " << d_num_active << std::endl; - pout << "N_queued: " << d_queue_list.size() << std::endl; - pout << "N_added: " << d_N_added << std::endl; - pout << "N_started: " << d_N_started << std::endl; - pout << "N_finished: " << d_N_finished << std::endl; - pout << "queue.insert(): " << d_queue_list.N_insert() << std::endl; - pout << "queue.remove(): " << d_queue_list.N_remove() << std::endl; - pout << "Stack Trace:\n"; - auto call_stack = StackTrace::getAllCallStacks(); - StackTrace::cleanupStackTrace( call_stack ); - auto text = call_stack.print( " " ); - for ( auto &line : text ) - pout << line << std::endl; - t1 = std::chrono::high_resolution_clock::now(); - } + pout << "Warning: Maximum wait time in ThreadPool exceeded, threads may be hung\n"; + pout << "N_active: " << d_num_active << std::endl; + pout << "N_queued: " << d_queue_list.size() << std::endl; + pout << "N_added: " << d_N_added << std::endl; + pout << "N_started: " << d_N_started << std::endl; + pout << "N_finished: " << d_N_finished << std::endl; + pout << "queue.insert(): " << d_queue_list.N_insert() << std::endl; + pout << "queue.remove(): " << d_queue_list.N_remove() << std::endl; + pout << "Stack Trace:\n"; + auto call_stack = StackTrace::getAllCallStacks(); + StackTrace::cleanupStackTrace( call_stack ); + auto text = call_stack.print( " " ); + for ( auto &line : text ) + pout << line << std::endl; } void ThreadPool::wait_pool_finished() const { // First check that we are not one of the threads - if ( isMemberThread() ) { + if ( isMemberThread() ) throw std::logic_error( "Member thread attempted to call wait_pool_finished" ); - } // Wait for all threads to finish their work auto t1 = std::chrono::high_resolution_clock::now(); - while ( d_num_active > 0 || d_queue_list.size() > 0 ) { - check_wait_time( t1 ); + while ( d_num_active > 0 || !d_queue_list.empty() ) { + // Wait for signal from last thread d_signal_empty = true; - d_wait_finished.wait_for( 10e-6 ); + d_wait_finished.wait_for( 5e-4 ); + if ( d_num_active == 0 && d_queue_list.empty() ) + break; + // Check that we have not exceeded the maximum time + auto t2 = std::chrono::high_resolution_clock::now(); + int seconds = std::chrono::duration_cast( t2 - t1 ).count(); + if ( seconds > d_max_wait_time ) { + print_wait_warning(); + t1 = t2; + } } d_signal_empty = false; } @@ -1162,30 +1136,46 @@ void ThreadPool::wait_ids_struct::id_finished( const ThreadPool::thread_id_t &id } } } -bool ThreadPool::wait_ids_struct::wait_for( double seconds ) +inline bool ThreadPool::wait_ids_struct::check() { - for ( int i = 0; i < d_N; i++ ) { - if ( d_ids[i].finished() ) - d_finished[i] = true; + int N_finished = 0; + for ( int i = 0; i < d_N; i++ ) + N_finished += d_finished[i] ? 1 : 0; + if ( N_finished >= d_wait || d_N == 0 ) { + *d_ptr = nullptr; + d_wait = 0; + d_N = 0; + return true; } - auto t1 = std::chrono::high_resolution_clock::now(); - while ( true ) { - int N_finished = 0; - for ( int i = 0; i < d_N; i++ ) - N_finished += d_finished[i] ? 1 : 0; - if ( N_finished >= d_wait || d_N == 0 ) { - *d_ptr = nullptr; - d_wait = 0; - d_N = 0; - break; + return false; +} +bool ThreadPool::wait_ids_struct::wait_for( double total_time, double recheck_time ) +{ + int total = 1e6 * total_time; + int recheck = 1e6 * recheck_time; + auto t1 = std::chrono::high_resolution_clock::now(); + auto t2 = t1; + int us1 = 0; + while ( us1 < total ) { + for ( int i = 0; i < d_N; i++ ) { + if ( d_ids[i].finished() ) + d_finished[i] = true; } - auto t2 = std::chrono::high_resolution_clock::now(); - if ( 1e-6 * std::chrono::duration_cast( t2 - t1 ).count() > - seconds ) - return false; - d_wait_event->wait_for( 1e-5 ); + if ( check() ) + return true; + int us2 = 0; + while ( us2 < recheck ) { + double dt = 1e-6 * std::max( 10, recheck - us2 ); + d_wait_event->wait_for( dt ); + if ( check() ) + return true; + auto t3 = std::chrono::high_resolution_clock::now(); + us2 = std::chrono::duration_cast( t3 - t2 ).count(); + t2 = t3; + } + us1 = std::chrono::duration_cast( t2 - t1 ).count(); } - return true; + return false; } @@ -1298,9 +1288,8 @@ inline int find_id( int n, const ThreadPool::thread_id_t *x, const ThreadPool::t // Perform the search size_t lower = 0; size_t upper = n - 1; - size_t index; while ( ( upper - lower ) != 1 ) { - index = ( upper + lower ) / 2; + size_t index = ( upper + lower ) / 2; if ( x[index] == id ) return index; if ( x[index] >= id ) @@ -1325,9 +1314,8 @@ void ThreadPool::WorkItem::add_dependencies( size_t N, const ThreadPool::thread_ throw std::logic_error( "Cannot add dependency to work item once it has been added the the threadpool" ); } - if ( static_cast( d_N_ids ) + N > 0xFFFF ) { + if ( d_N_ids + N > 0xFFFF ) throw std::logic_error( "Cannot add more than 65000 dependencies" ); - } if ( d_N_ids + N + 1 > d_size ) { thread_id_t *tmp = d_ids; unsigned int N2 = d_size; diff --git a/threadpool/thread_pool.h b/threadpool/thread_pool.h index eff12433..9cb5b21a 100644 --- a/threadpool/thread_pool.h +++ b/threadpool/thread_pool.h @@ -5,7 +5,7 @@ #define included_AtomicModelThreadPool #include -#include +#include #include #include #include @@ -40,7 +40,7 @@ * thread_id_t ids[2]; * ids[0] = TPOOL_ADD_WORK( tpool, myfun_1, (a,b) ); * ids[1] = TPOOL_ADD_WORK( tpool, myfun_2, (c,d) ); - * int error = wait_all(2,ids); + * wait_all(2,ids); * double x = getFunctionRet(ids[0]); * double y = getFunctionRet(ids[1]);
* @@ -49,11 +49,8 @@ class ThreadPool { public: ///// Set some global properties - constexpr static int MAX_NUM_THREADS = 128; // The maximum number of threads (must be a multiple of 64) - constexpr static int MAX_QUEUED = 1024; // The maximum number of items in the work queue at any moment - constexpr static int MAX_WAIT = 16; // The maximum number of active waits at any given time - constexpr static bool PROFILE_THREADPOOL_PERFORMANCE = false; // Add profile timers to the threadpool - constexpr static bool MONITOR_THREADPOOL_PERFORMANCE = false; // Add detailed performance counters + constexpr static uint16_t MAX_THREADS = 128; // The maximum number of threads (must be a multiple of 64) + constexpr static uint16_t MAX_WAIT = 16; // The maximum number of active waits at any given time public: ///// Member classes @@ -117,6 +114,8 @@ public: } //! Check if thread id is null inline bool isNull( ) const { return d_id==nullThreadID; } + //! Check if thread id is null + inline WorkItem* getWork( ) const { return reinterpret_cast( d_work ); } private: // Reset the internal data to the given values @@ -174,9 +173,8 @@ public: */ inline void add_dependencies( const std::vector &ids ) { - if ( !ids.empty() ) { + if ( !ids.empty() ) add_dependencies( ids.size(), &ids[0] ); - } } /*! * \brief Add a list of work item to the list of dependencies @@ -201,8 +199,8 @@ public: WorkItem( const WorkItem & ); // Private copy constructor WorkItem &operator=( const WorkItem & ); // Private assignment operator volatile char d_state; // Current state (0: not added to threadpool, 1: queued, 2: started, 3: finished) - short unsigned int d_N_ids; // Number of dependencies - short unsigned int d_size; // Size of d_ids + uint16_t d_N_ids; // Number of dependencies + uint16_t d_size; // Size of d_ids AtomicOperations::int32_atomic d_count; // Count used by a thread_id thread_id_t *d_ids; // Pointer to id list // Friends @@ -232,7 +230,7 @@ public: protected: return_type d_result; protected: - inline WorkItemRet() { } + inline WorkItemRet() : d_result( return_type() ) { } private: WorkItemRet( const WorkItemRet & ); // Private copy constructor WorkItemRet &operator=( const WorkItemRet & ); // Private assignment operator @@ -242,37 +240,17 @@ public: public: ///// Member functions - //! Empty constructor - ThreadPool() - { - // Note: we need the constructor in the header to ensure that check_startup - // is able to check for changes in the byte alignment - check_startup( sizeof( ThreadPool ) ); - initialize( 0, "none", 0, nullptr ); - if ( !is_valid( this ) ) - throw std::logic_error( "Thread pool is not valid" ); - } - - /*! * Constructor that initialize the thread pool with N threads - * @param N The desired number of worker threads + * @param N The desired number of worker threads * @param affinity The affinity scheduler to use: * none - Let the OS handle the affinities (default) * independent - Give each thread an independent set of processors * @param procs The processors to use (defaults to the process affinitiy list) + * @param queueSize The maximum number of items in the queue before forcing a wait */ - ThreadPool( const int N, const std::string &affinity = "none", - const std::vector &procs = std::vector() ) - { - // Note: we need the constructor in the header to ensure that check_startup - // is able to check for changes in the byte alignment - check_startup( sizeof( ThreadPool ) ); - const int *procs2 = procs.empty() ? nullptr : ( &procs[0] ); - initialize( N, affinity.c_str(), (int) procs.size(), procs2 ); - if ( !is_valid( this ) ) - throw std::logic_error( "Thread pool is not valid" ); - } + ThreadPool( const int N = 0, const std::string &affinity = "none", + const std::vector &procs = std::vector(), int queueSize = 1024 ); //! Destructor @@ -292,7 +270,7 @@ public: //! Function to set the affinity of the current process - static void setProcessAffinity( std::vector procs ); + static void setProcessAffinity( const std::vector& procs ); //! Function to return the affinity of the current thread @@ -310,7 +288,7 @@ public: * Function to set the affinity of the current thread * @param procs The processors to use */ - static void setThreadAffinity( std::vector procs ); + static void setThreadAffinity( const std::vector& procs ); /*! @@ -318,11 +296,11 @@ public: * @param thread The index of the thread * @param procs The processors to use */ - void setThreadAffinity( int thread, std::vector procs ) const; + void setThreadAffinity( int thread, const std::vector& procs ) const; //! Function to return the number of threads in the thread pool - int getNumThreads() const { return d_N_threads; } + inline int getNumThreads() const { return d_N_threads; } /*! @@ -332,21 +310,15 @@ public: * in the ThreadPool without checking the existing work unless the desired number of * threads is 0. In this case, the function will wait for all work items to finish * before deleting the existing work threads. - * Member threads may not call this function. * @param N The desired number of worker threads * @param affinity The affinity scheduler to use: * none - Let the OS handle the affinities (default) - * independent - Give each thread an independent set of processors * @param procs The processors to use (defaults to the process affinitiy list) */ - inline void setNumThreads( const int N, const std::string &affinity = "none", - const std::vector &procs = std::vector() ) - { - const int *procs2 = procs.empty() ? nullptr : ( &procs[0] ); - setNumThreads( N, affinity.c_str(), (int) procs.size(), procs2 ); - } + void setNumThreads( const int N, const std::string &affinity = "none", + const std::vector &procs = std::vector() ); /*! @@ -394,6 +366,36 @@ public: static inline return_type getFunctionRet( const thread_id_t &id ); + /*! + * \brief Function to create a work item + * \details This function creates a work item that can be added to the queue + * @param routine Function to call from the thread pool + * @param args Function arguments to pass + */ + template + static inline WorkItem* createWork( std::function routine, std::tuple &&args ); + + + /*! + * \brief Function to create a work item + * \details This function creates a work item that can be added to the queue + * @param routine Function to call from the thread pool + * @param args Function arguments to pass + */ + template + static inline WorkItem* createWork( Ret( *routine )( Args... ), std::tuple &&args ); + + + /*! + * \brief Function to create a work item + * \details This function creates a work item that can be added to the queue + * @param routine Function to call from the thread pool + * @param args Function arguments to pass + */ + template + static inline WorkItem* createWork( std::function routine, Args... args ); + + /*! * \brief Function to create a work item * \details This function creates a work item that can be added to the queue @@ -431,61 +433,33 @@ public: /*! * \brief Function to wait until a specific work item has finished - * \details This is the function waits for a specific work item to finished. It returns 0 if - * successful. + * \details This is the function waits for a specific work item to finished. * Note: any thread may call this routine, but they will block until finished. * For worker threads this may eventually lead to a deadlock. * @param id The work item to wait for */ - inline int wait( thread_id_t id ) const; + inline void wait( thread_id_t id ) const; /*! * \brief Function to wait until any of the given work items have finished their work * \details This is the function waits for any of the given work items to finish. * If successful it returns the index of a finished work item (the index in the array ids). - * If unseccessful it will return -1. - * Note: any thread may call this routine, but they will block until finished. - * For worker threads this may eventually lead to a deadlock. - * @param N_work The number of work items - * @param ids Array of work items to wait for - */ - inline int wait_any( size_t N_work, const thread_id_t *ids ); - - - /*! - * \brief Function to wait until any of the given work items have finished their work - * \details This is the function waits for any of the given work items to finish. - * If successful it returns the index of a finished work item (the index in the array ids). - * If unseccessful it will return -1. * Note: any thread may call this routine, but they will block until finished. * For worker threads this may eventually lead to a deadlock. * @param ids Vector of work items to wait for */ - inline int wait_any( const std::vector &ids ) const; + inline size_t wait_any( const std::vector &ids ) const; /*! * \brief Function to wait until all of the given work items have finished their work - * \details This is the function waits for all given of the work items to finish. It returns 0 - * if successful. - * Note: any thread may call this routine, but they will block until finished. - * For worker threads this may eventually lead to a deadlock. - * @param N_work The number of work items - * @param ids Array of work items to wait for - */ - inline int wait_all( size_t N_work, const thread_id_t *ids ) const; - - - /*! - * \brief Function to wait until all of the given work items have finished their work - * \details This is the function waits for all given of the work items to finish. It returns 0 - * if successful. + * \details This is the function waits for all given of the work items to finish. * Note: any thread may call this routine, but they will block until finished. * For worker threads this may eventually lead to a deadlock. * @param ids Vector of work items to wait for */ - inline int wait_all( const std::vector &ids ) const; + inline void wait_all( const std::vector &ids ) const; /*! @@ -496,8 +470,9 @@ public: * For worker threads this may eventually lead to a deadlock. * @param N_wait Number of work items to wait for * @param ids Vector of work items to wait for + * @param max_wait Maximum time to wait (seconds) */ - inline std::vector wait_some( int N_wait, const std::vector &ids ) const; + inline std::vector wait_some( int N_wait, const std::vector &ids, int max_wait = 10000000 ) const; /*! @@ -584,14 +559,13 @@ public: // Static interface /*! * \brief Function to wait until all of the given work items have finished their work - * \details This is the function waits for all given of the work items to finish. It returns 0 - * if successful. + * \details This is the function waits for all given of the work items to finish. * Note: any thread may call this routine, but they will block until finished. * For worker threads this may eventually lead to a deadlock. * @param tpool Threadpool containing work (must match call to add_work) * @param ids Vector of work items to wait for */ - static inline int wait_all( const ThreadPool* tpool, const std::vector &ids ); + static inline void wait_all( const ThreadPool* tpool, const std::vector &ids ); /*! @@ -604,10 +578,6 @@ public: // Static interface static inline void wait_pool_finished( const ThreadPool* tpool ) { if ( tpool ) { tpool->wait_pool_finished(); } } - -private: - typedef AtomicOperations::int32_atomic int32_atomic; - private: ///// Member data structures @@ -644,11 +614,14 @@ private: // before calling wait class wait_ids_struct { public: + wait_ids_struct() = delete; + wait_ids_struct( const wait_ids_struct& ) = delete; + wait_ids_struct& operator=( const wait_ids_struct & ) = delete; wait_ids_struct( size_t N, const ThreadPool::thread_id_t *ids, size_t N_wait, AtomicOperations::pool& cv_pool, int N_wait_list, volatile wait_ids_struct **list ); ~wait_ids_struct( ); void id_finished( const ThreadPool::thread_id_t& id ) const; - bool wait_for( double seconds ); + bool wait_for( double total_time, double recheck_time ); private: mutable int d_wait; // The number of work items that must finish before we alert the thread mutable int d_N; // The number of ids we are waiting on @@ -657,9 +630,8 @@ private: condition_variable *d_wait_event; // Handle to a wait event volatile mutable bool *d_finished; // Has each id finished volatile mutable wait_ids_struct **d_ptr; - wait_ids_struct(); - wait_ids_struct( const wait_ids_struct& ); - wait_ids_struct& operator=( const wait_ids_struct & ); + private: + inline bool check(); }; @@ -670,10 +642,8 @@ private: ThreadPool( const ThreadPool & ); ThreadPool &operator=( const ThreadPool & ); - // Function to initialize the thread pool - void setNumThreads( int N, const char *affinity, int N_procs, const int *procs ); - void initialize( int N, const char *affinity, int N_procs, const int *procs ); - void check_startup( size_t size0 ); + // Function to check the startup + void check_startup( ); // Function to add an array of work items void add_work( @@ -701,39 +671,45 @@ private: inline bool isMemberThread() const { return getThreadNumber()>=0; } // Function to wait for some work items to finish - int wait_some( size_t N_work, const thread_id_t *ids, size_t N_wait, bool *finished ) const; + int wait_some( size_t N_work, const thread_id_t *ids, size_t N_wait, bool *finished, int max_wait ) const; // Check if we are waiting too long and pring debug info - void check_wait_time( std::chrono::time_point& t1 ) const; + void print_wait_warning( ) const; + private: ///// Member data - typedef AtomicOperations::int64_atomic atomic_64; - typedef AtomicList> queue_type; - // Note: We want to store the variables in a certain order to optimize storage - // and ensure consistent packing / object size - size_t d_NULL_HEAD; // Null data buffer to check memory bounds - volatile atomic_64 d_id_assign; // An internal variable used to store the current id to assign - volatile mutable bool d_signal_empty; // Do we want to send a signal when the queue is empty - volatile mutable int32_atomic d_signal_count; // Signal count - short int d_N_threads; // Number of threads - volatile int32_atomic d_num_active; // Number of threads that are currently active - volatile atomic_64 d_active[MAX_NUM_THREADS/64]; // Which threads are currently active - volatile atomic_64 d_cancel[MAX_NUM_THREADS/64]; // Which threads should be deleted - volatile atomic_64 d_N_added; // Number of items added to the work queue - volatile atomic_64 d_N_started; // Number of items started - volatile atomic_64 d_N_finished; // Number of items finished - volatile mutable wait_ids_struct *d_wait[MAX_WAIT]; // The wait events to check - mutable wait_ids_struct *d_wait_last; // A cached copy of the last completed wait event (in case a thread still has a reference) - condition_variable d_wait_finished; // Condition variable to signal when all work is finished - condition_variable d_wait_work; // Condition variable to signal when there is new work - mutable AtomicOperations::pool d_cond_pool; - std::thread d_thread[MAX_NUM_THREADS]; // Handles to the threads - std::thread::id d_threadId[MAX_NUM_THREADS]; // Unique id for each thread - queue_type d_queue_list; // The work queue - size_t d_NULL_TAIL; // Null data buffer to check memory bounds - int d_max_wait_time; // The maximum time in a wait command before printing a warning message - std::function d_errorHandler; + + // Typedefs + typedef volatile AtomicOperations::int32_atomic vint32_t; + typedef volatile AtomicOperations::int64_atomic vint64_t; + typedef volatile wait_ids_struct vwait_t; + typedef AtomicOperations::pool cond_t; + typedef AtomicList> queue_type; + + // Internal data + uint32_t d_NULL_HEAD; // Null data buffer to check memory bounds + volatile mutable bool d_signal_empty; // Do we want to send a signal when the queue is empty + uint16_t d_N_threads; // Number of threads + int d_max_wait_time; // The maximum time in a wait command before printing a warning message + vint32_t d_signal_count; // Signal count + vint32_t d_num_active; // Number of threads that are currently active + vint64_t d_id_assign; // An internal variable used to store the current id to assign + vint64_t d_active[MAX_THREADS/64]; // Which threads are currently active + vint64_t d_cancel[MAX_THREADS/64]; // Which threads should be deleted + vint64_t d_N_added; // Number of items added to the work queue + vint64_t d_N_started; // Number of items started + vint64_t d_N_finished; // Number of items finished + mutable vwait_t *d_wait[MAX_WAIT]; // The wait events to check + mutable wait_ids_struct *d_wait_last; // A cached copy of the last completed wait event (in case a thread still has a reference) + condition_variable d_wait_finished; // Condition variable to signal when all work is finished + condition_variable d_wait_work; // Condition variable to signal when there is new work + mutable cond_t d_cond_pool; // Condition pool + std::thread d_thread[MAX_THREADS]; // Handles to the threads + std::thread::id d_threadId[MAX_THREADS]; // Unique id for each thread + queue_type d_queue_list; // The work queue + std::function d_errorHandler; // Error handler + uint32_t d_NULL_TAIL; // Null data buffer to check memory bounds }; diff --git a/threadpool/thread_pool.hpp b/threadpool/thread_pool.hpp index a87860b3..394e5619 100644 --- a/threadpool/thread_pool.hpp +++ b/threadpool/thread_pool.hpp @@ -21,19 +21,10 @@ * \param args The arguments to pass to the function in the form (arg1,arg2,...) * \param priority Optional argument specifying the priority of the work item */ -#define TPOOL_TUPLE_TO_SEQ( t ) TPOOL_TUPLE_TO_SEQ_##II t -#define TPOOL_TUPLE_TO_SEQ_II( a, ... ) a, ##__VA_ARGS__ -#if defined( WIN32 ) || defined( _WIN32 ) || defined( WIN64 ) || defined( _WIN64 ) -#define TPOOL_GET_PRIORITY( a, N, c, ... ) N -#define TPOOL_ADD_WORK( TPOOL, FUNCTION, ARGS, ... ) \ - ThreadPool_add_work( TPOOL, TPOOL_GET_PRIORITY( 0, __VA_ARGS__, 0, 0 ) + 0, FUNCTION, \ - TPOOL_TUPLE_TO_SEQ( ARGS ) ) -#else -#define TPOOL_GET_PRIORITY( _0, N, ... ) N -#define TPOOL_ADD_WORK( TPOOL, FUNCTION, ARGS, ... ) \ - ThreadPool_add_work( \ - TPOOL, TPOOL_GET_PRIORITY( _0, ##__VA_ARGS__, 0 ), FUNCTION, TPOOL_TUPLE_TO_SEQ( ARGS ) ) -#endif +#define TPOOL_ADD_WORK2( TPOOL, FUNCTION, ARGS, PRIORITY, ... ) \ + ThreadPool_add_work( TPOOL, PRIORITY, FUNCTION, std::make_tuple ARGS ) +#define TPOOL_ADD_WORK( TPOOL, FUNCTION, ... ) TPOOL_ADD_WORK2( TPOOL, FUNCTION, __VA_ARGS__, 0, 0 ) + /*! @} */ @@ -59,17 +50,17 @@ struct make_indexes : make_indexes_impl<0, index_tuple<>, Types...> { }; template inline Ret apply_helper( - Ret ( *pf )( Args... ), index_tuple, std::tuple &&tup ) + std::function &pf, index_tuple, std::tuple &&tup ) { return pf( std::forward( std::get( tup ) )... ); } template -inline Ret apply( Ret ( *pf )( Args... ), const std::tuple &tup ) +inline Ret apply( std::function &pf, const std::tuple &tup ) { return apply_helper( pf, typename make_indexes::type(), std::tuple( tup ) ); } template -inline Ret apply( Ret ( *pf )( Args... ), std::tuple &&tup ) +inline Ret apply( std::function &pf, std::tuple &&tup ) { return apply_helper( pf, typename make_indexes::type(), std::forward>( tup ) ); @@ -92,32 +83,40 @@ public: template class WorkItemFull; template -class WorkItemFull : public ThreadPool::WorkItemRet +class WorkItemFull final : public ThreadPool::WorkItemRet { private: - void ( *routine )( Args... ); + std::function routine; std::tuple args; WorkItemFull(); public: - WorkItemFull( void ( *routine2 )( Args... ), Args... ts ) - : ThreadPool::WorkItemRet(), routine( routine2 ), args( ts... ) + WorkItemFull( std::function &&routine2, Args... ts ) + : ThreadPool::WorkItemRet(), routine( std::move( routine2 ) ), args( ts... ) + { + } + WorkItemFull( std::function &&routine2, std::tuple &&ts ) + : ThreadPool::WorkItemRet(), routine( std::move( routine2 ) ), args( ts ) { } virtual void run() override { apply( routine, args ); } virtual ~WorkItemFull() {} }; template -class WorkItemFull : public ThreadPool::WorkItemRet +class WorkItemFull final : public ThreadPool::WorkItemRet { private: - Ret ( *routine )( Args... ); + std::function routine; std::tuple args; WorkItemFull(); public: - WorkItemFull( Ret ( *routine2 )( Args... ), Args... ts ) - : ThreadPool::WorkItemRet(), routine( routine2 ), args( ts... ) + WorkItemFull( std::function &&routine2, Args... ts ) + : ThreadPool::WorkItemRet(), routine( std::move( routine2 ) ), args( ts... ) + { + } + WorkItemFull( std::function &&routine2, std::tuple &&ts ) + : ThreadPool::WorkItemRet(), routine( std::move( routine2 ) ), args( ts ) { } virtual void run() override { this->d_result = apply( routine, args ); } @@ -126,11 +125,40 @@ public: // Functions to add work to the thread pool -template +// clang-format off +template inline ThreadPool::thread_id_t ThreadPool_add_work( - ThreadPool *tpool, int priority, Ret ( *routine )( Ts... ), Ts... ts ) + ThreadPool *tpool, int priority, std::function routine, std::tuple &&args ) { - auto work = new WorkItemFull( routine, ts... ); + auto work = new WorkItemFull( routine, std::move( args ) ); + return ThreadPool::add_work( tpool, work, priority ); +} +template +inline ThreadPool::thread_id_t ThreadPool_add_work( + ThreadPool *tpool, int priority, Ret ( *routine )( Args... ), std::tuple &&args ) +{ + auto work = new WorkItemFull( routine, std::move( args ) ); + return ThreadPool::add_work( tpool, work, priority ); +} +template +inline ThreadPool::thread_id_t ThreadPool_add_work( + ThreadPool *tpool, int priority, Ret ( *routine )(), std::tuple&& ) +{ + auto work = new WorkItemFull( routine ); + return ThreadPool::add_work( tpool, work, priority ); +} +template +inline ThreadPool::thread_id_t ThreadPool_add_work( +ThreadPool *tpool, int priority, std::function routine, Args... args ) +{ + auto work = new WorkItemFull( routine, std::forward_as_tuple( args... ) ); + return ThreadPool::add_work( tpool, work, priority ); +} +template +inline ThreadPool::thread_id_t ThreadPool_add_work( + ThreadPool *tpool, int priority, Ret ( *routine )( Args... ), Args... args ) +{ + auto work = new WorkItemFull( routine, std::forward_as_tuple( args... ) ); return ThreadPool::add_work( tpool, work, priority ); } template @@ -141,10 +169,29 @@ inline ThreadPool::thread_id_t ThreadPool_add_work( return ThreadPool::add_work( tpool, work, priority ); } template +inline ThreadPool::WorkItem *ThreadPool::createWork( + std::function routine, Args... args ) +{ + return new WorkItemFull( routine, std::forward_as_tuple( args... ) ); +} +template inline ThreadPool::WorkItem *ThreadPool::createWork( Ret ( *routine )( Args... ), Args... args ) { - return new WorkItemFull( routine, args... ); + return new WorkItemFull( routine, std::forward_as_tuple( args... ) ); } +template +inline ThreadPool::WorkItem *ThreadPool::createWork( + std::function routine, std::tuple &&args ) +{ + return new WorkItemFull( routine, std::move( args ) ); +} +template +inline ThreadPool::WorkItem *ThreadPool::createWork( + Ret ( *routine )( Args... ), std::tuple &&args ) +{ + return new WorkItemFull( routine, std::move( args ) ); +} +// clang-format on /****************************************************************** @@ -174,71 +221,49 @@ inline Ret ThreadPool::getFunctionRet( const ThreadPool::thread_id_t &id ) /****************************************************************** * Inline functions to wait for the work items to finish * ******************************************************************/ -inline int ThreadPool::wait( ThreadPool::thread_id_t id ) const +inline void ThreadPool::wait( ThreadPool::thread_id_t id ) const { bool finished; - wait_some( 1, &id, 1, &finished ); - return 0; + int N = wait_some( 1, &id, 1, &finished, 10000000 ); + if ( N != 1 ) + throw std::logic_error( "Failed to wait for id" ); } -inline int ThreadPool::wait_any( size_t N_work, const ThreadPool::thread_id_t *ids ) -{ - auto finished = new bool[N_work]; - wait_some( N_work, ids, 1, finished ); - int index = -1; - for ( size_t i = 0; i < N_work; i++ ) { - if ( finished[i] ) { - index = static_cast( i ); - break; - } - } - delete[] finished; - return index; -} -inline int ThreadPool::wait_any( const std::vector &ids ) const +inline size_t ThreadPool::wait_any( const std::vector &ids ) const { if ( ids.empty() ) return 0; auto finished = new bool[ids.size()]; - wait_some( ids.size(), &ids[0], 1, finished ); - int index = -1; + int N = wait_some( ids.size(), &ids[0], 1, finished, 10000000 ); + if ( N < 1 ) + throw std::logic_error( "Failed to wait for any id" ); for ( size_t i = 0; i < ids.size(); i++ ) { if ( finished[i] ) { - index = static_cast( i ); - break; + delete[] finished; + return i; } } - delete[] finished; - return index; + throw std::logic_error( "wait_any failed" ); } -inline int ThreadPool::wait_all( size_t N_work, const ThreadPool::thread_id_t *ids ) const -{ - if ( N_work == 0 ) - return 0; - auto finished = new bool[N_work]; - wait_some( N_work, ids, N_work, finished ); - delete[] finished; - return 0; -} -inline int ThreadPool::wait_all( const std::vector &ids ) const +inline void ThreadPool::wait_all( const std::vector &ids ) const { if ( ids.empty() ) - return 0; + return; auto finished = new bool[ids.size()]; - wait_some( ids.size(), ids.data(), ids.size(), finished ); + int N = wait_some( ids.size(), ids.data(), ids.size(), finished, 10000000 ); + if ( N != (int) ids.size() ) + throw std::logic_error( "Failed to wait for all ids" ); delete[] finished; - return 0; } -inline int ThreadPool::wait_all( const ThreadPool *tpool, const std::vector &ids ) +inline void ThreadPool::wait_all( const ThreadPool *tpool, const std::vector &ids ) { if ( tpool ) return tpool->wait_all( ids ); - return ids.size(); } inline std::vector ThreadPool::wait_some( - int N_wait, const std::vector &ids ) const + int N_wait, const std::vector &ids, int max_wait ) const { auto finished = new bool[ids.size()]; - int N_finished = wait_some( ids.size(), ids.data(), N_wait, finished ); + int N_finished = wait_some( ids.size(), ids.data(), N_wait, finished, max_wait ); std::vector index( N_finished, -1 ); for ( size_t i = 0, j = 0; i < ids.size(); i++ ) { if ( finished[i] ) { @@ -313,7 +338,7 @@ inline std::vector ThreadPool::add_work( ThreadPool *tp * Class functions to for the thread id * ******************************************************************/ inline ThreadPool::thread_id_t::thread_id_t() - : d_id( nullThreadID ), d_count( NULL ), d_work( NULL ) + : d_id( nullThreadID ), d_count( nullptr ), d_work( nullptr ) { } inline ThreadPool::thread_id_t::~thread_id_t() { reset(); } @@ -350,7 +375,7 @@ inline ThreadPool::thread_id_t &ThreadPool::thread_id_t::operator=( inline ThreadPool::thread_id_t::thread_id_t( const volatile ThreadPool::thread_id_t &rhs ) : d_id( rhs.d_id ), d_count( rhs.d_count ), d_work( rhs.d_work ) { - if ( d_count != NULL ) + if ( d_count != nullptr ) AtomicOperations::atomic_increment( d_count ); } #if !defined( WIN32 ) && !defined( _WIN32 ) && !defined( WIN64 ) && !defined( _WIN64 ) @@ -435,15 +460,9 @@ inline void ThreadPool::thread_id_t::reset() } inline uint64_t ThreadPool::thread_id_t::createId( int priority, uint64_t local_id ) { - if ( priority < -127 || priority > 127 ) - throw std::logic_error( "priority limited to +- 127" ); - if ( local_id > maxThreadID ) - throw std::logic_error( "local id >= 2^56" ); - char tmp1 = static_cast( priority + 128 ); - unsigned char tmp2 = static_cast( tmp1 ); - if ( priority >= 0 ) - tmp2 |= 0x80; - uint64_t id = tmp2; + if ( priority < -127 || priority > 127 || local_id > maxThreadID ) + throw std::logic_error( "Invalid priority or local id" ); + uint64_t id = priority + 128; id = ( id << 56 ) + local_id; return id; } @@ -460,9 +479,8 @@ inline void ThreadPool::thread_id_t::reset( int priority, uint64_t local_id, voi d_id = createId( priority, local_id ); // Create the work and counter d_count = nullptr; - d_work = nullptr; - if ( work != nullptr ) { - d_work = work; + d_work = work; + if ( d_work != nullptr ) { d_count = &( reinterpret_cast( work )->d_count ); *d_count = 1; } @@ -512,7 +530,6 @@ inline bool ThreadPool::thread_id_t::ready() const ******************************************************************/ inline bool ThreadPool::isValid( const ThreadPool::thread_id_t &id ) const { - static_assert( sizeof( atomic_64 ) == 8, "atomic_64 must be a 64-bit integer" ); uint64_t local_id = id.getLocalID(); uint64_t next_id = d_id_assign - 1; return local_id != 0 && id.initialized() && local_id <= thread_id_t::maxThreadID &&