From 3c854fd002c02650e1400a44ea705dd1c84d9810 Mon Sep 17 00:00:00 2001 From: Mark Berrill Date: Thu, 2 Jan 2020 13:23:51 -0500 Subject: [PATCH] Updating StackTrace and improving performance converting uCT data --- StackTrace/StackTrace.cpp | 26 ++++++-- StackTrace/StackTrace.h | 11 ++++ StackTrace/Utilities.cpp | 58 ++++++++++++++--- StackTrace/Utilities.h | 18 +++++ StackTrace/string_view.h | 2 +- analysis/runAnalysis.cpp | 11 ++-- common/Communication.hpp | 12 ++-- common/ReadMicroCT.cpp | 37 +++++------ common/Utilities.cpp | 116 ++++++++++++++++++++++++++++++++- common/Utilities.h | 31 +++++++++ tests/lbpm_color_simulator.cpp | 70 ++++++++++---------- 11 files changed, 303 insertions(+), 89 deletions(-) diff --git a/StackTrace/StackTrace.cpp b/StackTrace/StackTrace.cpp index e9292990..55a24352 100644 --- a/StackTrace/StackTrace.cpp +++ b/StackTrace/StackTrace.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -348,8 +349,11 @@ static inline int exec3( const char *cmd, FUNCTION &fun ) if ( buffer[0] != 0 ) fun( buffer ); } - auto status = pclose( pipe ); - int code = WEXITSTATUS( status ); + int code = pclose( pipe ); + if ( errno == ECHILD ) { + errno = 0; + code = 0; + } std::this_thread::yield(); // Allow any signals to process resetSignal( SIGCHLD ); // Clear child exited return code; @@ -1741,7 +1745,7 @@ std::vector StackTrace::defaultSignalsToCatch() * Set the signal handlers * ****************************************************************************/ static std::function abort_fun; -static StackTrace::abort_error rethrow() +StackTrace::abort_error rethrow() { StackTrace::abort_error error; #ifdef USE_LINUX @@ -1775,14 +1779,14 @@ static StackTrace::abort_error rethrow() } return error; } -static void term_func_abort( int sig ) +void StackTrace::terminateFunctionSignal( int sig ) { StackTrace::abort_error err; err.type = StackTrace::terminateType::signal; err.signal = sig; err.bytes = StackTrace::Utilities::getMemoryUsage(); err.stack = StackTrace::backtrace(); - err.stackType = StackTrace::printStackType::global; + err.stackType = StackTrace::getDefaultStackType(); abort_fun( err ); } static bool signals_set[256] = { false }; @@ -1829,7 +1833,7 @@ void StackTrace::setErrorHandler( std::function allSignalsToCatch(); @@ -289,6 +293,13 @@ multi_stack_info generateFromString( const std::vector &str ); multi_stack_info generateFromString( const std::string &str ); +//! Set default stack type +void setDefaultStackType( StackTrace::printStackType ); + +//! Get default stack type +StackTrace::printStackType getDefaultStackType(); + + } // namespace StackTrace diff --git a/StackTrace/Utilities.cpp b/StackTrace/Utilities.cpp index 734a0056..11f05777 100644 --- a/StackTrace/Utilities.cpp +++ b/StackTrace/Utilities.cpp @@ -8,8 +8,10 @@ #include #include #include +#include #include #include +#include #ifdef USE_MPI #include "mpi.h" @@ -19,6 +21,10 @@ #include "MemoryApp.h" #endif +#ifdef USE_GCOV +extern "C" void __gcov_flush( void ); +#endif + #define perr std::cerr @@ -65,6 +71,12 @@ // clang-format on +#ifdef __GNUC__ +#define USE_ABI +#include +#endif + + namespace StackTrace { @@ -96,13 +108,12 @@ inline size_t findfirst( const std::vector &X, TYPE Y ) /**************************************************************************** * Function to terminate the program * ****************************************************************************/ -static bool abort_throwException = false; -static printStackType abort_stackType = printStackType::global; -static int force_exit = 0; +static bool abort_throwException = false; +static int force_exit = 0; void Utilities::setAbortBehavior( bool throwException, int stackType ) { abort_throwException = throwException; - abort_stackType = static_cast( stackType ); + StackTrace::setDefaultStackType( static_cast( stackType ) ); } void Utilities::abort( const std::string &message, const std::string &filename, const int line ) { @@ -112,16 +123,28 @@ void Utilities::abort( const std::string &message, const std::string &filename, err.type = terminateType::abort; err.line = line; err.bytes = Utilities::getMemoryUsage(); - err.stackType = abort_stackType; + err.stackType = StackTrace::getDefaultStackType(); err.stack = StackTrace::backtrace(); throw err; } -static void terminate( const StackTrace::abort_error &err ) +static std::mutex terminate_mutex; +static inline void callAbort() { +#ifdef USE_GCOV + __gcov_flush(); +#endif + terminate_mutex.unlock(); + std::abort(); +} +void Utilities::terminate( const StackTrace::abort_error &err ) +{ + // Lock mutex to ensure multiple threads do not try to abort simultaneously + terminate_mutex.lock(); + // Clear the error handlers clearErrorHandler(); // Print the message and abort if ( force_exit > 1 ) { - std::abort(); + callAbort(); } else if ( !abort_throwException ) { // Use MPI_abort (will terminate all processes) force_exit = 2; @@ -135,10 +158,11 @@ static void terminate( const StackTrace::abort_error &err ) MPI_Abort( MPI_COMM_WORLD, -1 ); } #endif - std::abort(); + callAbort(); } else { perr << err.what(); - std::abort(); + perr.flush(); + callAbort(); } } @@ -149,7 +173,7 @@ static void terminate( const StackTrace::abort_error &err ) static void setTerminateErrorHandler() { // Set the terminate routine for runtime errors - StackTrace::setErrorHandler( terminate ); + StackTrace::setErrorHandler( Utilities::terminate ); } void Utilities::setErrorHandlers() { @@ -293,4 +317,18 @@ std::string Utilities::exec( const string_view &cmd, int &exit_code ) } +/**************************************************************************** + * Get the type name * + ****************************************************************************/ +std::string Utilities::getTypeName( const std::type_info &id ) +{ + std::string name = id.name(); +#if defined( USE_ABI ) + int status; + name = abi::__cxa_demangle( name.c_str(), 0, 0, &status ); +#endif + return name; +} + + } // namespace StackTrace diff --git a/StackTrace/Utilities.h b/StackTrace/Utilities.h index 10ed9085..83c8d7aa 100644 --- a/StackTrace/Utilities.h +++ b/StackTrace/Utilities.h @@ -4,6 +4,7 @@ #include #include #include +#include #include "StackTrace/StackTrace.h" #include "StackTrace/string_view.h" @@ -28,9 +29,14 @@ void abort( const std::string &message, const std::string &filename, const int l void setAbortBehavior( bool throwException, int stackType = 2 ); +//! Function to terminate the application +void terminate( const StackTrace::abort_error &err ); + + //! Function to set the error handlers void setErrorHandlers(); + //! Function to clear the error handlers void clearErrorHandlers(); @@ -92,6 +98,18 @@ void cause_segfault(); std::string exec( const StackTrace::string_view &cmd, int &exit_code ); +//! Return the hopefully demangled name of the given type +std::string getTypeName( const std::type_info &id ); + + +//! Return the hopefully demangled name of the given type +template +inline std::string getTypeName() +{ + return getTypeName( typeid( TYPE ) ); +} + + } // namespace Utilities } // namespace StackTrace diff --git a/StackTrace/string_view.h b/StackTrace/string_view.h index d83d1f24..ee729f63 100644 --- a/StackTrace/string_view.h +++ b/StackTrace/string_view.h @@ -119,7 +119,7 @@ public: int result = 0; for ( int i = 0; i < N && result == 0; i++ ) if ( d_data[i] != other[i] ) - result = d_data[i] < other[i] ? -i : i; + result = d_data[i] < other[i] ? -( i + 1 ) : ( i + 1 ); if ( result == 0 ) result = size() == other.size() ? 0 : size() < other.size() ? -1 : 1; return result; diff --git a/analysis/runAnalysis.cpp b/analysis/runAnalysis.cpp index caa03b1b..6c76f58b 100644 --- a/analysis/runAnalysis.cpp +++ b/analysis/runAnalysis.cpp @@ -767,6 +767,8 @@ void runAnalysis::run(int timestep, std::shared_ptr input_db, TwoPhase double *Pressure, double *Velocity, double *fq, double *Den) { int N = d_N[0]*d_N[1]*d_N[2]; + NULL_USE( N ); + NULL_USE( Phi ); auto db = input_db->getDatabase( "Analysis" ); //int timestep = db->getWithDefault( "timestep", 0 ); @@ -937,8 +939,6 @@ void runAnalysis::run(int timestep, std::shared_ptr input_db, TwoPhase ******************************************************************/ void runAnalysis::basic(int timestep, std::shared_ptr input_db, SubPhase &Averages, const double *Phi, double *Pressure, double *Velocity, double *fq, double *Den) { - int N = d_N[0]*d_N[1]*d_N[2]; - // Check which analysis steps we need to perform auto color_db = input_db->getDatabase( "Color" ); auto vis_db = input_db->getDatabase( "Visualization" ); @@ -954,7 +954,7 @@ void runAnalysis::basic(int timestep, std::shared_ptr input_db, SubPha finish(); } - PROFILE_START("run"); + PROFILE_START("basic"); // Copy the appropriate variables to the host (so we can spawn new threads) ScaLBL_DeviceBarrier(); @@ -983,7 +983,6 @@ void runAnalysis::basic(int timestep, std::shared_ptr input_db, SubPha } PROFILE_STOP("Copy data to host"); - PROFILE_START("run",1); // Spawn threads to do the analysis work //if (timestep%d_restart_interval==0){ // if ( matches(type,AnalysisType::ComputeAverages) ) { @@ -1036,12 +1035,11 @@ void runAnalysis::basic(int timestep, std::shared_ptr input_db, SubPha d_wait_vis = d_tpool.add_work(work); } - PROFILE_STOP("run"); + PROFILE_STOP("basic"); } void runAnalysis::WriteVisData(int timestep, std::shared_ptr input_db, SubPhase &Averages, const double *Phi, double *Pressure, double *Velocity, double *fq, double *Den) { - int N = d_N[0]*d_N[1]*d_N[2]; auto color_db = input_db->getDatabase( "Color" ); auto vis_db = input_db->getDatabase( "Visualization" ); //int timestep = color_db->getWithDefault( "timestep", 0 ); @@ -1068,7 +1066,6 @@ void runAnalysis::WriteVisData(int timestep, std::shared_ptr input_db, d_wait_vis = d_tpool.add_work(work2); //Averages.WriteVis = false; - // } PROFILE_STOP("write vis"); } diff --git a/common/Communication.hpp b/common/Communication.hpp index cb9f3f18..33fed3a7 100644 --- a/common/Communication.hpp +++ b/common/Communication.hpp @@ -44,9 +44,9 @@ Array redistribute( const RankInfoStruct& src_rank, const Array& src if ( !src_data.empty() ) { int i1[3] = { src_size[0] * src_rank.ix, src_size[1] * src_rank.jy, src_size[2] * src_rank.kz }; int i2[3] = { i1[0] + src_size[0] - 1, i1[1] + src_size[1] - 1, i1[2] + src_size[2] - 1 }; - for ( size_t i=0; i redistribute( const RankInfoStruct& src_rank, const Array& src Array dst_data( dst_size[0], dst_size[1], dst_size[2] ); int i1[3] = { dst_size[0] * dst_rank.ix, dst_size[1] * dst_rank.jy, dst_size[2] * dst_rank.kz }; int i2[3] = { i1[0] + dst_size[0] - 1, i1[1] + dst_size[1] - 1, i1[2] + dst_size[2] - 1 }; - for ( size_t i=0; i readMicroCT( const Database& domain, MPI_Comm comm ) auto n = domain.getVector( "n" ); int rank = comm_rank(MPI_COMM_WORLD); auto nproc = domain.getVector( "nproc" ); - auto ReadValues = domain.getVector( "ReadValues" ); - auto WriteValues = domain.getVector( "WriteValues" ); RankInfoStruct rankInfo( rank, nproc[0], nproc[1], nproc[2] ); // Determine the largest file number to get @@ -95,29 +93,26 @@ Array readMicroCT( const Database& domain, MPI_Comm comm ) ERROR( "Invalid name for first file" ); } data = readMicroCT( filename ); - - // Relabel the data - for (int k = 0; k<1024; k++){ - for (int j = 0; j<1024; j++){ - for (int i = 0; i<1024; i++){ - //n = k*Nfx*Nfy + j*Nfx + i; - //char locval = loc_id[n]; - char locval = data(i,j,k); - for (int idx=0; idx( "ReadValues" ); + auto WriteValues = domain.getVector( "WriteValues" ); + ASSERT( ReadValues.size() == WriteValues.size() ); + int readMaxValue = 0; + for ( auto v : ReadValues ) + readMaxValue = std::max( data.max()+1, v ); + std::vector map( readMaxValue + 1, -1 ); + for ( size_t i=0; i= 0 && t <= readMaxValue ); + data(i) = map[t]; + } + return data; } diff --git a/common/Utilities.cpp b/common/Utilities.cpp index f6d810af..1cf764be 100644 --- a/common/Utilities.cpp +++ b/common/Utilities.cpp @@ -1,10 +1,116 @@ #include "common/Utilities.h" +#include "StackTrace/StackTrace.h" +#include "StackTrace/ErrorHandlers.h" + +#ifdef USE_TIMER +#include "MemoryApp.h" +#include "ProfilerApp.h" +#endif + +#ifdef USE_MPI +#include "mpi.h" +#endif -#include #include +#include +#include -// Factor a number into it's prime factors +// Mutex for Utility functions +static std::mutex Utilities_mutex; + + +/**************************************************************************** + * Function to perform the default startup/shutdown sequences * + ****************************************************************************/ +void Utilities::startup( int argc, char **argv ) +{ + NULL_USE( argc ); + NULL_USE( argv ); + // Disable OpenMP + Utilities::setenv( "OMP_NUM_THREADS", "1" ); + Utilities::setenv( "MKL_NUM_THREADS", "1" ); + // Start MPI +#ifdef USE_MPI + int provided; + MPI_Init_thread( &argc, &argv, MPI_THREAD_MULTIPLE, &provided ); + if ( provided < MPI_THREAD_MULTIPLE ) { + int rank; + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); + if ( rank == 0 ) + std::cerr << "Warning: Failed to start MPI with necessary thread support, thread support will be disabled" << std::endl; + } + StackTrace::globalCallStackInitialize( MPI_COMM_WORLD ); +#endif + // Set the error handlers + Utilities::setAbortBehavior( true, 3 ); + Utilities::setErrorHandlers(); +} +void Utilities::shutdown() +{ + // Clear the error handlers + Utilities::clearErrorHandlers(); + StackTrace::clearSignals(); + StackTrace::clearSymbols(); + int rank = 0; +#ifdef USE_MPI + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); + StackTrace::globalCallStackFinalize(); + MPI_Barrier( MPI_COMM_WORLD ); + MPI_Finalize(); +#endif +#ifdef USE_TIMER + PROFILE_DISABLE(); + auto memory = MemoryApp::getMemoryStats(); + if ( rank == 0 && memory.N_new > memory.N_delete ) + MemoryApp::print( std::cout ); +#endif +} + + +/**************************************************************************** + * Function to set an environemental variable * + ****************************************************************************/ +void Utilities::setenv( const std::string &name, const std::string &value ) +{ + Utilities_mutex.lock(); +#if defined( USE_LINUX ) || defined( USE_MAC ) + bool pass = false; + if ( !value.empty() ) + pass = ::setenv( name.data(), value.data(), 1 ) == 0; + else + pass = ::unsetenv( name.data() ) == 0; +#elif defined( USE_WINDOWS ) + bool pass = SetEnvironmentVariable( name.data(), value.data() ) != 0; +#else +#error Unknown OS +#endif + Utilities_mutex.unlock(); + if ( !pass ) { + char msg[1024]; + if ( !value.empty() ) + sprintf( + msg, "Error setting enviornmental variable: %s=%s\n", name.data(), value.data() ); + else + sprintf( msg, "Error clearing enviornmental variable: %s\n", name.data() ); + ERROR( msg ); + } +} +std::string Utilities::getenv( const std::string &name ) +{ + std::string var; + Utilities_mutex.lock(); + auto tmp = std::getenv( name.data() ); + if ( tmp ) + var = std::string( tmp ); + Utilities_mutex.unlock(); + return var; +} + + +/**************************************************************************** + * Factor a number into it's prime factors * + ****************************************************************************/ std::vector Utilities::factor(size_t number) { if ( number<=3 ) @@ -54,9 +160,13 @@ std::vector Utilities::factor(size_t number) } -// Dummy function to prevent compiler from optimizing away variable +/**************************************************************************** + * Dummy function to prevent compiler from optimizing away variable * + ****************************************************************************/ void Utilities::nullUse( void* data ) { NULL_USE(data); } + + diff --git a/common/Utilities.h b/common/Utilities.h index 90cb4008..da579966 100644 --- a/common/Utilities.h +++ b/common/Utilities.h @@ -25,6 +25,37 @@ using StackTrace::Utilities::sleep_ms; using StackTrace::Utilities::sleep_s; +/*! + * \brief Start MPI, error handlers + * \details This routine will peform the default startup sequence + * \param argc argc from main + * \param argv argv from main + */ +void startup( int argc, char **argv ); + +/*! + * \brief Stop MPI, error handlers + * \details This routine will peform the default shutdown sequence to match startup + */ +void shutdown(); + + +/*! + * Get an environmental variable + * @param name The name of the environmental variable + * @return The value of the enviornmental variable + */ +std::string getenv( const std::string &name ); + + +/*! + * Set an environmental variable + * @param name The name of the environmental variable + * @param value The value to set + */ +void setenv( const std::string &name, const std::string &value ); + + //! std::string version of sprintf inline std::string stringf( const char *format, ... ); diff --git a/tests/lbpm_color_simulator.cpp b/tests/lbpm_color_simulator.cpp index e8e675e2..1f63c653 100644 --- a/tests/lbpm_color_simulator.cpp +++ b/tests/lbpm_color_simulator.cpp @@ -7,6 +7,7 @@ #include #include "models/ColorModel.h" +#include "common/Utilities.h" //#define WRE_SURFACES @@ -15,7 +16,6 @@ * James E. McClure 2013-2014 */ -using namespace std; //************************************************************************* // Implementation of Two-Phase Immiscible LBM using CUDA @@ -23,27 +23,26 @@ using namespace std; int main(int argc, char **argv) { - // Initialize MPI - int provided_thread_support = -1; - MPI_Init_thread(&argc,&argv,MPI_THREAD_MULTIPLE,&provided_thread_support); - MPI_Comm comm; - MPI_Comm_dup(MPI_COMM_WORLD,&comm); - int rank = comm_rank(comm); - int nprocs = comm_size(comm); - if ( rank==0 && provided_thread_support