combined updates
This commit is contained in:
commit
23e221d84e
|
@ -112,7 +112,7 @@ ENDIF()
|
|||
ADD_CUSTOM_TARGET( build-test )
|
||||
ADD_CUSTOM_TARGET( build-examples )
|
||||
ADD_CUSTOM_TARGET( check COMMAND make test )
|
||||
ADD_DISTCLEAN( analysis null_timer tests liblbpm-wia.* cpu gpu example common IO threadpool )
|
||||
ADD_DISTCLEAN( analysis null_timer tests liblbpm-wia.* cpu gpu example common IO threadpool StackTrace )
|
||||
|
||||
|
||||
# Check for CUDA
|
||||
|
@ -133,8 +133,6 @@ IF ( NOT ONLY_BUILD_DOCS )
|
|||
CONFIGURE_LBPM()
|
||||
CONFIGURE_TIMER( 0 "${${PROJ}_INSTALL_DIR}/null_timer" )
|
||||
CONFIGURE_LINE_COVERAGE()
|
||||
INCLUDE( "${CMAKE_CURRENT_SOURCE_DIR}/cmake/SharedPtr.cmake" )
|
||||
CONFIGURE_SHARED_PTR( "${${PROJ}_INSTALL_DIR}/include" "std" )
|
||||
# Set the external library link list
|
||||
SET( EXTERNAL_LIBS ${EXTERNAL_LIBS} ${TIMER_LIBS} )
|
||||
ENDIF()
|
||||
|
@ -156,6 +154,7 @@ IF ( NOT ONLY_BUILD_DOCS )
|
|||
ADD_PACKAGE_SUBDIRECTORY( analysis )
|
||||
ADD_PACKAGE_SUBDIRECTORY( IO )
|
||||
ADD_PACKAGE_SUBDIRECTORY( threadpool )
|
||||
ADD_PACKAGE_SUBDIRECTORY( StackTrace )
|
||||
ADD_PACKAGE_SUBDIRECTORY( models )
|
||||
IF ( USE_CUDA )
|
||||
ADD_PACKAGE_SUBDIRECTORY( gpu )
|
||||
|
@ -164,7 +163,6 @@ IF ( NOT ONLY_BUILD_DOCS )
|
|||
ENDIF()
|
||||
INSTALL_LBPM_TARGET( lbpm-wia-library )
|
||||
ADD_SUBDIRECTORY( tests )
|
||||
ADD_SUBDIRECTORY( threadpool/test )
|
||||
ADD_SUBDIRECTORY( example )
|
||||
#ADD_SUBDIRECTORY( workflows )
|
||||
INSTALL_PROJ_LIB()
|
||||
|
|
|
@ -30,9 +30,9 @@
|
|||
*/
|
||||
#include "Mesh.h"
|
||||
#include "common/Utilities.h"
|
||||
#include "shared_ptr.h"
|
||||
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <stdint.h>
|
||||
|
||||
namespace IO {
|
||||
|
|
|
@ -17,14 +17,13 @@
|
|||
#define MESH_INC
|
||||
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <string.h>
|
||||
#include <vector>
|
||||
|
||||
#include "common/Array.h"
|
||||
#include "common/Communication.h"
|
||||
#include "analysis/PointList.h"
|
||||
#include "shared_ptr.h"
|
||||
|
||||
|
||||
|
||||
namespace IO {
|
||||
|
|
|
@ -18,9 +18,9 @@
|
|||
|
||||
#include "IO/Mesh.h"
|
||||
#include "common/MPI_Helpers.h"
|
||||
#include "shared_ptr.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <string.h>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
|
|
@ -17,12 +17,12 @@
|
|||
#define READER_INC
|
||||
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <string.h>
|
||||
#include <vector>
|
||||
|
||||
#include "IO/Mesh.h"
|
||||
#include "IO/MeshDatabase.h"
|
||||
#include "shared_ptr.h"
|
||||
|
||||
|
||||
namespace IO {
|
||||
|
|
|
@ -19,12 +19,12 @@
|
|||
#include "IO/silo.h"
|
||||
#include "common/MPI_Helpers.h"
|
||||
#include "common/Utilities.h"
|
||||
#include "shared_ptr.h"
|
||||
|
||||
#include <sys/stat.h>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <memory>
|
||||
|
||||
|
||||
|
||||
|
|
42
StackTrace/ErrorHandlers.h
Normal file
42
StackTrace/ErrorHandlers.h
Normal file
|
@ -0,0 +1,42 @@
|
|||
#ifndef included_StackTraceErrorHandlers
|
||||
#define included_StackTraceErrorHandlers
|
||||
|
||||
|
||||
#include "StackTrace/StackTrace.h"
|
||||
|
||||
#include <functional>
|
||||
|
||||
#include "mpi.h"
|
||||
|
||||
|
||||
namespace StackTrace
|
||||
{
|
||||
|
||||
|
||||
/*!
|
||||
* Set the error handler
|
||||
* @param[in] abort Function to terminate the program: abort(msg,type)
|
||||
*/
|
||||
void setErrorHandler( std::function<void( const StackTrace::abort_error& )> abort );
|
||||
|
||||
//! Clear the error handler
|
||||
void clearErrorHandler();
|
||||
|
||||
|
||||
//! Set an error handler for MPI
|
||||
void setMPIErrorHandler( MPI_Comm comm );
|
||||
|
||||
//! Clear an error handler for MPI
|
||||
void clearMPIErrorHandler( MPI_Comm comm );
|
||||
|
||||
|
||||
//! Initialize globalCallStack functionallity
|
||||
void globalCallStackInitialize( MPI_Comm comm );
|
||||
|
||||
//! Clean up globalCallStack functionallity
|
||||
void globalCallStackFinalize();
|
||||
|
||||
|
||||
} // namespace StackTrace
|
||||
|
||||
#endif
|
4
StackTrace/Readme.txt
Normal file
4
StackTrace/Readme.txt
Normal file
|
@ -0,0 +1,4 @@
|
|||
This directory contains code external code released with permission under the license of this project.
|
||||
|
||||
Original code and license are availible at:
|
||||
https://bitbucket.org/mberrill/StackTrace
|
2517
StackTrace/StackTrace.cpp
Normal file
2517
StackTrace/StackTrace.cpp
Normal file
File diff suppressed because it is too large
Load Diff
|
@ -16,41 +16,30 @@
|
|||
#ifndef included_StackTrace
|
||||
#define included_StackTrace
|
||||
|
||||
#include <array>
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
#include <set>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
|
||||
// Check for and include MPI
|
||||
// clang-format off
|
||||
#if defined(USE_MPI) || defined(USE_EXT_MPI)
|
||||
#include "mpi.h"
|
||||
#elif defined(__has_include)
|
||||
#if __has_include("mpi.h")
|
||||
#include "mpi.h"
|
||||
#else
|
||||
typedef int MPI_Comm;
|
||||
#endif
|
||||
#else
|
||||
typedef int MPI_Comm;
|
||||
#endif
|
||||
// clang-format on
|
||||
#include "StackTrace/string_view.h"
|
||||
|
||||
|
||||
namespace StackTrace {
|
||||
|
||||
|
||||
//! Class to contain stack trace info for a single thread/process
|
||||
struct stack_info {
|
||||
uint32_t line;
|
||||
void *address;
|
||||
void *address2;
|
||||
std::string object;
|
||||
std::string function;
|
||||
std::string filename;
|
||||
int line;
|
||||
std::array<char, 56> object;
|
||||
std::array<char, 48> objectPath;
|
||||
std::array<char, 64> filename;
|
||||
std::array<char, 64> filenamePath;
|
||||
std::array<char, 256> function;
|
||||
//! Default constructor
|
||||
stack_info() : address( nullptr ), address2( nullptr ), line( 0 ) {}
|
||||
stack_info();
|
||||
//! Reset the stack
|
||||
void clear();
|
||||
//! Operator==
|
||||
|
@ -61,19 +50,22 @@ struct stack_info {
|
|||
int getAddressWidth() const;
|
||||
//! Print the stack info
|
||||
std::string print( int widthAddress = 16, int widthObject = 20, int widthFunction = 32 ) const;
|
||||
//! Print the stack info
|
||||
static void print( std::ostream &out, const std::vector<stack_info> &stack,
|
||||
const StackTrace::string_view &prefix = "" );
|
||||
//! Print the stack info
|
||||
void print2(
|
||||
char *txt, int widthAddress = 16, int widthObject = 20, int widthFunction = 32 ) const;
|
||||
//! Compute the number of bytes needed to store the object
|
||||
size_t size() const;
|
||||
//! Pack the data to a byte array, returning a pointer to the end of the data
|
||||
char *pack( char *ptr ) const;
|
||||
//! Unpack the data from a byte array, returning a pointer to the end of the data
|
||||
const char *unpack( const char *ptr );
|
||||
//! Pack a vector of data to a memory block
|
||||
static std::vector<char> packArray( const std::vector<stack_info> &data );
|
||||
//! Unpack a vector of data from a memory block
|
||||
static std::vector<stack_info> unpackArray( const char *data );
|
||||
};
|
||||
|
||||
|
||||
//! Class to contain stack trace info for multiple threads/processes
|
||||
struct multi_stack_info {
|
||||
int N; // Number of threads/processes
|
||||
stack_info stack; // Current stack item
|
||||
|
@ -86,19 +78,69 @@ struct multi_stack_info {
|
|||
multi_stack_info &operator=( const std::vector<stack_info> & );
|
||||
//! Reset the stack
|
||||
void clear();
|
||||
//! Is the stack empty
|
||||
bool empty() const { return N == 0; }
|
||||
//! Add the given stack to the multistack
|
||||
void add( size_t len, const stack_info *stack );
|
||||
//! Add the given stack to the multistack
|
||||
void add( const multi_stack_info &stack );
|
||||
//! Compute the number of bytes needed to store the object
|
||||
size_t size() const;
|
||||
//! Pack the data to a byte array, returning a pointer to the end of the data
|
||||
char *pack( char *ptr ) const;
|
||||
//! Unpack the data from a byte array, returning a pointer to the end of the data
|
||||
const char *unpack( const char *ptr );
|
||||
//! Print the stack info
|
||||
std::vector<std::string> print( const std::string &prefix = std::string() ) const;
|
||||
std::vector<std::string> print( const StackTrace::string_view &prefix = "" ) const;
|
||||
//! Print the stack info
|
||||
void print( std::ostream &out, const StackTrace::string_view &prefix = "" ) const;
|
||||
//! Print the stack info
|
||||
std::string printString( const StackTrace::string_view &prefix = "" ) const;
|
||||
|
||||
private:
|
||||
void print2( const std::string &prefix, int w[3], std::vector<std::string> &text ) const;
|
||||
template<class FUN>
|
||||
void print2( int Np, char *prefix, int w[3], bool c, FUN &fun ) const;
|
||||
int getAddressWidth() const;
|
||||
int getObjectWidth() const;
|
||||
int getFunctionWidth() const;
|
||||
};
|
||||
|
||||
|
||||
//!< Terminate type
|
||||
enum class terminateType : uint8_t { signal, exception, abort, MPI, unknown };
|
||||
enum class printStackType : uint8_t { local = 1, threaded = 2, global = 3 };
|
||||
|
||||
//!< Class to contain exception info from abort
|
||||
class abort_error : public std::exception
|
||||
{
|
||||
public:
|
||||
std::string message; //!< Abort message
|
||||
std::string filename; //!< File where abort was called
|
||||
terminateType type; //!< What caused the termination
|
||||
printStackType stackType; //!< Print the local stack, all threads, or global call stack
|
||||
uint8_t signal; //!< Signal number
|
||||
int line; //!< Line number where abort was called
|
||||
size_t bytes; //!< Memory in use during abort
|
||||
std::vector<void *> stack; //!< Local call stack for abort
|
||||
public:
|
||||
virtual const char *what() const noexcept override;
|
||||
abort_error();
|
||||
virtual ~abort_error() {}
|
||||
|
||||
private:
|
||||
mutable std::string d_msg;
|
||||
};
|
||||
|
||||
|
||||
//!< Class to contain symbol information
|
||||
struct symbols_struct {
|
||||
char type;
|
||||
void *address;
|
||||
std::array<char, 56> obj;
|
||||
std::array<char, 56> objPath;
|
||||
};
|
||||
|
||||
|
||||
/*!
|
||||
* @brief Get the current call stack
|
||||
* @details This function returns the current call stack for the current thread
|
||||
|
@ -167,16 +209,18 @@ std::vector<stack_info> getStackInfo( const std::vector<void *> &address );
|
|||
|
||||
|
||||
//! Function to return the signal name
|
||||
std::string signalName( int signal );
|
||||
const char *signalName( int signal );
|
||||
|
||||
|
||||
/*!
|
||||
* Return the symbols from the current executable (not availible for all platforms)
|
||||
* @return Returns 0 if sucessful
|
||||
* @return Returns the symbols loaded
|
||||
*/
|
||||
int getSymbols( std::vector<void *> &address,
|
||||
std::vector<char> &type,
|
||||
std::vector<std::string> &obj );
|
||||
std::vector<symbols_struct> getSymbols();
|
||||
|
||||
|
||||
//! Clear internal symbol data
|
||||
void clearSymbols();
|
||||
|
||||
|
||||
/*!
|
||||
|
@ -193,20 +237,10 @@ std::string getExecutable();
|
|||
std::string getSymPaths();
|
||||
|
||||
|
||||
//!< Terminate type
|
||||
enum class terminateType { signal, exception };
|
||||
|
||||
/*!
|
||||
* Set the error handlers
|
||||
* @param[in] abort Function to terminate the program: abort(msg,type)
|
||||
*/
|
||||
void setErrorHandlers( std::function<void( std::string, terminateType )> abort );
|
||||
|
||||
|
||||
/*!
|
||||
* Set the given signals to the handler
|
||||
* @param[in] signals Signals to handle
|
||||
* @param[in] handler Function to terminate the program: abort(msg,type)
|
||||
* @param[in] handler Function to terminate the program: abort(signal)
|
||||
*/
|
||||
void setSignals( const std::vector<int> &signals, void ( *handler )( int ) );
|
||||
|
||||
|
@ -215,10 +249,18 @@ void setSignals( const std::vector<int> &signals, void ( *handler )( int ) );
|
|||
void clearSignal( int signal );
|
||||
|
||||
|
||||
//! Clear a signal set by setSignals
|
||||
void clearSignals( const std::vector<int> &signals );
|
||||
|
||||
|
||||
//! Clear all signals set by setSignals
|
||||
void clearSignals();
|
||||
|
||||
|
||||
//! Raise a signal
|
||||
void raiseSignal( int signal );
|
||||
|
||||
|
||||
//! Return a list of all signals that can be caught
|
||||
std::vector<int> allSignalsToCatch();
|
||||
|
||||
|
@ -227,19 +269,12 @@ std::vector<int> defaultSignalsToCatch();
|
|||
|
||||
|
||||
//! Get a list of the active threads
|
||||
std::set<std::thread::native_handle_type> activeThreads();
|
||||
std::vector<std::thread::native_handle_type> activeThreads();
|
||||
|
||||
//! Get a handle to this thread
|
||||
std::thread::native_handle_type thisThread();
|
||||
|
||||
|
||||
//! Initialize globalCallStack functionallity
|
||||
void globalCallStackInitialize( MPI_Comm comm );
|
||||
|
||||
//! Clean up globalCallStack functionallity
|
||||
void globalCallStackFinalize();
|
||||
|
||||
|
||||
/*!
|
||||
* @brief Call system command
|
||||
* @details This function calls a system command, waits for the program
|
||||
|
@ -248,7 +283,25 @@ void globalCallStackFinalize();
|
|||
* @param[out] exit_code Exit code returned from child process
|
||||
* @return Returns string containing the output
|
||||
*/
|
||||
std::string exec( const std::string &cmd, int &exit_code );
|
||||
std::string exec( const string_view &cmd, int &exit_code );
|
||||
|
||||
|
||||
/*!
|
||||
* @brief Create stack from string
|
||||
* @details This function creates the call stack from the string generated by print
|
||||
* @param[in] str Vector of strings containing call stack
|
||||
* @return Returns the call stack
|
||||
*/
|
||||
multi_stack_info generateFromString( const std::vector<std::string> &str );
|
||||
|
||||
|
||||
/*!
|
||||
* @brief Create stack from string
|
||||
* @details This function creates the call stack from the string
|
||||
* @param[in] str String containing call stack
|
||||
* @return Returns the call stack
|
||||
*/
|
||||
multi_stack_info generateFromString( const std::string &str );
|
||||
|
||||
|
||||
} // namespace StackTrace
|
296
StackTrace/Utilities.cpp
Normal file
296
StackTrace/Utilities.cpp
Normal file
|
@ -0,0 +1,296 @@
|
|||
#define NOMINMAX
|
||||
#include "StackTrace/Utilities.h"
|
||||
#include "StackTrace/ErrorHandlers.h"
|
||||
#include "StackTrace/StackTrace.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <csignal>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
#ifdef USE_MPI
|
||||
#include "mpi.h"
|
||||
#endif
|
||||
|
||||
#ifdef USE_TIMER
|
||||
#include "MemoryApp.h"
|
||||
#endif
|
||||
|
||||
|
||||
#define perr std::cerr
|
||||
|
||||
|
||||
// Detect the OS
|
||||
// clang-format off
|
||||
#if defined( WIN32 ) || defined( _WIN32 ) || defined( WIN64 ) || defined( _WIN64 ) || defined( _MSC_VER )
|
||||
#define USE_WINDOWS
|
||||
#elif defined( __APPLE__ )
|
||||
#define USE_MAC
|
||||
#elif defined( __linux ) || defined( __linux__ ) || defined( __unix ) || defined( __posix )
|
||||
#define USE_LINUX
|
||||
#define USE_NM
|
||||
#else
|
||||
#error Unknown OS
|
||||
#endif
|
||||
// clang-format on
|
||||
|
||||
|
||||
// Include system dependent headers
|
||||
// clang-format off
|
||||
#ifdef USE_WINDOWS
|
||||
#include <process.h>
|
||||
#include <psapi.h>
|
||||
#include <stdio.h>
|
||||
#include <tchar.h>
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <dlfcn.h>
|
||||
#include <execinfo.h>
|
||||
#include <sched.h>
|
||||
#include <sys/time.h>
|
||||
#include <ctime>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#ifdef USE_LINUX
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
#ifdef USE_MAC
|
||||
#include <mach/mach.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
// clang-format on
|
||||
|
||||
|
||||
namespace StackTrace {
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
* Function to find an entry *
|
||||
****************************************************************************/
|
||||
template<class TYPE>
|
||||
inline size_t findfirst( const std::vector<TYPE> &X, TYPE Y )
|
||||
{
|
||||
if ( X.empty() )
|
||||
return 0;
|
||||
size_t lower = 0;
|
||||
size_t upper = X.size() - 1;
|
||||
if ( X[lower] >= Y )
|
||||
return lower;
|
||||
if ( X[upper] < Y )
|
||||
return upper;
|
||||
while ( ( upper - lower ) != 1 ) {
|
||||
size_t value = ( upper + lower ) / 2;
|
||||
if ( X[value] >= Y )
|
||||
upper = value;
|
||||
else
|
||||
lower = value;
|
||||
}
|
||||
return upper;
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
* Function to terminate the program *
|
||||
****************************************************************************/
|
||||
static bool abort_throwException = false;
|
||||
static printStackType abort_stackType = printStackType::global;
|
||||
static int force_exit = 0;
|
||||
void Utilities::setAbortBehavior( bool throwException, int stackType )
|
||||
{
|
||||
abort_throwException = throwException;
|
||||
abort_stackType = static_cast<printStackType>( stackType );
|
||||
}
|
||||
void Utilities::abort( const std::string &message, const std::string &filename, const int line )
|
||||
{
|
||||
abort_error err;
|
||||
err.message = message;
|
||||
err.filename = filename;
|
||||
err.type = terminateType::abort;
|
||||
err.line = line;
|
||||
err.bytes = Utilities::getMemoryUsage();
|
||||
err.stackType = abort_stackType;
|
||||
err.stack = StackTrace::backtrace();
|
||||
throw err;
|
||||
}
|
||||
static void terminate( const StackTrace::abort_error &err )
|
||||
{
|
||||
clearErrorHandler();
|
||||
// Print the message and abort
|
||||
if ( force_exit > 1 ) {
|
||||
std::abort();
|
||||
} else if ( !abort_throwException ) {
|
||||
// Use MPI_abort (will terminate all processes)
|
||||
force_exit = 2;
|
||||
perr << err.what();
|
||||
#if defined( USE_MPI ) || defined( HAVE_MPI )
|
||||
int initialized = 0, finalized = 0;
|
||||
MPI_Initialized( &initialized );
|
||||
MPI_Finalized( &finalized );
|
||||
if ( initialized != 0 && finalized == 0 ) {
|
||||
clearMPIErrorHandler( MPI_COMM_WORLD );
|
||||
MPI_Abort( MPI_COMM_WORLD, -1 );
|
||||
}
|
||||
#endif
|
||||
std::abort();
|
||||
} else {
|
||||
perr << err.what();
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
* Functions to set the error handler *
|
||||
****************************************************************************/
|
||||
static void setTerminateErrorHandler()
|
||||
{
|
||||
// Set the terminate routine for runtime errors
|
||||
StackTrace::setErrorHandler( terminate );
|
||||
}
|
||||
void Utilities::setErrorHandlers()
|
||||
{
|
||||
#ifdef USE_MPI
|
||||
setMPIErrorHandler( MPI_COMM_WORLD );
|
||||
setMPIErrorHandler( MPI_COMM_SELF );
|
||||
#endif
|
||||
setTerminateErrorHandler();
|
||||
}
|
||||
void Utilities::clearErrorHandlers()
|
||||
{
|
||||
#ifdef USE_MPI
|
||||
clearMPIErrorHandler( MPI_COMM_WORLD );
|
||||
clearMPIErrorHandler( MPI_COMM_SELF );
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
* Function to get the memory usage *
|
||||
* Note: this function should be thread-safe *
|
||||
****************************************************************************/
|
||||
// clang-format off
|
||||
#if defined( USE_MAC ) || defined( USE_LINUX )
|
||||
// Get the page size on mac or linux
|
||||
static size_t page_size = static_cast<size_t>( sysconf( _SC_PAGESIZE ) );
|
||||
#endif
|
||||
size_t Utilities::getSystemMemory()
|
||||
{
|
||||
#if defined( USE_LINUX )
|
||||
static long pages = sysconf( _SC_PHYS_PAGES );
|
||||
size_t N_bytes = pages * page_size;
|
||||
#elif defined( USE_MAC )
|
||||
int mib[2] = { CTL_HW, HW_MEMSIZE };
|
||||
u_int namelen = sizeof( mib ) / sizeof( mib[0] );
|
||||
uint64_t size;
|
||||
size_t len = sizeof( size );
|
||||
size_t N_bytes = 0;
|
||||
if ( sysctl( mib, namelen, &size, &len, nullptr, 0 ) == 0 )
|
||||
N_bytes = size;
|
||||
#elif defined( USE_WINDOWS )
|
||||
MEMORYSTATUSEX status;
|
||||
status.dwLength = sizeof( status );
|
||||
GlobalMemoryStatusEx( &status );
|
||||
size_t N_bytes = status.ullTotalPhys;
|
||||
#else
|
||||
#error Unknown OS
|
||||
#endif
|
||||
return N_bytes;
|
||||
}
|
||||
size_t Utilities::getMemoryUsage()
|
||||
{
|
||||
#ifdef USE_TIMER
|
||||
size_t N_bytes = MemoryApp::getTotalMemoryUsage();
|
||||
#else
|
||||
#if defined( USE_LINUX )
|
||||
struct mallinfo meminfo = mallinfo();
|
||||
size_t size_hblkhd = static_cast<unsigned int>( meminfo.hblkhd );
|
||||
size_t size_uordblks = static_cast<unsigned int>( meminfo.uordblks );
|
||||
size_t N_bytes = size_hblkhd + size_uordblks;
|
||||
#elif defined( USE_MAC )
|
||||
struct task_basic_info t_info;
|
||||
mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT;
|
||||
if ( KERN_SUCCESS !=
|
||||
task_info( mach_task_self(), TASK_BASIC_INFO, (task_info_t) &t_info, &t_info_count ) ) {
|
||||
return 0;
|
||||
}
|
||||
size_t N_bytes = t_info.virtual_size;
|
||||
#elif defined( USE_WINDOWS )
|
||||
PROCESS_MEMORY_COUNTERS memCounter;
|
||||
GetProcessMemoryInfo( GetCurrentProcess(), &memCounter, sizeof( memCounter ) );
|
||||
size_t N_bytes = memCounter.WorkingSetSize;
|
||||
#else
|
||||
#error Unknown OS
|
||||
#endif
|
||||
#endif
|
||||
return N_bytes;
|
||||
}
|
||||
// clang-format on
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
* Functions to get the time and timer resolution *
|
||||
****************************************************************************/
|
||||
#if defined( USE_WINDOWS )
|
||||
double Utilities::time()
|
||||
{
|
||||
LARGE_INTEGER end, f;
|
||||
QueryPerformanceFrequency( &f );
|
||||
QueryPerformanceCounter( &end );
|
||||
double time = ( (double) end.QuadPart ) / ( (double) f.QuadPart );
|
||||
return time;
|
||||
}
|
||||
double Utilities::tick()
|
||||
{
|
||||
LARGE_INTEGER f;
|
||||
QueryPerformanceFrequency( &f );
|
||||
double resolution = ( (double) 1.0 ) / ( (double) f.QuadPart );
|
||||
return resolution;
|
||||
}
|
||||
#elif defined( USE_LINUX ) || defined( USE_MAC )
|
||||
double Utilities::time()
|
||||
{
|
||||
timeval current_time;
|
||||
gettimeofday( ¤t_time, nullptr );
|
||||
double time = ( (double) current_time.tv_sec ) + 1e-6 * ( (double) current_time.tv_usec );
|
||||
return time;
|
||||
}
|
||||
double Utilities::tick()
|
||||
{
|
||||
timeval start, end;
|
||||
gettimeofday( &start, nullptr );
|
||||
gettimeofday( &end, nullptr );
|
||||
while ( end.tv_sec == start.tv_sec && end.tv_usec == start.tv_usec )
|
||||
gettimeofday( &end, nullptr );
|
||||
double resolution = ( (double) ( end.tv_sec - start.tv_sec ) ) +
|
||||
1e-6 * ( (double) ( end.tv_usec - start.tv_usec ) );
|
||||
return resolution;
|
||||
}
|
||||
#else
|
||||
#error Unknown OS
|
||||
#endif
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
* Cause a segfault *
|
||||
****************************************************************************/
|
||||
void Utilities::cause_segfault()
|
||||
{
|
||||
int *ptr = nullptr;
|
||||
ptr[0] = 0;
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
* Call system command *
|
||||
****************************************************************************/
|
||||
std::string Utilities::exec( const string_view &cmd, int &exit_code )
|
||||
{
|
||||
return StackTrace::exec( cmd, exit_code );
|
||||
}
|
||||
|
||||
|
||||
} // namespace StackTrace
|
99
StackTrace/Utilities.h
Normal file
99
StackTrace/Utilities.h
Normal file
|
@ -0,0 +1,99 @@
|
|||
#ifndef included_StackTrace_Utilities
|
||||
#define included_StackTrace_Utilities
|
||||
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
|
||||
#include "StackTrace/StackTrace.h"
|
||||
#include "StackTrace/string_view.h"
|
||||
|
||||
|
||||
namespace StackTrace {
|
||||
namespace Utilities {
|
||||
|
||||
|
||||
/*!
|
||||
* Aborts the run after printing an error message with file and
|
||||
* line number information.
|
||||
*/
|
||||
void abort( const std::string &message, const std::string &filename, const int line );
|
||||
|
||||
|
||||
/*!
|
||||
* Set the behavior of abort
|
||||
* @param throwException Throw an exception instead of MPI_Abort (default is false)
|
||||
* @param stackType Type of stack to get (1: thread local stack, 2: all threads, 3: global)
|
||||
*/
|
||||
void setAbortBehavior( bool throwException, int stackType = 2 );
|
||||
|
||||
|
||||
//! Function to set the error handlers
|
||||
void setErrorHandlers();
|
||||
|
||||
//! Function to clear the error handlers
|
||||
void clearErrorHandlers();
|
||||
|
||||
|
||||
/*!
|
||||
* Function to get the memory availible.
|
||||
* This function will return the total memory availible
|
||||
* Note: depending on the implimentation, this number may be rounded to
|
||||
* to a multiple of the page size.
|
||||
* If this function fails, it will return 0.
|
||||
*/
|
||||
size_t getSystemMemory();
|
||||
|
||||
|
||||
/*!
|
||||
* Function to get the memory usage.
|
||||
* This function will return the total memory used by the application.
|
||||
* Note: depending on the implimentation, this number may be rounded to
|
||||
* to a multiple of the page size.
|
||||
* If this function fails, it will return 0.
|
||||
*/
|
||||
size_t getMemoryUsage();
|
||||
|
||||
|
||||
//! Function to get an arbitrary point in time
|
||||
double time();
|
||||
|
||||
|
||||
//! Function to get the resolution of time
|
||||
double tick();
|
||||
|
||||
|
||||
/*!
|
||||
* Sleep for X ms
|
||||
* @param N Time to sleep (ms)
|
||||
*/
|
||||
inline void sleep_ms( int N ) { std::this_thread::sleep_for( std::chrono::milliseconds( N ) ); }
|
||||
|
||||
|
||||
/*!
|
||||
* Sleep for X s
|
||||
* @param N Time to sleep (s)
|
||||
*/
|
||||
inline void sleep_s( int N ) { std::this_thread::sleep_for( std::chrono::seconds( N ) ); }
|
||||
|
||||
|
||||
//! Cause a segfault
|
||||
void cause_segfault();
|
||||
|
||||
|
||||
/*!
|
||||
* @brief Call system command
|
||||
* @details This function calls a system command, waits for the program
|
||||
* to execute, captures and returns the output and exit code.
|
||||
* @param[in] cmd Command to execute
|
||||
* @param[out] exit_code Exit code returned from child process
|
||||
* @return Returns string containing the output
|
||||
*/
|
||||
std::string exec( const StackTrace::string_view &cmd, int &exit_code );
|
||||
|
||||
|
||||
} // namespace Utilities
|
||||
} // namespace StackTrace
|
||||
|
||||
|
||||
#endif
|
193
StackTrace/string_view.h
Normal file
193
StackTrace/string_view.h
Normal file
|
@ -0,0 +1,193 @@
|
|||
#ifndef included_StackTrace_stringView
|
||||
#define included_StackTrace_stringView
|
||||
|
||||
#include <cstring>
|
||||
#include <ostream>
|
||||
|
||||
namespace StackTrace {
|
||||
|
||||
// string_view
|
||||
class string_view
|
||||
{
|
||||
public:
|
||||
// Constants:
|
||||
static constexpr size_t npos = size_t( -1 );
|
||||
|
||||
// Constructions
|
||||
constexpr string_view() noexcept : d_data( nullptr ), d_size( 0 ) {}
|
||||
constexpr string_view( string_view&& ) noexcept = default;
|
||||
constexpr string_view( const string_view& ) noexcept = default;
|
||||
constexpr string_view( const char* s ) : d_data( s ), d_size( s ? strlen( s ) : 0 ) {}
|
||||
constexpr string_view( const char* s, size_t count ) : d_data( s ), d_size( count ) {}
|
||||
inline string_view( const std::string& s ) : d_data( s.data() ), d_size( s.size() ) {}
|
||||
|
||||
// Assignment
|
||||
constexpr string_view& operator=( string_view&& other ) noexcept = default;
|
||||
constexpr string_view& operator=( const string_view& other ) noexcept = default;
|
||||
|
||||
// Iterators
|
||||
constexpr const char* begin() const noexcept { return d_data; }
|
||||
constexpr const char* end() const noexcept { return d_data + d_size; }
|
||||
constexpr const char* cbegin() const noexcept { return begin(); }
|
||||
constexpr const char* cend() const noexcept { return end(); }
|
||||
|
||||
// capacity
|
||||
constexpr size_t size() const noexcept { return d_size; }
|
||||
constexpr size_t length() const noexcept { return d_size; }
|
||||
constexpr bool empty() const noexcept { return d_size == 0; }
|
||||
|
||||
// Element access
|
||||
constexpr const char& operator[]( size_t pos ) const
|
||||
{
|
||||
if ( pos >= d_size )
|
||||
throw std::out_of_range( "string_view[]" );
|
||||
return d_data[pos];
|
||||
}
|
||||
constexpr const char& at( size_t pos ) const
|
||||
{
|
||||
if ( pos >= d_size )
|
||||
throw std::out_of_range( "string_view::at()" );
|
||||
return d_data[pos];
|
||||
}
|
||||
constexpr const char& front() const
|
||||
{
|
||||
if ( d_size == 0 )
|
||||
throw std::out_of_range( "front()" );
|
||||
return d_data[0];
|
||||
}
|
||||
constexpr const char& back() const
|
||||
{
|
||||
if ( d_size == 0 )
|
||||
throw std::out_of_range( "back()" );
|
||||
return d_data[size() - 1];
|
||||
}
|
||||
constexpr const char* data() const noexcept { return d_data; }
|
||||
|
||||
// Swap data
|
||||
void swap( string_view& other ) noexcept
|
||||
{
|
||||
std::swap( d_data, other.d_data );
|
||||
std::swap( d_size, other.d_size );
|
||||
}
|
||||
|
||||
// String operations
|
||||
size_t copy( char* dest, size_t n, size_t pos = 0 ) const
|
||||
{
|
||||
if ( pos > size() )
|
||||
throw std::out_of_range( "string_view::copy()" );
|
||||
const size_t rlen = std::min( n, size() - pos );
|
||||
memcpy( dest, data() + pos, rlen );
|
||||
return rlen;
|
||||
}
|
||||
constexpr string_view substr( size_t pos = 0, size_t n = npos ) const
|
||||
{
|
||||
if ( pos > size() )
|
||||
throw std::out_of_range( "string_view::substr()" );
|
||||
return string_view( data() + pos, std::min( n, size() - pos ) );
|
||||
}
|
||||
|
||||
// Find
|
||||
constexpr size_t find( char ch, size_t pos = 0 ) const noexcept
|
||||
{
|
||||
for ( size_t i = pos; i < d_size; i++ )
|
||||
if ( d_data[i] == ch )
|
||||
return i;
|
||||
return std::string::npos;
|
||||
}
|
||||
constexpr size_t find( string_view v, size_t pos = 0 ) const noexcept
|
||||
{
|
||||
size_t i = pos;
|
||||
size_t N = v.size();
|
||||
if ( N == 0 || N > ( d_size - pos ) )
|
||||
return std::string::npos;
|
||||
while ( i < ( d_size - N + 1 ) ) {
|
||||
size_t j = 0;
|
||||
for ( j = 0; j < N && i + j < d_size; j++ )
|
||||
if ( d_data[i + j] != v[j] )
|
||||
break;
|
||||
if ( j == N )
|
||||
return i;
|
||||
i++;
|
||||
}
|
||||
return std::string::npos;
|
||||
}
|
||||
|
||||
// compare()
|
||||
constexpr int compare( const string_view& other ) const noexcept
|
||||
{
|
||||
int N = std::min( size(), other.size() );
|
||||
int result = 0;
|
||||
for ( int i = 0; i < N && result == 0; i++ )
|
||||
if ( d_data[i] != other[i] )
|
||||
result = d_data[i] < other[i] ? -i : i;
|
||||
if ( result == 0 )
|
||||
result = size() == other.size() ? 0 : size() < other.size() ? -1 : 1;
|
||||
return result;
|
||||
}
|
||||
constexpr int compare( size_t pos1, size_t n1, string_view other ) const
|
||||
{
|
||||
return substr( pos1, n1 ).compare( other );
|
||||
}
|
||||
constexpr int compare( size_t pos1, size_t n1, string_view other, size_t pos2, size_t n2 ) const
|
||||
{
|
||||
return substr( pos1, n1 ).compare( other.substr( pos2, n2 ) );
|
||||
}
|
||||
constexpr int compare( char const* s ) const { return compare( string_view( s ) ); }
|
||||
constexpr int compare( size_t pos1, size_t n1, char const* s ) const
|
||||
{
|
||||
return substr( pos1, n1 ).compare( string_view( s ) );
|
||||
}
|
||||
constexpr int compare( size_t pos1, size_t n1, char const* s, size_t n2 ) const
|
||||
{
|
||||
return substr( pos1, n1 ).compare( string_view( s, n2 ) );
|
||||
}
|
||||
|
||||
explicit operator std::string() const { return std::string( begin(), end() ); }
|
||||
std::string to_string() const { return std::string( begin(), end() ); }
|
||||
|
||||
private:
|
||||
const char* d_data;
|
||||
size_t d_size;
|
||||
};
|
||||
|
||||
|
||||
// Non-member functions:
|
||||
constexpr inline bool operator==( const string_view& lhs, const string_view& rhs ) noexcept
|
||||
{
|
||||
return lhs.compare( rhs ) == 0;
|
||||
}
|
||||
constexpr inline bool operator!=( const string_view& lhs, const string_view& rhs ) noexcept
|
||||
{
|
||||
return lhs.compare( rhs ) != 0;
|
||||
}
|
||||
constexpr inline bool operator<( const string_view& lhs, const string_view& rhs ) noexcept
|
||||
{
|
||||
return lhs.compare( rhs ) < 0;
|
||||
}
|
||||
|
||||
constexpr inline bool operator<=( const string_view& lhs, const string_view& rhs ) noexcept
|
||||
{
|
||||
return lhs.compare( rhs ) <= 0;
|
||||
}
|
||||
constexpr inline bool operator>( const string_view& lhs, const string_view& rhs ) noexcept
|
||||
{
|
||||
return lhs.compare( rhs ) > 0;
|
||||
}
|
||||
constexpr inline bool operator>=( const string_view& lhs, const string_view& rhs ) noexcept
|
||||
{
|
||||
return lhs.compare( rhs ) >= 0;
|
||||
}
|
||||
inline std::string to_string( const string_view& v ) { return std::string( v.begin(), v.end() ); }
|
||||
inline string_view to_string_view( std::string const& s )
|
||||
{
|
||||
return string_view( s.data(), s.size() );
|
||||
}
|
||||
inline std::ostream& operator<<( std::ostream& out, const string_view& s )
|
||||
{
|
||||
out << s.data();
|
||||
return out;
|
||||
}
|
||||
|
||||
} // namespace StackTrace
|
||||
|
||||
#endif
|
|
@ -15,11 +15,9 @@
|
|||
*/
|
||||
#include "analysis/Minkowski.h"
|
||||
#include "analysis/pmmc.h"
|
||||
#include "analysis/analysis.h"
|
||||
#include "common/Domain.h"
|
||||
#include "common/Communication.h"
|
||||
#include "analysis/analysis.h"
|
||||
|
||||
#include "shared_ptr.h"
|
||||
#include "common/Utilities.h"
|
||||
#include "common/MPI_Helpers.h"
|
||||
#include "IO/MeshDatabase.h"
|
||||
|
@ -28,6 +26,8 @@
|
|||
|
||||
#include "ProfilerApp.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
|
||||
#define PI 3.14159265359
|
||||
|
||||
|
@ -39,6 +39,10 @@ Minkowski::Minkowski(std::shared_ptr <Domain> dm):
|
|||
Nx=dm->Nx; Ny=dm->Ny; Nz=dm->Nz;
|
||||
Volume=double((Nx-2)*(Ny-2)*(Nz-2))*double(Dm->nprocx()*Dm->nprocy()*Dm->nprocz());
|
||||
|
||||
id.resize(Nx,Ny,Nz); id.fill(0);
|
||||
label.resize(Nx,Ny,Nz); label.fill(0);
|
||||
distance.resize(Nx,Ny,Nz); distance.fill(0);
|
||||
|
||||
if (Dm->rank()==0){
|
||||
LOGFILE = fopen("minkowski.csv","a+");
|
||||
if (fseek(LOGFILE,0,SEEK_SET) == fseek(LOGFILE,0,SEEK_CUR))
|
||||
|
@ -57,22 +61,6 @@ Minkowski::~Minkowski()
|
|||
if ( LOGFILE!=NULL ) { fclose(LOGFILE); }
|
||||
}
|
||||
|
||||
double Minkowski::V(){
|
||||
return Vi_global;
|
||||
}
|
||||
|
||||
double Minkowski::A(){
|
||||
return Ai_global;
|
||||
}
|
||||
|
||||
double Minkowski::J(){
|
||||
return Ji_global;
|
||||
}
|
||||
|
||||
double Minkowski::X(){
|
||||
return Xi_global;
|
||||
}
|
||||
|
||||
void Minkowski::ComputeScalar(const DoubleArray& Field, const double isovalue)
|
||||
{
|
||||
PROFILE_START("ComputeScalar");
|
||||
|
@ -133,6 +121,9 @@ void Minkowski::ComputeScalar(const DoubleArray& Field, const double isovalue)
|
|||
}
|
||||
}
|
||||
}
|
||||
// convert X for 2D manifold to 3D object
|
||||
Xi *= 0.5;
|
||||
|
||||
MPI_Barrier(Dm->Comm);
|
||||
// Phase averages
|
||||
MPI_Allreduce(&Vi,&Vi_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
|
||||
|
@ -143,6 +134,74 @@ void Minkowski::ComputeScalar(const DoubleArray& Field, const double isovalue)
|
|||
PROFILE_STOP("ComputeScalar");
|
||||
}
|
||||
|
||||
|
||||
void Minkowski::MeasureObject(){
|
||||
/*
|
||||
* compute the distance to an object
|
||||
*
|
||||
* THIS ALGORITHM ASSUMES THAT id() is populated with phase id to distinguish objects
|
||||
* 0 - labels the object
|
||||
* 1 - labels the rest of the
|
||||
*/
|
||||
|
||||
for (int k=0; k<Nz; k++){
|
||||
for (int j=0; j<Ny; j++){
|
||||
for (int i=0; i<Nx; i++){
|
||||
distance(i,j,k) =2.0*double(id(i,j,k))-1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
CalcDist(distance,id,*Dm);
|
||||
ComputeScalar(distance,0.0);
|
||||
|
||||
}
|
||||
|
||||
|
||||
int Minkowski::MeasureConnectedPathway(){
|
||||
/*
|
||||
* compute the connected pathway for object with LABEL in id field
|
||||
* compute the labels for connected components
|
||||
* compute the distance to the connected pathway
|
||||
*
|
||||
* THIS ALGORITHM ASSUMES THAT id() is populated with phase id to distinguish objects
|
||||
*/
|
||||
|
||||
char LABEL = 0;
|
||||
for (int k=0; k<Nz; k++){
|
||||
for (int j=0; j<Ny; j++){
|
||||
for (int i=0; i<Nx; i++){
|
||||
if (id(i,j,k) == LABEL){
|
||||
distance(i,j,k) = 1.0;
|
||||
}
|
||||
else
|
||||
distance(i,j,k) = -1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract only the connected part of NWP
|
||||
double vF=0.0;
|
||||
n_connected_components = ComputeGlobalBlobIDs(Nx-2,Ny-2,Nz-2,Dm->rank_info,distance,distance,vF,vF,label,Dm->Comm);
|
||||
// int n_connected_components = ComputeGlobalPhaseComponent(Nx-2,Ny-2,Nz-2,Dm->rank_info,const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, Dm->Comm )
|
||||
MPI_Barrier(Dm->Comm);
|
||||
|
||||
for (int k=0; k<Nz; k++){
|
||||
for (int j=0; j<Ny; j++){
|
||||
for (int i=0; i<Nx; i++){
|
||||
if ( label(i,j,k) == 0){
|
||||
id(i,j,k) = 0;
|
||||
}
|
||||
else{
|
||||
id(i,j,k) = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
MeasureObject();
|
||||
return n_connected_components;
|
||||
}
|
||||
|
||||
|
||||
void Minkowski::PrintAll()
|
||||
{
|
||||
if (Dm->rank()==0){
|
||||
|
|
|
@ -17,14 +17,15 @@
|
|||
#ifndef Minkowski_INC
|
||||
#define Minkowski_INC
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "analysis/dcel.h"
|
||||
#include "common/Domain.h"
|
||||
#include "common/Communication.h"
|
||||
#include "analysis/analysis.h"
|
||||
#include "analysis/distance.h"
|
||||
|
||||
#include "shared_ptr.h"
|
||||
#include "common/Utilities.h"
|
||||
#include "common/MPI_Helpers.h"
|
||||
#include "IO/MeshDatabase.h"
|
||||
|
@ -45,6 +46,9 @@ class Minkowski{
|
|||
public:
|
||||
//...........................................................................
|
||||
std::shared_ptr <Domain> Dm;
|
||||
Array <char> id;
|
||||
Array <int> label;
|
||||
Array <double> distance;
|
||||
//...........................................................................
|
||||
// Averaging variables
|
||||
//...........................................................................
|
||||
|
@ -52,19 +56,30 @@ public:
|
|||
double Ai,Ji,Xi,Vi;
|
||||
// Global averages (all processes)
|
||||
double Ai_global,Ji_global,Xi_global,Vi_global;
|
||||
|
||||
int n_connected_components;
|
||||
//...........................................................................
|
||||
int Nx,Ny,Nz;
|
||||
double V();
|
||||
double A();
|
||||
double J();
|
||||
double X();
|
||||
|
||||
double V(){
|
||||
return Vi;
|
||||
}
|
||||
double A(){
|
||||
return Ai;
|
||||
}
|
||||
double H(){
|
||||
return Ji;
|
||||
}
|
||||
double X(){
|
||||
return Xi;
|
||||
}
|
||||
|
||||
//..........................................................................
|
||||
Minkowski(){};//NULL CONSTRUCTOR
|
||||
Minkowski(std::shared_ptr <Domain> Dm);
|
||||
~Minkowski();
|
||||
void MeasureObject();
|
||||
int MeasureConnectedPathway();
|
||||
void ComputeScalar(const DoubleArray& Field, const double isovalue);
|
||||
|
||||
void PrintAll();
|
||||
|
||||
};
|
||||
|
|
558
analysis/SubPhase.cpp
Normal file
558
analysis/SubPhase.cpp
Normal file
|
@ -0,0 +1,558 @@
|
|||
#include "analysis/SubPhase.h"
|
||||
|
||||
// Constructor
|
||||
SubPhase::SubPhase(std::shared_ptr <Domain> dm):
|
||||
Dm(dm)
|
||||
{
|
||||
Nx=dm->Nx; Ny=dm->Ny; Nz=dm->Nz;
|
||||
Volume=(Nx-2)*(Ny-2)*(Nz-2)*Dm->nprocx()*Dm->nprocy()*Dm->nprocz()*1.0;
|
||||
|
||||
morph_w = std::shared_ptr<Minkowski>(new Minkowski(Dm));
|
||||
morph_n = std::shared_ptr<Minkowski>(new Minkowski(Dm));
|
||||
morph_i = std::shared_ptr<Minkowski>(new Minkowski(Dm));
|
||||
|
||||
// Global arrays
|
||||
PhaseID.resize(Nx,Ny,Nz); PhaseID.fill(0);
|
||||
Label_WP.resize(Nx,Ny,Nz); Label_WP.fill(0);
|
||||
Label_NWP.resize(Nx,Ny,Nz); Label_NWP.fill(0);
|
||||
Rho_n.resize(Nx,Ny,Nz); Rho_n.fill(0);
|
||||
Rho_w.resize(Nx,Ny,Nz); Rho_w.fill(0);
|
||||
Pressure.resize(Nx,Ny,Nz); Pressure.fill(0);
|
||||
Phi.resize(Nx,Ny,Nz); Phi.fill(0);
|
||||
DelPhi.resize(Nx,Ny,Nz); DelPhi.fill(0);
|
||||
Vel_x.resize(Nx,Ny,Nz); Vel_x.fill(0); // Gradient of the phase indicator field
|
||||
Vel_y.resize(Nx,Ny,Nz); Vel_y.fill(0);
|
||||
Vel_z.resize(Nx,Ny,Nz); Vel_z.fill(0);
|
||||
SDs.resize(Nx,Ny,Nz); SDs.fill(0);
|
||||
//.........................................
|
||||
|
||||
//.........................................
|
||||
if (Dm->rank()==0){
|
||||
TIMELOG = fopen("subphase.csv","a+");
|
||||
if (fseek(TIMELOG,0,SEEK_SET) == fseek(TIMELOG,0,SEEK_CUR))
|
||||
{
|
||||
// If timelog is empty, write a short header to list the averages
|
||||
//fprintf(TIMELOG,"--------------------------------------------------------------------------------------\n");
|
||||
fprintf(TIMELOG,"time rn rw nun nuw Fx Fy Fz iftwn ");
|
||||
fprintf(TIMELOG,"pwc pwd pnc pnd "); // pressures
|
||||
fprintf(TIMELOG,"Mwc Mwd Mwi Mnc Mnd Mni "); // mass
|
||||
fprintf(TIMELOG,"Pwc_x Pwd_x Pwi_x Pnc_x Pnd_x Pni_x "); // momentum
|
||||
fprintf(TIMELOG,"Pwc_y Pwd_y Pwi_y Pnc_y Pnd_y Pni_y ");
|
||||
fprintf(TIMELOG,"Pwc_z Pwd_z Pwi_z Pnc_z Pnd_z Pni_z ");
|
||||
fprintf(TIMELOG,"Kwc Kwd Kwi Knc Knd Kni "); // kinetic energy
|
||||
fprintf(TIMELOG,"Vwc Awc Hwc Xwc "); // wc region
|
||||
fprintf(TIMELOG,"Vwd Awd Hwd Xwd Nwd "); // wd region
|
||||
fprintf(TIMELOG,"Vnc Anc Hnc Xnc "); // nc region
|
||||
fprintf(TIMELOG,"Vnd And Hnd Xnd Nnd "); // nd region
|
||||
fprintf(TIMELOG,"Vi Ai Hi Xi\n"); // interface region
|
||||
// stress tensor?
|
||||
}
|
||||
|
||||
}
|
||||
else{
|
||||
char LocalRankString[8];
|
||||
sprintf(LocalRankString,"%05d",Dm->rank());
|
||||
char LocalRankFilename[40];
|
||||
sprintf(LocalRankFilename,"%s%s","subphase.csv.",LocalRankString);
|
||||
TIMELOG = fopen(LocalRankFilename,"a+");
|
||||
//fprintf(TIMELOG,"--------------------------------------------------------------------------------------\n");
|
||||
fprintf(TIMELOG,"time rn rw nun nuw Fx Fy Fz iftwn ");
|
||||
fprintf(TIMELOG,"pwc pwd pnc pnd "); // pressures
|
||||
fprintf(TIMELOG,"Mwc Mwd Mwi Mnc Mnd Mni "); // mass
|
||||
fprintf(TIMELOG,"Pwc_x Pwd_x Pwi_x Pnc_x Pnd_x Pni_x "); // momentum
|
||||
fprintf(TIMELOG,"Pwc_y Pwd_y Pwi_y Pnc_y Pnd_y Pni_y ");
|
||||
fprintf(TIMELOG,"Pwc_z Pwd_z Pwi_z Pnc_z Pnd_z Pni_z ");
|
||||
fprintf(TIMELOG,"Kwc Kwd Kwi Knc Knd Kni "); // kinetic energy
|
||||
fprintf(TIMELOG,"Vwc Awc Hwc Xwc "); // wc region
|
||||
fprintf(TIMELOG,"Vwd Awd Hwd Xwd Nwd "); // wd region
|
||||
fprintf(TIMELOG,"Vnc Anc Hnc Xnc "); // nc region
|
||||
fprintf(TIMELOG,"Vnd And Hnd Xnd Nnd "); // nd region
|
||||
fprintf(TIMELOG,"Vi Ai Hi Xi\n"); // interface region
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Destructor
|
||||
SubPhase::~SubPhase()
|
||||
{
|
||||
if ( TIMELOG!=NULL ) { fclose(TIMELOG); }
|
||||
|
||||
}
|
||||
|
||||
void SubPhase::Write(int timestep)
|
||||
{
|
||||
if (Dm->rank()==0){
|
||||
fprintf(TIMELOG,"%i %.5g %.5g %.5g %.5g %.5g %.5g %.5g %.5g ",timestep,rho_n,rho_w,nu_n,nu_w,Fx,Fy,Fz,gamma_wn);
|
||||
fprintf(TIMELOG,"%.5g %.5g %.5g %.5g ",gwc.p, gwd.p, gnc.p, gnd.p);
|
||||
fprintf(TIMELOG,"%.5g %.5g %.5g %.5g %.5g %.5g ",gwc.M, gwd.M, giwn.Mw, gnc.M, gnd.M, giwn.Mn);
|
||||
fprintf(TIMELOG,"%.5g %.5g %.5g %.5g %.5g %.5g ",gwc.Px, gwd.Px, giwn.Px, gnc.Px, gnd.Px, giwn.Px);
|
||||
fprintf(TIMELOG,"%.5g %.5g %.5g %.5g %.5g %.5g ",gwc.Py, gwd.Py, giwn.Py, gnc.Py, gnd.Py, giwn.Py);
|
||||
fprintf(TIMELOG,"%.5g %.5g %.5g %.5g %.5g %.5g ",gwc.Pz, gwd.Pz, giwn.Pz, gnc.Pz, gnd.Pz, giwn.Pz);
|
||||
fprintf(TIMELOG,"%.5g %.5g %.5g %.5g %.5g %.5g ",gwc.K, gwd.K, giwn.Kw, gnc.K, gnd.K, giwn.Kn);
|
||||
fprintf(TIMELOG,"%.5g %.5g %.5g %.5g ",gwc.V, gwc.A, gwc.H, gwc.X);
|
||||
fprintf(TIMELOG,"%.5g %.5g %.5g %.5g %i ",gwd.V, gwd.A, gwd.H, gwd.X, gwd.Nc);
|
||||
fprintf(TIMELOG,"%.5g %.5g %.5g %.5g ",gnc.V, gnc.A, gnc.H, gnc.X);
|
||||
fprintf(TIMELOG,"%.5g %.5g %.5g %.5g %i ",gnd.V, gnd.A, gnd.H, gnd.X, gnd.Nc);
|
||||
fprintf(TIMELOG,"%.5g %.5g %.5g %.5g\n",giwn.V, giwn.A, giwn.H, giwn.X);
|
||||
fflush(TIMELOG);
|
||||
}
|
||||
else{
|
||||
fprintf(TIMELOG,"%i %.5g %.5g %.5g %.5g %.5g %.5g %.5g %.5g ",timestep,rho_n,rho_w,nu_n,nu_w,Fx,Fy,Fz,gamma_wn);
|
||||
fprintf(TIMELOG,"%.5g %.5g %.5g %.5g ",wc.p, wd.p, nc.p, nd.p);
|
||||
fprintf(TIMELOG,"%.5g %.5g %.5g %.5g %.5g %.5g ",wc.M, wd.M, iwn.Mw, nc.M, nd.M, iwn.Mn);
|
||||
fprintf(TIMELOG,"%.5g %.5g %.5g %.5g %.5g %.5g ",wc.Px, wd.Px, iwn.Px, nc.Px, nd.Px, iwn.Px);
|
||||
fprintf(TIMELOG,"%.5g %.5g %.5g %.5g %.5g %.5g ",wc.Py, wd.Py, iwn.Py, nc.Py, nd.Py, iwn.Py);
|
||||
fprintf(TIMELOG,"%.5g %.5g %.5g %.5g %.5g %.5g ",wc.Pz, wd.Pz, iwn.Pz, nc.Pz, nd.Pz, iwn.Pz);
|
||||
fprintf(TIMELOG,"%.5g %.5g %.5g %.5g %.5g %.5g ",wc.K, wd.K, iwn.Kw, nc.K, nd.K, iwn.Kn);
|
||||
fprintf(TIMELOG,"%.5g %.5g %.5g %.5g ",wc.V, wc.A, wc.H, wc.X);
|
||||
fprintf(TIMELOG,"%.5g %.5g %.5g %.5g %i ",wd.V, wd.A, wd.H, wd.X, wd.Nc);
|
||||
fprintf(TIMELOG,"%.5g %.5g %.5g %.5g ",nc.V, nc.A, nc.H, nc.X);
|
||||
fprintf(TIMELOG,"%.5g %.5g %.5g %.5g %i ",nd.V, nd.A, nd.H, nd.X, nd.Nc);
|
||||
fprintf(TIMELOG,"%.5g %.5g %.5g %.5g\n",iwn.V, iwn.A, iwn.H, iwn.X);
|
||||
fflush(TIMELOG);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void SubPhase::SetParams(double rhoA, double rhoB, double tauA, double tauB, double force_x, double force_y, double force_z, double alpha, double B)
|
||||
{
|
||||
Fx = force_x;
|
||||
Fy = force_y;
|
||||
Fz = force_z;
|
||||
rho_n = rhoA;
|
||||
rho_w = rhoB;
|
||||
nu_n = (tauA-0.5)/3.f;
|
||||
nu_w = (tauB-0.5)/3.f;
|
||||
gamma_wn = 5.796*alpha;
|
||||
beta = B;
|
||||
}
|
||||
|
||||
void SubPhase::Basic(){
|
||||
int i,j,k,n,imin,jmin,kmin,kmax;
|
||||
|
||||
// If external boundary conditions are set, do not average over the inlet
|
||||
kmin=1; kmax=Nz-1;
|
||||
if (Dm->BoundaryCondition > 0 && Dm->kproc() == 0) kmin=4;
|
||||
if (Dm->BoundaryCondition > 0 && Dm->kproc() == Dm->nprocz()-1) kmax=Nz-4;
|
||||
|
||||
imin=jmin=1;
|
||||
// If inlet layers exist use these as default
|
||||
if (Dm->inlet_layers_x > 0) imin = Dm->inlet_layers_x;
|
||||
if (Dm->inlet_layers_y > 0) jmin = Dm->inlet_layers_y;
|
||||
if (Dm->inlet_layers_z > 0) kmin = Dm->inlet_layers_z;
|
||||
|
||||
nb.reset(); wb.reset();
|
||||
/*
|
||||
//Dm->CommunicateMeshHalo(Phi);
|
||||
for (int k=1; k<Nz-1; k++){
|
||||
for (int j=1; j<Ny-1; j++){
|
||||
for (int i=1; i<Nx-1; i++){
|
||||
// Compute all of the derivatives using finite differences
|
||||
double fx = 0.5*(Phi(i+1,j,k) - Phi(i-1,j,k));
|
||||
double fy = 0.5*(Phi(i,j+1,k) - Phi(i,j-1,k));
|
||||
double fz = 0.5*(Phi(i,j,k+1) - Phi(i,j,k-1));
|
||||
DelPhi(i,j,k) = sqrt(fx*fx+fy*fy+fz*fz);
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
double nA,nB;
|
||||
for (k=kmin; k<kmax; k++){
|
||||
for (j=jmin; j<Ny-1; j++){
|
||||
for (i=imin; i<Nx-1; i++){
|
||||
n = k*Nx*Ny + j*Nx + i;
|
||||
// Compute volume averages
|
||||
if ( Dm->id[n] > 0 ){
|
||||
// compute density
|
||||
double nA = Rho_n(n);
|
||||
double nB = Rho_w(n);
|
||||
double phi = (nA-nB)/(nA+nB);
|
||||
Phi(n) = phi;
|
||||
|
||||
if ( phi > 0.0 ){
|
||||
nb.V += 1.0;
|
||||
nb.M += nA*rho_n;
|
||||
// velocity
|
||||
nb.Px += rho_n*nA*Vel_x(n);
|
||||
nb.Py += rho_n*nA*Vel_y(n);
|
||||
nb.Pz += rho_n*nA*Vel_z(n);
|
||||
}
|
||||
else{
|
||||
wb.M += nB*rho_w;
|
||||
wb.V += 1.0;
|
||||
|
||||
// velocity
|
||||
wb.Px += rho_w*nB*Vel_x(n);
|
||||
wb.Py += rho_w*nB*Vel_y(n);
|
||||
wb.Pz += rho_w*nB*Vel_z(n);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
gwb.V=sumReduce( Dm->Comm, wb.V);
|
||||
gnb.V=sumReduce( Dm->Comm, nb.V);
|
||||
gwb.M=sumReduce( Dm->Comm, wb.M);
|
||||
gnb.M=sumReduce( Dm->Comm, nb.M);
|
||||
gwb.Px=sumReduce( Dm->Comm, wb.Px);
|
||||
gwb.Py=sumReduce( Dm->Comm, wb.Py);
|
||||
gwb.Pz=sumReduce( Dm->Comm, wb.Pz);
|
||||
gnb.Px=sumReduce( Dm->Comm, nb.Px);
|
||||
gnb.Py=sumReduce( Dm->Comm, nb.Py);
|
||||
gnb.Pz=sumReduce( Dm->Comm, nb.Pz);
|
||||
|
||||
if (Dm->rank() == 0){
|
||||
double saturation=gwb.V/(gwb.V + gnb.V);
|
||||
double water_flow_rate=gwb.V*sqrt(gwb.Px*gwb.Px + gwb.Py*gwb.Py + gwb.Pz*gwb.Pz)/gwb.M;
|
||||
double not_water_flow_rate=gnb.V*sqrt(gnb.Px*gnb.Px + gnb.Py*gnb.Py + gnb.Pz*gnb.Pz)/gnb.M;
|
||||
double total_flow_rate = water_flow_rate + not_water_flow_rate;
|
||||
double fractional_flow= water_flow_rate / total_flow_rate;
|
||||
printf(" water saturation = %f, fractional flow =%f \n",saturation,fractional_flow);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
inline void InterfaceTransportMeasures( double beta, double rA, double rB, double nA, double nB,
|
||||
double nx, double ny, double nz, double ux, double uy, double uz, interface &I){
|
||||
|
||||
double A1,A2,A3,A4,A5,A6;
|
||||
double B1,B2,B3,B4,B5,B6;
|
||||
double nAB,delta;
|
||||
// Instantiate mass transport distributions
|
||||
// Stationary value - distribution 0
|
||||
nAB = 1.0/(nA+nB);
|
||||
//...............................................
|
||||
// q = 0,2,4
|
||||
// Cq = {1,0,0}, {0,1,0}, {0,0,1}
|
||||
delta = beta*nA*nB*nAB*0.1111111111111111*nx;
|
||||
if (!(nA*nB*nAB>0)) delta=0;
|
||||
A1 = nA*(0.1111111111111111*(1+4.5*ux))+delta;
|
||||
B1 = nB*(0.1111111111111111*(1+4.5*ux))-delta;
|
||||
A2 = nA*(0.1111111111111111*(1-4.5*ux))-delta;
|
||||
B2 = nB*(0.1111111111111111*(1-4.5*ux))+delta;
|
||||
|
||||
//...............................................
|
||||
// Cq = {0,1,0}
|
||||
delta = beta*nA*nB*nAB*0.1111111111111111*ny;
|
||||
if (!(nA*nB*nAB>0)) delta=0;
|
||||
A3 = nA*(0.1111111111111111*(1+4.5*uy))+delta;
|
||||
B3 = nB*(0.1111111111111111*(1+4.5*uy))-delta;
|
||||
A4 = nA*(0.1111111111111111*(1-4.5*uy))-delta;
|
||||
B4 = nB*(0.1111111111111111*(1-4.5*uy))+delta;
|
||||
|
||||
//...............................................
|
||||
// q = 4
|
||||
// Cq = {0,0,1}
|
||||
delta = beta*nA*nB*nAB*0.1111111111111111*nz;
|
||||
if (!(nA*nB*nAB>0)) delta=0;
|
||||
A5 = nA*(0.1111111111111111*(1+4.5*uz))+delta;
|
||||
B5 = nB*(0.1111111111111111*(1+4.5*uz))-delta;
|
||||
A6 = nA*(0.1111111111111111*(1-4.5*uz))-delta;
|
||||
B6 = nB*(0.1111111111111111*(1-4.5*uz))+delta;
|
||||
|
||||
double unx = (A1-A2);
|
||||
double uny = (A3-A4);
|
||||
double unz = (A5-A6);
|
||||
double uwx = (B1-B2);
|
||||
double uwy = (B3-B4);
|
||||
double uwz = (B5-B6);
|
||||
|
||||
I.Mn += rA*nA;
|
||||
I.Mw += rB*nB;
|
||||
I.Pnx += rA*nA*unx;
|
||||
I.Pny += rA*nA*uny;
|
||||
I.Pnz += rA*nA*unz;
|
||||
I.Pwx += rB*nB*uwx;
|
||||
I.Pwy += rB*nB*uwy;
|
||||
I.Pwz += rB*nB*uwz;
|
||||
I.Kn += rA*nA*(unx*unx + uny*uny + unz*unz);
|
||||
I.Kw += rB*nB*(uwx*uwx + uwy*uwy + uwz*uwz);
|
||||
|
||||
}
|
||||
|
||||
void SubPhase::Full(){
|
||||
int i,j,k,n,imin,jmin,kmin,kmax;
|
||||
|
||||
// If external boundary conditions are set, do not average over the inlet
|
||||
kmin=1; kmax=Nz-1;
|
||||
if (Dm->BoundaryCondition > 0 && Dm->kproc() == 0) kmin=4;
|
||||
if (Dm->BoundaryCondition > 0 && Dm->kproc() == Dm->nprocz()-1) kmax=Nz-4;
|
||||
|
||||
imin=jmin=1;
|
||||
// If inlet layers exist use these as default
|
||||
if (Dm->inlet_layers_x > 0) imin = Dm->inlet_layers_x;
|
||||
if (Dm->inlet_layers_y > 0) jmin = Dm->inlet_layers_y;
|
||||
if (Dm->inlet_layers_z > 0) kmin = Dm->inlet_layers_z;
|
||||
|
||||
nd.reset(); nc.reset(); wd.reset(); wc.reset(); iwn.reset();
|
||||
|
||||
Dm->CommunicateMeshHalo(Phi);
|
||||
for (int k=1; k<Nz-1; k++){
|
||||
for (int j=1; j<Ny-1; j++){
|
||||
for (int i=1; i<Nx-1; i++){
|
||||
// Compute all of the derivatives using finite differences
|
||||
double fx = 0.5*(Phi(i+1,j,k) - Phi(i-1,j,k));
|
||||
double fy = 0.5*(Phi(i,j+1,k) - Phi(i,j-1,k));
|
||||
double fz = 0.5*(Phi(i,j,k+1) - Phi(i,j,k-1));
|
||||
DelPhi(i,j,k) = sqrt(fx*fx+fy*fy+fz*fz);
|
||||
}
|
||||
}
|
||||
}
|
||||
Dm->CommunicateMeshHalo(DelPhi);
|
||||
|
||||
/* Set up geometric analysis of each region */
|
||||
|
||||
// non-wetting
|
||||
for (k=0; k<Nz; k++){
|
||||
for (j=0; j<Ny; j++){
|
||||
for (i=0; i<Nx; i++){
|
||||
n = k*Nx*Ny+j*Nx+i;
|
||||
if (!(Dm->id[n] > 0)){
|
||||
// Solid phase
|
||||
morph_n->id(i,j,k) = 1;
|
||||
|
||||
}
|
||||
else if (Phi(n) > 0.0){
|
||||
// non-wetting phase
|
||||
morph_n->id(i,j,k) = 0;
|
||||
}
|
||||
else {
|
||||
// wetting phase
|
||||
morph_n->id(i,j,k) = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// measure the whole object
|
||||
morph_n->MeasureObject();
|
||||
nd.V = morph_n->V();
|
||||
nd.A = morph_n->A();
|
||||
nd.H = morph_n->H();
|
||||
nd.X = morph_n->X();
|
||||
// measure only the connected part
|
||||
nd.Nc = morph_n->MeasureConnectedPathway();
|
||||
nc.V = morph_n->V();
|
||||
nc.A = morph_n->A();
|
||||
nc.H = morph_n->H();
|
||||
nc.X = morph_n->X();
|
||||
// update disconnected part
|
||||
nd.V -= nc.V;
|
||||
nd.A -= nc.A;
|
||||
nd.H -= nc.H;
|
||||
nd.X -= nc.X;
|
||||
|
||||
// compute global entities
|
||||
gnc.V=sumReduce( Dm->Comm, nc.V);
|
||||
gnc.A=sumReduce( Dm->Comm, nc.A);
|
||||
gnc.H=sumReduce( Dm->Comm, nc.H);
|
||||
gnc.X=sumReduce( Dm->Comm, nc.X);
|
||||
gnd.V=sumReduce( Dm->Comm, nd.V);
|
||||
gnd.A=sumReduce( Dm->Comm, nd.A);
|
||||
gnd.H=sumReduce( Dm->Comm, nd.H);
|
||||
gnd.X=sumReduce( Dm->Comm, nd.X);
|
||||
gnd.Nc = nd.Nc;
|
||||
// wetting
|
||||
for (k=0; k<Nz; k++){
|
||||
for (j=0; j<Ny; j++){
|
||||
for (i=0; i<Nx; i++){
|
||||
n = k*Nx*Ny+j*Nx+i;
|
||||
if (!(Dm->id[n] > 0)){
|
||||
// Solid phase
|
||||
morph_w->id(i,j,k) = 1;
|
||||
|
||||
}
|
||||
else if (Phi(n) < 0.0){
|
||||
// wetting phase
|
||||
morph_w->id(i,j,k) = 0;
|
||||
}
|
||||
else {
|
||||
// non-wetting phase
|
||||
morph_w->id(i,j,k) = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
morph_w->MeasureObject();
|
||||
wd.V = morph_w->V();
|
||||
wd.A = morph_w->A();
|
||||
wd.H = morph_w->H();
|
||||
wd.X = morph_w->X();
|
||||
// measure only the connected part
|
||||
wd.Nc = morph_w->MeasureConnectedPathway();
|
||||
wc.V = morph_w->V();
|
||||
wc.A = morph_w->A();
|
||||
wc.H = morph_w->H();
|
||||
wc.X = morph_w->X();
|
||||
// update disconnected part
|
||||
wd.V -= wc.V;
|
||||
wd.A -= wc.A;
|
||||
wd.H -= wc.H;
|
||||
wd.X -= wc.X;
|
||||
// compute global entities
|
||||
gwc.V=sumReduce( Dm->Comm, wc.V);
|
||||
gwc.A=sumReduce( Dm->Comm, wc.A);
|
||||
gwc.H=sumReduce( Dm->Comm, wc.H);
|
||||
gwc.X=sumReduce( Dm->Comm, wc.X);
|
||||
gwd.V=sumReduce( Dm->Comm, wd.V);
|
||||
gwd.A=sumReduce( Dm->Comm, wd.A);
|
||||
gwd.H=sumReduce( Dm->Comm, wd.H);
|
||||
gwd.X=sumReduce( Dm->Comm, wd.X);
|
||||
gwd.Nc = wd.Nc;
|
||||
|
||||
/* Set up geometric analysis of interface region */
|
||||
for (k=0; k<Nz; k++){
|
||||
for (j=0; j<Ny; j++){
|
||||
for (i=0; i<Nx; i++){
|
||||
n = k*Nx*Ny+j*Nx+i;
|
||||
if (!(Dm->id[n] > 0)){
|
||||
// Solid phase
|
||||
morph_i->id(i,j,k) = 1;
|
||||
}
|
||||
else if (DelPhi(n) > 1e-4){
|
||||
// interface
|
||||
morph_i->id(i,j,k) = 0;
|
||||
}
|
||||
else {
|
||||
// not interface
|
||||
morph_i->id(i,j,k) = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
morph_i->MeasureObject();
|
||||
iwn.V = morph_i->V();
|
||||
iwn.A = morph_i->A();
|
||||
iwn.H = morph_i->H();
|
||||
iwn.X = morph_i->X();
|
||||
giwn.V=sumReduce( Dm->Comm, iwn.V);
|
||||
giwn.A=sumReduce( Dm->Comm, iwn.A);
|
||||
giwn.H=sumReduce( Dm->Comm, iwn.H);
|
||||
giwn.X=sumReduce( Dm->Comm, iwn.X);
|
||||
|
||||
double vol_nc_bulk = 0.0;
|
||||
double vol_wc_bulk = 0.0;
|
||||
double vol_nd_bulk = 0.0;
|
||||
double vol_wd_bulk = 0.0;
|
||||
for (k=kmin; k<kmax; k++){
|
||||
for (j=jmin; j<Ny-1; j++){
|
||||
for (i=imin; i<Nx-1; i++){
|
||||
n = k*Nx*Ny + j*Nx + i;
|
||||
// Compute volume averages
|
||||
if ( Dm->id[n] > 0 ){
|
||||
// compute density
|
||||
double nA = Rho_n(n);
|
||||
double nB = Rho_w(n);
|
||||
double phi = (nA-nB)/(nA+nB);
|
||||
double ux = Vel_x(n);
|
||||
double uy = Vel_y(n);
|
||||
double uz = Vel_z(n);
|
||||
Phi(n) = phi;
|
||||
|
||||
if (DelPhi(n) > 1e-4){
|
||||
// interface region
|
||||
double nx = 0.5*(Phi(i+1,j,k)-Phi(i-1,j,k));
|
||||
double ny = 0.5*(Phi(i,j+1,k)-Phi(i,j-1,k));
|
||||
double nz = 0.5*(Phi(i,j,k+1)-Phi(i,j,k-1));
|
||||
InterfaceTransportMeasures( beta, rho_w, rho_n, nA, nB, nx, ny, nz, ux, uy, uz, iwn);
|
||||
}
|
||||
else if ( phi > 0.0){
|
||||
if (morph_n->label(i,j,k) > 0 ){
|
||||
vol_nd_bulk += 1.0;
|
||||
nd.M += nA*rho_n;
|
||||
nd.Px += nA*rho_n*ux;
|
||||
nd.Py += nA*rho_n*uy;
|
||||
nd.Pz += nA*rho_n*uz;
|
||||
nd.K += nA*rho_n*(ux*ux + uy*uy + uz*uz);
|
||||
nd.p += Pressure(n);
|
||||
}
|
||||
else{
|
||||
vol_nc_bulk += 1.0;
|
||||
nc.M += nA*rho_n;
|
||||
nc.Px += nA*rho_n*ux;
|
||||
nc.Py += nA*rho_n*uy;
|
||||
nc.Pz += nA*rho_n*uz;
|
||||
nc.K += nA*rho_n*(ux*ux + uy*uy + uz*uz);
|
||||
nc.p += Pressure(n);
|
||||
}
|
||||
}
|
||||
else{
|
||||
// water region
|
||||
if (morph_w->label(i,j,k) > 0 ){
|
||||
vol_wd_bulk += 1.0;
|
||||
wd.M += nB*rho_w;
|
||||
wd.Px += nB*rho_w*ux;
|
||||
wd.Py += nB*rho_w*uy;
|
||||
wd.Pz += nB*rho_w*uz;
|
||||
wd.K += nB*rho_w*(ux*ux + uy*uy + uz*uz);
|
||||
wd.p += Pressure(n);
|
||||
}
|
||||
else{
|
||||
vol_wc_bulk += 1.0;
|
||||
wc.M += nB*rho_w;
|
||||
wc.Px += nB*rho_w*ux;
|
||||
wc.Py += nB*rho_w*uy;
|
||||
wc.Pz += nB*rho_w*uz;
|
||||
wc.K += nB*rho_w*(ux*ux + uy*uy + uz*uz);
|
||||
wc.p += Pressure(n);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
gnd.M=sumReduce( Dm->Comm, nd.M);
|
||||
gnd.Px=sumReduce( Dm->Comm, nd.Px);
|
||||
gnd.Py=sumReduce( Dm->Comm, nd.Py);
|
||||
gnd.Pz=sumReduce( Dm->Comm, nd.Pz);
|
||||
gnd.K=sumReduce( Dm->Comm, nd.K);
|
||||
gnd.p=sumReduce( Dm->Comm, nd.p);
|
||||
|
||||
gwd.M=sumReduce( Dm->Comm, wd.M);
|
||||
gwd.Px=sumReduce( Dm->Comm, wd.Px);
|
||||
gwd.Py=sumReduce( Dm->Comm, wd.Py);
|
||||
gwd.Pz=sumReduce( Dm->Comm, wd.Pz);
|
||||
gwd.K=sumReduce( Dm->Comm, wd.K);
|
||||
gwd.p=sumReduce( Dm->Comm, wd.p);
|
||||
|
||||
gnc.M=sumReduce( Dm->Comm, nc.M);
|
||||
gnc.Px=sumReduce( Dm->Comm, nc.Px);
|
||||
gnc.Py=sumReduce( Dm->Comm, nc.Py);
|
||||
gnc.Pz=sumReduce( Dm->Comm, nc.Pz);
|
||||
gnc.K=sumReduce( Dm->Comm, nc.K);
|
||||
gnc.p=sumReduce( Dm->Comm, nc.p);
|
||||
|
||||
gwc.M=sumReduce( Dm->Comm, wc.M);
|
||||
gwc.Px=sumReduce( Dm->Comm, wc.Px);
|
||||
gwc.Py=sumReduce( Dm->Comm, wc.Py);
|
||||
gwc.Pz=sumReduce( Dm->Comm, wc.Pz);
|
||||
gwc.K=sumReduce( Dm->Comm, wc.K);
|
||||
gwc.p=sumReduce( Dm->Comm, wc.p);
|
||||
|
||||
// pressure averaging
|
||||
if (vol_wc_bulk > 0.0)
|
||||
wc.p = wc.p /vol_wc_bulk;
|
||||
if (vol_nc_bulk > 0.0)
|
||||
nc.p = nc.p /vol_nc_bulk;
|
||||
if (vol_wd_bulk > 0.0)
|
||||
wd.p = wd.p /vol_wd_bulk;
|
||||
if (vol_nd_bulk > 0.0)
|
||||
nd.p = nd.p /vol_nd_bulk;
|
||||
|
||||
vol_wc_bulk=sumReduce( Dm->Comm, vol_wc_bulk);
|
||||
vol_wd_bulk=sumReduce( Dm->Comm, vol_wd_bulk);
|
||||
vol_nc_bulk=sumReduce( Dm->Comm, vol_nc_bulk);
|
||||
vol_nd_bulk=sumReduce( Dm->Comm, vol_nd_bulk);
|
||||
|
||||
if (vol_wc_bulk > 0.0)
|
||||
gwc.p = gwc.p /vol_wc_bulk;
|
||||
if (vol_nc_bulk > 0.0)
|
||||
gnc.p = gnc.p /vol_nc_bulk;
|
||||
if (vol_wd_bulk > 0.0)
|
||||
gwd.p = gwd.p /vol_wd_bulk;
|
||||
if (vol_nd_bulk > 0.0)
|
||||
gnd.p = gnd.p /vol_nd_bulk;
|
||||
}
|
||||
|
||||
|
||||
|
111
analysis/SubPhase.h
Normal file
111
analysis/SubPhase.h
Normal file
|
@ -0,0 +1,111 @@
|
|||
/*
|
||||
* Sub-phase averaging tools
|
||||
*/
|
||||
|
||||
#ifndef SubPhase_INC
|
||||
#define SubPhase_INC
|
||||
|
||||
#include <vector>
|
||||
#include "common/Domain.h"
|
||||
#include "common/Communication.h"
|
||||
#include "analysis/analysis.h"
|
||||
#include "analysis/distance.h"
|
||||
#include "analysis/Minkowski.h"
|
||||
|
||||
#include "shared_ptr.h"
|
||||
#include "common/Utilities.h"
|
||||
#include "common/MPI_Helpers.h"
|
||||
#include "IO/MeshDatabase.h"
|
||||
#include "IO/Reader.h"
|
||||
#include "IO/Writer.h"
|
||||
|
||||
|
||||
class phase{
|
||||
public:
|
||||
int Nc;
|
||||
double p;
|
||||
double M,Px,Py,Pz,K;
|
||||
double V,A,H,X;
|
||||
void reset(){
|
||||
p=M=Px=Py=Pz=K=0.0;
|
||||
V=A=H=X=0.0;
|
||||
Nc=1;
|
||||
}
|
||||
|
||||
private:
|
||||
};
|
||||
|
||||
class interface{
|
||||
public:
|
||||
double M,Px,Py,Pz,K;
|
||||
double Mw,Mn,Pnx,Pny,Pnz,Pwx,Pwy,Pwz,Kw,Kn;
|
||||
double V,A,H,X;
|
||||
void reset(){
|
||||
M=Px=Py=Pz=K=0.0;
|
||||
V=A=H=X=0.0;
|
||||
Mw=Mn=Pnx=Pny=Pnz=Pwx=Pwy=Pwz=Kw=Kn=0.0;
|
||||
}
|
||||
|
||||
private:
|
||||
};
|
||||
|
||||
class SubPhase{
|
||||
public:
|
||||
std::shared_ptr <Domain> Dm;
|
||||
double Volume;
|
||||
// input variables
|
||||
double rho_n, rho_w;
|
||||
double nu_n, nu_w;
|
||||
double gamma_wn, beta;
|
||||
double Fx, Fy, Fz;
|
||||
/*
|
||||
* indices
|
||||
* w - water phase
|
||||
* n - not water phase
|
||||
* c - connected part
|
||||
* d - disconnected part
|
||||
* i - interface region
|
||||
* b - bulk (total)
|
||||
*/
|
||||
// local entities
|
||||
phase wc,wd,wb,nc,nd,nb;
|
||||
interface iwn;
|
||||
|
||||
// global entities
|
||||
phase gwc,gwd,gwb,gnc,gnd,gnb;
|
||||
interface giwn;
|
||||
//...........................................................................
|
||||
int Nx,Ny,Nz;
|
||||
IntArray PhaseID; // Phase ID array (solid=0, non-wetting=1, wetting=2)
|
||||
BlobIDArray Label_WP; // Wetting phase label
|
||||
BlobIDArray Label_NWP; // Non-wetting phase label index (0:nblobs-1)
|
||||
std::vector<BlobIDType> Label_NWP_map; // Non-wetting phase label for each index
|
||||
DoubleArray Rho_n; // density field
|
||||
DoubleArray Rho_w; // density field
|
||||
DoubleArray Phi; // phase indicator field
|
||||
DoubleArray DelPhi; // Magnitude of Gradient of the phase indicator field
|
||||
DoubleArray Pressure; // pressure field
|
||||
DoubleArray Vel_x; // velocity field
|
||||
DoubleArray Vel_y;
|
||||
DoubleArray Vel_z;
|
||||
DoubleArray SDs;
|
||||
|
||||
std::shared_ptr<Minkowski> morph_w;
|
||||
std::shared_ptr<Minkowski> morph_n;
|
||||
std::shared_ptr<Minkowski> morph_i;
|
||||
|
||||
SubPhase(std::shared_ptr <Domain> Dm);
|
||||
~SubPhase();
|
||||
|
||||
void SetParams(double rhoA, double rhoB, double tauA, double tauB, double force_x, double force_y, double force_z, double alpha, double beta);
|
||||
void Basic();
|
||||
void Full();
|
||||
void Write(int time);
|
||||
|
||||
private:
|
||||
FILE *TIMELOG;
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
@ -31,17 +31,18 @@
|
|||
#include "analysis/TwoPhase.h"
|
||||
|
||||
#include "analysis/pmmc.h"
|
||||
#include "analysis/analysis.h"
|
||||
#include "common/Domain.h"
|
||||
#include "common/Communication.h"
|
||||
#include "analysis/analysis.h"
|
||||
|
||||
#include "shared_ptr.h"
|
||||
#include "common/Utilities.h"
|
||||
#include "common/MPI_Helpers.h"
|
||||
#include "IO/MeshDatabase.h"
|
||||
#include "IO/Reader.h"
|
||||
#include "IO/Writer.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
|
||||
#define BLOB_AVG_COUNT 35
|
||||
|
||||
// Array access for averages defined by the following
|
||||
|
@ -459,11 +460,10 @@ void TwoPhase::UpdateMeshValues()
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
void TwoPhase::ComputeLocal()
|
||||
{
|
||||
int i,j,k,n,kmin,kmax;
|
||||
int i,j,k,n,imin,jmin,kmin,kmax;
|
||||
int cube[8][3] = {{0,0,0},{1,0,0},{0,1,0},{1,1,0},{0,0,1},{1,0,1},{0,1,1},{1,1,1}};
|
||||
|
||||
// If external boundary conditions are set, do not average over the inlet
|
||||
|
@ -471,9 +471,15 @@ void TwoPhase::ComputeLocal()
|
|||
if (Dm->BoundaryCondition > 0 && Dm->kproc() == 0) kmin=4;
|
||||
if (Dm->BoundaryCondition > 0 && Dm->kproc() == Dm->nprocz()-1) kmax=Nz-4;
|
||||
|
||||
imin=jmin=1;
|
||||
// If inlet layers exist use these as default
|
||||
if (Dm->inlet_layers_x > 0) imin = Dm->inlet_layers_x;
|
||||
if (Dm->inlet_layers_y > 0) jmin = Dm->inlet_layers_y;
|
||||
if (Dm->inlet_layers_z > 0) kmin = Dm->inlet_layers_z;
|
||||
|
||||
for (k=kmin; k<kmax; k++){
|
||||
for (j=1; j<Ny-1; j++){
|
||||
for (i=1; i<Nx-1; i++){
|
||||
for (j=jmin; j<Ny-1; j++){
|
||||
for (i=imin; i<Nx-1; i++){
|
||||
//...........................................................................
|
||||
n_nw_pts=n_ns_pts=n_ws_pts=n_nws_pts=n_local_sol_pts=n_local_nws_pts=0;
|
||||
n_nw_tris=n_ns_tris=n_ws_tris=n_nws_seg=n_local_sol_tris=0;
|
||||
|
@ -1224,8 +1230,8 @@ void TwoPhase::PrintAll(int timestep)
|
|||
Gws_global(0),Gws_global(1),Gws_global(2),Gws_global(3),Gws_global(4),Gws_global(5)); // orientation of ws interface
|
||||
fprintf(TIMELOG,"%.5g %.5g %.5g ",trawn_global, trJwn_global, trRwn_global); // Trimmed curvature
|
||||
fprintf(TIMELOG,"%.5g %.5g %.5g ",wwndnw_global, wwnsdnwn_global, Jwnwwndnw_global); // kinematic quantities
|
||||
fprintf(TIMELOG,"%.5g %.5g %.5g %.5g ",wet_morph->V(), wet_morph->A(), wet_morph->J(), wet_morph->X());
|
||||
fprintf(TIMELOG,"%.5g %.5g %.5g %.5g\n",nonwet_morph->V(), nonwet_morph->A(), nonwet_morph->J(), nonwet_morph->X());
|
||||
fprintf(TIMELOG,"%.5g %.5g %.5g %.5g ",wet_morph->V(), wet_morph->A(), wet_morph->H(), wet_morph->X());
|
||||
fprintf(TIMELOG,"%.5g %.5g %.5g %.5g\n",nonwet_morph->V(), nonwet_morph->A(), nonwet_morph->H(), nonwet_morph->X());
|
||||
// fprintf(TIMELOG,"%.5g %.5g %.5g %.5g\n",euler_global, Kn_global, Jn_global, An_global); // minkowski measures
|
||||
fflush(TIMELOG);
|
||||
}
|
||||
|
|
|
@ -17,16 +17,15 @@
|
|||
#ifndef TwoPhase_INC
|
||||
#define TwoPhase_INC
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "analysis/pmmc.h"
|
||||
#include "common/Domain.h"
|
||||
#include "common/Communication.h"
|
||||
#include "analysis/analysis.h"
|
||||
#include "analysis/distance.h"
|
||||
#include "analysis/Minkowski.h"
|
||||
|
||||
#include "shared_ptr.h"
|
||||
#include "common/Domain.h"
|
||||
#include "common/Communication.h"
|
||||
#include "common/Utilities.h"
|
||||
#include "common/MPI_Helpers.h"
|
||||
#include "IO/MeshDatabase.h"
|
||||
|
|
|
@ -450,8 +450,9 @@ double MorphDrain(DoubleArray &SignDist, char *id, std::shared_ptr<Domain> Dm, d
|
|||
// Rcrit_new = strtod(argv[2],NULL);
|
||||
// if (rank==0) printf("Max. distance =%f, Initial critical radius = %f \n",maxdistGlobal,Rcrit_new);
|
||||
//}
|
||||
|
||||
while (void_fraction_new > VoidFraction)
|
||||
MPI_Barrier(Dm->Comm);
|
||||
|
||||
while (void_fraction_new > VoidFraction && Rcrit_new > 0.5)
|
||||
{
|
||||
void_fraction_diff_old = void_fraction_diff_new;
|
||||
void_fraction_old = void_fraction_new;
|
||||
|
@ -569,9 +570,9 @@ double MorphDrain(DoubleArray &SignDist, char *id, std::shared_ptr<Domain> Dm, d
|
|||
//......................................................................................
|
||||
MPI_Allreduce(&LocalNumber,&GlobalNumber,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
|
||||
|
||||
for (int k=1; k<nz-1; k++){
|
||||
for (int j=1; j<ny-1; j++){
|
||||
for (int i=1; i<nx-1; i++){
|
||||
for (int k=0; k<nz; k++){
|
||||
for (int j=0; j<ny; j++){
|
||||
for (int i=0; i<nx; i++){
|
||||
n=k*nx*ny+j*nx+i;
|
||||
if (id[n] == 1){
|
||||
phase(i,j,k) = 1.0;
|
||||
|
@ -582,15 +583,15 @@ double MorphDrain(DoubleArray &SignDist, char *id, std::shared_ptr<Domain> Dm, d
|
|||
}
|
||||
}
|
||||
|
||||
// Extract only the connected part
|
||||
// Extract only the connected part of NWP
|
||||
BlobIDstruct new_index;
|
||||
double vF=0.0; double vS=0.0;
|
||||
ComputeGlobalBlobIDs(nx-2,ny-2,nz-2,Dm->rank_info,phase,SignDist,vF,vS,phase_label,Dm->Comm);
|
||||
MPI_Barrier(Dm->Comm);
|
||||
|
||||
for (int k=1; k<nz-1; k++){
|
||||
for (int j=1; j<ny-1; j++){
|
||||
for (int i=1; i<nx-1; i++){
|
||||
for (int k=0; k<nz; k++){
|
||||
for (int j=0; j<ny; j++){
|
||||
for (int i=0; i<nx; i++){
|
||||
n=k*nx*ny+j*nx+i;
|
||||
if (id[n] == 1 && phase_label(i,j,k) > 1){
|
||||
id[n] = 2;
|
||||
|
@ -598,13 +599,51 @@ double MorphDrain(DoubleArray &SignDist, char *id, std::shared_ptr<Domain> Dm, d
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Extract only the connected part of NWP
|
||||
for (int k=1; k<nz-1; k++){
|
||||
for (int j=1; j<ny-1; j++){
|
||||
for (int i=1; i<nx-1; i++){
|
||||
n=k*nx*ny+j*nx+i;
|
||||
if (id[n] == 2){
|
||||
phase(i,j,k) = 1.0;
|
||||
}
|
||||
else if (id[n] == 1){
|
||||
// nwp
|
||||
phase(i,j,k) = -1.0;
|
||||
}
|
||||
else{
|
||||
// treat solid as WP since films can connect
|
||||
phase(i,j,k) = 1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ComputeGlobalBlobIDs(nx-2,ny-2,nz-2,Dm->rank_info,phase,SignDist,vF,vS,phase_label,Dm->Comm);
|
||||
MPI_Barrier(Dm->Comm);
|
||||
|
||||
for (int k=1; k<nz-1; k++){
|
||||
for (int j=1; j<ny-1; j++){
|
||||
for (int i=1; i<nx-1; i++){
|
||||
n=k*nx*ny+j*nx+i;
|
||||
if (id[n] == 2 && phase_label(i,j,k) > 1){
|
||||
id[n] = 20;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// done
|
||||
*/
|
||||
|
||||
count = 0.f;
|
||||
for (int k=1; k<nz-1; k++){
|
||||
for (int j=1; j<ny-1; j++){
|
||||
for (int i=1; i<nx-1; i++){
|
||||
n=k*nx*ny+j*nx+i;
|
||||
if (id[n] == 2){
|
||||
if (id[n] > 1){
|
||||
count+=1.0;
|
||||
}
|
||||
}
|
||||
|
@ -633,6 +672,18 @@ double MorphDrain(DoubleArray &SignDist, char *id, std::shared_ptr<Domain> Dm, d
|
|||
printf("Final critical radius=%f\n",Rcrit_old);
|
||||
}
|
||||
}
|
||||
// label all WP components as 2
|
||||
for (int k=1; k<nz-1; k++){
|
||||
for (int j=1; j<ny-1; j++){
|
||||
for (int i=1; i<nx-1; i++){
|
||||
n=k*nx*ny+j*nx+i;
|
||||
if (id[n] > 1){
|
||||
id[n] = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return final_void_fraction;
|
||||
}
|
||||
|
||||
|
|
|
@ -213,6 +213,52 @@ private:
|
|||
runAnalysis::commWrapper comm;
|
||||
};
|
||||
|
||||
// Helper class to write the vis file from a thread
|
||||
class IOWorkItem: public ThreadPool::WorkItemRet<void>
|
||||
{
|
||||
public:
|
||||
IOWorkItem( int timestep_, std::vector<IO::MeshDataStruct>& visData_,
|
||||
SubPhase& Averages_, fillHalo<double>& fillData_, runAnalysis::commWrapper&& comm_ ):
|
||||
timestep(timestep_), visData(visData_), Averages(Averages_), fillData(fillData_), comm(std::move(comm_))
|
||||
{
|
||||
}
|
||||
~IOWorkItem() { }
|
||||
virtual void run() {
|
||||
PROFILE_START("Save Vis",1);
|
||||
ASSERT(visData[0].vars[0]->name=="phase");
|
||||
ASSERT(visData[0].vars[1]->name=="Pressure");
|
||||
ASSERT(visData[0].vars[2]->name=="Velocity_x");
|
||||
ASSERT(visData[0].vars[3]->name=="Velocity_y");
|
||||
ASSERT(visData[0].vars[4]->name=="Velocity_z");
|
||||
ASSERT(visData[0].vars[5]->name=="SignDist");
|
||||
ASSERT(visData[0].vars[6]->name=="BlobID");
|
||||
Array<double>& PhaseData = visData[0].vars[0]->data;
|
||||
Array<double>& PressData = visData[0].vars[1]->data;
|
||||
Array<double>& VelxData = visData[0].vars[2]->data;
|
||||
Array<double>& VelyData = visData[0].vars[3]->data;
|
||||
Array<double>& VelzData = visData[0].vars[4]->data;
|
||||
Array<double>& SignData = visData[0].vars[5]->data;
|
||||
Array<double>& BlobData = visData[0].vars[6]->data;
|
||||
fillData.copy(Averages.Phi,PhaseData);
|
||||
fillData.copy(Averages.Pressure,PressData);
|
||||
fillData.copy(Averages.SDs,SignData);
|
||||
fillData.copy(Averages.Vel_x,VelxData);
|
||||
fillData.copy(Averages.Vel_y,VelyData);
|
||||
fillData.copy(Averages.Vel_z,VelzData);
|
||||
fillData.copy(Averages.morph_n->label,BlobData);
|
||||
IO::writeData( timestep, visData, comm.comm );
|
||||
PROFILE_STOP("Save Vis",1);
|
||||
};
|
||||
private:
|
||||
IOWorkItem();
|
||||
int timestep;
|
||||
std::vector<IO::MeshDataStruct>& visData;
|
||||
SubPhase& Averages;
|
||||
fillHalo<double>& fillData;
|
||||
runAnalysis::commWrapper comm;
|
||||
};
|
||||
|
||||
|
||||
|
||||
// Helper class to run the analysis from within a thread
|
||||
// Note: Averages will be modified after the constructor is called
|
||||
|
@ -262,6 +308,116 @@ private:
|
|||
};
|
||||
|
||||
|
||||
class TCATWorkItem: public ThreadPool::WorkItemRet<void>
|
||||
{
|
||||
public:
|
||||
TCATWorkItem( AnalysisType type_, int timestep_, TwoPhase& Averages_,
|
||||
BlobIDstruct ids, BlobIDList id_list_, double beta_ ):
|
||||
type(type_), timestep(timestep_), Averages(Averages_),
|
||||
blob_ids(ids), id_list(id_list_), beta(beta_) { }
|
||||
~TCATWorkItem() { }
|
||||
virtual void run() {
|
||||
Averages.NumberComponents_NWP = blob_ids->first;
|
||||
Averages.Label_NWP = blob_ids->second;
|
||||
Averages.Label_NWP_map = *id_list;
|
||||
Averages.NumberComponents_WP = 1;
|
||||
Averages.Label_WP.fill(0.0);
|
||||
if ( matches(type,AnalysisType::CopyPhaseIndicator) ) {
|
||||
// Averages.ColorToSignedDistance(beta,Averages.Phase,Averages.Phase_tplus);
|
||||
}
|
||||
if ( matches(type,AnalysisType::ComputeAverages) ) {
|
||||
PROFILE_START("Compute TCAT",1);
|
||||
Averages.Initialize();
|
||||
Averages.ComputeDelPhi();
|
||||
Averages.ColorToSignedDistance(beta,Averages.Phase,Averages.SDn);
|
||||
Averages.ColorToSignedDistance(beta,Averages.Phase_tminus,Averages.Phase_tminus);
|
||||
Averages.ColorToSignedDistance(beta,Averages.Phase_tplus,Averages.Phase_tplus);
|
||||
Averages.UpdateMeshValues();
|
||||
Averages.ComputeLocal();
|
||||
Averages.Reduce();
|
||||
Averages.PrintAll(timestep);
|
||||
PROFILE_STOP("Compute TCAT",1);
|
||||
}
|
||||
}
|
||||
private:
|
||||
TCATWorkItem();
|
||||
AnalysisType type;
|
||||
int timestep;
|
||||
TwoPhase& Averages;
|
||||
BlobIDstruct blob_ids;
|
||||
BlobIDList id_list;
|
||||
double beta;
|
||||
};
|
||||
|
||||
|
||||
class GanglionTrackingWorkItem: public ThreadPool::WorkItemRet<void>
|
||||
{
|
||||
public:
|
||||
GanglionTrackingWorkItem( AnalysisType type_, int timestep_, TwoPhase& Averages_,
|
||||
BlobIDstruct ids, BlobIDList id_list_, double beta_ ):
|
||||
type(type_), timestep(timestep_), Averages(Averages_),
|
||||
blob_ids(ids), id_list(id_list_), beta(beta_) { }
|
||||
~GanglionTrackingWorkItem() { }
|
||||
virtual void run() {
|
||||
Averages.NumberComponents_NWP = blob_ids->first;
|
||||
Averages.Label_NWP = blob_ids->second;
|
||||
Averages.Label_NWP_map = *id_list;
|
||||
Averages.NumberComponents_WP = 1;
|
||||
Averages.Label_WP.fill(0.0);
|
||||
if ( matches(type,AnalysisType::CopyPhaseIndicator) ) {
|
||||
// Averages.ColorToSignedDistance(beta,Averages.Phase,Averages.Phase_tplus);
|
||||
}
|
||||
if ( matches(type,AnalysisType::ComputeAverages) ) {
|
||||
PROFILE_START("Compute ganglion",1);
|
||||
Averages.Initialize();
|
||||
Averages.ComputeDelPhi();
|
||||
Averages.ColorToSignedDistance(beta,Averages.Phase,Averages.SDn);
|
||||
Averages.ColorToSignedDistance(beta,Averages.Phase_tminus,Averages.Phase_tminus);
|
||||
Averages.ColorToSignedDistance(beta,Averages.Phase_tplus,Averages.Phase_tplus);
|
||||
Averages.UpdateMeshValues();
|
||||
Averages.ComponentAverages();
|
||||
Averages.SortBlobs();
|
||||
Averages.PrintComponents(timestep);
|
||||
PROFILE_STOP("Compute ganglion",1);
|
||||
}
|
||||
}
|
||||
private:
|
||||
GanglionTrackingWorkItem();
|
||||
AnalysisType type;
|
||||
int timestep;
|
||||
TwoPhase& Averages;
|
||||
BlobIDstruct blob_ids;
|
||||
BlobIDList id_list;
|
||||
double beta;
|
||||
};
|
||||
|
||||
|
||||
class BasicWorkItem: public ThreadPool::WorkItemRet<void>
|
||||
{
|
||||
public:
|
||||
BasicWorkItem( AnalysisType type_, int timestep_, SubPhase& Averages_ ):
|
||||
type(type_), timestep(timestep_), Averages(Averages_){ }
|
||||
~BasicWorkItem() { }
|
||||
virtual void run() {
|
||||
|
||||
if ( matches(type,AnalysisType::CopyPhaseIndicator) ) {
|
||||
// Averages.ColorToSignedDistance(beta,Averages.Phase,Averages.Phase_tplus);
|
||||
}
|
||||
if ( matches(type,AnalysisType::ComputeAverages) ) {
|
||||
PROFILE_START("Compute subphase",1);
|
||||
Averages.Basic();
|
||||
PROFILE_STOP("Compute subphase",1);
|
||||
}
|
||||
}
|
||||
private:
|
||||
BasicWorkItem();
|
||||
AnalysisType type;
|
||||
int timestep;
|
||||
SubPhase& Averages;
|
||||
double beta;
|
||||
};
|
||||
|
||||
|
||||
/******************************************************************
|
||||
* MPI comm wrapper for use with analysis *
|
||||
******************************************************************/
|
||||
|
@ -699,4 +855,133 @@ void runAnalysis::run( int timestep, TwoPhase& Averages, const double *Phi,
|
|||
}
|
||||
|
||||
|
||||
/******************************************************************
|
||||
* Run the analysis *
|
||||
******************************************************************/
|
||||
void runAnalysis::basic( int timestep, SubPhase &Averages, const double *Phi, double *Pressure, double *Velocity, double *fq, double *Den)
|
||||
{
|
||||
int N = d_N[0]*d_N[1]*d_N[2];
|
||||
|
||||
// Check which analysis steps we need to perform
|
||||
auto type = computeAnalysisType( timestep );
|
||||
if ( type == AnalysisType::AnalyzeNone )
|
||||
return;
|
||||
|
||||
// Check how may queued items we have
|
||||
if ( d_tpool.N_queued() > 20 ) {
|
||||
std::cerr << "Analysis queue is getting behind, waiting ...\n";
|
||||
finish();
|
||||
}
|
||||
|
||||
PROFILE_START("run");
|
||||
|
||||
// Copy the appropriate variables to the host (so we can spawn new threads)
|
||||
ScaLBL_DeviceBarrier();
|
||||
PROFILE_START("Copy data to host",1);
|
||||
|
||||
//if ( matches(type,AnalysisType::CopySimState) ) {
|
||||
if ( timestep%d_analysis_interval == 0 ) {
|
||||
// Copy the members of Averages to the cpu (phase was copied above)
|
||||
PROFILE_START("Copy-Pressure",1);
|
||||
ScaLBL_D3Q19_Pressure(fq,Pressure,d_Np);
|
||||
//ScaLBL_D3Q19_Momentum(fq,Velocity,d_Np);
|
||||
ScaLBL_DeviceBarrier();
|
||||
PROFILE_STOP("Copy-Pressure",1);
|
||||
PROFILE_START("Copy-Wait",1);
|
||||
PROFILE_STOP("Copy-Wait",1);
|
||||
PROFILE_START("Copy-State",1);
|
||||
// copy other variables
|
||||
d_ScaLBL_Comm->RegularLayout(d_Map,Pressure,Averages.Pressure);
|
||||
d_ScaLBL_Comm->RegularLayout(d_Map,&Den[0],Averages.Rho_n);
|
||||
d_ScaLBL_Comm->RegularLayout(d_Map,&Den[d_Np],Averages.Rho_w);
|
||||
d_ScaLBL_Comm->RegularLayout(d_Map,&Velocity[0],Averages.Vel_x);
|
||||
d_ScaLBL_Comm->RegularLayout(d_Map,&Velocity[d_Np],Averages.Vel_y);
|
||||
d_ScaLBL_Comm->RegularLayout(d_Map,&Velocity[2*d_Np],Averages.Vel_z);
|
||||
PROFILE_STOP("Copy-State",1);
|
||||
}
|
||||
PROFILE_STOP("Copy data to host");
|
||||
|
||||
PROFILE_START("run",1);
|
||||
// Spawn threads to do the analysis work
|
||||
//if (timestep%d_restart_interval==0){
|
||||
// if ( matches(type,AnalysisType::ComputeAverages) ) {
|
||||
if ( timestep%d_analysis_interval == 0 ) {
|
||||
auto work = new BasicWorkItem(type,timestep,Averages);
|
||||
work->add_dependency(d_wait_analysis); // Make sure we are done using analysis before modifying
|
||||
d_wait_analysis = d_tpool.add_work(work);
|
||||
}
|
||||
|
||||
PROFILE_STOP("run");
|
||||
}
|
||||
|
||||
void runAnalysis::WriteVisData( int timestep, SubPhase &Averages, const double *Phi, double *Pressure, double *Velocity, double *fq, double *Den)
|
||||
{
|
||||
int N = d_N[0]*d_N[1]*d_N[2];
|
||||
|
||||
// Check which analysis steps we need to perform
|
||||
auto type = computeAnalysisType( timestep );
|
||||
if ( type == AnalysisType::AnalyzeNone )
|
||||
return;
|
||||
|
||||
// Check how may queued items we have
|
||||
if ( d_tpool.N_queued() > 20 ) {
|
||||
std::cerr << "Analysis queue is getting behind, waiting ...\n";
|
||||
finish();
|
||||
}
|
||||
|
||||
// Copy the appropriate variables to the host (so we can spawn new threads)
|
||||
ScaLBL_DeviceBarrier();
|
||||
/* PROFILE_START("Copy data to host",1);
|
||||
|
||||
//if ( matches(type,AnalysisType::CopySimState) ) {
|
||||
if ( timestep%d_analysis_interval == 0 ) {
|
||||
// Copy the members of Averages to the cpu (phase was copied above)
|
||||
PROFILE_START("Copy-Pressure",1);
|
||||
ScaLBL_D3Q19_Pressure(fq,Pressure,d_Np);
|
||||
//ScaLBL_D3Q19_Momentum(fq,Velocity,d_Np);
|
||||
ScaLBL_DeviceBarrier();
|
||||
PROFILE_STOP("Copy-Pressure",1);
|
||||
PROFILE_START("Copy-Wait",1);
|
||||
PROFILE_STOP("Copy-Wait",1);
|
||||
PROFILE_START("Copy-State",1);
|
||||
// copy other variables
|
||||
d_ScaLBL_Comm->RegularLayout(d_Map,Pressure,Averages.Pressure);
|
||||
d_ScaLBL_Comm->RegularLayout(d_Map,&Den[0],Averages.Rho_n);
|
||||
d_ScaLBL_Comm->RegularLayout(d_Map,&Den[d_Np],Averages.Rho_w);
|
||||
d_ScaLBL_Comm->RegularLayout(d_Map,&Velocity[0],Averages.Vel_x);
|
||||
d_ScaLBL_Comm->RegularLayout(d_Map,&Velocity[d_Np],Averages.Vel_y);
|
||||
d_ScaLBL_Comm->RegularLayout(d_Map,&Velocity[2*d_Np],Averages.Vel_z);
|
||||
PROFILE_STOP("Copy-State",1);
|
||||
}
|
||||
PROFILE_STOP("Copy data to host");
|
||||
*/
|
||||
PROFILE_START("write vis",1);
|
||||
|
||||
// if (Averages.WriteVis == true){
|
||||
std::shared_ptr<double> cfq,cDen;
|
||||
// Copy restart data to the CPU
|
||||
cDen = std::shared_ptr<double>(new double[2*d_Np],DeleteArray<double>);
|
||||
cfq = std::shared_ptr<double>(new double[19*d_Np],DeleteArray<double>);
|
||||
ScaLBL_CopyToHost(cfq.get(),fq,19*d_Np*sizeof(double));
|
||||
ScaLBL_CopyToHost(cDen.get(),Den,2*d_Np*sizeof(double));
|
||||
|
||||
if (d_rank==0) {
|
||||
FILE *Rst = fopen("Restart.txt","w");
|
||||
fprintf(Rst,"%i\n",timestep+4);
|
||||
fclose(Rst);
|
||||
}
|
||||
// Write the restart file (using a seperate thread)
|
||||
auto work1 = new WriteRestartWorkItem(d_restartFile.c_str(),cDen,cfq,d_Np);
|
||||
work1->add_dependency(d_wait_restart);
|
||||
d_wait_restart = d_tpool.add_work(work1);
|
||||
|
||||
auto work2 = new IOWorkItem( timestep, d_meshData, Averages, d_fillData, getComm() );
|
||||
work2->add_dependency(d_wait_vis);
|
||||
d_wait_vis = d_tpool.add_work(work2);
|
||||
|
||||
//Averages.WriteVis = false;
|
||||
// }
|
||||
|
||||
PROFILE_STOP("write vis");
|
||||
}
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
|
||||
#include "analysis/analysis.h"
|
||||
#include "analysis/TwoPhase.h"
|
||||
#include "analysis/SubPhase.h"
|
||||
#include "common/Communication.h"
|
||||
#include "common/ScaLBL.h"
|
||||
#include "threadpool/thread_pool.h"
|
||||
|
@ -47,6 +48,9 @@ public:
|
|||
//! Run the next analysis
|
||||
void run( int timestep, TwoPhase &Averages, const double *Phi,
|
||||
double *Pressure, double *Velocity, double *fq, double *Den );
|
||||
|
||||
void basic( int timestep, SubPhase &Averages, const double *Phi, double *Pressure, double *Velocity, double *fq, double *Den );
|
||||
void WriteVisData( int timestep, SubPhase &Averages, const double *Phi, double *Pressure, double *Velocity, double *fq, double *Den);
|
||||
|
||||
//! Finish all active analysis
|
||||
void finish();
|
||||
|
|
|
@ -1,170 +0,0 @@
|
|||
# Create a shared_ptr.h file in the include directory that contains
|
||||
# a shared_ptr class (hopefully typedef to a compiler basic)
|
||||
# Arguements:
|
||||
# INSTALL_DIR - Directory to install shared_ptr.h
|
||||
# NAMESPACE - Namespace to contain the shared_ptr class (may be empty)
|
||||
INCLUDE( CheckCXXSourceCompiles )
|
||||
FUNCTION( CONFIGURE_SHARED_PTR INSTALL_DIR NAMESPACE )
|
||||
SET( CMAKE_REQUIRED_FLAGS ${CMAKE_CXX_FLAGS} )
|
||||
CHECK_CXX_SOURCE_COMPILES(
|
||||
" #include <memory>
|
||||
namespace ${NAMESPACE} { using std::shared_ptr; }
|
||||
int main() {
|
||||
${NAMESPACE}::shared_ptr<int> ptr;
|
||||
return 0;
|
||||
}
|
||||
"
|
||||
MEMORY_SHARED_PTR )
|
||||
CHECK_CXX_SOURCE_COMPILES(
|
||||
" #include <memory>
|
||||
namespace ${NAMESPACE} { using std::tr1::shared_ptr; }
|
||||
int main() {
|
||||
${NAMESPACE}::shared_ptr<int> ptr;
|
||||
return 0;
|
||||
}
|
||||
"
|
||||
MEMORY_TR1_SHARED_PTR )
|
||||
CHECK_CXX_SOURCE_COMPILES(
|
||||
" #include <tr1/memory>
|
||||
namespace ${NAMESPACE} { using std::tr1::shared_ptr; }
|
||||
int main() {
|
||||
${NAMESPACE}::shared_ptr<int> ptr;
|
||||
return 0;
|
||||
}
|
||||
"
|
||||
TR1_MEMORY_TR1_SHARED_PTR )
|
||||
GET_DIRECTORY_PROPERTY( dirs INCLUDE_DIRECTORIES )
|
||||
SET( CMAKE_REQUIRED_FLAGS "${CMAKE_CXX_FLAGS}" )
|
||||
SET( CMAKE_REQUIRED_INCLUDES ${dirs} "${BOOST_INCLUDE}" )
|
||||
CHECK_CXX_SOURCE_COMPILES(
|
||||
" #include \"boost/shared_ptr.hpp\"
|
||||
namespace ${NAMESPACE} { using boost::shared_ptr; }
|
||||
int main() {
|
||||
${NAMESPACE}::shared_ptr<int> ptr;
|
||||
return 0;
|
||||
}
|
||||
"
|
||||
BOOST_SHARED_PTR )
|
||||
WRITE_DUMMY_SHARED_PTR( "${NAMESPACE}" "${CMAKE_CURRENT_BINARY_DIR}/tmp/dummy_shared_ptr.h" )
|
||||
CHECK_CXX_SOURCE_COMPILES(
|
||||
" #include <iostream>
|
||||
#include \"${CMAKE_CURRENT_BINARY_DIR}/tmp/dummy_shared_ptr.h\"
|
||||
int main() {
|
||||
${NAMESPACE}::shared_ptr<int> ptr;
|
||||
return 0;
|
||||
}
|
||||
"
|
||||
DUMMY_SHARED_PTR )
|
||||
IF ( NOT NAMESPACE )
|
||||
SET( NAMESPACE " " )
|
||||
ENDIF()
|
||||
IF ( BOOST_SHARED_PTR )
|
||||
FILE(WRITE "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "#include \"boost/shared_ptr.hpp\"\n")
|
||||
FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "#include \"boost/weak_ptr.hpp\"\n")
|
||||
FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "#include \"boost/enable_shared_from_this.hpp\"\n")
|
||||
FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "namespace ${NAMESPACE} {\n")
|
||||
FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using boost::shared_ptr; \n")
|
||||
FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using boost::dynamic_pointer_cast; \n")
|
||||
FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using boost::const_pointer_cast; \n")
|
||||
FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using boost::weak_ptr; \n")
|
||||
FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using boost::enable_shared_from_this; \n")
|
||||
FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "}\n")
|
||||
ELSEIF ( MEMORY_SHARED_PTR )
|
||||
IF ( ${NAMESPACE} STREQUAL "std" )
|
||||
FILE(WRITE "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "#include <memory>\n")
|
||||
ELSE()
|
||||
FILE(WRITE "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "#include <memory>\n")
|
||||
FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "namespace ${NAMESPACE} {\n")
|
||||
FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using std::shared_ptr; \n")
|
||||
FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using std::dynamic_pointer_cast; \n")
|
||||
FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using std::const_pointer_cast; \n")
|
||||
FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using std::weak_ptr; \n")
|
||||
FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using std::enable_shared_from_this; \n")
|
||||
FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "}\n")
|
||||
ENDIF()
|
||||
ELSEIF ( MEMORY_TR1_SHARED_PTR )
|
||||
FILE(WRITE "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "#include <memory>\n")
|
||||
FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "namespace ${NAMESPACE} {\n")
|
||||
FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using std::tr1::shared_ptr; \n")
|
||||
FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using std::tr1::dynamic_pointer_cast; \n")
|
||||
FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using std::tr1::const_pointer_cast; \n")
|
||||
FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using std::tr1::weak_ptr; \n")
|
||||
FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using std::tr1::enable_shared_from_this; \n")
|
||||
FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "}\n")
|
||||
ELSEIF ( TR1_MEMORY_TR1_SHARED_PTR )
|
||||
FILE(WRITE "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "#include <tr1/memory>\n")
|
||||
FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "namespace ${NAMESPACE} {\n")
|
||||
FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using std::tr1::shared_ptr; \n")
|
||||
FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using std::tr1::dynamic_pointer_cast; \n")
|
||||
FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using std::tr1::const_pointer_cast; \n")
|
||||
FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using std::tr1::weak_ptr; \n")
|
||||
FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" " using std::tr1::enable_shared_from_this; \n")
|
||||
FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "}\n")
|
||||
ELSEIF ( DUMMY_SHARED_PTR )
|
||||
MESSAGE("Warning: No valid shared_ptr found, using dummy shared_ptr" )
|
||||
WRITE_DUMMY_SHARED_PTR( "${NAMESPACE}" "${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" )
|
||||
ELSE()
|
||||
MESSAGE(FATAL_ERROR "No shared_ptr availible")
|
||||
ENDIF()
|
||||
EXECUTE_PROCESS( COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/tmp/shared_ptr.h" "${INSTALL_DIR}/shared_ptr.h" )
|
||||
ENDFUNCTION()
|
||||
|
||||
|
||||
FUNCTION( WRITE_DUMMY_SHARED_PTR NAMESPACE FILENAME )
|
||||
FILE(WRITE "${FILENAME}" "#ifndef DUMMY_SHARED_PTR_INC\n")
|
||||
FILE(APPEND "${FILENAME}" "#define DUMMY_SHARED_PTR_INC\n")
|
||||
FILE(APPEND "${FILENAME}" "namespace dummy {\n\n")
|
||||
FILE(APPEND "${FILENAME}" "template<class T> void DefaultDeleter(T* p) {delete p;}\n\n")
|
||||
FILE(APPEND "${FILENAME}" "template<class T> class shared_ptr {\n")
|
||||
FILE(APPEND "${FILENAME}" "public:\n")
|
||||
FILE(APPEND "${FILENAME}" " typedef void (*D)(T*);\n")
|
||||
FILE(APPEND "${FILENAME}" " shared_ptr( ): obj(NULL), deleter(DefaultDeleter<T>), count(NULL) {}\n")
|
||||
FILE(APPEND "${FILENAME}" " shared_ptr( T *ptr, void (*D)(T*)=DefaultDeleter<T>):\n")
|
||||
FILE(APPEND "${FILENAME}" " obj(ptr), deleter(D), count(NULL) { if (ptr) { count = new int; (*count)=1; } } \n")
|
||||
FILE(APPEND "${FILENAME}" " shared_ptr( const shared_ptr<T>& rhs ): \n")
|
||||
FILE(APPEND "${FILENAME}" " obj(rhs.get()), deleter(reinterpret_cast<D>(rhs.deleter)), count(rhs.count) { if ( count!=NULL ) { ++(*count); } } \n")
|
||||
FILE(APPEND "${FILENAME}" " template<class U> shared_ptr( const shared_ptr<U>& rhs ): \n")
|
||||
FILE(APPEND "${FILENAME}" " obj(rhs.get()), deleter(reinterpret_cast<D>(rhs.deleter)), count(rhs.count) { if ( count!=NULL ) { ++(*count); } } \n")
|
||||
FILE(APPEND "${FILENAME}" " shared_ptr& operator=( const shared_ptr<T>& rhs )\n")
|
||||
FILE(APPEND "${FILENAME}" " { if (this==&rhs) { return *this;} reset(); obj=rhs.obj; deleter=reinterpret_cast<D>(rhs.deleter); count=rhs.count; ++(*count); return *this; } \n")
|
||||
FILE(APPEND "${FILENAME}" " ~shared_ptr( ) { reset(); }\n")
|
||||
FILE(APPEND "${FILENAME}" " void reset( T *ptr ) { reset(); obj=ptr; count=new int; (*count)=1; }\n")
|
||||
FILE(APPEND "${FILENAME}" " void reset( void ) { \n")
|
||||
FILE(APPEND "${FILENAME}" " if ( count!=NULL) { int tmp=--(*count); if ( tmp==0 ) { deleter(obj); delete count; } } \n")
|
||||
FILE(APPEND "${FILENAME}" " obj=NULL; count=NULL; \n")
|
||||
FILE(APPEND "${FILENAME}" " }\n")
|
||||
FILE(APPEND "${FILENAME}" " T* get( ) const { return obj; } \n")
|
||||
FILE(APPEND "${FILENAME}" " T* operator->( ) const { return obj; } \n")
|
||||
FILE(APPEND "${FILENAME}" " const T& operator*( ) const { return *obj; } \n")
|
||||
FILE(APPEND "${FILENAME}" " bool operator==( const T * rhs ) const { return obj==rhs; } \n")
|
||||
FILE(APPEND "${FILENAME}" " bool operator!=( const T * rhs ) const { return obj!=rhs; } \n")
|
||||
FILE(APPEND "${FILENAME}" "protected:\n")
|
||||
FILE(APPEND "${FILENAME}" " T *obj;\n")
|
||||
FILE(APPEND "${FILENAME}" " void (*deleter)(T*);\n")
|
||||
FILE(APPEND "${FILENAME}" " volatile int *count;\n")
|
||||
FILE(APPEND "${FILENAME}" "template<class T1, class U> friend shared_ptr<T1> dynamic_pointer_cast( shared_ptr<U> const & );\n")
|
||||
FILE(APPEND "${FILENAME}" "template<class T1, class U> friend shared_ptr<T1> const_pointer_cast( shared_ptr<U> const & );\n")
|
||||
FILE(APPEND "${FILENAME}" "template<class Y> friend class shared_ptr;\n")
|
||||
FILE(APPEND "${FILENAME}" "};\n\n")
|
||||
FILE(APPEND "${FILENAME}" "template<class T, class U> shared_ptr<T> dynamic_pointer_cast( shared_ptr<U> const & rhs ) {\n")
|
||||
FILE(APPEND "${FILENAME}" " T* obj = dynamic_cast<T*>(rhs.obj);\n")
|
||||
FILE(APPEND "${FILENAME}" " shared_ptr<T> ptr;\n")
|
||||
FILE(APPEND "${FILENAME}" " if ( obj!=NULL ) { ptr.obj = obj; ptr.count=rhs.count; ++(*ptr.count); }\n")
|
||||
FILE(APPEND "${FILENAME}" " return ptr;\n}\n")
|
||||
FILE(APPEND "${FILENAME}" "template<class T, class U> shared_ptr<T> const_pointer_cast( shared_ptr<U> const & rhs ) {\n")
|
||||
FILE(APPEND "${FILENAME}" " T* obj = const_cast<T*>(rhs.obj);\n")
|
||||
FILE(APPEND "${FILENAME}" " shared_ptr<T> ptr;\n")
|
||||
FILE(APPEND "${FILENAME}" " if ( obj!=NULL ) { ptr.obj = obj; ptr.count=rhs.count; ++(*ptr.count); }\n")
|
||||
FILE(APPEND "${FILENAME}" " return ptr;\n}\n")
|
||||
FILE(APPEND "${FILENAME}" "\n} // namespace dummy\n")
|
||||
FILE(APPEND "${FILENAME}" "\n\n")
|
||||
FILE(APPEND "${FILENAME}" "namespace ${NAMESPACE} {\n")
|
||||
FILE(APPEND "${FILENAME}" " using dummy::shared_ptr; \n")
|
||||
FILE(APPEND "${FILENAME}" " using dummy::dynamic_pointer_cast; \n")
|
||||
FILE(APPEND "${FILENAME}" " using dummy::const_pointer_cast; \n")
|
||||
FILE(APPEND "${FILENAME}" "}\n\n")
|
||||
FILE(APPEND "${FILENAME}" "#endif\n")
|
||||
ENDFUNCTION()
|
||||
|
||||
|
|
@ -1147,9 +1147,8 @@ Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::cat( const std::vector<
|
|||
* Interpolate *
|
||||
********************************************************/
|
||||
template<class T>
|
||||
struct is_compatible_double : std::integral_constant<bool,
|
||||
std::is_floating_point<typename std::remove_cv<T>::type>::value ||
|
||||
std::is_integral<typename std::remove_cv<T>::type>::value> {
|
||||
struct is_compatible_double
|
||||
: std::integral_constant<bool, std::is_floating_point<T>::value || std::is_integral<T>::value> {
|
||||
};
|
||||
template<class TYPE>
|
||||
inline typename std::enable_if<is_compatible_double<TYPE>::value, TYPE>::type Array_interp_1D(
|
||||
|
|
|
@ -45,6 +45,7 @@ Domain::Domain( int nx, int ny, int nz, int rnk, int npx, int npy, int npz,
|
|||
Nx(0), Ny(0), Nz(0),
|
||||
Lx(0), Ly(0), Lz(0), Volume(0), BoundaryCondition(0),
|
||||
Comm(MPI_COMM_WORLD),
|
||||
inlet_layers_x(0), inlet_layers_y(0), inlet_layers_z(0),
|
||||
sendCount_x(0), sendCount_y(0), sendCount_z(0), sendCount_X(0), sendCount_Y(0), sendCount_Z(0),
|
||||
sendCount_xy(0), sendCount_yz(0), sendCount_xz(0), sendCount_Xy(0), sendCount_Yz(0), sendCount_xZ(0),
|
||||
sendCount_xY(0), sendCount_yZ(0), sendCount_Xz(0), sendCount_XY(0), sendCount_YZ(0), sendCount_XZ(0),
|
||||
|
@ -90,6 +91,7 @@ Domain::Domain( std::shared_ptr<Database> db, MPI_Comm Communicator):
|
|||
Nx(0), Ny(0), Nz(0),
|
||||
Lx(0), Ly(0), Lz(0), Volume(0), BoundaryCondition(0),
|
||||
Comm(MPI_COMM_NULL),
|
||||
inlet_layers_x(0), inlet_layers_y(0), inlet_layers_z(0),
|
||||
sendCount_x(0), sendCount_y(0), sendCount_z(0), sendCount_X(0), sendCount_Y(0), sendCount_Z(0),
|
||||
sendCount_xy(0), sendCount_yz(0), sendCount_xz(0), sendCount_Xy(0), sendCount_Yz(0), sendCount_xZ(0),
|
||||
sendCount_xY(0), sendCount_yZ(0), sendCount_Xz(0), sendCount_XY(0), sendCount_YZ(0), sendCount_XZ(0),
|
||||
|
@ -132,6 +134,14 @@ void Domain::initialize( std::shared_ptr<Database> db )
|
|||
auto n = d_db->getVector<int>("n");
|
||||
auto L = d_db->getVector<double>("L");
|
||||
//nspheres = d_db->getScalar<int>("nspheres");
|
||||
|
||||
if (d_db->keyExists( "InletLayers" )){
|
||||
auto InletCount = d_db->getVector<int>( "InletLayers" );
|
||||
inlet_layers_x = InletCount[0];
|
||||
inlet_layers_y = InletCount[1];
|
||||
inlet_layers_z = InletCount[2];
|
||||
}
|
||||
|
||||
ASSERT( n.size() == 3u );
|
||||
ASSERT( L.size() == 3u );
|
||||
ASSERT( nproc.size() == 3u );
|
||||
|
@ -148,6 +158,10 @@ void Domain::initialize( std::shared_ptr<Database> db )
|
|||
int myrank;
|
||||
MPI_Comm_rank( Comm, &myrank );
|
||||
rank_info = RankInfoStruct(myrank,nproc[0],nproc[1],nproc[2]);
|
||||
// inlet layers only apply to lower part of domain
|
||||
if (nproc[0] > 0) inlet_layers_x = 0;
|
||||
if (nproc[1] > 0) inlet_layers_y = 0;
|
||||
if (nproc[2] > 0) inlet_layers_z = 0;
|
||||
// Fill remaining variables
|
||||
N = Nx*Ny*Nz;
|
||||
Volume = nx*ny*nx*nproc[0]*nproc[1]*nproc[2]*1.0;
|
||||
|
|
|
@ -124,6 +124,7 @@ public: // Public variables (need to create accessors instead)
|
|||
|
||||
double Lx,Ly,Lz,Volume;
|
||||
int Nx,Ny,Nz,N;
|
||||
int inlet_layers_x, inlet_layers_y, inlet_layers_z;
|
||||
RankInfoStruct rank_info;
|
||||
|
||||
MPI_Comm Comm; // MPI Communicator for this domain
|
||||
|
|
|
@ -51,7 +51,7 @@ template<> MPI_Datatype getMPItype<double>() {
|
|||
********************************************************/
|
||||
// unsigned char
|
||||
template<>
|
||||
size_t packsize<unsigned char>( const unsigned char& rhs )
|
||||
size_t packsize<unsigned char>( const unsigned char& )
|
||||
{
|
||||
return sizeof(unsigned char);
|
||||
}
|
||||
|
@ -67,7 +67,7 @@ void unpack<unsigned char>( unsigned char& data, const char *buffer )
|
|||
}
|
||||
// char
|
||||
template<>
|
||||
size_t packsize<char>( const char& rhs )
|
||||
size_t packsize<char>( const char& )
|
||||
{
|
||||
return sizeof(char);
|
||||
}
|
||||
|
@ -83,7 +83,7 @@ void unpack<char>( char& data, const char *buffer )
|
|||
}
|
||||
// int
|
||||
template<>
|
||||
size_t packsize<int>( const int& rhs )
|
||||
size_t packsize<int>( const int& )
|
||||
{
|
||||
return sizeof(int);
|
||||
}
|
||||
|
@ -99,7 +99,7 @@ void unpack<int>( int& data, const char *buffer )
|
|||
}
|
||||
// unsigned int
|
||||
template<>
|
||||
size_t packsize<unsigned int>( const unsigned int& rhs )
|
||||
size_t packsize<unsigned int>( const unsigned int& )
|
||||
{
|
||||
return sizeof(unsigned int);
|
||||
}
|
||||
|
@ -115,7 +115,7 @@ void unpack<unsigned int>( unsigned int& data, const char *buffer )
|
|||
}
|
||||
// size_t
|
||||
template<>
|
||||
size_t packsize<size_t>( const size_t& rhs )
|
||||
size_t packsize<size_t>( const size_t& )
|
||||
{
|
||||
return sizeof(size_t);
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -14,305 +14,10 @@
|
|||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include "common/Utilities.h"
|
||||
#include "common/StackTrace.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <fstream>
|
||||
#include <string.h>
|
||||
#include <signal.h>
|
||||
#include <math.h>
|
||||
#include <algorithm>
|
||||
|
||||
#ifdef USE_MPI
|
||||
#include "mpi.h"
|
||||
#endif
|
||||
|
||||
// Detect the OS and include system dependent headers
|
||||
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) || defined(_MSC_VER)
|
||||
// Note: windows has not been testeds
|
||||
#define USE_WINDOWS
|
||||
#include <windows.h>
|
||||
#include <process.h>
|
||||
#include <stdio.h>
|
||||
#include <tchar.h>
|
||||
#include <psapi.h>
|
||||
#include <DbgHelp.h>
|
||||
#define mkdir(path, mode) _mkdir(path)
|
||||
//#pragma comment(lib, psapi.lib) //added
|
||||
//#pragma comment(linker, /DEFAULTLIB:psapi.lib)
|
||||
#elif defined(__APPLE__)
|
||||
#define USE_MAC
|
||||
#include <sys/time.h>
|
||||
#include <signal.h>
|
||||
#include <execinfo.h>
|
||||
#include <dlfcn.h>
|
||||
#include <mach/mach.h>
|
||||
#include <unistd.h>
|
||||
#elif defined( __linux ) || defined( __linux__ ) || defined( __unix ) || defined( __posix )
|
||||
#define USE_LINUX
|
||||
#include <sys/time.h>
|
||||
#include <execinfo.h>
|
||||
#include <dlfcn.h>
|
||||
#include <malloc.h>
|
||||
#include <unistd.h>
|
||||
#else
|
||||
#error Unknown OS
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __GNUC__
|
||||
#define USE_ABI
|
||||
#include <cxxabi.h>
|
||||
#endif
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
* Function to terminate the program *
|
||||
****************************************************************************/
|
||||
static bool abort_printMemory = true;
|
||||
static bool abort_printStack = true;
|
||||
static bool abort_throwException = false;
|
||||
static int force_exit = 0;
|
||||
void Utilities::setAbortBehavior( bool printMemory, bool printStack, bool throwException )
|
||||
{
|
||||
abort_printMemory = printMemory;
|
||||
abort_printStack = printStack;
|
||||
abort_throwException = throwException;
|
||||
}
|
||||
void Utilities::abort(const std::string &message, const std::string &filename, const int line)
|
||||
{
|
||||
std::stringstream msg;
|
||||
msg << "Program abort called in file `" << filename << "' at line " << line << std::endl;
|
||||
// Add the memory usage and call stack to the error message
|
||||
if ( abort_printMemory ) {
|
||||
size_t N_bytes = Utilities::getMemoryUsage();
|
||||
msg << "Bytes used = " << N_bytes << std::endl;
|
||||
}
|
||||
if ( abort_printStack ) {
|
||||
std::vector<StackTrace::stack_info> stack = StackTrace::getCallStack();
|
||||
msg << std::endl;
|
||||
msg << "Stack Trace:\n";
|
||||
for (size_t i=0; i<stack.size(); i++)
|
||||
msg << " " << stack[i].print() << std::endl;
|
||||
}
|
||||
msg << std::endl << message << std::endl;
|
||||
// Print the message and abort
|
||||
if ( force_exit>1 ) {
|
||||
exit(-1);
|
||||
} else if ( !abort_throwException ) {
|
||||
// Use MPI_abort (will terminate all processes)
|
||||
force_exit = 2;
|
||||
std::cerr << msg.str();
|
||||
#if defined(USE_MPI) || defined(HAVE_MPI)
|
||||
int initialized=0, finalized=0;
|
||||
MPI_Initialized(&initialized);
|
||||
MPI_Finalized(&finalized);
|
||||
if ( initialized!=0 && finalized==0 )
|
||||
MPI_Abort(MPI_COMM_WORLD,-1);
|
||||
#endif
|
||||
exit(-1);
|
||||
} else if ( force_exit>0 ) {
|
||||
exit(-1);
|
||||
} else {
|
||||
// Throw and standard exception (allows the use of try, catch)
|
||||
throw std::logic_error(msg.str());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
* Function to handle MPI errors *
|
||||
****************************************************************************/
|
||||
/*#if defined(USE_MPI) || defined(HAVE_MPI)
|
||||
MPI_Errhandler mpierr;
|
||||
void MPI_error_handler_fun( MPI_Comm *comm, int *err, ... )
|
||||
{
|
||||
if ( *err==MPI_ERR_COMM && *comm==MPI_COMM_WORLD ) {
|
||||
// Special error handling for an invalid MPI_COMM_WORLD
|
||||
std::cerr << "Error invalid MPI_COMM_WORLD";
|
||||
exit(-1);
|
||||
}
|
||||
int msg_len=0;
|
||||
char message[1000];
|
||||
MPI_Error_string( *err, message, &msg_len );
|
||||
if ( msg_len <= 0 )
|
||||
abort("Unkown error in MPI");
|
||||
abort( "Error calling MPI routine:\n" + std::string(message) );
|
||||
}
|
||||
#endif*/
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
* Function to handle unhandled exceptions *
|
||||
****************************************************************************/
|
||||
bool tried_MPI_Abort=false;
|
||||
void term_func_abort(int err)
|
||||
{
|
||||
printf("Exiting due to abort (%i)\n",err);
|
||||
std::vector<StackTrace::stack_info> stack = StackTrace::getCallStack();
|
||||
std::string message = "Stack Trace:\n";
|
||||
for (size_t i=0; i<stack.size(); i++)
|
||||
message += " " + stack[i].print() += "\n";
|
||||
message += "\nExiting\n";
|
||||
// Print the message and abort
|
||||
std::cerr << message;
|
||||
#ifdef USE_MPI
|
||||
if ( !abort_throwException && !tried_MPI_Abort ) {
|
||||
tried_MPI_Abort = true;
|
||||
MPI_Abort(MPI_COMM_WORLD,-1);
|
||||
}
|
||||
#endif
|
||||
exit(-1);
|
||||
}
|
||||
#if defined(USE_LINUX) || defined(USE_MAC)
|
||||
static int tried_throw = 0;
|
||||
#endif
|
||||
void term_func()
|
||||
{
|
||||
// Try to re-throw the last error to get the last message
|
||||
std::string last_message;
|
||||
#if defined(USE_LINUX) || defined(USE_MAC)
|
||||
try {
|
||||
if ( tried_throw==0 ) {
|
||||
tried_throw = 1;
|
||||
throw;
|
||||
}
|
||||
// No active exception
|
||||
} catch (const std::exception &err) {
|
||||
// Caught a std::runtime_error
|
||||
last_message = err.what();
|
||||
} catch (...) {
|
||||
// Caught an unknown exception
|
||||
last_message = "unknown exception occurred.";
|
||||
}
|
||||
#endif
|
||||
std::stringstream msg;
|
||||
msg << "Unhandled exception:" << std::endl;
|
||||
msg << " " << last_message << std::endl;
|
||||
Utilities::abort( msg.str(), __FILE__, __LINE__ );
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
* Functions to set the error handler *
|
||||
****************************************************************************/
|
||||
static void setTerminateErrorHandler()
|
||||
{
|
||||
std::set_terminate( term_func );
|
||||
signal(SIGABRT,&term_func_abort);
|
||||
signal(SIGFPE,&term_func_abort);
|
||||
signal(SIGILL,&term_func_abort);
|
||||
signal(SIGINT,&term_func_abort);
|
||||
signal(SIGSEGV,&term_func_abort);
|
||||
signal(SIGTERM,&term_func_abort);
|
||||
}
|
||||
void Utilities::setErrorHandlers()
|
||||
{
|
||||
//d_use_MPI_Abort = use_MPI_Abort;
|
||||
//setMPIErrorHandler( SAMRAI::tbox::SAMRAI_MPI::getSAMRAIWorld() );
|
||||
setTerminateErrorHandler();
|
||||
}
|
||||
/*void Utilities::setMPIErrorHandler( const SAMRAI::tbox::SAMRAI_MPI& mpi )
|
||||
{
|
||||
#if defined(USE_MPI) || defined(HAVE_MPI)
|
||||
if ( mpierr.get()==NULL ) {
|
||||
mpierr = boost::shared_ptr<MPI_Errhandler>( new MPI_Errhandler );
|
||||
MPI_Comm_create_errhandler( MPI_error_handler_fun, mpierr.get() );
|
||||
}
|
||||
MPI_Comm_set_errhandler( mpi.getCommunicator(), *mpierr );
|
||||
MPI_Comm_set_errhandler( MPI_COMM_WORLD, *mpierr );
|
||||
#endif
|
||||
}
|
||||
void Utilities::clearMPIErrorHandler( )
|
||||
{
|
||||
#if defined(USE_MPI) || defined(HAVE_MPI)
|
||||
if ( mpierr.get()!=NULL )
|
||||
MPI_Errhandler_free( mpierr.get() ); // Delete the error handler
|
||||
mpierr.reset();
|
||||
MPI_Comm_set_errhandler( MPI_COMM_SELF, MPI_ERRORS_ARE_FATAL );
|
||||
MPI_Comm_set_errhandler( MPI_COMM_WORLD, MPI_ERRORS_ARE_FATAL );
|
||||
#endif
|
||||
}*/
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
* Function to get the memory usage *
|
||||
* Note: this function should be thread-safe *
|
||||
****************************************************************************/
|
||||
#if defined(USE_MAC)
|
||||
// Get the page size on mac
|
||||
static size_t page_size = static_cast<size_t>(sysconf(_SC_PAGESIZE));
|
||||
#endif
|
||||
static size_t N_bytes_initialization = Utilities::getMemoryUsage();
|
||||
size_t Utilities::getMemoryUsage()
|
||||
{
|
||||
size_t N_bytes = 0;
|
||||
#if defined(USE_LINUX)
|
||||
struct mallinfo meminfo = mallinfo();
|
||||
size_t size_hblkhd = static_cast<unsigned int>( meminfo.hblkhd );
|
||||
size_t size_uordblks = static_cast<unsigned int>( meminfo.uordblks );
|
||||
N_bytes = size_hblkhd + size_uordblks;
|
||||
#elif defined(USE_MAC)
|
||||
struct task_basic_info t_info;
|
||||
mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT;
|
||||
if (KERN_SUCCESS != task_info(mach_task_self(),
|
||||
TASK_BASIC_INFO, (task_info_t)&t_info,
|
||||
&t_info_count)) {
|
||||
return 0;
|
||||
}
|
||||
N_bytes = t_info.virtual_size;
|
||||
#elif defined(USE_WINDOWS)
|
||||
PROCESS_MEMORY_COUNTERS memCounter;
|
||||
GetProcessMemoryInfo( GetCurrentProcess(), &memCounter, sizeof(memCounter) );
|
||||
N_bytes = memCounter.WorkingSetSize;
|
||||
#endif
|
||||
return N_bytes;
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
* Functions to get the time and timer resolution *
|
||||
****************************************************************************/
|
||||
#if defined(USE_WINDOWS)
|
||||
double Utilities::time()
|
||||
{
|
||||
LARGE_INTEGER end, f;
|
||||
QueryPerformanceFrequency(&f);
|
||||
QueryPerformanceCounter(&end);
|
||||
double time = ((double)end.QuadPart)/((double)f.QuadPart);
|
||||
return time;
|
||||
}
|
||||
double Utilities::tick()
|
||||
{
|
||||
LARGE_INTEGER f;
|
||||
QueryPerformanceFrequency(&f);
|
||||
double resolution = ((double)1.0)/((double)f.QuadPart);
|
||||
return resolution;
|
||||
}
|
||||
#elif defined(USE_LINUX) || defined(USE_MAC)
|
||||
double Utilities::time()
|
||||
{
|
||||
timeval current_time;
|
||||
gettimeofday(¤t_time,NULL);
|
||||
double time = ((double)current_time.tv_sec)+1e-6*((double)current_time.tv_usec);
|
||||
return time;
|
||||
}
|
||||
double Utilities::tick()
|
||||
{
|
||||
timeval start, end;
|
||||
gettimeofday(&start,NULL);
|
||||
gettimeofday(&end,NULL);
|
||||
while ( end.tv_sec==start.tv_sec && end.tv_usec==start.tv_usec )
|
||||
gettimeofday(&end,NULL);
|
||||
double resolution = ((double)(end.tv_sec-start.tv_sec))+1e-6*((double)(end.tv_usec-start.tv_usec));
|
||||
return resolution;
|
||||
}
|
||||
#else
|
||||
#error Unknown OS
|
||||
#endif
|
||||
|
||||
|
||||
// Factor a number into it's prime factors
|
||||
std::vector<int> Utilities::factor(size_t number)
|
||||
|
|
|
@ -16,91 +16,42 @@
|
|||
#ifndef included_Utilities
|
||||
#define included_Utilities
|
||||
|
||||
#include <chrono>
|
||||
#include <cstdarg>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/stat.h>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#include "StackTrace/Utilities.h"
|
||||
|
||||
|
||||
namespace Utilities {
|
||||
|
||||
|
||||
/*!
|
||||
* Aborts the run after printing an error message with file and
|
||||
* linenumber information.
|
||||
*/
|
||||
void abort( const std::string &message, const std::string &filename, const int line );
|
||||
|
||||
|
||||
/*!
|
||||
* Set the behavior of abort
|
||||
* @param printMemory Print the current memory usage (default is true)
|
||||
* @param printStack Print the current call stack (default is true)
|
||||
* @param throwException Throw an exception instead of MPI_Abort (default is false)
|
||||
*/
|
||||
void setAbortBehavior( bool printMemory, bool printStack, bool throwException );
|
||||
|
||||
//! Function to set the error handlers
|
||||
void setErrorHandlers();
|
||||
|
||||
|
||||
/*!
|
||||
* Function to get the memory availible.
|
||||
* This function will return the total memory availible
|
||||
* Note: depending on the implimentation, this number may be rounded to
|
||||
* to a multiple of the page size.
|
||||
* If this function fails, it will return 0.
|
||||
*/
|
||||
size_t getSystemMemory();
|
||||
|
||||
|
||||
/*!
|
||||
* Function to get the memory usage.
|
||||
* This function will return the total memory used by the application.
|
||||
* Note: depending on the implimentation, this number may be rounded to
|
||||
* to a multiple of the page size.
|
||||
* If this function fails, it will return 0.
|
||||
*/
|
||||
size_t getMemoryUsage();
|
||||
|
||||
|
||||
//! Function to get an arbitrary point in time
|
||||
double time();
|
||||
|
||||
|
||||
//! Function to get the resolution of time
|
||||
double tick();
|
||||
// Functions inherited from StackTrace::Utilities
|
||||
using StackTrace::Utilities::abort;
|
||||
using StackTrace::Utilities::cause_segfault;
|
||||
using StackTrace::Utilities::clearErrorHandlers;
|
||||
using StackTrace::Utilities::exec;
|
||||
using StackTrace::Utilities::getMemoryUsage;
|
||||
using StackTrace::Utilities::getSystemMemory;
|
||||
using StackTrace::Utilities::setAbortBehavior;
|
||||
using StackTrace::Utilities::setErrorHandlers;
|
||||
using StackTrace::Utilities::tick;
|
||||
using StackTrace::Utilities::time;
|
||||
using StackTrace::Utilities::sleep_ms;
|
||||
using StackTrace::Utilities::sleep_s;
|
||||
|
||||
|
||||
//! std::string version of sprintf
|
||||
inline std::string stringf( const char *format, ... );
|
||||
|
||||
|
||||
/*!
|
||||
* Sleep for X ms
|
||||
* @param N Time to sleep (ms)
|
||||
*/
|
||||
inline void sleep_ms( int N ) { std::this_thread::sleep_for( std::chrono::milliseconds( N ) ); }
|
||||
|
||||
|
||||
/*!
|
||||
* Sleep for X s
|
||||
* @param N Time to sleep (s)
|
||||
*/
|
||||
inline void sleep_s( int N ) { std::this_thread::sleep_for( std::chrono::seconds( N ) ); }
|
||||
|
||||
|
||||
//! Factor a number into it's prime factors
|
||||
std::vector<int> factor(size_t number);
|
||||
|
||||
//! Print AMP Banner
|
||||
|
||||
//! Null use function
|
||||
void nullUse( void* );
|
||||
|
||||
|
||||
} // namespace Utilities
|
||||
|
||||
|
||||
|
|
|
@ -121,7 +121,8 @@ void ScaLBL_ColorModel::SetDomain(){
|
|||
N = Nx*Ny*Nz;
|
||||
id = new char [N];
|
||||
for (int i=0; i<Nx*Ny*Nz; i++) Dm->id[i] = 1; // initialize this way
|
||||
Averages = std::shared_ptr<TwoPhase> ( new TwoPhase(Dm) ); // TwoPhase analysis object
|
||||
//Averages = std::shared_ptr<TwoPhase> ( new TwoPhase(Dm) ); // TwoPhase analysis object
|
||||
Averages = std::shared_ptr<SubPhase> ( new SubPhase(Dm) ); // TwoPhase analysis object
|
||||
MPI_Barrier(comm);
|
||||
Dm->CommInit();
|
||||
MPI_Barrier(comm);
|
||||
|
@ -168,7 +169,7 @@ void ScaLBL_ColorModel::ReadInput(){
|
|||
|
||||
if (rank == 0) cout << "Domain set." << endl;
|
||||
|
||||
Averages->SetParams(rhoA,rhoB,tauA,tauB,Fx,Fy,Fz,alpha);
|
||||
Averages->SetParams(rhoA,rhoB,tauA,tauB,Fx,Fy,Fz,alpha,beta);
|
||||
}
|
||||
|
||||
void ScaLBL_ColorModel::AssignComponentLabels(double *phase)
|
||||
|
@ -427,35 +428,28 @@ void ScaLBL_ColorModel::Run(){
|
|||
bool SET_CAPILLARY_NUMBER = false;
|
||||
bool MORPH_ADAPT = false;
|
||||
bool USE_MORPH = false;
|
||||
int MAX_MORPH_TIMESTEPS = 20000;
|
||||
int CURRENT_MORPH_TIMESTEPS=0;
|
||||
int morph_interval;
|
||||
double morph_delta;
|
||||
int analysis_interval = 1000; // number of timesteps in between in situ analysis
|
||||
int MAX_MORPH_TIMESTEPS = 50000; // maximum number of LBM timesteps to spend in morphological adaptation routine
|
||||
int RAMP_TIMESTEPS = 0;//50000; // number of timesteps to run initially (to get a reasonable velocity field before other pieces kick in)
|
||||
int morph_interval = 1000000;
|
||||
int CURRENT_MORPH_TIMESTEPS=0; // counter for number of timesteps spent in morphological adaptation routine (reset each time)
|
||||
int CURRENT_STEADY_TIMESTEPS=0; // counter for number of timesteps spent in morphological adaptation routine (reset each time)
|
||||
int morph_timesteps = 0;
|
||||
int ramp_timesteps = 50000;
|
||||
double capillary_number;
|
||||
double tolerance = 1.f;
|
||||
double morph_delta = 0.0;
|
||||
double capillary_number = 0.0;
|
||||
double tolerance = 0.01;
|
||||
double Ca_previous = 0.f;
|
||||
|
||||
|
||||
double delta_volume = 0.0;
|
||||
double delta_volume_target = 0.0;
|
||||
double RESIDUAL_ENDPOINT_THRESHOLD = 0.04;
|
||||
|
||||
/* double TARGET_SATURATION = 0.f;
|
||||
int target_saturation_index=0;
|
||||
std::vector<double> target_saturation;
|
||||
if (color_db->keyExists( "target_saturation" )){
|
||||
target_saturation = color_db->getVector<double>( "target_saturation" );
|
||||
TARGET_SATURATION = target_saturation[0];
|
||||
if (color_db->keyExists( "residual_endpoint_threshold" )){
|
||||
RESIDUAL_ENDPOINT_THRESHOLD = color_db->getScalar<double>( "residual_endpoint_threshold" );
|
||||
}
|
||||
*/
|
||||
if (color_db->keyExists( "capillary_number" )){
|
||||
capillary_number = color_db->getScalar<double>( "capillary_number" );
|
||||
SET_CAPILLARY_NUMBER=true;
|
||||
}
|
||||
else{
|
||||
capillary_number=0;
|
||||
}
|
||||
if (BoundaryCondition != 0 && SET_CAPILLARY_NUMBER==true){
|
||||
if (rank == 0) printf("WARINING: capillary number target only supported for BC = 0 \n");
|
||||
SET_CAPILLARY_NUMBER=false;
|
||||
|
@ -463,25 +457,16 @@ void ScaLBL_ColorModel::Run(){
|
|||
if (analysis_db->keyExists( "morph_delta" )){
|
||||
morph_delta = analysis_db->getScalar<double>( "morph_delta" );
|
||||
}
|
||||
else{
|
||||
morph_delta=0.0;
|
||||
USE_MORPH = false;
|
||||
}
|
||||
if (analysis_db->keyExists( "morph_interval" )){
|
||||
morph_interval = analysis_db->getScalar<int>( "morph_interval" );
|
||||
USE_MORPH = true;
|
||||
}
|
||||
else{
|
||||
morph_interval=1000000;
|
||||
USE_MORPH = false;
|
||||
}
|
||||
if (analysis_db->keyExists( "tolerance" )){
|
||||
tolerance = analysis_db->getScalar<double>( "tolerance" );
|
||||
}
|
||||
else{
|
||||
tolerance = 0.02;
|
||||
if (analysis_db->keyExists( "analysis_interval" )){
|
||||
analysis_interval = analysis_db->getScalar<int>( "analysis_interval" );
|
||||
}
|
||||
int analysis_interval = analysis_db->getScalar<int>( "analysis_interval" );
|
||||
|
||||
if (rank==0){
|
||||
printf("********************************************************\n");
|
||||
|
@ -494,7 +479,7 @@ void ScaLBL_ColorModel::Run(){
|
|||
MPI_Barrier(comm);
|
||||
starttime = MPI_Wtime();
|
||||
//.........................................
|
||||
|
||||
|
||||
//************ MAIN ITERATION LOOP ***************************************/
|
||||
PROFILE_START("Loop");
|
||||
//std::shared_ptr<Database> analysis_db;
|
||||
|
@ -581,21 +566,23 @@ void ScaLBL_ColorModel::Run(){
|
|||
PROFILE_STOP("Update");
|
||||
|
||||
// Run the analysis
|
||||
analysis.run( timestep, *Averages, Phi, Pressure, Velocity, fq, Den );
|
||||
|
||||
// allow initial ramp-up to get closer to steady state
|
||||
if (timestep > ramp_timesteps && timestep%analysis_interval == 0 && USE_MORPH){
|
||||
analysis.finish();
|
||||
if ( morph_timesteps > morph_interval ){
|
||||
//analysis.run( timestep, *Averages, Phi, Pressure, Velocity, fq, Den );
|
||||
analysis.basic( timestep, *Averages, Phi, Pressure, Velocity, fq, Den );
|
||||
|
||||
double volB = Averages->Volume_w();
|
||||
double volA = Averages->Volume_n();
|
||||
double vA_x = Averages->van_global(0);
|
||||
double vA_y = Averages->van_global(1);
|
||||
double vA_z = Averages->van_global(2);
|
||||
double vB_x = Averages->vaw_global(0);
|
||||
double vB_y = Averages->vaw_global(1);
|
||||
double vB_z = Averages->vaw_global(2);
|
||||
// allow initial ramp-up to get closer to steady state
|
||||
if (timestep > RAMP_TIMESTEPS && timestep%analysis_interval == 0 && USE_MORPH){
|
||||
analysis.finish();
|
||||
CURRENT_STEADY_TIMESTEPS += analysis_interval;
|
||||
|
||||
if ( morph_timesteps > morph_interval ){
|
||||
double volB = Averages->gwb.V;
|
||||
double volA = Averages->gnb.V;
|
||||
double vA_x = Averages->gnb.Px/Averages->gnb.M;
|
||||
double vA_y = Averages->gnb.Py/Averages->gnb.M;
|
||||
double vA_z = Averages->gnb.Pz/Averages->gnb.M;
|
||||
double vB_x = Averages->gwb.Px/Averages->gnb.M;
|
||||
double vB_y = Averages->gwb.Py/Averages->gnb.M;
|
||||
double vB_z = Averages->gwb.Pz/Averages->gnb.M;
|
||||
double muA = rhoA*(tauA-0.5)/3.f;
|
||||
double muB = rhoB*(tauB-0.5)/3.f;
|
||||
|
||||
|
@ -605,22 +592,34 @@ void ScaLBL_ColorModel::Run(){
|
|||
double Ca = fabs(volA*muA*flow_rate_A + volB*muB*flow_rate_B)/(5.796*alpha*double((Nx-2)*(Ny-2)*(Nz-2)*nprocs));
|
||||
|
||||
double force_magnitude = sqrt(Fx*Fx + Fy*Fy + Fz*Fz);
|
||||
//double krA = muA*volA*flow_rate_A/force_magnitude/double(Nx*Ny*Nz*nprocs);
|
||||
//double krB = muB*volB*flow_rate_B/force_magnitude/double(Nx*Ny*Nz*nprocs);
|
||||
|
||||
if (fabs((Ca - Ca_previous)/Ca) < tolerance ){
|
||||
MORPH_ADAPT = true;
|
||||
CURRENT_MORPH_TIMESTEPS=0;
|
||||
delta_volume_target = (volA + volB)*morph_delta; // set target volume chnage
|
||||
delta_volume_target = (volA + volB)*morph_delta; // set target volume change
|
||||
Averages->Full();
|
||||
Averages->Write(timestep);
|
||||
analysis.WriteVisData( timestep, *Averages, Phi, Pressure, Velocity, fq, Den );
|
||||
analysis.finish();
|
||||
|
||||
if (rank==0){
|
||||
printf("** WRITE STEADY POINT *** ");
|
||||
printf("Ca = %f, (previous = %f) \n",Ca,Ca_previous);
|
||||
volA /= double((Nx-2)*(Ny-2)*(Nz-2)*nprocs);
|
||||
volB /= double((Nx-2)*(Ny-2)*(Nz-2)*nprocs);
|
||||
FILE * kr_log_file = fopen("relperm.csv","a");
|
||||
fprintf(kr_log_file,"%i %.5g %.5g %.5g %.5g %.5g %.5g ",timestep-analysis_interval+20,muA,muB,5.796*alpha,Fx,Fy,Fz);
|
||||
fprintf(kr_log_file,"%i %.5g %.5g %.5g %.5g %.5g %.5g ",CURRENT_STEADY_TIMESTEPS,muA,muB,5.796*alpha,Fx,Fy,Fz);
|
||||
fprintf(kr_log_file,"%.5g %.5g %.5g %.5g %.5g %.5g %.5g %.5g\n",volA,volB,vA_x,vA_y,vA_z,vB_x,vB_y,vB_z);
|
||||
fclose(kr_log_file);
|
||||
// flow reversal criteria based on fractional flow
|
||||
if (delta_volume_target < 0.0 &&
|
||||
volA*sqrt(vA_x*vA_x + vA_y*vA_y + vA_z*vA_z)/(volB*sqrt(vB_x*vB_x + vB_y*vB_y + vB_z*vB_z)) < RESIDUAL_ENDPOINT_THRESHOLD){
|
||||
delta_volume_target *= (-1.0);
|
||||
}
|
||||
else if (delta_volume_target > 0.0 &&
|
||||
(volB*sqrt(vB_x*vB_x + vB_y*vB_y + vB_z*vB_z)) / (volA*sqrt(vA_x*vA_x + vA_y*vA_y + vA_z*vA_z)) < RESIDUAL_ENDPOINT_THRESHOLD){
|
||||
delta_volume_target *= (-1.0);
|
||||
}
|
||||
|
||||
printf(" Measured capillary number %f \n ",Ca);
|
||||
}
|
||||
|
@ -641,23 +640,8 @@ void ScaLBL_ColorModel::Run(){
|
|||
Fz *= 1e-6/force_magnitude;
|
||||
}
|
||||
if (rank == 0) printf(" -- adjust force by factor %f \n ",capillary_number / Ca);
|
||||
Averages->SetParams(rhoA,rhoB,tauA,tauB,Fx,Fy,Fz,alpha);
|
||||
Averages->SetParams(rhoA,rhoB,tauA,tauB,Fx,Fy,Fz,alpha,beta);
|
||||
}
|
||||
|
||||
/*if (morph_delta > 0.f){
|
||||
// wetting phase saturation will decrease
|
||||
while (current_saturation < TARGET_SATURATION && target_saturation_index < target_saturation.size() ){
|
||||
TARGET_SATURATION = target_saturation[target_saturation_index++];
|
||||
}
|
||||
}
|
||||
else{
|
||||
// wetting phase saturation will increase
|
||||
while (current_saturation > TARGET_SATURATION && target_saturation_index < target_saturation.size() ){
|
||||
TARGET_SATURATION = target_saturation[target_saturation_index++];
|
||||
if (rank==0) printf(" Set target saturation as %f (currently %f)\n",TARGET_SATURATION,current_saturation);
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
else{
|
||||
if (rank==0){
|
||||
|
@ -676,30 +660,12 @@ void ScaLBL_ColorModel::Run(){
|
|||
if ( (delta_volume - delta_volume_target)/delta_volume_target > 0.0 ){
|
||||
MORPH_ADAPT = false;
|
||||
delta_volume = 0.0;
|
||||
CURRENT_STEADY_TIMESTEPS=0;
|
||||
}
|
||||
else if (CURRENT_MORPH_TIMESTEPS > MAX_MORPH_TIMESTEPS) {
|
||||
MORPH_ADAPT = false;
|
||||
CURRENT_STEADY_TIMESTEPS=0;
|
||||
}
|
||||
/*if ((delta_volume_target - delta_volume) / delta_volume > 0.f){
|
||||
morph_delta *= 1.01*min((delta_volume_target - delta_volume) / delta_volume, 2.0);
|
||||
if (morph_delta > 1.f) morph_delta = 1.f;
|
||||
if (morph_delta < -1.f) morph_delta = -1.f;
|
||||
if (fabs(morph_delta) < 0.05 ) morph_delta = 0.05*(morph_delta)/fabs(morph_delta); // set minimum
|
||||
if (rank==0) printf(" Adjust morph delta: %f \n", morph_delta);
|
||||
}
|
||||
if (delta_volume_target < 0.f){
|
||||
if (volB/(volA + volB) > TARGET_SATURATION){
|
||||
MORPH_ADAPT = false;
|
||||
TARGET_SATURATION = target_saturation[target_saturation_index++];
|
||||
}
|
||||
}
|
||||
else{
|
||||
if (volB/(volA + volB) < TARGET_SATURATION){
|
||||
MORPH_ADAPT = false;
|
||||
TARGET_SATURATION = target_saturation[target_saturation_index++];
|
||||
}
|
||||
}
|
||||
*/
|
||||
MPI_Barrier(comm);
|
||||
}
|
||||
morph_timesteps += analysis_interval;
|
||||
|
@ -809,9 +775,9 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta
|
|||
}
|
||||
|
||||
|
||||
if (volume_connected < 0.05*volume_initial){
|
||||
// if connected volume is less than 5% just delete the whole thing
|
||||
if (rank==0) printf("Connected region has shrunk to less than 5% of total fluid volume (remove the whole thing) \n");
|
||||
if (volume_connected < 0.025*volume_initial){
|
||||
// if connected volume is less than 2.5% just delete the whole thing
|
||||
if (rank==0) printf("Connected region has shrunk to less than 2.5% of total fluid volume (remove the whole thing) \n");
|
||||
}
|
||||
else {
|
||||
if (rank==0) printf("MorphGrow with target volume fraction change %f \n", target_delta_volume/volume_initial);
|
||||
|
@ -820,7 +786,6 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta
|
|||
target_delta_volume_incremental = 0.01*volume_initial*target_delta_volume/fabs(target_delta_volume);
|
||||
delta_volume = MorphGrow(Averages->SDs,phase_distance,phase_id,Averages->Dm, target_delta_volume_incremental);
|
||||
|
||||
|
||||
for (int k=0; k<Nz; k++){
|
||||
for (int j=0; j<Ny; j++){
|
||||
for (int i=0; i<Nx; i++){
|
||||
|
|
|
@ -61,7 +61,8 @@ public:
|
|||
std::shared_ptr<Domain> Mask; // this domain is for lbm
|
||||
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm;
|
||||
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm_Regular;
|
||||
std::shared_ptr<TwoPhase> Averages;
|
||||
//std::shared_ptr<TwoPhase> Averages;
|
||||
std::shared_ptr<SubPhase> Averages;
|
||||
|
||||
// input database
|
||||
std::shared_ptr<Database> db;
|
||||
|
|
|
@ -247,13 +247,21 @@ void ScaLBL_MRTModel::Run(){
|
|||
Morphology.ComputeScalar(Distance,0.f);
|
||||
//Morphology.PrintAll();
|
||||
double mu = (tau-0.5)/3.f;
|
||||
double Vs = Morphology.V();
|
||||
double As = Morphology.A();
|
||||
double Hs = Morphology.H();
|
||||
double Xs = Morphology.X();
|
||||
Vs=sumReduce( Dm->Comm, Vs);
|
||||
As=sumReduce( Dm->Comm, As);
|
||||
Hs=sumReduce( Dm->Comm, Hs);
|
||||
Xs=sumReduce( Dm->Comm, Xs);
|
||||
if (rank==0) {
|
||||
double h = Lz/double(Nz);
|
||||
double absperm = h*h*mu*sqrt(vax*vax+vay*vay+vaz*vaz)/sqrt(Fx*Fx+Fy*Fy+Fz*Fz);
|
||||
printf(" %f\n",absperm);
|
||||
FILE * log_file = fopen("Permeability.csv","a");
|
||||
fprintf(log_file,"%i %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g\n",timestep, Fx, Fy, Fz, mu,
|
||||
Morphology.V(),Morphology.A(),Morphology.J(),Morphology.X(),vax,vay,vaz, absperm);
|
||||
Vs,As,Hs,Xs,vax,vay,vaz, absperm);
|
||||
fclose(log_file);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,9 +11,7 @@ cmake \
|
|||
-D CMAKE_BUILD_TYPE:STRING=Release \
|
||||
-D CMAKE_C_COMPILER:PATH=mpicc \
|
||||
-D CMAKE_CXX_COMPILER:PATH=mpicxx \
|
||||
-D CMAKE_C_FLAGS="-std=c++11" \
|
||||
-D CMAKE_CXX_FLAGS="-std=c++11" \
|
||||
-D CMAKE_CXX_STANDARD=11 \
|
||||
-D CMAKE_CXX_STANDARD=14 \
|
||||
-D USE_CUDA=1 \
|
||||
-D CMAKE_CUDA_FLAGS="-arch sm_70 -Xptxas=-v -Xptxas -dlcm=cg -lineinfo" \
|
||||
-D CMAKE_CUDA_HOST_COMPILER="/sw/summit/gcc/6.4.0/bin/gcc" \
|
||||
|
@ -21,6 +19,12 @@ cmake \
|
|||
-D MPI_COMPILER:BOOL=TRUE \
|
||||
-D MPIEXEC=mpirun \
|
||||
-D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \
|
||||
-D USE_HDF5=1 \
|
||||
-D HDF5_DIRECTORY="/ccs/proj/bip176/TPLs/hdf5/" \
|
||||
-D HDF5_LIB="/ccs/proj/bip176/TPLs/hdf5/lib/libhdf5.a" \
|
||||
-D USE_SILO=1 \
|
||||
-D SILO_LIB="/ccs/proj/bip176/TPLs/silo/lib/libsiloh5.a" \
|
||||
-D SILO_DIRECTORY="/ccs/proj/bip176/TPLs/silo/" \
|
||||
-D USE_DOXYGEN:BOOL=false \
|
||||
-D USE_TIMER=0 \
|
||||
~/LBPM-WIA
|
||||
|
|
|
@ -64,7 +64,6 @@ ADD_LBPM_TEST_1_2_4( TestBlobIdentify )
|
|||
ADD_LBPM_TEST_PARALLEL( TestSegDist 8 )
|
||||
ADD_LBPM_TEST_PARALLEL( TestCommD3Q19 8 )
|
||||
ADD_LBPM_TEST_1_2_4( testCommunication )
|
||||
ADD_LBPM_TEST_1_2_4( testUtilities )
|
||||
ADD_LBPM_TEST( TestWriter )
|
||||
IF ( USE_NETCDF )
|
||||
ADD_LBPM_TEST_PARALLEL( TestNetcdf 8 )
|
||||
|
|
|
@ -4,8 +4,8 @@
|
|||
#include <exception>
|
||||
#include <stdexcept>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
|
||||
#include "shared_ptr.h"
|
||||
#include "common/UnitTest.h"
|
||||
#include "common/Utilities.h"
|
||||
#include "common/MPI_Helpers.h"
|
||||
|
@ -233,7 +233,7 @@ int main(int argc, char **argv)
|
|||
MPI_Comm comm = MPI_COMM_WORLD;
|
||||
MPI_Comm_rank(comm,&rank);
|
||||
MPI_Comm_size(comm,&nprocs);
|
||||
Utilities::setAbortBehavior(true,true,false);
|
||||
Utilities::setAbortBehavior(true,2);
|
||||
Utilities::setErrorHandlers();
|
||||
UnitTest ut;
|
||||
|
||||
|
|
|
@ -13,7 +13,6 @@
|
|||
#include "common/Domain.h"
|
||||
#include "analysis/distance.h"
|
||||
#include "analysis/morphology.h"
|
||||
#include "analysis/runAnalysis.h"
|
||||
|
||||
//*************************************************************************
|
||||
// Morpohologica pre-processor
|
||||
|
|
|
@ -1,145 +0,0 @@
|
|||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#include <sys/stat.h>
|
||||
#include <math.h>
|
||||
#include <stdexcept>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "common/Utilities.h"
|
||||
#include "common/StackTrace.h"
|
||||
#include "common/UnitTest.h"
|
||||
#include "common/MPI_Helpers.h"
|
||||
|
||||
|
||||
// Detect the OS (defines which tests we allow to fail)
|
||||
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) || defined(_MSC_VER)
|
||||
#define USE_WINDOWS
|
||||
#elif defined(__APPLE__)
|
||||
#define USE_MAC
|
||||
#elif defined( __linux ) || defined( __linux__ ) || defined( __unix ) || defined( __posix )
|
||||
#define USE_LINUX
|
||||
#else
|
||||
#error Unknown OS
|
||||
#endif
|
||||
|
||||
|
||||
// Function to return the call stack
|
||||
std::vector<std::string> get_call_stack()
|
||||
{
|
||||
std::vector<StackTrace::stack_info> stack = StackTrace::getCallStack();
|
||||
std::vector<std::string> stack2(stack.size());
|
||||
for (size_t i=0; i<stack.size(); i++)
|
||||
stack2[i] = stack[i].print();
|
||||
// Trick compiler to skip inline for this function with fake recursion
|
||||
if ( stack.size() > 10000 ) { stack2 = get_call_stack(); }
|
||||
return stack2;
|
||||
}
|
||||
|
||||
|
||||
// The main function
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int rank = 0;
|
||||
MPI_Init(&argc,&argv);
|
||||
MPI_Comm comm = MPI_COMM_WORLD;
|
||||
MPI_Comm_rank(comm,&rank);
|
||||
UnitTest ut;
|
||||
Utilities::setAbortBehavior( true, true, true );
|
||||
|
||||
// Limit the scope of variables
|
||||
{
|
||||
// Test the memory usage
|
||||
double t0 = Utilities::time();
|
||||
size_t n_bytes1 = Utilities::getMemoryUsage();
|
||||
double time1 = Utilities::time() - t0;
|
||||
uint64_t *tmp = new uint64_t[0x100000];
|
||||
memset(tmp,0xAA,0x100000*sizeof(uint64_t));
|
||||
Utilities::nullUse( tmp );
|
||||
t0 = Utilities::time();
|
||||
size_t n_bytes2 = Utilities::getMemoryUsage();
|
||||
double time2 = Utilities::time() - t0;
|
||||
delete [] tmp;
|
||||
t0 = Utilities::time();
|
||||
size_t n_bytes3 = Utilities::getMemoryUsage();
|
||||
double time3 = Utilities::time() - t0;
|
||||
std::cout << "Number of bytes used for a basic test: " << n_bytes1 << ", " << n_bytes2 << ", " << n_bytes3 << std::endl;
|
||||
std::cout << " Time to query: " << time1*1e6 << " us, " << time2*1e6 << " us, " << time3*1e6 << " us" << std::endl;
|
||||
if ( n_bytes1==0 ) {
|
||||
ut.failure("getMemoryUsage returns 0");
|
||||
} else {
|
||||
ut.passes("getMemoryUsage returns non-zero");
|
||||
if ( n_bytes2>n_bytes1 ) {
|
||||
ut.passes("getMemoryUsage increases size");
|
||||
} else {
|
||||
#if defined(USE_MAC)
|
||||
ut.expected_failure("getMemoryUsage does not increase size");
|
||||
#else
|
||||
ut.failure("getMemoryUsage increases size");
|
||||
#endif
|
||||
}
|
||||
if ( n_bytes1==n_bytes3 ) {
|
||||
ut.passes("getMemoryUsage decreases size properly");
|
||||
} else {
|
||||
#if defined(USE_MAC) || defined(USE_WINDOWS)
|
||||
ut.expected_failure("getMemoryUsage does not decrease size properly");
|
||||
#else
|
||||
ut.failure("getMemoryUsage does not decrease size properly");
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// Test getting the current call stack
|
||||
std::vector<std::string> call_stack = get_call_stack();
|
||||
if ( rank==0 ) {
|
||||
std::cout << "Call stack:" << std::endl;
|
||||
for (size_t i=0; i<call_stack.size(); i++)
|
||||
std::cout << " " << call_stack[i] << std::endl;
|
||||
}
|
||||
if ( !call_stack.empty() ) {
|
||||
ut.passes("non empty call stack");
|
||||
if ( call_stack[0].find("get_call_stack()") != std::string::npos )
|
||||
ut.passes("call stack decoded function symbols");
|
||||
else
|
||||
ut.expected_failure("call stack was unable to decode function symbols");
|
||||
} else {
|
||||
ut.failure("non empty call stack");
|
||||
}
|
||||
|
||||
// Test catching an error
|
||||
try {
|
||||
ERROR("Test");
|
||||
ut.failure("Failed to catch RAY_ERROR");
|
||||
} catch (...) {
|
||||
ut.passes("Caught RAY_ERROR");
|
||||
}
|
||||
try {
|
||||
throw std::logic_error("test");
|
||||
ut.failure("Failed to catch exception");
|
||||
} catch (...) {
|
||||
ut.passes("Caught exception");
|
||||
}
|
||||
|
||||
// Test time/tick
|
||||
double time = Utilities::time();
|
||||
double res = Utilities::tick();
|
||||
if ( time==0 || res==0 )
|
||||
ut.failure("time/tick");
|
||||
else
|
||||
ut.passes("time/tick");
|
||||
|
||||
}
|
||||
|
||||
// Finished
|
||||
ut.report();
|
||||
size_t N_errors = ut.NumFailGlobal();
|
||||
if ( N_errors==0 )
|
||||
printf("All tests passed\n");
|
||||
MPI_Finalize();
|
||||
return (int) N_errors;
|
||||
}
|
||||
|
||||
|
2
threadpool/Readme.txt
Normal file
2
threadpool/Readme.txt
Normal file
|
@ -0,0 +1,2 @@
|
|||
This directory contains code external code released with permission under the license of this project.
|
||||
|
|
@ -55,5 +55,43 @@ static int create_atomic_pthread_lock()
|
|||
int atomic_pthread_lock_initialized = create_atomic_pthread_lock();
|
||||
#endif
|
||||
|
||||
|
||||
// Atomic operations for floating types
|
||||
double atomic_add( double volatile *x, double y )
|
||||
{
|
||||
static_assert( sizeof( double ) == sizeof( int64_atomic ), "Unexpected size" );
|
||||
union U {
|
||||
double d;
|
||||
int64_atomic i;
|
||||
};
|
||||
U a, b;
|
||||
bool swap = false;
|
||||
auto x2 = reinterpret_cast<int64_atomic volatile *>( x );
|
||||
while ( !swap ) {
|
||||
a.i = atomic_add( x2, 0 );
|
||||
b.d = a.d + y;
|
||||
swap = atomic_compare_and_swap( x2, a.i, b.i );
|
||||
}
|
||||
return b.d;
|
||||
}
|
||||
float atomic_add( float volatile *x, float y )
|
||||
{
|
||||
static_assert( sizeof( float ) == sizeof( int32_atomic ), "Unexpected size" );
|
||||
union U {
|
||||
float d;
|
||||
int32_atomic i;
|
||||
};
|
||||
U a, b;
|
||||
bool swap = false;
|
||||
auto x2 = reinterpret_cast<int32_atomic volatile *>( x );
|
||||
while ( !swap ) {
|
||||
a.i = atomic_add( x2, 0 );
|
||||
b.d = a.d + y;
|
||||
swap = atomic_compare_and_swap( x2, a.i, b.i );
|
||||
}
|
||||
return b.d;
|
||||
}
|
||||
|
||||
|
||||
} // AtomicOperations namespace
|
||||
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
// but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
#ifndef included_ThreadPoolAtomicHelpers
|
||||
#define included_ThreadPoolAtomicHelpers
|
||||
|
||||
#include <stdexcept>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <typeinfo>
|
||||
|
@ -25,7 +27,6 @@
|
|||
#if defined( WIN32 ) || defined( _WIN32 ) || defined( WIN64 ) || defined( _WIN64 )
|
||||
// Using windows
|
||||
#define USE_WINDOWS
|
||||
#define NOMINMAX
|
||||
#include <process.h>
|
||||
#include <stdlib.h>
|
||||
#include <windows.h>
|
||||
|
@ -544,6 +545,11 @@ inline void atomic_swap( int64_atomic volatile *x, int64_atomic *y )
|
|||
}
|
||||
|
||||
|
||||
// Atomic operations for floating types
|
||||
double atomic_add( double volatile *x, double y );
|
||||
float atomic_add( float volatile *x, float y );
|
||||
|
||||
|
||||
// Define an atomic counter
|
||||
struct counter_t {
|
||||
public:
|
||||
|
|
|
@ -29,12 +29,16 @@
|
|||
* \details This class implements a basic sorted list that is thread-safe and lock-free.
|
||||
* Entries are stored smallest to largest according to the compare operator
|
||||
*/
|
||||
template<class TYPE, int MAX_SIZE, class COMPARE = std::less<TYPE>>
|
||||
template<class TYPE, class COMPARE = std::less<TYPE>>
|
||||
class AtomicList final
|
||||
{
|
||||
public:
|
||||
//! Default constructor
|
||||
AtomicList( const TYPE &default_value = TYPE(), const COMPARE &comp = COMPARE() );
|
||||
AtomicList( size_t capacity = 1024, const TYPE &default_value = TYPE(),
|
||||
const COMPARE &comp = COMPARE() );
|
||||
|
||||
//! Destructor
|
||||
~AtomicList();
|
||||
|
||||
/*!
|
||||
* \brief Remove an item from the list
|
||||
|
@ -48,8 +52,8 @@ public:
|
|||
* bool cmp( const TYPE& value, ... );
|
||||
* @param args Additional arguments for the comparison
|
||||
*/
|
||||
template<class Compare, class... Args>
|
||||
inline TYPE remove( Compare compare, Args... args );
|
||||
template<typename Compare, class... Args>
|
||||
inline TYPE remove( Compare compare, const Args &... args );
|
||||
|
||||
//! Remove the first from the list
|
||||
inline TYPE remove_first();
|
||||
|
@ -59,13 +63,13 @@ public:
|
|||
* \details Insert an item into the list
|
||||
* @param x Item to insert
|
||||
*/
|
||||
inline void insert( TYPE x );
|
||||
inline void insert( const TYPE &x );
|
||||
|
||||
/*!
|
||||
* \brief Return the size of the list
|
||||
* \details Return the number of items in the list
|
||||
*/
|
||||
inline int size() const { return AtomicOperations::atomic_get( &d_N ); }
|
||||
inline size_t size() const { return AtomicOperations::atomic_get( &d_N ); }
|
||||
|
||||
/*!
|
||||
* \brief Check if the list is empty
|
||||
|
@ -73,11 +77,23 @@ public:
|
|||
*/
|
||||
inline bool empty() const { return AtomicOperations::atomic_get( &d_N ) == 0; }
|
||||
|
||||
/*!
|
||||
* \brief Clear the list
|
||||
* \details Removes all entries from the list
|
||||
*/
|
||||
inline void clear()
|
||||
{
|
||||
while ( !empty() ) {
|
||||
remove_first();
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
* \brief Return the capacity of the list
|
||||
* \details Return the maximum number of items the list can hold
|
||||
*/
|
||||
inline int capacity() const { return MAX_SIZE; }
|
||||
inline constexpr size_t capacity() const { return d_capacity; }
|
||||
|
||||
|
||||
/*!
|
||||
* \brief Check the list
|
||||
|
@ -91,19 +107,20 @@ public:
|
|||
|
||||
|
||||
//! Return the total number of inserts since object creation
|
||||
inline int64_t N_insert() const { return AtomicOperations::atomic_get( &d_N_insert ); }
|
||||
inline size_t N_insert() const { return AtomicOperations::atomic_get( &d_N_insert ); }
|
||||
|
||||
|
||||
//! Return the total number of removals since object creation
|
||||
inline int64_t N_remove() const { return AtomicOperations::atomic_get( &d_N_remove ); }
|
||||
inline size_t N_remove() const { return AtomicOperations::atomic_get( &d_N_remove ); }
|
||||
|
||||
private:
|
||||
// Data members
|
||||
COMPARE d_compare;
|
||||
const size_t d_capacity;
|
||||
volatile TYPE d_default;
|
||||
volatile TYPE d_objects[MAX_SIZE];
|
||||
volatile TYPE *d_objects;
|
||||
volatile AtomicOperations::int32_atomic d_N;
|
||||
volatile AtomicOperations::int32_atomic d_next[MAX_SIZE + 1];
|
||||
volatile AtomicOperations::int32_atomic *d_next;
|
||||
volatile AtomicOperations::int32_atomic d_unused;
|
||||
volatile AtomicOperations::int64_atomic d_N_insert;
|
||||
volatile AtomicOperations::int64_atomic d_N_remove;
|
||||
|
@ -114,8 +131,9 @@ private:
|
|||
if ( i == -1 )
|
||||
return -1;
|
||||
int tmp = 0;
|
||||
while ( tmp == 0 )
|
||||
do {
|
||||
tmp = AtomicOperations::atomic_fetch_and_and( &d_next[i], 0 );
|
||||
} while ( tmp == 0 );
|
||||
return tmp;
|
||||
}
|
||||
inline void unlock( int i, int value )
|
||||
|
@ -126,8 +144,9 @@ private:
|
|||
inline int get_unused()
|
||||
{
|
||||
int i = 0;
|
||||
while ( i == 0 )
|
||||
do {
|
||||
i = AtomicOperations::atomic_fetch_and_and( &d_unused, 0 );
|
||||
} while ( i == 0 );
|
||||
AtomicOperations::atomic_fetch_and_or( &d_unused, -( d_next[i] + 4 ) + 1 );
|
||||
d_next[i] = -3;
|
||||
return i;
|
||||
|
@ -135,16 +154,17 @@ private:
|
|||
inline void put_unused( int i )
|
||||
{
|
||||
int j = 0;
|
||||
while ( j == 0 )
|
||||
do {
|
||||
AtomicOperations::atomic_swap( &d_unused, &j );
|
||||
} while ( j == 0 );
|
||||
d_next[i] = -3 - j;
|
||||
AtomicOperations::atomic_fetch_and_or( &d_unused, i );
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
AtomicList( const AtomicList & );
|
||||
AtomicList &operator=( const AtomicList & );
|
||||
public:
|
||||
AtomicList( const AtomicList & ) = delete;
|
||||
AtomicList &operator=( const AtomicList & ) = delete;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -40,28 +40,39 @@
|
|||
/******************************************************************
|
||||
* Constructor *
|
||||
******************************************************************/
|
||||
template<class TYPE, int MAX_SIZE, class COMPARE>
|
||||
AtomicList<TYPE, MAX_SIZE, COMPARE>::AtomicList( const TYPE &default_value, const COMPARE &comp )
|
||||
: d_compare( comp ), d_default( default_value )
|
||||
template<class TYPE, class COMPARE>
|
||||
AtomicList<TYPE, COMPARE>::AtomicList(
|
||||
size_t capacity, const TYPE &default_value, const COMPARE &comp )
|
||||
: d_compare( comp ),
|
||||
d_capacity( capacity ),
|
||||
d_default( default_value ),
|
||||
d_objects( new TYPE[capacity] ),
|
||||
d_N( 0 ),
|
||||
d_next( new AtomicOperations::int32_atomic[capacity + 1] ),
|
||||
d_unused( 1 ),
|
||||
d_N_insert( 0 ),
|
||||
d_N_remove( 0 )
|
||||
{
|
||||
d_N = 0;
|
||||
d_next[0] = -1;
|
||||
d_unused = 1;
|
||||
d_N_insert = 0;
|
||||
d_N_remove = 0;
|
||||
for ( int i = 0; i < MAX_SIZE; i++ ) {
|
||||
d_next[0] = -1;
|
||||
for ( size_t i = 0; i < d_capacity; i++ ) {
|
||||
d_next[i + 1] = -5 - i;
|
||||
d_objects[i] = d_default;
|
||||
}
|
||||
}
|
||||
template<class TYPE, class COMPARE>
|
||||
AtomicList<TYPE, COMPARE>::~AtomicList()
|
||||
{
|
||||
delete[] d_objects;
|
||||
delete[] d_next;
|
||||
}
|
||||
|
||||
|
||||
/******************************************************************
|
||||
* Remove an item *
|
||||
******************************************************************/
|
||||
template<class TYPE, int MAX_SIZE, class COMPARE>
|
||||
template<class TYPE, class COMPARE>
|
||||
template<class Compare, class... Args>
|
||||
inline TYPE AtomicList<TYPE, MAX_SIZE, COMPARE>::remove( Compare compare, Args... args )
|
||||
inline TYPE AtomicList<TYPE, COMPARE>::remove( Compare compare, const Args &... args )
|
||||
{
|
||||
// Acquiring temporary ownership
|
||||
int pos = 0;
|
||||
|
@ -80,8 +91,7 @@ inline TYPE AtomicList<TYPE, MAX_SIZE, COMPARE>::remove( Compare compare, Args..
|
|||
// Test to see if the object passes compare
|
||||
bool test = compare( const_cast<TYPE &>( d_objects[next - 1] ), args... );
|
||||
if ( test ) {
|
||||
// We want to return this object, update next to point to another entry and remove the
|
||||
// entry
|
||||
// We want to return this object, update next to point to another entry and remove
|
||||
unlock( next, -3 );
|
||||
unlock( pos, next2 );
|
||||
pos = next;
|
||||
|
@ -101,8 +111,8 @@ inline TYPE AtomicList<TYPE, MAX_SIZE, COMPARE>::remove( Compare compare, Args..
|
|||
}
|
||||
return rtn;
|
||||
}
|
||||
template<class TYPE, int MAX_SIZE, class COMPARE>
|
||||
inline TYPE AtomicList<TYPE, MAX_SIZE, COMPARE>::remove_first()
|
||||
template<class TYPE, class COMPARE>
|
||||
inline TYPE AtomicList<TYPE, COMPARE>::remove_first()
|
||||
{
|
||||
TYPE rtn( d_default );
|
||||
auto next = lock( 0 );
|
||||
|
@ -124,11 +134,11 @@ inline TYPE AtomicList<TYPE, MAX_SIZE, COMPARE>::remove_first()
|
|||
/******************************************************************
|
||||
* Insert an item *
|
||||
******************************************************************/
|
||||
template<class TYPE, int MAX_SIZE, class COMPARE>
|
||||
inline void AtomicList<TYPE, MAX_SIZE, COMPARE>::insert( TYPE x )
|
||||
template<class TYPE, class COMPARE>
|
||||
inline void AtomicList<TYPE, COMPARE>::insert( const TYPE &x )
|
||||
{
|
||||
int N_used = AtomicOperations::atomic_increment( &d_N );
|
||||
if ( N_used > MAX_SIZE ) {
|
||||
size_t N_used = AtomicOperations::atomic_increment( &d_N );
|
||||
if ( N_used > d_capacity ) {
|
||||
AtomicOperations::atomic_decrement( &d_N );
|
||||
throw std::logic_error( "No room in list" );
|
||||
}
|
||||
|
@ -171,8 +181,8 @@ inline void AtomicList<TYPE, MAX_SIZE, COMPARE>::insert( TYPE x )
|
|||
* Check the internal structures of the list *
|
||||
* This is mostly thread-safe, but blocks all threads *
|
||||
******************************************************************/
|
||||
template<class TYPE, int MAX_SIZE, class COMPARE>
|
||||
inline bool AtomicList<TYPE, MAX_SIZE, COMPARE>::check()
|
||||
template<class TYPE, class COMPARE>
|
||||
inline bool AtomicList<TYPE, COMPARE>::check()
|
||||
{
|
||||
// Get the lock and check for any other threads modifying the list
|
||||
auto start = lock( 0 );
|
||||
|
@ -183,11 +193,11 @@ inline bool AtomicList<TYPE, MAX_SIZE, COMPARE>::check()
|
|||
int N2 = 0;
|
||||
int N_unused = 0;
|
||||
int N_tail = 0;
|
||||
for ( int i = 0; i < MAX_SIZE; i++ ) {
|
||||
for ( size_t i = 0; i < d_capacity; i++ ) {
|
||||
if ( d_objects[i] != d_default )
|
||||
N1++;
|
||||
}
|
||||
for ( int i = 0; i < MAX_SIZE + 1; i++ ) {
|
||||
for ( size_t i = 0; i <= d_capacity; i++ ) {
|
||||
int next = i == 0 ? start : d_next[i];
|
||||
if ( next > 0 ) {
|
||||
N2++;
|
||||
|
@ -199,7 +209,7 @@ inline bool AtomicList<TYPE, MAX_SIZE, COMPARE>::check()
|
|||
pass = false;
|
||||
}
|
||||
}
|
||||
pass = pass && N_tail == 1 && N1 == d_N && N2 == d_N && N_unused + d_N == MAX_SIZE;
|
||||
pass = pass && N_tail == 1 && N1 == d_N && N2 == d_N && N_unused + d_N == (int) d_capacity;
|
||||
int it = 0;
|
||||
int pos = 0;
|
||||
while ( true ) {
|
||||
|
|
|
@ -1,16 +0,0 @@
|
|||
# Add thread pool tests
|
||||
ADD_LBPM_TEST( test_atomic )
|
||||
ADD_LBPM_TEST( test_atomic_list )
|
||||
SET_TESTS_PROPERTIES ( test_atomic PROPERTIES FAIL_REGULAR_EXPRESSION ".*FAILED.*" PROCESSORS 64 )
|
||||
ADD_LBPM_TEST_THREAD_MPI( test_thread_pool 1 4 )
|
||||
ADD_LBPM_TEST_THREAD_MPI( test_thread_pool 2 4 )
|
||||
ADD_LBPM_TEST_THREAD_MPI( test_thread_pool 4 4 )
|
||||
SET_PROPERTY( TEST test_thread_pool_1procs_4threads APPEND PROPERTY RUN_SERIAL 1 )
|
||||
IF ( USE_MPI )
|
||||
SET_PROPERTY( TEST test_thread_pool_2procs_4threads APPEND PROPERTY RUN_SERIAL 1 )
|
||||
SET_PROPERTY( TEST test_thread_pool_4procs_4threads APPEND PROPERTY RUN_SERIAL 1 )
|
||||
ENDIF()
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,154 +0,0 @@
|
|||
#include "threadpool/atomic_helpers.h"
|
||||
#include "common/UnitTest.h"
|
||||
#include "common/Utilities.h"
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
|
||||
#define perr std::cerr
|
||||
#define pout std::cout
|
||||
#define printp printf
|
||||
|
||||
|
||||
// Function to increment/decrement a counter N times
|
||||
static void modify_counter( int N, AtomicOperations::counter_t &counter )
|
||||
{
|
||||
if ( N > 0 ) {
|
||||
for ( int i = 0; i < N; i++ )
|
||||
counter.increment();
|
||||
} else if ( N < 0 ) {
|
||||
for ( int i = 0; i < -N; i++ )
|
||||
counter.decrement();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/******************************************************************
|
||||
* The main program *
|
||||
******************************************************************/
|
||||
#ifdef USE_WINDOWS
|
||||
int __cdecl main( int, char ** )
|
||||
{
|
||||
#elif defined( USE_LINUX ) || defined( USE_MAC )
|
||||
int main( int, char *[] )
|
||||
{
|
||||
#else
|
||||
#error Unknown OS
|
||||
#endif
|
||||
UnitTest ut;
|
||||
|
||||
int N_threads = 64; // Number of threads
|
||||
int N_count = 1000000; // Number of work items
|
||||
|
||||
// Ensure we are using all processors
|
||||
#ifdef __USE_GNU
|
||||
int N_procs = sysconf( _SC_NPROCESSORS_ONLN );
|
||||
cpu_set_t mask;
|
||||
CPU_ZERO( &mask );
|
||||
for ( int i = 0; i < N_procs; i++ )
|
||||
CPU_SET( i, &mask );
|
||||
sched_setaffinity( getpid(), sizeof( cpu_set_t ), &mask );
|
||||
#endif
|
||||
|
||||
// Create the counter we want to test
|
||||
AtomicOperations::counter_t count;
|
||||
if ( count.increment() == 1 )
|
||||
ut.passes( "increment count" );
|
||||
else
|
||||
ut.failure( "increment count" );
|
||||
if ( count.decrement() == 0 )
|
||||
ut.passes( "decrement count" );
|
||||
else
|
||||
ut.failure( "decrement count" );
|
||||
count.setCount( 3 );
|
||||
if ( count.getCount() == 3 )
|
||||
ut.passes( "set count" );
|
||||
else
|
||||
ut.failure( "set count" );
|
||||
count.setCount( 0 );
|
||||
|
||||
// Increment the counter in serial
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
modify_counter( N_count, count );
|
||||
auto stop = std::chrono::high_resolution_clock::now();
|
||||
double time_inc_serial = std::chrono::duration<double>( stop - start ).count() / N_count;
|
||||
int val = count.getCount();
|
||||
if ( val != N_count ) {
|
||||
char tmp[100];
|
||||
sprintf( tmp, "Count of %i did not match expected count of %i", val, N_count );
|
||||
ut.failure( tmp );
|
||||
}
|
||||
printp( "Time to increment (serial) = %0.1f ns\n", 1e9 * time_inc_serial );
|
||||
|
||||
// Decrement the counter in serial
|
||||
start = std::chrono::high_resolution_clock::now();
|
||||
modify_counter( -N_count, count );
|
||||
stop = std::chrono::high_resolution_clock::now();
|
||||
double time_dec_serial = std::chrono::duration<double>( stop - start ).count() / N_count;
|
||||
val = count.getCount();
|
||||
if ( val != 0 ) {
|
||||
char tmp[100];
|
||||
sprintf( tmp, "Count of %i did not match expected count of %i", val, 0 );
|
||||
ut.failure( tmp );
|
||||
}
|
||||
printp( "Time to decrement (serial) = %0.1f ns\n", 1e9 * time_dec_serial );
|
||||
|
||||
// Increment the counter in parallel
|
||||
std::vector<std::thread> threads( N_threads );
|
||||
start = std::chrono::high_resolution_clock::now();
|
||||
for ( int i = 0; i < N_threads; i++ )
|
||||
threads[i] = std::thread( modify_counter, N_count, std::ref( count ) );
|
||||
for ( int i = 0; i < N_threads; i++ )
|
||||
threads[i].join();
|
||||
stop = std::chrono::high_resolution_clock::now();
|
||||
double time_inc_parallel =
|
||||
std::chrono::duration<double>( stop - start ).count() / ( N_count * N_threads );
|
||||
val = count.getCount();
|
||||
if ( val != N_count * N_threads ) {
|
||||
char tmp[100];
|
||||
sprintf( tmp, "Count of %i did not match expected count of %i", val, N_count * N_threads );
|
||||
ut.failure( tmp );
|
||||
}
|
||||
printp( "Time to increment (parallel) = %0.1f ns\n", 1e9 * time_inc_parallel );
|
||||
|
||||
// Decrement the counter in parallel
|
||||
start = std::chrono::high_resolution_clock::now();
|
||||
for ( int i = 0; i < N_threads; i++ )
|
||||
threads[i] = std::thread( modify_counter, -N_count, std::ref( count ) );
|
||||
for ( int i = 0; i < N_threads; i++ )
|
||||
threads[i].join();
|
||||
stop = std::chrono::high_resolution_clock::now();
|
||||
double time_dec_parallel =
|
||||
std::chrono::duration<double>( stop - start ).count() / ( N_count * N_threads );
|
||||
val = count.getCount();
|
||||
if ( val != 0 ) {
|
||||
char tmp[100];
|
||||
sprintf( tmp, "Count of %i did not match expected count of %i", val, 0 );
|
||||
ut.failure( tmp );
|
||||
}
|
||||
printp( "Time to decrement (parallel) = %0.1f ns\n", 1e9 * time_dec_parallel );
|
||||
|
||||
// Check the time to increment/decrement
|
||||
if ( time_inc_serial > 100e-9 || time_dec_serial > 100e-9 || time_inc_parallel > 100e-9 ||
|
||||
time_dec_serial > 100e-9 ) {
|
||||
#if USE_GCOV
|
||||
ut.expected_failure( "Time to increment/decrement count is too expensive" );
|
||||
#else
|
||||
ut.failure( "Time to increment/decrement count is too expensive" );
|
||||
#endif
|
||||
} else {
|
||||
ut.passes( "Time to increment/decrement passed" );
|
||||
}
|
||||
|
||||
// Finished
|
||||
ut.report();
|
||||
auto N_errors = static_cast<int>( ut.NumFailGlobal() );
|
||||
return N_errors;
|
||||
}
|
|
@ -1,221 +0,0 @@
|
|||
#include "threadpool/atomic_list.h"
|
||||
#include "common/UnitTest.h"
|
||||
#include "common/Utilities.h"
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
|
||||
|
||||
static void modify_list( AtomicList<int, 1024> &list )
|
||||
{
|
||||
const int N_count = 50000;
|
||||
for ( int i = 0; i < N_count; i++ ) {
|
||||
auto v1 = list.remove_first();
|
||||
auto v2 = list.remove( []( int ) { return true; } );
|
||||
auto v3 = list.remove( []( int v ) { return v >= ( rand() / 8 ); } );
|
||||
auto v4 = list.remove( []( int v ) { return v >= ( rand() / 4 ); } );
|
||||
auto v5 = list.remove( []( int v ) { return v >= ( rand() / 2 ); } );
|
||||
if ( v1 != -1 ) {
|
||||
list.insert( v1 );
|
||||
}
|
||||
if ( v2 != -1 ) {
|
||||
list.insert( v2 );
|
||||
}
|
||||
if ( v3 != -1 ) {
|
||||
list.insert( v3 );
|
||||
}
|
||||
if ( v4 != -1 ) {
|
||||
list.insert( v4 );
|
||||
}
|
||||
if ( v5 != -1 ) {
|
||||
list.insert( v5 );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static bool check_list( const std::vector<int> &x, AtomicList<int, 1024> &list )
|
||||
{
|
||||
bool pass = list.check();
|
||||
pass = pass && (int) x.size() == list.size();
|
||||
if ( pass ) {
|
||||
for ( int i : x )
|
||||
pass = pass && i == list.remove( []( int ) { return true; } );
|
||||
}
|
||||
// Restore the list
|
||||
for ( int i = 0; i < list.size(); i++ )
|
||||
list.remove_first();
|
||||
for ( int i : x )
|
||||
list.insert( i );
|
||||
return pass;
|
||||
}
|
||||
|
||||
|
||||
static inline void clear_list( AtomicList<int, 1024> &list )
|
||||
{
|
||||
for ( int i = 0; i < list.size(); i++ )
|
||||
list.remove_first();
|
||||
}
|
||||
|
||||
|
||||
/******************************************************************
|
||||
* The main program *
|
||||
******************************************************************/
|
||||
int main( int, char *[] )
|
||||
{
|
||||
UnitTest ut;
|
||||
|
||||
int N_threads = 8; // Number of threads
|
||||
|
||||
// Create the list
|
||||
AtomicList<int, 1024> list( -1 );
|
||||
if ( list.size() == 0 && list.check() )
|
||||
ut.passes( "Initialize" );
|
||||
else
|
||||
ut.failure( "Initialize" );
|
||||
|
||||
// Initialize the list with some empty values
|
||||
for ( int i = 0; i < 80; i++ )
|
||||
list.insert( rand() );
|
||||
list.insert( 2 );
|
||||
list.insert( 1 );
|
||||
list.insert( rand() );
|
||||
|
||||
// Try to pull off a couple of values
|
||||
int v1 = list.remove( []( int a ) { return a == 1; } ); // Find the entry with 1
|
||||
int v2 = list.remove( []( int ) { return true; } ); // Get the first entry
|
||||
int v3 = list.remove( []( int ) { return false; } ); // Fail to get an entry
|
||||
if ( v1 == 1 && v2 == 2 && v3 == -1 && list.size() == 81 && list.check() )
|
||||
ut.passes( "Basic sanity test" );
|
||||
else
|
||||
ut.failure( "Basic sanity test" );
|
||||
|
||||
// Clear the list
|
||||
while ( list.remove( []( int ) { return true; } ) != -1 ) {
|
||||
}
|
||||
|
||||
// Create a list of known values
|
||||
// std::vector<int> data0(512);
|
||||
std::vector<int> data0( 5 * N_threads );
|
||||
for ( int &i : data0 )
|
||||
i = rand();
|
||||
auto data = data0;
|
||||
std::sort( data.begin(), data.end() );
|
||||
|
||||
// Test the cost to insert
|
||||
int N_it = 20;
|
||||
for ( int i = 0; i < list.size(); i++ )
|
||||
list.remove( []( int ) { return true; } );
|
||||
std::chrono::duration<double> time;
|
||||
std::chrono::time_point<std::chrono::high_resolution_clock> start, stop;
|
||||
time = time.zero();
|
||||
for ( int it = 0; it < N_it; it++ ) {
|
||||
clear_list( list );
|
||||
start = std::chrono::high_resolution_clock::now();
|
||||
for ( int i : data0 )
|
||||
list.insert( i );
|
||||
stop = std::chrono::high_resolution_clock::now();
|
||||
time += ( stop - start );
|
||||
}
|
||||
printf( "insert time/item = %0.0f ns\n", 1e9 * time.count() / ( N_it * data0.size() ) );
|
||||
|
||||
// Test the cost to remove (first)
|
||||
time = time.zero();
|
||||
for ( int it = 0; it < N_it; it++ ) {
|
||||
check_list( data, list );
|
||||
start = std::chrono::high_resolution_clock::now();
|
||||
for ( size_t i = 0; i < data0.size(); i++ )
|
||||
list.remove_first();
|
||||
stop = std::chrono::high_resolution_clock::now();
|
||||
time += ( stop - start );
|
||||
}
|
||||
printf( "remove (first) time/item = %0.0f ns\n", 1e9 * time.count() / ( N_it * data0.size() ) );
|
||||
|
||||
// Test the cost to remove (in order)
|
||||
time = time.zero();
|
||||
for ( int it = 0; it < N_it; it++ ) {
|
||||
check_list( data, list );
|
||||
start = std::chrono::high_resolution_clock::now();
|
||||
for ( size_t i = 0; i < data0.size(); i++ )
|
||||
list.remove( []( int ) { return true; } );
|
||||
stop = std::chrono::high_resolution_clock::now();
|
||||
time += ( stop - start );
|
||||
}
|
||||
printf(
|
||||
"remove (ordered) time/item = %0.0f ns\n", 1e9 * time.count() / ( N_it * data0.size() ) );
|
||||
|
||||
// Test the cost to remove (out order)
|
||||
time = time.zero();
|
||||
for ( int it = 0; it < N_it; it++ ) {
|
||||
check_list( data, list );
|
||||
start = std::chrono::high_resolution_clock::now();
|
||||
for ( int tmp : data0 ) {
|
||||
list.remove( [tmp]( int v ) { return v == tmp; } );
|
||||
}
|
||||
stop = std::chrono::high_resolution_clock::now();
|
||||
time += ( stop - start );
|
||||
}
|
||||
printf(
|
||||
"remove (unordered) time/item = %0.0f ns\n", 1e9 * time.count() / ( N_it * data0.size() ) );
|
||||
|
||||
// Read/write to the list and check the results
|
||||
int64_t N0 = list.N_remove();
|
||||
check_list( data, list );
|
||||
start = std::chrono::high_resolution_clock::now();
|
||||
modify_list( list );
|
||||
stop = std::chrono::high_resolution_clock::now();
|
||||
double time_serial = std::chrono::duration<double>( stop - start ).count();
|
||||
int64_t N1 = list.N_remove();
|
||||
bool pass = check_list( data, list );
|
||||
if ( pass )
|
||||
ut.passes( "Serial get/insert" );
|
||||
else
|
||||
ut.failure( "Serial get/insert" );
|
||||
printf( "serial time = %0.5f s\n", time_serial );
|
||||
printf( "serial time/item = %0.0f ns\n", 1e9 * time_serial / ( N1 - N0 ) );
|
||||
|
||||
// Have multiple threads reading/writing to the list simultaneously
|
||||
std::vector<std::thread> threads( N_threads );
|
||||
start = std::chrono::high_resolution_clock::now();
|
||||
for ( int i = 0; i < N_threads; i++ )
|
||||
threads[i] = std::thread( modify_list, std::ref( list ) );
|
||||
for ( int i = 0; i < N_threads; i++ )
|
||||
threads[i].join();
|
||||
stop = std::chrono::high_resolution_clock::now();
|
||||
double time_parallel = std::chrono::duration<double>( stop - start ).count();
|
||||
int64_t N2 = list.N_remove();
|
||||
pass = check_list( data, list );
|
||||
if ( pass )
|
||||
ut.passes( "Parallel get/insert" );
|
||||
else
|
||||
ut.failure( "Parallel get/insert" );
|
||||
printf( "parallel time = %0.5f s\n", time_parallel );
|
||||
printf( "parallel time/item = %0.0f ns\n", 1e9 * time_parallel / ( N2 - N1 ) );
|
||||
|
||||
// Try to over-fill the list
|
||||
while ( !list.empty() )
|
||||
list.remove_first();
|
||||
for ( int i = 1; i <= list.capacity(); i++ )
|
||||
list.insert( i );
|
||||
try {
|
||||
list.insert( list.capacity() + 1 );
|
||||
ut.failure( "List overflow" );
|
||||
} catch ( const std::exception &e ) {
|
||||
ut.passes( "List overflow" );
|
||||
} catch ( ... ) {
|
||||
ut.failure( "List overflow (unknown exception)" );
|
||||
}
|
||||
|
||||
// Finished
|
||||
ut.report();
|
||||
auto N_errors = static_cast<int>( ut.NumFailGlobal() );
|
||||
return N_errors;
|
||||
}
|
|
@ -1,967 +0,0 @@
|
|||
#include "ProfilerApp.h"
|
||||
#ifdef USE_TIMER
|
||||
#include "MemoryApp.h"
|
||||
#endif
|
||||
#include "threadpool/thread_pool.h"
|
||||
#include "common/UnitTest.h"
|
||||
#include "common/Utilities.h"
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
|
||||
#define MAX( x, y ) ( ( x ) > ( y ) ? ( x ) : ( y ) )
|
||||
|
||||
|
||||
#define perr std::cerr
|
||||
#define pout std::cout
|
||||
#define printp printf
|
||||
|
||||
|
||||
#ifdef USE_MPI
|
||||
#include "mpi.h"
|
||||
#endif
|
||||
|
||||
#define to_ns( x ) std::chrono::duration_cast<std::chrono::nanoseconds>( x ).count()
|
||||
#define to_ms( x ) std::chrono::duration_cast<std::chrono::milliseconds>( x ).count()
|
||||
|
||||
|
||||
// Wrapper functions for mpi
|
||||
static inline void barrier()
|
||||
{
|
||||
#ifdef USE_MPI
|
||||
MPI_Barrier( MPI_COMM_WORLD );
|
||||
#endif
|
||||
}
|
||||
static inline int getRank()
|
||||
{
|
||||
int rank = 0;
|
||||
#ifdef USE_MPI
|
||||
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
|
||||
#endif
|
||||
return rank;
|
||||
}
|
||||
static inline int getSize()
|
||||
{
|
||||
int size = 0;
|
||||
#ifdef USE_MPI
|
||||
MPI_Comm_size( MPI_COMM_WORLD, &size );
|
||||
#endif
|
||||
return size;
|
||||
}
|
||||
|
||||
|
||||
// Function to waste CPU cycles
|
||||
void waste_cpu( int N )
|
||||
{
|
||||
if ( N > 10000 ) {
|
||||
PROFILE_START( "waste_cpu", 2 );
|
||||
}
|
||||
double pi = 3.141592653589793;
|
||||
double x = 1.0;
|
||||
N = std::max( 10, N );
|
||||
{
|
||||
for ( int i = 0; i < N; i++ )
|
||||
x = sqrt( x * exp( pi / x ) );
|
||||
} // style to limit gcov hits
|
||||
if ( fabs( x - 2.926064057273157 ) > 1e-12 ) {
|
||||
abort();
|
||||
}
|
||||
if ( N > 10000 ) {
|
||||
PROFILE_STOP( "waste_cpu", 2 );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Sleep for the given time
|
||||
// Note: since we may encounter interrupts, we may not sleep for the desired time
|
||||
// so we need to perform the sleep in a loop
|
||||
void sleep_ms( int64_t N )
|
||||
{
|
||||
auto t1 = std::chrono::high_resolution_clock::now();
|
||||
auto t2 = std::chrono::high_resolution_clock::now();
|
||||
while ( to_ms( t2 - t1 ) < N ) {
|
||||
int N2 = N - to_ms( t2 - t1 );
|
||||
std::this_thread::sleep_for( std::chrono::milliseconds( N2 ) );
|
||||
t2 = std::chrono::high_resolution_clock::now();
|
||||
}
|
||||
}
|
||||
void sleep_s( int N ) { sleep_ms( 1000 * N ); }
|
||||
|
||||
|
||||
// Function to sleep for N seconds then increment a global count
|
||||
static volatile int global_sleep_count = 0;
|
||||
void sleep_inc( int N )
|
||||
{
|
||||
PROFILE_START( "sleep_inc" );
|
||||
sleep_s( N );
|
||||
++global_sleep_count;
|
||||
PROFILE_STOP( "sleep_inc" );
|
||||
}
|
||||
void sleep_inc2( double x )
|
||||
{
|
||||
sleep_ms( static_cast<int>( round( x * 1000 ) ) );
|
||||
++global_sleep_count;
|
||||
}
|
||||
void sleep_msg( double x, std::string msg )
|
||||
{
|
||||
PROFILE_START( msg );
|
||||
sleep_ms( static_cast<int>( round( x * 1000 ) ) );
|
||||
NULL_USE( msg );
|
||||
PROFILE_STOP( msg );
|
||||
}
|
||||
bool check_inc( int N ) { return global_sleep_count == N; }
|
||||
|
||||
|
||||
// Function to return the processor for the given thread
|
||||
std::mutex print_processor_mutex;
|
||||
|
||||
void print_processor( ThreadPool *tpool )
|
||||
{
|
||||
int rank = 0;
|
||||
#ifdef USE_MPI
|
||||
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
|
||||
#endif
|
||||
int thread = tpool->getThreadNumber();
|
||||
int processor = ThreadPool::getCurrentProcessor();
|
||||
char tmp[100];
|
||||
sprintf( tmp, "%i: Thread,proc = %i,%i\n", rank, thread, processor );
|
||||
sleep_ms( 10 * rank );
|
||||
print_processor_mutex.lock();
|
||||
pout << tmp;
|
||||
print_processor_mutex.unlock();
|
||||
sleep_ms( 100 );
|
||||
}
|
||||
|
||||
|
||||
// Function to test how a member thread interacts with the thread pool
|
||||
int test_member_thread( ThreadPool *tpool )
|
||||
{
|
||||
int N_errors = 0;
|
||||
// Member threads are not allowed to wait for the pool to finish
|
||||
try {
|
||||
tpool->wait_pool_finished();
|
||||
N_errors++;
|
||||
} catch ( ... ) {
|
||||
}
|
||||
// Member threads are not allowed to change the size of the pool
|
||||
try {
|
||||
tpool->wait_pool_finished();
|
||||
N_errors++;
|
||||
} catch ( ... ) {
|
||||
}
|
||||
return N_errors;
|
||||
}
|
||||
|
||||
|
||||
/******************************************************************
|
||||
* Test the TPOOL_ADD_WORK macro with variable number of arguments *
|
||||
******************************************************************/
|
||||
static int myfun0() { return 0; }
|
||||
static int myfun1( int ) { return 1; }
|
||||
static int myfun2( int, float ) { return 2; }
|
||||
static int myfun3( int, float, double ) { return 3; }
|
||||
static int myfun4( int, float, double, char ) { return 4; }
|
||||
static int myfun5( int, float, double, char, std::string ) { return 5; }
|
||||
static int myfun6( int, float, double, char, std::string, int ) { return 6; }
|
||||
static int myfun7( int, float, double, char, std::string, int, int ) { return 7; }
|
||||
static int test_function_arguements( ThreadPool *tpool )
|
||||
{
|
||||
int N_errors = 0;
|
||||
// Test some basic types of instantiations
|
||||
ThreadPool::thread_id_t id0 = TPOOL_ADD_WORK( tpool, myfun0, ( nullptr ) );
|
||||
ThreadPool::thread_id_t id1 = TPOOL_ADD_WORK( tpool, myfun1, ( (int) 1 ) );
|
||||
ThreadPool::thread_id_t id2 = TPOOL_ADD_WORK( tpool, myfun2, ( (int) 1, (float) 2 ) );
|
||||
ThreadPool::thread_id_t id3 =
|
||||
TPOOL_ADD_WORK( tpool, myfun3, ( (int) 1, (float) 2, (double) 3 ) );
|
||||
ThreadPool::thread_id_t id4 =
|
||||
TPOOL_ADD_WORK( tpool, myfun4, ( (int) 1, (float) 2, (double) 3, (char) 4 ) );
|
||||
ThreadPool::thread_id_t id5 = TPOOL_ADD_WORK(
|
||||
tpool, myfun5, ( (int) 1, (float) 2, (double) 3, (char) 4, std::string( "test" ) ) );
|
||||
ThreadPool::thread_id_t id52 = TPOOL_ADD_WORK(
|
||||
tpool, myfun5, ( (int) 1, (float) 2, (double) 3, (char) 4, std::string( "test" ) ), -1 );
|
||||
ThreadPool::thread_id_t id6 = TPOOL_ADD_WORK( tpool, myfun6,
|
||||
( (int) 1, (float) 2, (double) 3, (char) 4, std::string( "test" ), (int) 1 ) );
|
||||
ThreadPool::thread_id_t id7 = TPOOL_ADD_WORK( tpool, myfun7,
|
||||
( (int) 1, (float) 2, (double) 3, (char) 4, std::string( "test" ), (int) 1, (int) 1 ) );
|
||||
tpool->wait_pool_finished();
|
||||
if ( !tpool->isFinished( id0 ) ) {
|
||||
N_errors++;
|
||||
}
|
||||
if ( tpool->getFunctionRet<int>( id0 ) != 0 ) {
|
||||
N_errors++;
|
||||
}
|
||||
if ( tpool->getFunctionRet<int>( id1 ) != 1 ) {
|
||||
N_errors++;
|
||||
}
|
||||
if ( tpool->getFunctionRet<int>( id2 ) != 2 ) {
|
||||
N_errors++;
|
||||
}
|
||||
if ( tpool->getFunctionRet<int>( id3 ) != 3 ) {
|
||||
N_errors++;
|
||||
}
|
||||
if ( tpool->getFunctionRet<int>( id4 ) != 4 ) {
|
||||
N_errors++;
|
||||
}
|
||||
if ( tpool->getFunctionRet<int>( id5 ) != 5 ) {
|
||||
N_errors++;
|
||||
}
|
||||
if ( tpool->getFunctionRet<int>( id52 ) != 5 ) {
|
||||
N_errors++;
|
||||
}
|
||||
if ( tpool->getFunctionRet<int>( id6 ) != 6 ) {
|
||||
N_errors++;
|
||||
}
|
||||
if ( tpool->getFunctionRet<int>( id7 ) != 7 ) {
|
||||
N_errors++;
|
||||
}
|
||||
return N_errors;
|
||||
}
|
||||
|
||||
|
||||
/******************************************************************
|
||||
* Examples to derive a user work item *
|
||||
******************************************************************/
|
||||
class UserWorkItemVoid : public ThreadPool::WorkItem
|
||||
{
|
||||
public:
|
||||
// User defined constructor (does not need to match any interfaces)
|
||||
explicit UserWorkItemVoid( int dummy )
|
||||
{
|
||||
// User initialized variables
|
||||
NULL_USE( dummy );
|
||||
}
|
||||
// User defined run (can do anything)
|
||||
void run() override
|
||||
{
|
||||
// Perform the tasks
|
||||
printf( "Hello work from UserWorkItem (void)" );
|
||||
}
|
||||
// Will the routine return a result
|
||||
bool has_result() const override { return false; }
|
||||
// User defined destructor
|
||||
~UserWorkItemVoid() override = default;
|
||||
};
|
||||
class UserWorkItemInt : public ThreadPool::WorkItemRet<int>
|
||||
{
|
||||
public:
|
||||
// User defined constructor (does not need to match any interfaces)
|
||||
explicit UserWorkItemInt( int dummy )
|
||||
{
|
||||
// User initialized variables
|
||||
NULL_USE( dummy );
|
||||
}
|
||||
// User defined run (can do anything)
|
||||
void run() override
|
||||
{
|
||||
// Perform the tasks
|
||||
printf( "Hello work from UserWorkItem (int)" );
|
||||
// Store the results (it's type will match the template)
|
||||
ThreadPool::WorkItemRet<int>::d_result = 1;
|
||||
}
|
||||
// User defined destructor
|
||||
~UserWorkItemInt() override = default;
|
||||
};
|
||||
|
||||
|
||||
/******************************************************************
|
||||
* test the time to run N tasks in parallel *
|
||||
******************************************************************/
|
||||
template<class Ret, class... Args>
|
||||
inline double launchAndTime( ThreadPool &tpool, int N, Ret ( *routine )( Args... ), Args... args )
|
||||
{
|
||||
tpool.wait_pool_finished();
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
for ( int i = 0; i < N; i++ )
|
||||
ThreadPool_add_work( &tpool, 0, routine, args... );
|
||||
tpool.wait_pool_finished();
|
||||
auto stop = std::chrono::high_resolution_clock::now();
|
||||
return std::chrono::duration<double>( stop - start ).count();
|
||||
}
|
||||
|
||||
|
||||
// Move constructor function
|
||||
volatile ThreadPool::thread_id_t f1( volatile ThreadPool::thread_id_t a ) { return a; }
|
||||
ThreadPool::thread_id_t f2( ThreadPool::thread_id_t a ) { return a; }
|
||||
|
||||
|
||||
/******************************************************************
|
||||
* Test the basic functionallity of the atomics *
|
||||
******************************************************************/
|
||||
int test_atomics()
|
||||
{
|
||||
using namespace AtomicOperations;
|
||||
int N_errors = 0;
|
||||
volatile int32_atomic i32;
|
||||
volatile int64_atomic i64;
|
||||
i32 = 32;
|
||||
i64 = 64;
|
||||
if ( atomic_increment( &i32 ) != 33 || atomic_increment( &i64 ) != 65 )
|
||||
N_errors++;
|
||||
if ( atomic_decrement( &i32 ) != 32 || atomic_decrement( &i64 ) != 64 )
|
||||
N_errors++;
|
||||
if ( atomic_add( &i32, 2 ) != 34 || atomic_add( &i64, 4 ) != 68 )
|
||||
N_errors++;
|
||||
if ( atomic_compare_and_swap( &i32, 0, 0 ) || atomic_compare_and_swap( &i64, 0, 0 ) )
|
||||
N_errors++;
|
||||
if ( !atomic_compare_and_swap( &i32, 34, 32 ) || !atomic_compare_and_swap( &i64, 68, 64 ) )
|
||||
N_errors++;
|
||||
if ( i32 != 32 || i64 != 64 )
|
||||
N_errors++;
|
||||
return N_errors;
|
||||
}
|
||||
|
||||
|
||||
/******************************************************************
|
||||
* Test FIFO behavior *
|
||||
******************************************************************/
|
||||
void test_FIFO( UnitTest &ut, ThreadPool &tpool )
|
||||
{
|
||||
int rank = getRank();
|
||||
int size = getSize();
|
||||
const int N = 4000;
|
||||
for ( int r = 0; r < size; r++ ) {
|
||||
barrier();
|
||||
if ( r != rank )
|
||||
continue;
|
||||
std::vector<ThreadPool::thread_id_t> ids;
|
||||
ids.reserve( N );
|
||||
for ( size_t i = 0; i < N; i++ )
|
||||
ids.emplace_back( TPOOL_ADD_WORK( &tpool, sleep_inc2, ( 0.001 ) ) );
|
||||
bool pass = true;
|
||||
while ( tpool.N_queued() > 0 ) {
|
||||
int i1 = -1, i2 = ids.size();
|
||||
for ( int i = N - 1; i >= 0; i-- ) {
|
||||
bool started = ids[i].started();
|
||||
if ( started )
|
||||
i1 = std::max<int>( i1, i ); // Last index to processing item
|
||||
else
|
||||
i2 = std::min<int>( i2, i ); // First index to queued item
|
||||
}
|
||||
int diff = i1 == -1 ? 0 : ( i2 - i1 - 1 );
|
||||
if ( abs( diff ) > 4 ) {
|
||||
printf( "%i %i %i\n", i1, i2, diff );
|
||||
pass = pass && abs( i2 - i1 - 1 ) <= 2;
|
||||
}
|
||||
}
|
||||
ids.clear();
|
||||
tpool.wait_pool_finished();
|
||||
if ( pass )
|
||||
ut.passes( "Thread pool behaves as FIFO" );
|
||||
else
|
||||
ut.failure( "Thread pool does not behave as FIFO" );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/******************************************************************
|
||||
* The main program *
|
||||
******************************************************************/
|
||||
#ifdef USE_WINDOWS
|
||||
int __cdecl main( int argc, char **argv )
|
||||
{
|
||||
#elif defined( USE_LINUX ) || defined( USE_MAC )
|
||||
int main( int argc, char *argv[] )
|
||||
{
|
||||
#else
|
||||
#error Unknown OS
|
||||
#endif
|
||||
|
||||
int N_threads = 4; // Number of threads
|
||||
int N_work = 2000; // Number of work items
|
||||
int N_it = 10; // Number of cycles to run
|
||||
int N_problem = 5; // Problem size
|
||||
PROFILE_ENABLE( 3 );
|
||||
PROFILE_ENABLE_TRACE();
|
||||
PROFILE_DISABLE_MEMORY();
|
||||
UnitTest ut;
|
||||
|
||||
|
||||
// Initialize MPI and set the error handlers
|
||||
#ifdef USE_MPI
|
||||
int provided_thread_support = -1;
|
||||
MPI_Init_thread( &argc, &argv, MPI_THREAD_MULTIPLE, &provided_thread_support );
|
||||
Utilities::setErrorHandlers();
|
||||
// Disable OS specific warnings for all non-root ranks
|
||||
#endif
|
||||
int rank = getRank();
|
||||
int size = getSize();
|
||||
if ( rank > 0 )
|
||||
ThreadPool::set_OS_warnings( 1 );
|
||||
NULL_USE( size );
|
||||
NULL_USE( argc );
|
||||
NULL_USE( argv );
|
||||
|
||||
|
||||
// Test the atomics
|
||||
if ( test_atomics() == 0 )
|
||||
ut.passes( "Atomics passed" );
|
||||
else
|
||||
ut.failure( "Atomics failed" );
|
||||
|
||||
// Initialize the data
|
||||
std::vector<int> data1( N_work, 0 );
|
||||
std::vector<int> priority( N_work, 0 );
|
||||
for ( int i = 0; i < N_work; i++ ) {
|
||||
data1[i] = N_problem;
|
||||
priority[i] = i % 128;
|
||||
}
|
||||
|
||||
|
||||
// Print the size of the thread pool class
|
||||
printp( "Size of ThreadPool = %i\n", (int) sizeof( ThreadPool ) );
|
||||
|
||||
|
||||
// Get the number of processors availible
|
||||
barrier();
|
||||
int N_procs = ThreadPool::getNumberOfProcessors();
|
||||
if ( N_procs > 0 )
|
||||
ut.passes( "getNumberOfProcessors" );
|
||||
else
|
||||
ut.failure( "getNumberOfProcessors" );
|
||||
printp( "%i processors availible\n", N_procs );
|
||||
|
||||
|
||||
// Get the processor affinities for the process
|
||||
barrier();
|
||||
std::vector<int> cpus = ThreadPool::getProcessAffinity();
|
||||
printp( "%i cpus for current process: ", (int) cpus.size() );
|
||||
for ( int cpu : cpus )
|
||||
printp( "%i ", cpu );
|
||||
printp( "\n" );
|
||||
if ( !cpus.empty() ) {
|
||||
ut.passes( "getProcessAffinity" );
|
||||
} else {
|
||||
#ifdef __APPLE__
|
||||
ut.expected_failure( "getProcessAffinity" );
|
||||
#else
|
||||
ut.failure( "getProcessAffinity" );
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// Test setting the process affinities
|
||||
barrier();
|
||||
bool pass = false;
|
||||
if ( !cpus.empty() && N_procs > 0 ) {
|
||||
if ( cpus.size() == 1 ) {
|
||||
cpus.resize( N_procs );
|
||||
for ( int i = 0; i < N_procs; i++ )
|
||||
cpus.push_back( i );
|
||||
try {
|
||||
ThreadPool::setProcessAffinity( cpus );
|
||||
} catch ( ... ) {
|
||||
}
|
||||
cpus = ThreadPool::getProcessAffinity();
|
||||
std::vector<int> cpus = ThreadPool::getProcessAffinity();
|
||||
printp( "%i cpus for current process (updated): ", (int) cpus.size() );
|
||||
for ( int cpu : cpus )
|
||||
printp( "%i ", cpu );
|
||||
printp( "\n" );
|
||||
pass = cpus.size() > 1;
|
||||
} else {
|
||||
std::vector<int> cpus_orig = cpus;
|
||||
std::vector<int> cpus_tmp( 1, cpus[0] );
|
||||
try {
|
||||
ThreadPool::setProcessAffinity( cpus_tmp );
|
||||
} catch ( ... ) {
|
||||
}
|
||||
cpus = ThreadPool::getProcessAffinity();
|
||||
if ( cpus.size() == 1 )
|
||||
pass = true;
|
||||
try {
|
||||
ThreadPool::setProcessAffinity( cpus_orig );
|
||||
} catch ( ... ) {
|
||||
}
|
||||
cpus = ThreadPool::getProcessAffinity();
|
||||
if ( cpus.size() != cpus_orig.size() )
|
||||
pass = false;
|
||||
}
|
||||
}
|
||||
if ( pass ) {
|
||||
ut.passes( "setProcessAffinity" );
|
||||
} else {
|
||||
#ifdef __APPLE__
|
||||
ut.expected_failure( "setProcessAffinity" );
|
||||
#else
|
||||
ut.failure( "setProcessAffinity" );
|
||||
#endif
|
||||
}
|
||||
int N_procs_used = std::min<int>( N_procs, N_threads );
|
||||
printp( "%i processors used\n", N_procs_used );
|
||||
|
||||
|
||||
// Create the thread pool
|
||||
barrier();
|
||||
printp( "Creating thread pool\n" );
|
||||
ThreadPool tpool0;
|
||||
ThreadPool tpool;
|
||||
ThreadPool::thread_id_t id;
|
||||
id = TPOOL_ADD_WORK( &tpool, waste_cpu, ( data1[0] ) );
|
||||
if ( id == ThreadPool::thread_id_t() || !tpool.isValid( id ) )
|
||||
ut.failure( "Errors with id" );
|
||||
tpool.setNumThreads( N_threads );
|
||||
if ( tpool.getNumThreads() == N_threads )
|
||||
ut.passes( "Created thread pool" );
|
||||
else
|
||||
ut.failure( "Failed to create tpool with desired number of threads" );
|
||||
|
||||
|
||||
// Test setting the thread affinities
|
||||
barrier();
|
||||
if ( cpus.size() > 1 ) {
|
||||
sleep_ms( 50 );
|
||||
// First make sure we can get the thread affinities
|
||||
std::vector<int> procs = ThreadPool::getThreadAffinity();
|
||||
if ( procs == cpus ) {
|
||||
ut.passes( "getThreadAffinity() matches procs" );
|
||||
} else {
|
||||
char msg[100];
|
||||
sprintf( msg, "getThreadAffinity() does not match procs (%i,%i)",
|
||||
static_cast<int>( procs.size() ), static_cast<int>( cpus.size() ) );
|
||||
ut.failure( msg );
|
||||
}
|
||||
pass = true;
|
||||
for ( int i = 0; i < N_threads; i++ ) {
|
||||
std::vector<int> procs_thread = tpool.getThreadAffinity( i );
|
||||
if ( procs_thread != procs ) {
|
||||
printp( "%i: Initial thread affinity: ", rank );
|
||||
for ( int i : procs_thread )
|
||||
printp( "%i ", i );
|
||||
printp( "\n" );
|
||||
pass = false;
|
||||
}
|
||||
}
|
||||
if ( pass )
|
||||
ut.passes( "getThreadAffinity(thread) matches procs" );
|
||||
else
|
||||
ut.failure( "getThreadAffinity(thread) does not match procs" );
|
||||
// Try to set the thread affinities
|
||||
pass = true;
|
||||
if ( !procs.empty() ) {
|
||||
int N_procs_thread = std::max<int>( (int) cpus.size() / N_threads, 1 );
|
||||
for ( int i = 0; i < N_threads; i++ ) {
|
||||
std::vector<int> procs_thread( N_procs_thread, -1 );
|
||||
for ( int j = 0; j < N_procs_thread; j++ )
|
||||
procs_thread[j] = procs[( i * N_procs_thread + j ) % procs.size()];
|
||||
tpool.setThreadAffinity( i, procs_thread );
|
||||
sleep_ms( 10 ); // Give time for OS to update thread affinities
|
||||
std::vector<int> procs_thread2 = tpool.getThreadAffinity( i );
|
||||
if ( procs_thread2 != procs_thread ) {
|
||||
printp( "%i: Final thread affinity: ", rank );
|
||||
for ( int i : procs_thread )
|
||||
printp( "%i ", i );
|
||||
printp( "\n" );
|
||||
pass = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ( pass )
|
||||
ut.passes( "setThreadAffinity passes" );
|
||||
else
|
||||
ut.failure( "setThreadAffinity failed to change affinity" );
|
||||
}
|
||||
|
||||
|
||||
// Reset the thread affinities
|
||||
barrier();
|
||||
tpool.setNumThreads( tpool.getNumThreads(), "none" );
|
||||
// tpool.setNumThreads(tpool.getNumThreads(),"independent");
|
||||
for ( int i = 0; i < N_threads; i++ ) {
|
||||
std::vector<int> procs_thread = tpool.getThreadAffinity( i );
|
||||
printp( "Thread affinity: " );
|
||||
for ( int i : procs_thread )
|
||||
printp( "%i ", i );
|
||||
printp( "\n" );
|
||||
}
|
||||
|
||||
// Print the current processors by thread id
|
||||
barrier();
|
||||
ThreadPool::set_OS_warnings( 1 );
|
||||
print_processor( &tpool );
|
||||
launchAndTime( tpool, N_threads, print_processor, &tpool );
|
||||
|
||||
// Run some basic tests
|
||||
barrier();
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
for ( int n = 0; n < N_it; n++ ) {
|
||||
for ( int i = 0; i < N_work; i++ )
|
||||
waste_cpu( data1[i] );
|
||||
}
|
||||
auto stop = std::chrono::high_resolution_clock::now();
|
||||
double time = std::chrono::duration<double>( stop - start ).count();
|
||||
printp( "Time for serial cycle = %0.0f us\n", 1e6 * time / N_it );
|
||||
printp( "Time for serial item = %0.0f ns\n", 1e9 * time / ( N_it * N_work ) );
|
||||
id = TPOOL_ADD_WORK( &tpool, waste_cpu, ( data1[0] ) );
|
||||
tpool.wait( id );
|
||||
std::vector<ThreadPool::thread_id_t> ids2;
|
||||
ids2.push_back( TPOOL_ADD_WORK( &tpool, waste_cpu, ( data1[0] ) ) );
|
||||
tpool.wait( ids2[0] );
|
||||
|
||||
// Test the move operator for thread_id
|
||||
ThreadPool::thread_id_t id1 = f1( id ); // move-construct from rvalue temporary
|
||||
ThreadPool::thread_id_t id2 = std::move( id1 ); // move-construct from xvalue
|
||||
volatile ThreadPool::thread_id_t id3 = f2( id ); // move-construct from rvalue temporary
|
||||
volatile ThreadPool::thread_id_t id4 = std::move( id3 ); // move-construct from xvalue
|
||||
id2.reset();
|
||||
id4.reset();
|
||||
|
||||
// Test calling functions with different number of arguments
|
||||
barrier();
|
||||
printp( "Testing arguments:\n" );
|
||||
int N_errors_args = test_function_arguements( &tpool );
|
||||
if ( N_errors_args == 0 )
|
||||
ut.passes( "Calling function with default arguments" );
|
||||
else
|
||||
ut.failure( "Error calling function with default arguments" );
|
||||
|
||||
|
||||
// Check that the threads can sleep in parallel (this does not depend on the number of
|
||||
// processors)
|
||||
barrier();
|
||||
tpool.wait_pool_finished();
|
||||
start = std::chrono::high_resolution_clock::now();
|
||||
sleep_inc( 1 );
|
||||
stop = std::chrono::high_resolution_clock::now();
|
||||
double sleep_serial = std::chrono::duration<double>( stop - start ).count();
|
||||
double sleep_parallel = launchAndTime( tpool, N_threads, sleep_inc, 1 );
|
||||
double sleep_speedup = N_procs_used * sleep_serial / sleep_parallel;
|
||||
printf( "%i: Speedup on %i sleeping threads: %0.3f\n", rank, N_procs_used, sleep_speedup );
|
||||
printf( "%i: ts = %0.3f, tp = %0.3f\n", rank, sleep_serial, sleep_parallel );
|
||||
if ( fabs( sleep_serial - 1.0 ) < 0.05 && fabs( sleep_parallel - 1.0 ) < 0.25 &&
|
||||
sleep_speedup > 3 )
|
||||
ut.passes( "Passed thread sleep" );
|
||||
else
|
||||
ut.failure( "Failed thread sleep" );
|
||||
|
||||
|
||||
// Check that the threads are actually working in parallel
|
||||
barrier();
|
||||
if ( N_procs_used > 1 ) {
|
||||
#ifdef USE_MPI
|
||||
// Use a non-blocking serialization of the MPI processes
|
||||
// if we do not have a sufficient number of processors
|
||||
bool serialize_mpi = N_procs < N_threads * size;
|
||||
int buf;
|
||||
MPI_Request request;
|
||||
MPI_Status status;
|
||||
if ( serialize_mpi && rank > 0 ) {
|
||||
MPI_Irecv( &buf, 1, MPI_INT, rank - 1, 0, MPI_COMM_WORLD, &request );
|
||||
int flag = false;
|
||||
while ( !flag ) {
|
||||
MPI_Test( &request, &flag, &status );
|
||||
sleep_s( 1 );
|
||||
}
|
||||
}
|
||||
#endif
|
||||
int N = 20000000; // Enough work to keep the processor busy for ~ 1 s
|
||||
// Run in serial
|
||||
start = std::chrono::high_resolution_clock::now();
|
||||
waste_cpu( N );
|
||||
stop = std::chrono::high_resolution_clock::now();
|
||||
double time_serial = std::chrono::duration<double>( stop - start ).count();
|
||||
// Run in parallel
|
||||
double time_parallel = launchAndTime( tpool, N_procs_used, waste_cpu, N );
|
||||
double time_parallel2 = launchAndTime( tpool, N_procs_used, waste_cpu, N / 1000 );
|
||||
double speedup = N_procs_used * time_serial / time_parallel;
|
||||
printf( "%i: Speedup on %i procs: %0.3f\n", rank, N_procs_used, speedup );
|
||||
printf( "%i: ts = %0.3f, tp = %0.3f, tp2 = %0.3f\n", rank, time_serial, time_parallel,
|
||||
time_parallel2 );
|
||||
if ( speedup > 1.4 ) {
|
||||
ut.passes( "Passed speedup test" );
|
||||
} else {
|
||||
#ifdef USE_GCOV
|
||||
ut.expected_failure( "Times do not indicate tests are running in parallel (gcov)" );
|
||||
#else
|
||||
ut.failure( "Times do not indicate tests are running in parallel" );
|
||||
#endif
|
||||
}
|
||||
#ifdef USE_MPI
|
||||
if ( serialize_mpi ) {
|
||||
if ( rank < size - 1 )
|
||||
MPI_Send( &N, 1, MPI_INT, rank + 1, 0, MPI_COMM_WORLD );
|
||||
if ( rank == size - 1 ) {
|
||||
for ( int i = 0; i < size - 1; i++ )
|
||||
MPI_Send( &N, 1, MPI_INT, i, 1, MPI_COMM_WORLD );
|
||||
} else {
|
||||
MPI_Irecv( &buf, 1, MPI_INT, size - 1, 1, MPI_COMM_WORLD, &request );
|
||||
int flag = false;
|
||||
MPI_Status status;
|
||||
while ( !flag ) {
|
||||
MPI_Test( &request, &flag, &status );
|
||||
sleep_s( 1 );
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
ut.expected_failure( "Testing thread performance with less than 1 processor" );
|
||||
}
|
||||
|
||||
|
||||
// Test first-in-first-out scheduler (also ensures priorities)
|
||||
test_FIFO( ut, tpool );
|
||||
|
||||
|
||||
// Test adding a work item with a dependency
|
||||
barrier();
|
||||
{
|
||||
// Test that we sucessfully wait on the work items
|
||||
std::vector<ThreadPool::thread_id_t> ids;
|
||||
ids.reserve( 5 );
|
||||
global_sleep_count = 0; // Reset the count before this test
|
||||
ThreadPool::thread_id_t id0;
|
||||
auto id1 = TPOOL_ADD_WORK( &tpool, sleep_inc, ( 1 ) );
|
||||
auto id2 = TPOOL_ADD_WORK( &tpool, sleep_inc, ( 2 ) );
|
||||
auto *wait1 = new WorkItemFull<bool, int>( check_inc, 1 );
|
||||
auto *wait2 = new WorkItemFull<bool, int>( check_inc, 2 );
|
||||
wait1->add_dependency( id0 );
|
||||
wait1->add_dependency( id1 );
|
||||
wait2->add_dependency( id1 );
|
||||
wait2->add_dependency( id2 );
|
||||
ids.clear();
|
||||
ids.push_back( tpool.add_work( wait1 ) );
|
||||
ids.push_back( tpool.add_work( wait2 ) );
|
||||
tpool.wait_all( ids.size(), &ids[0] );
|
||||
if ( !tpool.getFunctionRet<bool>( ids[0] ) || !tpool.getFunctionRet<bool>( ids[1] ) )
|
||||
ut.failure( "Failed to wait on required dependency" );
|
||||
else
|
||||
ut.passes( "Dependencies" );
|
||||
tpool.wait_pool_finished();
|
||||
// Test waiting on more dependencies than in the thread pool (changing priorities)
|
||||
ids.clear();
|
||||
for ( size_t i = 0; i < 20; i++ )
|
||||
ids.push_back( TPOOL_ADD_WORK( &tpool, sleep_inc2, ( 0.1 ) ) );
|
||||
auto *wait3 = new WorkItemFull<void, double>( sleep_inc2, 0 );
|
||||
wait3->add_dependencies( ids );
|
||||
id = tpool.add_work( wait3, 50 );
|
||||
tpool.wait( id );
|
||||
bool pass = true;
|
||||
for ( auto &id : ids )
|
||||
pass = pass && id.finished();
|
||||
ids.clear();
|
||||
if ( pass )
|
||||
ut.passes( "Dependencies2" );
|
||||
else
|
||||
ut.failure( "Dependencies2" );
|
||||
// Check that we can handle more complex dependencies
|
||||
id1 = TPOOL_ADD_WORK( &tpool, sleep_inc2, ( 0.5 ) );
|
||||
for ( int i = 0; i < 10; i++ ) {
|
||||
wait1 = new WorkItemFull<bool, int>( check_inc, 1 );
|
||||
wait1->add_dependency( id1 );
|
||||
tpool.add_work( wait1 );
|
||||
}
|
||||
tpool.wait_pool_finished();
|
||||
ids.clear();
|
||||
for ( int i = 0; i < 5; i++ )
|
||||
ids.push_back( TPOOL_ADD_WORK( &tpool, sleep_inc2, ( 0.5 ) ) );
|
||||
sleep_inc2( 0.002 );
|
||||
ThreadPool::WorkItem *work = new WorkItemFull<void, int>( waste_cpu, 100 );
|
||||
work->add_dependencies( ids );
|
||||
id = tpool.add_work( work, 10 );
|
||||
tpool.wait( id );
|
||||
}
|
||||
|
||||
// Test the timing creating and running a work item
|
||||
barrier();
|
||||
{
|
||||
printp( "Testing timmings (creating/running work item):\n" );
|
||||
std::string timer_name = "Create/Run work item";
|
||||
PROFILE_START( timer_name );
|
||||
int64_t time_create = 0;
|
||||
int64_t time_run = 0;
|
||||
int64_t time_delete = 0;
|
||||
std::vector<ThreadPool::WorkItem *> work( N_work );
|
||||
start = std::chrono::high_resolution_clock::now();
|
||||
for ( int n = 0; n < N_it; n++ ) {
|
||||
auto t1 = std::chrono::high_resolution_clock::now();
|
||||
for ( int i = 0; i < N_work; i++ )
|
||||
work[i] = ThreadPool::createWork<void, int>( waste_cpu, data1[i] );
|
||||
auto t2 = std::chrono::high_resolution_clock::now();
|
||||
for ( int i = 0; i < N_work; i++ )
|
||||
work[i]->run();
|
||||
auto t3 = std::chrono::high_resolution_clock::now();
|
||||
for ( int i = 0; i < N_work; i++ )
|
||||
delete work[i];
|
||||
auto t4 = std::chrono::high_resolution_clock::now();
|
||||
time_create += to_ns( t2 - t1 );
|
||||
time_run += to_ns( t3 - t2 );
|
||||
time_delete += to_ns( t4 - t3 );
|
||||
if ( ( n + 1 ) % 100 == 0 )
|
||||
printp( "Cycle %i of %i finished\n", n + 1, N_it );
|
||||
}
|
||||
stop = std::chrono::high_resolution_clock::now();
|
||||
time = std::chrono::duration<double>( stop - start ).count();
|
||||
PROFILE_STOP( timer_name );
|
||||
printp( " time = %0.0f ms\n", 1e3 * time );
|
||||
printp( " time / cycle = %0.0f us\n", 1e6 * time / N_it );
|
||||
printp( " average time / item = %0.0f ns\n", 1e9 * time / ( N_it * N_work ) );
|
||||
printp( " create = %i ns\n", time_create / ( N_it * N_work ) );
|
||||
printp( " run = %i ns\n", time_run / ( N_it * N_work ) );
|
||||
printp( " delete = %i us\n", time_delete / ( N_it * N_work ) );
|
||||
}
|
||||
|
||||
// Test the timing adding a single item
|
||||
barrier();
|
||||
for ( int it = 0; it < 2; it++ ) {
|
||||
ThreadPool *tpool_ptr = nullptr;
|
||||
std::string timer_name;
|
||||
if ( it == 0 ) {
|
||||
printp( "Testing timmings (adding a single item to empty tpool):\n" );
|
||||
timer_name = "Add single item to empty pool";
|
||||
tpool_ptr = &tpool0;
|
||||
} else if ( it == 1 ) {
|
||||
printp( "Testing timmings (adding a single item):\n" );
|
||||
timer_name = "Add single item to tpool";
|
||||
tpool_ptr = &tpool;
|
||||
}
|
||||
PROFILE_START( timer_name );
|
||||
std::vector<ThreadPool::thread_id_t> ids( N_work );
|
||||
int64_t time_add = 0;
|
||||
int64_t time_wait = 0;
|
||||
start = std::chrono::high_resolution_clock::now();
|
||||
for ( int n = 0; n < N_it; n++ ) {
|
||||
auto t1 = std::chrono::high_resolution_clock::now();
|
||||
for ( int i = 0; i < N_work; i++ )
|
||||
ids[i] = TPOOL_ADD_WORK( tpool_ptr, waste_cpu, ( data1[i] ), priority[i] );
|
||||
auto t2 = std::chrono::high_resolution_clock::now();
|
||||
tpool_ptr->wait_all( N_work, &ids[0] );
|
||||
auto t3 = std::chrono::high_resolution_clock::now();
|
||||
time_add += to_ns( t2 - t1 );
|
||||
time_wait += to_ns( t3 - t2 );
|
||||
if ( ( n + 1 ) % 100 == 0 )
|
||||
printp( "Cycle %i of %i finished\n", n + 1, N_it );
|
||||
}
|
||||
stop = std::chrono::high_resolution_clock::now();
|
||||
time = std::chrono::duration<double>( stop - start ).count();
|
||||
PROFILE_STOP( timer_name );
|
||||
printp( " time = %0.0f ms\n", 1e3 * time );
|
||||
printp( " time / cycle = %0.0f us\n", 1e6 * time / N_it );
|
||||
printp( " average time / item = %0.0f ns\n", 1e9 * time / ( N_it * N_work ) );
|
||||
printp( " create and add = %i ns\n", time_add / ( N_it * N_work ) );
|
||||
printp( " wait = %i us\n", time_wait / ( N_it * N_work ) );
|
||||
}
|
||||
|
||||
// Test the timing pre-creating the work items and adding multiple at a time
|
||||
barrier();
|
||||
for ( int it = 0; it < 2; it++ ) {
|
||||
ThreadPool *tpool_ptr = nullptr;
|
||||
std::string timer_name;
|
||||
if ( it == 0 ) {
|
||||
printp( "Testing timmings (adding a block of items to empty tpool):\n" );
|
||||
timer_name = "Add multiple items to empty pool";
|
||||
tpool_ptr = &tpool0;
|
||||
} else if ( it == 1 ) {
|
||||
printp( "Testing timmings (adding a block of items):\n" );
|
||||
timer_name = "Add multiple items to tpool";
|
||||
tpool_ptr = &tpool;
|
||||
}
|
||||
PROFILE_START( timer_name );
|
||||
int64_t time_create_work = 0;
|
||||
int64_t time_add_work = 0;
|
||||
int64_t time_wait_work = 0;
|
||||
std::vector<ThreadPool::WorkItem *> work( N_work );
|
||||
start = std::chrono::high_resolution_clock::now();
|
||||
for ( int n = 0; n < N_it; n++ ) {
|
||||
auto t1 = std::chrono::high_resolution_clock::now();
|
||||
for ( int i = 0; i < N_work; i++ )
|
||||
work[i] = ThreadPool::createWork<void, int>( waste_cpu, data1[i] );
|
||||
auto t2 = std::chrono::high_resolution_clock::now();
|
||||
auto ids = tpool_ptr->add_work( work, priority );
|
||||
auto t3 = std::chrono::high_resolution_clock::now();
|
||||
tpool_ptr->wait_all( ids );
|
||||
auto t4 = std::chrono::high_resolution_clock::now();
|
||||
time_create_work += to_ns( t2 - t1 );
|
||||
time_add_work += to_ns( t3 - t2 );
|
||||
time_wait_work += to_ns( t4 - t3 );
|
||||
if ( ( n + 1 ) % 100 == 0 )
|
||||
printp( "Cycle %i of %i finished\n", n + 1, N_it );
|
||||
}
|
||||
stop = std::chrono::high_resolution_clock::now();
|
||||
time = std::chrono::duration<double>( stop - start ).count();
|
||||
PROFILE_STOP( timer_name );
|
||||
printp( " time = %0.0f ms\n", 1e3 * time );
|
||||
printp( " time / cycle = %0.0f us\n", 1e6 * time / N_it );
|
||||
printp( " average time / item = %0.0f ns\n", 1e9 * time / ( N_it * N_work ) );
|
||||
printp( " create = %i ns\n", time_create_work / ( N_it * N_work ) );
|
||||
printp( " add = %i ns\n", time_add_work / ( N_it * N_work ) );
|
||||
printp( " wait = %i ns\n", time_wait_work / ( N_it * N_work ) );
|
||||
}
|
||||
|
||||
// Run a dependency test that tests a simple case that should keep the thread pool busy
|
||||
// Note: Checking the results requires looking at the trace data
|
||||
tpool.wait_pool_finished();
|
||||
PROFILE_START( "Dependency test" );
|
||||
for ( int i = 0; i < 10; i++ ) {
|
||||
char msg[3][100];
|
||||
sprintf( msg[0], "Item %i-%i", i, 0 );
|
||||
sprintf( msg[1], "Item %i-%i", i, 1 );
|
||||
sprintf( msg[2], "Item %i-%i", i, 2 );
|
||||
ThreadPool::WorkItem *work =
|
||||
new WorkItemFull<void, double, std::string>( sleep_msg, 0.5, msg[0] );
|
||||
ThreadPool::WorkItem *work1 =
|
||||
new WorkItemFull<void, double, std::string>( sleep_msg, 0.1, msg[1] );
|
||||
ThreadPool::WorkItem *work2 =
|
||||
new WorkItemFull<void, double, std::string>( sleep_msg, 0.1, msg[2] );
|
||||
ThreadPool::thread_id_t id = tpool.add_work( work );
|
||||
work1->add_dependency( id );
|
||||
work2->add_dependency( id );
|
||||
tpool.add_work( work1 );
|
||||
tpool.add_work( work2 );
|
||||
}
|
||||
tpool.wait_pool_finished();
|
||||
PROFILE_STOP( "Dependency test" );
|
||||
|
||||
// Close the thread pool
|
||||
tpool.setNumThreads( 0 );
|
||||
|
||||
// Save the profiling results
|
||||
PROFILE_SAVE( "test_thread_pool" );
|
||||
PROFILE_DISABLE();
|
||||
|
||||
// Test creating/destroying a thread pool using new
|
||||
barrier();
|
||||
pass = true;
|
||||
try {
|
||||
ThreadPool *tpool = new ThreadPool( ThreadPool::MAX_NUM_THREADS - 1 );
|
||||
if ( tpool->getNumThreads() != ThreadPool::MAX_NUM_THREADS - 1 )
|
||||
pass = false;
|
||||
if ( !ThreadPool::is_valid( tpool ) )
|
||||
pass = false;
|
||||
delete tpool;
|
||||
// Check that tpool is invalid
|
||||
// Note: valgrind will report this as an invalid memory read, but we want to keep the test)
|
||||
if ( ThreadPool::is_valid( tpool ) )
|
||||
pass = false;
|
||||
} catch ( ... ) {
|
||||
pass = false;
|
||||
}
|
||||
if ( pass )
|
||||
ut.passes( "Created/destroyed thread pool with new" );
|
||||
else
|
||||
ut.failure( "Created/destroyed thread pool with new" );
|
||||
|
||||
// Print the test results
|
||||
barrier();
|
||||
ut.report();
|
||||
auto N_errors = static_cast<int>( ut.NumFailGlobal() );
|
||||
|
||||
// Shudown MPI
|
||||
pout << "Shutting down\n";
|
||||
barrier();
|
||||
#ifdef USE_TIMER
|
||||
if ( rank == 0 )
|
||||
MemoryApp::print( pout );
|
||||
#endif
|
||||
#ifdef USE_MPI
|
||||
MPI_Finalize();
|
||||
sleep_ms( 10 );
|
||||
#endif
|
||||
return N_errors;
|
||||
}
|
|
@ -1,38 +1,10 @@
|
|||
/*
|
||||
Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
/*
|
||||
Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#define _CRT_NONSTDC_NO_DEPRECATE
|
||||
#include "threadpool/thread_pool.h"
|
||||
#include "common/Utilities.h"
|
||||
#include "common/StackTrace.h"
|
||||
#include "StackTrace/StackTrace.h"
|
||||
#include "StackTrace/Utilities.h"
|
||||
|
||||
#include "ProfilerApp.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <bitset>
|
||||
#include <chrono>
|
||||
|
@ -40,11 +12,17 @@
|
|||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <random>
|
||||
#include <stdexcept>
|
||||
#include <thread>
|
||||
#include <typeinfo>
|
||||
|
||||
|
||||
// Add profile timers or performance counters to the threadpool
|
||||
#define PROFILE_THREADPOOL_PERFORMANCE 0
|
||||
#define MONITOR_THREADPOOL_PERFORMANCE 0
|
||||
|
||||
|
||||
#define perr std::cerr
|
||||
#define pout std::cout
|
||||
#define printp printf
|
||||
|
@ -64,7 +42,6 @@
|
|||
#if defined( USE_WINDOWS )
|
||||
#include <process.h>
|
||||
#include <windows.h>
|
||||
#define NOMINMAX
|
||||
// Disable warning: the inline specifier cannot be used when a friend
|
||||
// declaration refers to a specialization of a function template
|
||||
#pragma warning( disable : 4396 )
|
||||
|
@ -92,30 +69,23 @@
|
|||
|
||||
|
||||
// Set some macros
|
||||
#if PROFILE_THREADPOOL_PERFORMANCE
|
||||
#define PROFILE_THREADPOOL_START( X ) PROFILE_START( X, 3 )
|
||||
#define PROFILE_THREADPOOL_START2( X ) PROFILE_START2( X, 3 )
|
||||
#define PROFILE_THREADPOOL_STOP( X ) PROFILE_STOP( X, 3 )
|
||||
#define PROFILE_THREADPOOL_STOP2( X ) PROFILE_STOP2( X, 3 )
|
||||
// clang-format off
|
||||
#if PROFILE_THREADPOOL_PERFORMANCE == 1
|
||||
#define PROFILE_THREADPOOL_START(X) PROFILE_START(X,3)
|
||||
#define PROFILE_THREADPOOL_START2(X) PROFILE_START2(X,3)
|
||||
#define PROFILE_THREADPOOL_STOP(X) PROFILE_STOP(X,3)
|
||||
#define PROFILE_THREADPOOL_STOP2(X) PROFILE_STOP2(X,3)
|
||||
#else
|
||||
#define PROFILE_THREADPOOL_START( X ) \
|
||||
do { \
|
||||
} while ( 0 )
|
||||
#define PROFILE_THREADPOOL_START2( X ) \
|
||||
do { \
|
||||
} while ( 0 )
|
||||
#define PROFILE_THREADPOOL_STOP( X ) \
|
||||
do { \
|
||||
} while ( 0 )
|
||||
#define PROFILE_THREADPOOL_STOP2( X ) \
|
||||
do { \
|
||||
} while ( 0 )
|
||||
#define PROFILE_THREADPOOL_START(X) do {} while ( 0 )
|
||||
#define PROFILE_THREADPOOL_START2(X) do {} while ( 0 )
|
||||
#define PROFILE_THREADPOOL_STOP(X) do {} while ( 0 )
|
||||
#define PROFILE_THREADPOOL_STOP2(X) do {} while ( 0 )
|
||||
#endif
|
||||
#if MONITOR_THREADPOOL_PERFORMANCE == 1
|
||||
#define accumulate( x, t1, t2 ) \
|
||||
AtomicOperations::atomic_add( \
|
||||
&x, std::chrono::duration_cast<std::chrono::nanoseconds>( t2 - t1 ).count() );
|
||||
#define accumulate(x,t1,t2) AtomicOperations::atomic_add( \
|
||||
&x, std::chrono::duration_cast<std::chrono::nanoseconds>(t2-t1).count() );
|
||||
#endif
|
||||
// clang-format on
|
||||
|
||||
|
||||
#if MONITOR_THREADPOOL_PERFORMANCE == 1
|
||||
|
@ -123,37 +93,59 @@ static AtomicOperations::int64_atomic total_add_work_time[5] = { 0, 0, 0, 0, 0 }
|
|||
#endif
|
||||
|
||||
|
||||
// Set env
|
||||
static std::mutex Utilities_mutex;
|
||||
void setenv( const std::string &name, const std::string &value )
|
||||
{
|
||||
Utilities_mutex.lock();
|
||||
#if defined( USE_LINUX ) || defined( USE_MAC )
|
||||
bool pass = false;
|
||||
if ( value.empty() )
|
||||
pass = ::setenv( name.data(), value.data(), 1 ) == 0;
|
||||
else
|
||||
pass = ::unsetenv( name.data() ) == 0;
|
||||
#elif defined( USE_WINDOWS )
|
||||
bool pass = SetEnvironmentVariable( name.data(), value.data() ) != 0;
|
||||
#else
|
||||
#error Unknown OS
|
||||
#endif
|
||||
Utilities_mutex.unlock();
|
||||
if ( !pass ) {
|
||||
char msg[1024];
|
||||
if ( !value.empty() )
|
||||
sprintf(
|
||||
msg, "Error setting enviornmental variable: %s=%s\n", name.data(), value.data() );
|
||||
else
|
||||
sprintf( msg, "Error clearing enviornmental variable: %s\n", name.data() );
|
||||
ERROR( msg );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Helper functions
|
||||
template<class T>
|
||||
void quicksort( int N, T *data );
|
||||
template<class T>
|
||||
inline void quicksort( std::vector<T> &x )
|
||||
{
|
||||
quicksort( (int) x.size(), x.data() );
|
||||
quicksort( x.size(), x.data() );
|
||||
}
|
||||
static inline int find_id( int, const ThreadPool::thread_id_t *, const ThreadPool::thread_id_t & );
|
||||
|
||||
|
||||
// Function to generate a random size_t number (excluding 0 and ~0)
|
||||
static size_t rand_size_t()
|
||||
// Function to generate a random number for checking if tpool is valid
|
||||
static inline bool validHeadTail( uint32_t key )
|
||||
{
|
||||
size_t key = 0;
|
||||
double tmp = 1;
|
||||
if ( sizeof( size_t ) == 4 ) {
|
||||
while ( tmp < 4e9 ) {
|
||||
key ^= rand() * 0x9E3779B9; // 2^32*0.5*(sqrt(5)-1)
|
||||
tmp *= RAND_MAX;
|
||||
}
|
||||
} else if ( sizeof( size_t ) == 8 ) {
|
||||
while ( tmp < 1.8e19 ) {
|
||||
key ^= rand() * 0x9E3779B97F4A7C15; // 2^64*0.5*(sqrt(5)-1)
|
||||
tmp *= RAND_MAX;
|
||||
}
|
||||
} else {
|
||||
throw std::logic_error( "Unhandled case" );
|
||||
}
|
||||
if ( key == 0 || ( ~key ) == 0 )
|
||||
key = rand_size_t();
|
||||
return ( key > 10 ) && ( ~key > 10 ) && ( key % 2 != 0 ) && ( key % 3 == 2 );
|
||||
}
|
||||
static inline uint32_t generateHeadTail()
|
||||
{
|
||||
uint32_t key = 0;
|
||||
std::random_device rd;
|
||||
std::mt19937 gen( rd() );
|
||||
std::uniform_int_distribution<> dis( 1, 0xFFFFFF );
|
||||
while ( !validHeadTail( key ) )
|
||||
key = static_cast<uint32_t>( dis( gen ) ) * 0x9E3779B9; // 2^32*0.5*(sqrt(5)-1)
|
||||
return key;
|
||||
}
|
||||
|
||||
|
@ -161,22 +153,10 @@ static size_t rand_size_t()
|
|||
/******************************************************************
|
||||
* Run some basic compile-time checks *
|
||||
******************************************************************/
|
||||
#if MAX_NUM_THREADS % 64 != 0
|
||||
// We use a bit array for d_active and d_cancel
|
||||
#error MAX_NUM_THREADS must be a multiple of 64
|
||||
#endif
|
||||
#if MAX_NUM_THREADS >= 65535
|
||||
// We store N_threads as a short int
|
||||
#error MAX_NUM_THREADS must < 65535
|
||||
#endif
|
||||
#if MAX_QUEUED >= 65535
|
||||
// We store the indicies to the queue list as short ints
|
||||
#error MAX_QUEUED must < 65535
|
||||
#endif
|
||||
// Check the c++ std
|
||||
#if CXX_STD == 98
|
||||
#error Thread pool class requires c++11 or newer
|
||||
#endif
|
||||
static_assert( ThreadPool::MAX_THREADS % 64 == 0, "MAX_THREADS must be a multiple of 64" );
|
||||
static_assert( ThreadPool::MAX_THREADS < 65535, "MAX_THREADS must < 65535" );
|
||||
static_assert( sizeof( AtomicOperations::int32_atomic ) == 4, "atomic32 must be a 32-bit integer" );
|
||||
static_assert( sizeof( AtomicOperations::int64_atomic ) == 8, "atomic64 must be a 64-bit integer" );
|
||||
|
||||
|
||||
/******************************************************************
|
||||
|
@ -211,7 +191,7 @@ static inline bool get_bit( const volatile AtomicOperations::int64_atomic *x, si
|
|||
uint64_t mask = 0x01;
|
||||
mask <<= index % 64;
|
||||
// This is thread-safe since we only care about a single bit
|
||||
AtomicOperations::int64_atomic y = x[index / 64];
|
||||
AtomicOperations::int64_atomic y = x[index / 64];
|
||||
return ( y & mask ) != 0;
|
||||
}
|
||||
|
||||
|
@ -244,18 +224,15 @@ static inline int count_bits( int_type x )
|
|||
/******************************************************************
|
||||
* Set the global constants *
|
||||
******************************************************************/
|
||||
constexpr int ThreadPool::MAX_NUM_THREADS;
|
||||
constexpr int ThreadPool::MAX_QUEUED;
|
||||
constexpr int ThreadPool::MAX_WAIT;
|
||||
constexpr bool ThreadPool::PROFILE_THREADPOOL_PERFORMANCE;
|
||||
constexpr bool ThreadPool::MONITOR_THREADPOOL_PERFORMANCE;
|
||||
constexpr uint16_t ThreadPool::MAX_THREADS;
|
||||
constexpr uint16_t ThreadPool::MAX_WAIT;
|
||||
|
||||
|
||||
/******************************************************************
|
||||
* Set the behavior of OS warnings *
|
||||
******************************************************************/
|
||||
static int global_OS_behavior = 0;
|
||||
std::mutex OS_warning_mutex;
|
||||
static std::mutex OS_warning_mutex;
|
||||
void ThreadPool::set_OS_warnings( int behavior )
|
||||
{
|
||||
ASSERT( behavior >= 0 && behavior <= 2 );
|
||||
|
@ -279,18 +256,7 @@ void ThreadPool::setErrorHandler( std::function<void( const std::string & )> fun
|
|||
/******************************************************************
|
||||
* Function to return the number of processors availible *
|
||||
******************************************************************/
|
||||
int ThreadPool::getNumberOfProcessors()
|
||||
{
|
||||
#if defined( USE_LINUX ) || defined( USE_MAC )
|
||||
return sysconf( _SC_NPROCESSORS_ONLN );
|
||||
#elif defined( USE_WINDOWS )
|
||||
SYSTEM_INFO sysinfo;
|
||||
GetSystemInfo( &sysinfo );
|
||||
return static_cast<int>( sysinfo.dwNumberOfProcessors );
|
||||
#else
|
||||
#error Unknown OS
|
||||
#endif
|
||||
}
|
||||
int ThreadPool::getNumberOfProcessors() { return std::thread::hardware_concurrency(); }
|
||||
|
||||
|
||||
/******************************************************************
|
||||
|
@ -323,19 +289,17 @@ std::vector<int> ThreadPool::getProcessAffinity()
|
|||
int error = sched_getaffinity( getpid(), sizeof( cpu_set_t ), &mask );
|
||||
if ( error != 0 )
|
||||
throw std::logic_error( "Error getting process affinity" );
|
||||
for ( int i = 0; i < (int) sizeof( cpu_set_t ) * CHAR_BIT; i++ ) {
|
||||
for ( size_t i = 0; i < sizeof( cpu_set_t ) * CHAR_BIT; i++ ) {
|
||||
if ( CPU_ISSET( i, &mask ) )
|
||||
procs.push_back( i );
|
||||
}
|
||||
#else
|
||||
#warning sched_getaffinity is not supported for this compiler/OS
|
||||
OS_warning( "sched_getaffinity is not supported for this compiler/OS" );
|
||||
procs.clear();
|
||||
#endif
|
||||
#elif defined( USE_MAC )
|
||||
// MAC does not support getting or setting the affinity
|
||||
OS_warning( "MAC does not support getting the process affinity" );
|
||||
procs.clear();
|
||||
#elif defined( USE_WINDOWS )
|
||||
HANDLE hProc = GetCurrentProcess();
|
||||
size_t procMask;
|
||||
|
@ -343,7 +307,7 @@ std::vector<int> ThreadPool::getProcessAffinity()
|
|||
PDWORD_PTR procMaskPtr = reinterpret_cast<PDWORD_PTR>( &procMask );
|
||||
PDWORD_PTR sysMaskPtr = reinterpret_cast<PDWORD_PTR>( &sysMask );
|
||||
GetProcessAffinityMask( hProc, procMaskPtr, sysMaskPtr );
|
||||
for ( int i = 0; i < (int) sizeof( size_t ) * CHAR_BIT; i++ ) {
|
||||
for ( size_t i = 0; i < sizeof( size_t ) * CHAR_BIT; i++ ) {
|
||||
if ( ( procMask & 0x1 ) != 0 )
|
||||
procs.push_back( i );
|
||||
procMask >>= 1;
|
||||
|
@ -353,7 +317,7 @@ std::vector<int> ThreadPool::getProcessAffinity()
|
|||
#endif
|
||||
return procs;
|
||||
}
|
||||
void ThreadPool::setProcessAffinity( std::vector<int> procs )
|
||||
void ThreadPool::setProcessAffinity( const std::vector<int> &procs )
|
||||
{
|
||||
#ifdef USE_LINUX
|
||||
#ifdef _GNU_SOURCE
|
||||
|
@ -367,12 +331,10 @@ void ThreadPool::setProcessAffinity( std::vector<int> procs )
|
|||
#else
|
||||
#warning sched_setaffinity is not supported for this compiler/OS
|
||||
OS_warning( "sched_setaffinity is not supported for this compiler/OS" );
|
||||
procs.clear();
|
||||
#endif
|
||||
#elif defined( USE_MAC )
|
||||
// MAC does not support getting or setting the affinity
|
||||
OS_warning( "MAC does not support setting the process affinity" );
|
||||
procs.clear();
|
||||
#elif defined( USE_WINDOWS )
|
||||
DWORD mask = 0;
|
||||
for ( size_t i = 0; i < procs.size(); i++ )
|
||||
|
@ -395,7 +357,7 @@ DWORD GetThreadAffinityMask( HANDLE thread )
|
|||
DWORD old = 0;
|
||||
// try every CPU one by one until one works or none are left
|
||||
while ( mask ) {
|
||||
old = static_cast<DWORD>( SetThreadAffinityMask( thread, mask ) );
|
||||
old = SetThreadAffinityMask( thread, mask );
|
||||
if ( old ) { // this one worked
|
||||
SetThreadAffinityMask( thread, old ); // restore original
|
||||
return old;
|
||||
|
@ -405,7 +367,6 @@ DWORD GetThreadAffinityMask( HANDLE thread )
|
|||
}
|
||||
mask <<= 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
@ -418,22 +379,20 @@ std::vector<int> ThreadPool::getThreadAffinity()
|
|||
int error = pthread_getaffinity_np( pthread_self(), sizeof( cpu_set_t ), &mask );
|
||||
if ( error != 0 )
|
||||
throw std::logic_error( "Error getting thread affinity" );
|
||||
for ( int i = 0; i < (int) sizeof( cpu_set_t ) * CHAR_BIT; i++ ) {
|
||||
for ( size_t i = 0; i < sizeof( cpu_set_t ) * CHAR_BIT; i++ ) {
|
||||
if ( CPU_ISSET( i, &mask ) )
|
||||
procs.push_back( i );
|
||||
}
|
||||
#else
|
||||
#warning pthread_getaffinity_np is not supported
|
||||
OS_warning( "pthread does not support pthread_getaffinity_np" );
|
||||
procs.clear();
|
||||
#endif
|
||||
#elif defined( USE_MAC )
|
||||
// MAC does not support getting or setting the affinity
|
||||
OS_warning( "MAC does not support getting the thread affinity" );
|
||||
procs.clear();
|
||||
#elif defined( USE_WINDOWS )
|
||||
size_t procMask = GetThreadAffinityMask( GetCurrentThread() );
|
||||
for ( int i = 0; i < (int) sizeof( size_t ) * CHAR_BIT; i++ ) {
|
||||
for ( size_t i = 0; i < sizeof( size_t ) * CHAR_BIT; i++ ) {
|
||||
if ( ( procMask & 0x1 ) != 0 )
|
||||
procs.push_back( i );
|
||||
procMask >>= 1;
|
||||
|
@ -448,30 +407,28 @@ std::vector<int> ThreadPool::getThreadAffinity( int thread ) const
|
|||
if ( thread >= getNumThreads() )
|
||||
std::logic_error( "Invalid thread number" );
|
||||
std::vector<int> procs;
|
||||
auto handle = const_cast<std::thread &>( d_thread[thread] ).native_handle();
|
||||
#ifdef USE_LINUX
|
||||
#ifdef _GNU_SOURCE
|
||||
auto handle = const_cast<std::thread &>( d_thread[thread] ).native_handle();
|
||||
cpu_set_t mask;
|
||||
int error = pthread_getaffinity_np( handle, sizeof( cpu_set_t ), &mask );
|
||||
if ( error != 0 )
|
||||
throw std::logic_error( "Error getting thread affinity" );
|
||||
for ( int i = 0; i < (int) sizeof( cpu_set_t ) * CHAR_BIT; i++ ) {
|
||||
for ( size_t i = 0; i < sizeof( cpu_set_t ) * CHAR_BIT; i++ ) {
|
||||
if ( CPU_ISSET( i, &mask ) )
|
||||
procs.push_back( i );
|
||||
}
|
||||
#else
|
||||
#warning pthread_getaffinity_np is not supported
|
||||
OS_warning( "pthread does not support pthread_getaffinity_np" );
|
||||
procs.clear();
|
||||
#endif
|
||||
#elif defined( USE_MAC )
|
||||
// MAC does not support getting or setting the affinity
|
||||
NULL_USE( handle );
|
||||
OS_warning( "MAC does not support getting the thread affinity" );
|
||||
procs.clear();
|
||||
#elif defined( USE_WINDOWS )
|
||||
auto handle = const_cast<std::thread &>( d_thread[thread] ).native_handle();
|
||||
size_t procMask = GetThreadAffinityMask( handle );
|
||||
for ( int i = 0; i < (int) sizeof( size_t ) * CHAR_BIT; i++ ) {
|
||||
for ( size_t i = 0; i < sizeof( size_t ) * CHAR_BIT; i++ ) {
|
||||
if ( ( procMask & 0x1 ) != 0 )
|
||||
procs.push_back( i );
|
||||
procMask >>= 1;
|
||||
|
@ -486,7 +443,7 @@ std::vector<int> ThreadPool::getThreadAffinity( int thread ) const
|
|||
/******************************************************************
|
||||
* Function to set the thread affinity *
|
||||
******************************************************************/
|
||||
void ThreadPool::setThreadAffinity( std::vector<int> procs )
|
||||
void ThreadPool::setThreadAffinity( const std::vector<int> &procs )
|
||||
{
|
||||
#ifdef USE_LINUX
|
||||
#ifdef _GNU_SOURCE
|
||||
|
@ -500,7 +457,6 @@ void ThreadPool::setThreadAffinity( std::vector<int> procs )
|
|||
#else
|
||||
#warning pthread_getaffinity_np is not supported
|
||||
OS_warning( "pthread does not support pthread_setaffinity_np" );
|
||||
procs.clear();
|
||||
#endif
|
||||
#elif defined( USE_MAC )
|
||||
// MAC does not support getting or setting the affinity
|
||||
|
@ -515,34 +471,33 @@ void ThreadPool::setThreadAffinity( std::vector<int> procs )
|
|||
#error Unknown OS
|
||||
#endif
|
||||
}
|
||||
void ThreadPool::setThreadAffinity( int thread, std::vector<int> procs ) const
|
||||
void ThreadPool::setThreadAffinity( int thread, const std::vector<int> &procs ) const
|
||||
{
|
||||
if ( thread >= getNumThreads() )
|
||||
std::logic_error( "Invalid thread number" );
|
||||
auto handle = const_cast<std::thread &>( d_thread[thread] ).native_handle();
|
||||
#ifdef USE_LINUX
|
||||
#ifdef __USE_GNU
|
||||
cpu_set_t mask;
|
||||
CPU_ZERO( &mask );
|
||||
for ( size_t i = 0; i < procs.size(); i++ )
|
||||
CPU_SET( procs[i], &mask );
|
||||
int error = pthread_setaffinity_np( handle, sizeof( cpu_set_t ), &mask );
|
||||
auto handle = const_cast<std::thread &>( d_thread[thread] ).native_handle();
|
||||
int error = pthread_setaffinity_np( handle, sizeof( cpu_set_t ), &mask );
|
||||
if ( error != 0 )
|
||||
throw std::logic_error( "Error setting thread affinity" );
|
||||
#else
|
||||
#warning pthread_getaffinity_np is not supported
|
||||
OS_warning( "pthread does not support pthread_setaffinity_np" );
|
||||
procs.clear();
|
||||
#endif
|
||||
#elif defined( USE_MAC )
|
||||
// MAC does not support getting or setting the affinity
|
||||
NULL_USE( handle );
|
||||
NULL_USE( procs );
|
||||
OS_warning( "MAC does not support getting the process affinity" );
|
||||
#elif defined( USE_WINDOWS )
|
||||
DWORD mask = 0;
|
||||
for ( size_t i = 0; i < procs.size(); i++ )
|
||||
mask |= ( (DWORD) 1 ) << procs[i];
|
||||
auto handle = const_cast<std::thread &>( d_thread[thread] ).native_handle();
|
||||
SetThreadAffinityMask( handle, mask );
|
||||
#else
|
||||
#error Unknown OS
|
||||
|
@ -553,22 +508,10 @@ void ThreadPool::setThreadAffinity( int thread, std::vector<int> procs ) const
|
|||
/******************************************************************
|
||||
* Function to perform some basic checks before we start *
|
||||
******************************************************************/
|
||||
void ThreadPool::check_startup( size_t size0 )
|
||||
void ThreadPool::check_startup()
|
||||
{
|
||||
// Check the size of the class to make sure that we don't have any
|
||||
// byte alignment problems between a library implimentation and a calling pacakge
|
||||
size_t size1 = sizeof( ThreadPool );
|
||||
size_t size2 = ( (size_t) &d_NULL_HEAD ) - ( (size_t) this ) + sizeof( size_t );
|
||||
size_t size3 = ( (size_t) &d_NULL_TAIL ) - ( (size_t) this ) + sizeof( size_t );
|
||||
if ( size0 != size1 || size1 < size2 || size1 < size3 )
|
||||
throw std::logic_error( "Internal data format problem" );
|
||||
// Check the size of variables
|
||||
if ( sizeof( AtomicOperations::int32_atomic ) != 4 )
|
||||
throw std::logic_error( "AtomicOperations::int32_atomic is not 32 bits" );
|
||||
if ( sizeof( AtomicOperations::int64_atomic ) != 8 )
|
||||
throw std::logic_error( "AtomicOperations::int32_atomic is not 64 bits" );
|
||||
// Check getting/setting a bit
|
||||
atomic_64 x[2] = { 0x0, 0x7 };
|
||||
AtomicOperations::int64_atomic x[2] = { 0x0, 0x7 };
|
||||
set_bit( x, 2 );
|
||||
unset_bit( x, 66 );
|
||||
if ( x[0] != 4 || x[1] != 3 || !get_bit( x, 2 ) || get_bit( x, 66 ) )
|
||||
|
@ -608,17 +551,21 @@ void ThreadPool::check_startup( size_t size0 )
|
|||
if ( isValid( id ) || !isValid( id2 ) )
|
||||
pass = false;
|
||||
if ( !pass )
|
||||
throw std::logic_error( "Thread pool failed to initialize" );
|
||||
throw std::logic_error( "thread id test failed" );
|
||||
}
|
||||
|
||||
|
||||
/******************************************************************
|
||||
* Function to initialize the thread pool *
|
||||
* Constructors/destructor *
|
||||
******************************************************************/
|
||||
void ThreadPool::initialize( const int N, const char *affinity, int N_procs, const int *procs )
|
||||
ThreadPool::ThreadPool(
|
||||
const int N, const std::string &affinity, const std::vector<int> &procs, int queueSize )
|
||||
: d_queue_list( queueSize )
|
||||
{
|
||||
// Run some basic tests on startup
|
||||
check_startup();
|
||||
// Initialize the header/tail
|
||||
d_NULL_HEAD = rand_size_t();
|
||||
d_NULL_HEAD = generateHeadTail();
|
||||
d_NULL_TAIL = d_NULL_HEAD;
|
||||
// Initialize the variables to NULL values
|
||||
d_id_assign = 0;
|
||||
|
@ -630,31 +577,31 @@ void ThreadPool::initialize( const int N, const char *affinity, int N_procs, con
|
|||
d_N_started = 0;
|
||||
d_N_finished = 0;
|
||||
d_max_wait_time = 600;
|
||||
memset( (void *) d_active, 0, MAX_NUM_THREADS / 8 );
|
||||
memset( (void *) d_cancel, 0, MAX_NUM_THREADS / 8 );
|
||||
memset( (void *) d_active, 0, MAX_THREADS / 8 );
|
||||
memset( (void *) d_cancel, 0, MAX_THREADS / 8 );
|
||||
d_wait_last = nullptr;
|
||||
for ( auto &i : d_wait )
|
||||
i = nullptr;
|
||||
// Initialize the id
|
||||
d_id_assign = thread_id_t::maxThreadID;
|
||||
// Create the threads
|
||||
setNumThreads( N, affinity, N_procs, procs );
|
||||
setNumThreads( N, affinity, procs );
|
||||
// Verify that the threadpool is valid
|
||||
if ( !is_valid( this ) )
|
||||
throw std::logic_error( "Thread pool is not valid" );
|
||||
}
|
||||
|
||||
|
||||
/******************************************************************
|
||||
* This is the de-constructor *
|
||||
******************************************************************/
|
||||
ThreadPool::~ThreadPool()
|
||||
{
|
||||
DISABLE_WARNINGS
|
||||
if ( !is_valid( this ) )
|
||||
throw std::logic_error( "Thread pool is not valid" );
|
||||
if ( !is_valid( this ) ) {
|
||||
std::cerr << "Thread pool is not valid, error calling destructor\n";
|
||||
return;
|
||||
}
|
||||
ENABLE_WARNINGS
|
||||
// Destroy the threads
|
||||
setNumThreads( 0 );
|
||||
// Delete all remaining data
|
||||
d_N_threads = -1;
|
||||
d_N_threads = ~0;
|
||||
d_NULL_HEAD = 0;
|
||||
d_NULL_TAIL = 0;
|
||||
delete d_wait_last;
|
||||
|
@ -675,9 +622,9 @@ bool ThreadPool::is_valid( const ThreadPool *tpool )
|
|||
{
|
||||
if ( tpool == nullptr )
|
||||
return false;
|
||||
if ( tpool->d_N_threads < 0 || tpool->d_N_threads > MAX_NUM_THREADS )
|
||||
if ( tpool->d_N_threads > MAX_THREADS )
|
||||
return false;
|
||||
if ( tpool->d_NULL_HEAD == 0 || tpool->d_NULL_HEAD != tpool->d_NULL_TAIL )
|
||||
if ( !validHeadTail( tpool->d_NULL_HEAD ) || tpool->d_NULL_HEAD != tpool->d_NULL_TAIL )
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
@ -687,17 +634,17 @@ bool ThreadPool::is_valid( const ThreadPool *tpool )
|
|||
* This function creates the threads in the thread pool *
|
||||
******************************************************************/
|
||||
void ThreadPool::setNumThreads(
|
||||
int num_worker_threads, const char *affinity2, int N_procs, const int *procs )
|
||||
int num_worker_threads, const std::string &affinity, const std::vector<int> &procs )
|
||||
{
|
||||
// Check if we are a member thread
|
||||
if ( isMemberThread() )
|
||||
throw std::logic_error(
|
||||
"Member threads are not allowed to change the number of threads in the pool" );
|
||||
// Determing the number of threads we need to create or destroy
|
||||
if ( num_worker_threads > MAX_NUM_THREADS ) {
|
||||
printp( "Warning: Maximum Number of Threads is %i\n", MAX_NUM_THREADS );
|
||||
if ( num_worker_threads > MAX_THREADS ) {
|
||||
printp( "Warning: Maximum Number of Threads is %i\n", MAX_THREADS );
|
||||
printp( " Only that number will be created\n" );
|
||||
num_worker_threads = MAX_NUM_THREADS;
|
||||
num_worker_threads = MAX_THREADS;
|
||||
} else if ( num_worker_threads < 0 ) {
|
||||
printp( "Error: cannot have a negitive number of threads\n" );
|
||||
printp( " Setting the number of threads to 0\n" );
|
||||
|
@ -711,23 +658,10 @@ void ThreadPool::setNumThreads(
|
|||
throw std::logic_error(
|
||||
"Threads are being created and destroyed at the same time" );
|
||||
}
|
||||
// Create the thread attributes (linux only)
|
||||
#if defined( USE_LINUX ) || defined( USE_MAC )
|
||||
pthread_attr_t attr;
|
||||
pthread_attr_init( &attr );
|
||||
// int ptmp;
|
||||
// pthread_attr_setstacksize(&attr,2097152); // Default stack size is 8MB
|
||||
// pthread_attr_setschedpolicy(&attr,1);
|
||||
// pthread_attr_getschedpolicy(&attr,&ptmp);
|
||||
// pout << "getschedpolicy = " << ptmp << std::endl;
|
||||
#endif
|
||||
// Create the threads
|
||||
auto tmp = new void *[2 * d_N_threads_diff];
|
||||
int j = d_N_threads;
|
||||
int j = d_N_threads;
|
||||
for ( int i = 0; i < d_N_threads_diff; i++ ) {
|
||||
d_N_threads++;
|
||||
tmp[0 + 2 * i] = this;
|
||||
tmp[1 + 2 * i] = reinterpret_cast<void *>( static_cast<size_t>( j ) );
|
||||
set_bit( d_cancel, j );
|
||||
d_thread[j] = std::thread( create_new_thread, this, j );
|
||||
j++;
|
||||
|
@ -743,12 +677,7 @@ void ThreadPool::setNumThreads(
|
|||
if ( !wait )
|
||||
break;
|
||||
}
|
||||
// Delete the thread attributes (linux only)
|
||||
#if defined( USE_LINUX ) || defined( USE_MAC )
|
||||
pthread_attr_destroy( &attr );
|
||||
#endif
|
||||
std::this_thread::sleep_for( std::chrono::milliseconds( 25 ) );
|
||||
delete[] tmp;
|
||||
} else if ( d_N_threads_diff < 0 ) {
|
||||
// Reduce the number of threads
|
||||
if ( num_worker_threads == 0 ) {
|
||||
|
@ -782,15 +711,14 @@ void ThreadPool::setNumThreads(
|
|||
} catch ( ... ) {
|
||||
pout << "Warning: Unable to get default cpus for thread affinities\n";
|
||||
}
|
||||
if ( !cpus.empty() && N_procs > 0 ) {
|
||||
cpus.resize( N_procs );
|
||||
for ( int i = 0; i < N_procs; i++ )
|
||||
if ( !cpus.empty() && !procs.empty() ) {
|
||||
cpus.resize( procs.size() );
|
||||
for ( size_t i = 0; i < procs.size(); i++ )
|
||||
cpus[i] = procs[i];
|
||||
}
|
||||
// Set the affinity model and the associated thread affinities
|
||||
// Note: not all OS's support setting the thread affinities
|
||||
std::vector<std::vector<int>> t_procs( d_N_threads );
|
||||
std::string affinity( affinity2 );
|
||||
if ( cpus.empty() ) {
|
||||
// We do not have a list of cpus to use, do nothing (OS not supported)
|
||||
} else if ( affinity == "none" ) {
|
||||
|
@ -799,13 +727,13 @@ void ThreadPool::setNumThreads(
|
|||
t_procs[i] = cpus;
|
||||
} else if ( affinity == "independent" ) {
|
||||
// We want to use an independent set of processors for each thread
|
||||
if ( (int) cpus.size() == d_N_threads ) {
|
||||
if ( cpus.size() == d_N_threads ) {
|
||||
// The number of cpus matches the number of threads
|
||||
for ( int i = 0; i < d_N_threads; i++ )
|
||||
t_procs[i] = std::vector<int>( 1, cpus[i] );
|
||||
} else if ( (int) cpus.size() > d_N_threads ) {
|
||||
} else if ( cpus.size() > d_N_threads ) {
|
||||
// There are more cpus than threads, threads will use more the one processor
|
||||
int N_procs_thread = static_cast<int>( cpus.size() + d_N_threads - 1 ) / d_N_threads;
|
||||
int N_procs_thread = ( cpus.size() + d_N_threads - 1 ) / d_N_threads;
|
||||
size_t k = 0;
|
||||
for ( int i = 0; i < d_N_threads; i++ ) {
|
||||
for ( int j = 0; j < N_procs_thread && k < cpus.size(); j++ ) {
|
||||
|
@ -815,8 +743,7 @@ void ThreadPool::setNumThreads(
|
|||
}
|
||||
} else {
|
||||
// There are fewer cpus than threads, threads will share a processor
|
||||
auto N_threads_proc =
|
||||
static_cast<int>( ( cpus.size() + d_N_threads - 1 ) / cpus.size() );
|
||||
auto N_threads_proc = ( cpus.size() + d_N_threads - 1 ) / cpus.size();
|
||||
for ( int i = 0; i < d_N_threads; i++ )
|
||||
t_procs[i].push_back( cpus[i / N_threads_proc] );
|
||||
}
|
||||
|
@ -827,7 +754,7 @@ void ThreadPool::setNumThreads(
|
|||
try {
|
||||
for ( int i = 0; i < d_N_threads; i++ ) {
|
||||
ThreadPool::setThreadAffinity( i, t_procs[i] );
|
||||
std::vector<int> cpus2 = getThreadAffinity( i );
|
||||
auto cpus2 = getThreadAffinity( i );
|
||||
if ( cpus2 != t_procs[i] )
|
||||
pout << "Warning: error setting affinities (failed to set)\n";
|
||||
}
|
||||
|
@ -853,12 +780,14 @@ void ThreadPool::tpool_thread( int thread_id )
|
|||
AtomicOperations::atomic_increment( &d_num_active );
|
||||
set_bit( d_active, thread_id );
|
||||
unset_bit( d_cancel, thread_id );
|
||||
setenv( "OMP_NUM_THREADS", "1" );
|
||||
setenv( "MKL_NUM_THREADS", "1" );
|
||||
if ( printInfo ) {
|
||||
// Print the pid
|
||||
printp( "pid = %i\n", (int) getpid() );
|
||||
// Print the processor affinities for the process
|
||||
try {
|
||||
std::vector<int> cpus = ThreadPool::getProcessAffinity();
|
||||
auto cpus = ThreadPool::getProcessAffinity();
|
||||
printp( "%i cpus for current thread: ", (int) cpus.size() );
|
||||
for ( int cpu : cpus )
|
||||
printp( "%i ", cpu );
|
||||
|
@ -872,7 +801,7 @@ void ThreadPool::tpool_thread( int thread_id )
|
|||
shutdown = false;
|
||||
while ( !shutdown ) {
|
||||
// Check if there is work to do
|
||||
if ( d_queue_list.size() > 0 ) {
|
||||
if ( !d_queue_list.empty() ) {
|
||||
// Get next work item to process
|
||||
auto work_id =
|
||||
d_queue_list.remove( []( const thread_id_t &id ) { return id.ready(); } );
|
||||
|
@ -920,6 +849,8 @@ void ThreadPool::tpool_thread( int thread_id )
|
|||
} else {
|
||||
int N_active = AtomicOperations::atomic_decrement( &d_num_active );
|
||||
unset_bit( d_active, thread_id );
|
||||
// Yield to give the main thread a chance to update
|
||||
std::this_thread::yield();
|
||||
// Alert main thread that a thread finished processing
|
||||
if ( ( N_active == 0 ) && d_signal_empty ) {
|
||||
d_wait_finished.notify_all();
|
||||
|
@ -927,7 +858,9 @@ void ThreadPool::tpool_thread( int thread_id )
|
|||
}
|
||||
// Wait for work
|
||||
PROFILE_THREADPOOL_STOP2( "thread active" );
|
||||
d_wait_work.wait_for( 1e-3 );
|
||||
double wait_time = thread_id <= 2 ? 0.01 : 0.1;
|
||||
if ( d_queue_list.empty() )
|
||||
d_wait_work.wait_for( wait_time );
|
||||
PROFILE_THREADPOOL_START2( "thread active" );
|
||||
AtomicOperations::atomic_increment( &d_num_active );
|
||||
set_bit( d_active, thread_id );
|
||||
|
@ -951,13 +884,13 @@ inline void ThreadPool::add_work( const ThreadPool::thread_id_t &id )
|
|||
auto work = id.work();
|
||||
work->d_state = 1;
|
||||
// Check and change priorities of dependency ids
|
||||
const int priority = id.getPriority();
|
||||
int priority = id.getPriority();
|
||||
auto compare = []( const thread_id_t &a, const thread_id_t &b ) { return a == b; };
|
||||
for ( int i = 0; i < work->d_N_ids; i++ ) {
|
||||
const auto &id1 = work->d_ids[i];
|
||||
if ( !id1.started() && id1 < id ) {
|
||||
// Remove and add the id back with a higher priority
|
||||
auto id2 = d_queue_list.remove(
|
||||
[]( const thread_id_t &a, const thread_id_t &b ) { return a == b; }, id1 );
|
||||
auto id2 = d_queue_list.remove( compare, id1 );
|
||||
id2.setPriority( std::max( priority, id2.getPriority() ) );
|
||||
d_queue_list.insert( id2 );
|
||||
}
|
||||
|
@ -969,7 +902,7 @@ void ThreadPool::add_work(
|
|||
size_t N, ThreadPool::WorkItem *work[], const int *priority, ThreadPool::thread_id_t *ids )
|
||||
{
|
||||
// If we have a very long list, break it up into smaller pieces to keep the threads busy
|
||||
const size_t block_size = MAX_QUEUED / 8;
|
||||
constexpr size_t block_size = 256;
|
||||
if ( N > block_size ) {
|
||||
size_t i = 0;
|
||||
while ( i < N ) {
|
||||
|
@ -979,13 +912,13 @@ void ThreadPool::add_work(
|
|||
return;
|
||||
}
|
||||
PROFILE_THREADPOOL_START( "add_work" );
|
||||
#if MONITOR_THREADPOOL_PERFORMANCE
|
||||
#if MONITOR_THREADPOOL_PERFORMANCE == 1
|
||||
auto t1 = std::chrono::high_resolution_clock::now();
|
||||
#endif
|
||||
// Create the thread ids (can be done without blocking)
|
||||
for ( size_t i = 0; i < N; i++ )
|
||||
ids[i].reset( priority[i], AtomicOperations::atomic_decrement( &d_id_assign ), work[i] );
|
||||
#if MONITOR_THREADPOOL_PERFORMANCE
|
||||
#if MONITOR_THREADPOOL_PERFORMANCE == 1
|
||||
auto t2 = std::chrono::high_resolution_clock::now();
|
||||
accumulate( total_add_work_time[0], t1, t2 );
|
||||
#endif
|
||||
|
@ -996,7 +929,7 @@ void ThreadPool::add_work(
|
|||
work[i]->run();
|
||||
work[i]->d_state = 3;
|
||||
}
|
||||
#if MONITOR_THREADPOOL_PERFORMANCE
|
||||
#if MONITOR_THREADPOOL_PERFORMANCE == 1
|
||||
auto t5 = std::chrono::high_resolution_clock::now();
|
||||
accumulate( total_add_work_time[4], t2, t5 );
|
||||
#endif
|
||||
|
@ -1004,29 +937,29 @@ void ThreadPool::add_work(
|
|||
return;
|
||||
}
|
||||
// Wait for enough room in the queue (doesn't need blocking since it isn't that precise)
|
||||
if ( N > static_cast<size_t>( MAX_QUEUED - d_queue_list.size() ) ) {
|
||||
auto N_wait = static_cast<int>( N - ( MAX_QUEUED - d_queue_list.size() ) );
|
||||
if ( N > d_queue_list.capacity() - d_queue_list.size() ) {
|
||||
int N_wait = N - ( d_queue_list.capacity() - d_queue_list.size() );
|
||||
while ( N_wait > 0 ) {
|
||||
d_signal_count = static_cast<unsigned char>( std::min( N_wait, 255 ) );
|
||||
d_signal_count = std::min<int>( N_wait, 255 );
|
||||
d_wait_finished.wait_for( 1e-4 );
|
||||
N_wait = static_cast<int>( N - ( MAX_QUEUED - d_queue_list.size() ) );
|
||||
N_wait = N - ( d_queue_list.capacity() - d_queue_list.size() );
|
||||
}
|
||||
}
|
||||
#if MONITOR_THREADPOOL_PERFORMANCE
|
||||
#if MONITOR_THREADPOOL_PERFORMANCE == 1
|
||||
auto t3 = std::chrono::high_resolution_clock::now();
|
||||
accumulate( total_add_work_time[1], t2, t3 );
|
||||
#endif
|
||||
// Get add the work items to the queue
|
||||
for ( size_t i = 0; i < N; i++ )
|
||||
add_work( ids[i] );
|
||||
#if MONITOR_THREADPOOL_PERFORMANCE
|
||||
#if MONITOR_THREADPOOL_PERFORMANCE == 1
|
||||
auto t4 = std::chrono::high_resolution_clock::now();
|
||||
accumulate( total_add_work_time[2], t3, t4 );
|
||||
#endif
|
||||
// Activate sleeping threads
|
||||
if ( d_num_active == d_N_threads ) {
|
||||
// All threads are active, no need to wake anybody
|
||||
} else if ( d_queue_list.size() == 0 ) {
|
||||
} else if ( d_queue_list.empty() ) {
|
||||
// Queue is empty, no need to activate
|
||||
} else if ( N == 1 ) {
|
||||
// Added 1 item to the queue, wake 1 worker
|
||||
|
@ -1035,7 +968,7 @@ void ThreadPool::add_work(
|
|||
// Added multple items in the queue, wake all workers
|
||||
d_wait_work.notify_all();
|
||||
}
|
||||
#if MONITOR_THREADPOOL_PERFORMANCE
|
||||
#if MONITOR_THREADPOOL_PERFORMANCE == 1
|
||||
auto t5 = std::chrono::high_resolution_clock::now();
|
||||
accumulate( total_add_work_time[3], t4, t5 );
|
||||
#endif
|
||||
|
@ -1056,8 +989,8 @@ static inline void check_finished(
|
|||
}
|
||||
}
|
||||
}
|
||||
int ThreadPool::wait_some(
|
||||
size_t N_work, const ThreadPool::thread_id_t *ids, size_t N_wait, bool *finished ) const
|
||||
int ThreadPool::wait_some( size_t N_work, const ThreadPool::thread_id_t *ids, size_t N_wait,
|
||||
bool *finished, int max_wait ) const
|
||||
{
|
||||
// Check the inputs
|
||||
if ( N_wait > N_work )
|
||||
|
@ -1086,13 +1019,21 @@ int ThreadPool::wait_some(
|
|||
auto tmp = new wait_ids_struct( N_work, ids, N_wait, d_cond_pool, MAX_WAIT, d_wait );
|
||||
// Wait for the ids
|
||||
auto t1 = std::chrono::high_resolution_clock::now();
|
||||
while ( !tmp->wait_for( 0.01 ) ) {
|
||||
check_wait_time( t1 );
|
||||
auto t2 = t1;
|
||||
int dt1 = 0;
|
||||
while ( dt1 < max_wait ) {
|
||||
if ( tmp->wait_for( std::min( max_wait, d_max_wait_time ), 0.01 ) )
|
||||
break;
|
||||
auto t3 = std::chrono::high_resolution_clock::now();
|
||||
dt1 = std::chrono::duration_cast<std::chrono::seconds>( t3 - t1 ).count();
|
||||
int dt2 = std::chrono::duration_cast<std::chrono::seconds>( t3 - t2 ).count();
|
||||
if ( dt2 >= d_max_wait_time ) {
|
||||
print_wait_warning();
|
||||
t2 = t3;
|
||||
}
|
||||
}
|
||||
// Update the ids that have finished
|
||||
check_finished( N_work, ids, N_finished, finished );
|
||||
if ( N_finished < N_wait && N_work != 0 )
|
||||
throw std::logic_error( "Internal error: failed to wait" );
|
||||
// Delete the wait event struct
|
||||
// Note: we want to maintain the reference in case a thread is still using it
|
||||
// Note: technically this should be atomic, but it really isn't necessary here
|
||||
|
@ -1105,40 +1046,43 @@ int ThreadPool::wait_some(
|
|||
/******************************************************************
|
||||
* This function waits for all of the threads to finish their work *
|
||||
******************************************************************/
|
||||
void ThreadPool::check_wait_time(
|
||||
std::chrono::time_point<std::chrono::high_resolution_clock> &t1 ) const
|
||||
void ThreadPool::print_wait_warning() const
|
||||
{
|
||||
auto t2 = std::chrono::high_resolution_clock::now();
|
||||
if ( std::chrono::duration_cast<std::chrono::seconds>( t2 - t1 ).count() > d_max_wait_time ) {
|
||||
pout << "Warning: Maximum wait time in ThreadPool exceeded, threads may be hung\n";
|
||||
pout << "N_active: " << d_num_active << std::endl;
|
||||
pout << "N_queued: " << d_queue_list.size() << std::endl;
|
||||
pout << "N_added: " << d_N_added << std::endl;
|
||||
pout << "N_started: " << d_N_started << std::endl;
|
||||
pout << "N_finished: " << d_N_finished << std::endl;
|
||||
pout << "queue.insert(): " << d_queue_list.N_insert() << std::endl;
|
||||
pout << "queue.remove(): " << d_queue_list.N_remove() << std::endl;
|
||||
pout << "Stack Trace:\n";
|
||||
auto call_stack = StackTrace::getAllCallStacks();
|
||||
StackTrace::cleanupStackTrace( call_stack );
|
||||
auto text = call_stack.print( " " );
|
||||
for ( auto &line : text )
|
||||
pout << line << std::endl;
|
||||
t1 = std::chrono::high_resolution_clock::now();
|
||||
}
|
||||
pout << "Warning: Maximum wait time in ThreadPool exceeded, threads may be hung\n";
|
||||
pout << "N_active: " << d_num_active << std::endl;
|
||||
pout << "N_queued: " << d_queue_list.size() << std::endl;
|
||||
pout << "N_added: " << d_N_added << std::endl;
|
||||
pout << "N_started: " << d_N_started << std::endl;
|
||||
pout << "N_finished: " << d_N_finished << std::endl;
|
||||
pout << "queue.insert(): " << d_queue_list.N_insert() << std::endl;
|
||||
pout << "queue.remove(): " << d_queue_list.N_remove() << std::endl;
|
||||
pout << "Stack Trace:\n";
|
||||
auto call_stack = StackTrace::getAllCallStacks();
|
||||
StackTrace::cleanupStackTrace( call_stack );
|
||||
auto text = call_stack.print( " " );
|
||||
for ( auto &line : text )
|
||||
pout << line << std::endl;
|
||||
}
|
||||
void ThreadPool::wait_pool_finished() const
|
||||
{
|
||||
// First check that we are not one of the threads
|
||||
if ( isMemberThread() ) {
|
||||
if ( isMemberThread() )
|
||||
throw std::logic_error( "Member thread attempted to call wait_pool_finished" );
|
||||
}
|
||||
// Wait for all threads to finish their work
|
||||
auto t1 = std::chrono::high_resolution_clock::now();
|
||||
while ( d_num_active > 0 || d_queue_list.size() > 0 ) {
|
||||
check_wait_time( t1 );
|
||||
while ( d_num_active > 0 || !d_queue_list.empty() ) {
|
||||
// Wait for signal from last thread
|
||||
d_signal_empty = true;
|
||||
d_wait_finished.wait_for( 10e-6 );
|
||||
d_wait_finished.wait_for( 5e-4 );
|
||||
if ( d_num_active == 0 && d_queue_list.empty() )
|
||||
break;
|
||||
// Check that we have not exceeded the maximum time
|
||||
auto t2 = std::chrono::high_resolution_clock::now();
|
||||
int seconds = std::chrono::duration_cast<std::chrono::seconds>( t2 - t1 ).count();
|
||||
if ( seconds > d_max_wait_time ) {
|
||||
print_wait_warning();
|
||||
t1 = t2;
|
||||
}
|
||||
}
|
||||
d_signal_empty = false;
|
||||
}
|
||||
|
@ -1192,30 +1136,46 @@ void ThreadPool::wait_ids_struct::id_finished( const ThreadPool::thread_id_t &id
|
|||
}
|
||||
}
|
||||
}
|
||||
bool ThreadPool::wait_ids_struct::wait_for( double seconds )
|
||||
inline bool ThreadPool::wait_ids_struct::check()
|
||||
{
|
||||
for ( int i = 0; i < d_N; i++ ) {
|
||||
if ( d_ids[i].finished() )
|
||||
d_finished[i] = true;
|
||||
int N_finished = 0;
|
||||
for ( int i = 0; i < d_N; i++ )
|
||||
N_finished += d_finished[i] ? 1 : 0;
|
||||
if ( N_finished >= d_wait || d_N == 0 ) {
|
||||
*d_ptr = nullptr;
|
||||
d_wait = 0;
|
||||
d_N = 0;
|
||||
return true;
|
||||
}
|
||||
auto t1 = std::chrono::high_resolution_clock::now();
|
||||
while ( true ) {
|
||||
int N_finished = 0;
|
||||
for ( int i = 0; i < d_N; i++ )
|
||||
N_finished += d_finished[i] ? 1 : 0;
|
||||
if ( N_finished >= d_wait || d_N == 0 ) {
|
||||
*d_ptr = nullptr;
|
||||
d_wait = 0;
|
||||
d_N = 0;
|
||||
break;
|
||||
return false;
|
||||
}
|
||||
bool ThreadPool::wait_ids_struct::wait_for( double total_time, double recheck_time )
|
||||
{
|
||||
int total = 1e6 * total_time;
|
||||
int recheck = 1e6 * recheck_time;
|
||||
auto t1 = std::chrono::high_resolution_clock::now();
|
||||
auto t2 = t1;
|
||||
int us1 = 0;
|
||||
while ( us1 < total ) {
|
||||
for ( int i = 0; i < d_N; i++ ) {
|
||||
if ( d_ids[i].finished() )
|
||||
d_finished[i] = true;
|
||||
}
|
||||
auto t2 = std::chrono::high_resolution_clock::now();
|
||||
if ( 1e-6 * std::chrono::duration_cast<std::chrono::microseconds>( t2 - t1 ).count() >
|
||||
seconds )
|
||||
return false;
|
||||
d_wait_event->wait_for( 1e-5 );
|
||||
if ( check() )
|
||||
return true;
|
||||
int us2 = 0;
|
||||
while ( us2 < recheck ) {
|
||||
double dt = 1e-6 * std::max( 10, recheck - us2 );
|
||||
d_wait_event->wait_for( dt );
|
||||
if ( check() )
|
||||
return true;
|
||||
auto t3 = std::chrono::high_resolution_clock::now();
|
||||
us2 = std::chrono::duration_cast<std::chrono::microseconds>( t3 - t2 ).count();
|
||||
t2 = t3;
|
||||
}
|
||||
us1 = std::chrono::duration_cast<std::chrono::microseconds>( t2 - t1 ).count();
|
||||
}
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1328,9 +1288,8 @@ inline int find_id( int n, const ThreadPool::thread_id_t *x, const ThreadPool::t
|
|||
// Perform the search
|
||||
size_t lower = 0;
|
||||
size_t upper = n - 1;
|
||||
size_t index;
|
||||
while ( ( upper - lower ) != 1 ) {
|
||||
index = ( upper + lower ) / 2;
|
||||
size_t index = ( upper + lower ) / 2;
|
||||
if ( x[index] == id )
|
||||
return index;
|
||||
if ( x[index] >= id )
|
||||
|
@ -1355,9 +1314,8 @@ void ThreadPool::WorkItem::add_dependencies( size_t N, const ThreadPool::thread_
|
|||
throw std::logic_error(
|
||||
"Cannot add dependency to work item once it has been added the the threadpool" );
|
||||
}
|
||||
if ( static_cast<size_t>( d_N_ids ) + N > 0xFFFF ) {
|
||||
if ( d_N_ids + N > 0xFFFF )
|
||||
throw std::logic_error( "Cannot add more than 65000 dependencies" );
|
||||
}
|
||||
if ( d_N_ids + N + 1 > d_size ) {
|
||||
thread_id_t *tmp = d_ids;
|
||||
unsigned int N2 = d_size;
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
#define included_AtomicModelThreadPool
|
||||
|
||||
#include <condition_variable>
|
||||
#include <iostream>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <stdarg.h>
|
||||
|
@ -55,7 +55,7 @@
|
|||
* thread_id_t ids[2];
|
||||
* ids[0] = TPOOL_ADD_WORK( tpool, myfun_1, (a,b) );
|
||||
* ids[1] = TPOOL_ADD_WORK( tpool, myfun_2, (c,d) );
|
||||
* int error = wait_all(2,ids);
|
||||
* wait_all(2,ids);
|
||||
* double x = getFunctionRet(ids[0]);
|
||||
* double y = getFunctionRet(ids[1]); <BR>
|
||||
* </pre>
|
||||
|
@ -64,11 +64,8 @@ class ThreadPool
|
|||
{
|
||||
public:
|
||||
///// Set some global properties
|
||||
constexpr static int MAX_NUM_THREADS = 128; // The maximum number of threads (must be a multiple of 64)
|
||||
constexpr static int MAX_QUEUED = 1024; // The maximum number of items in the work queue at any moment
|
||||
constexpr static int MAX_WAIT = 16; // The maximum number of active waits at any given time
|
||||
constexpr static bool PROFILE_THREADPOOL_PERFORMANCE = false; // Add profile timers to the threadpool
|
||||
constexpr static bool MONITOR_THREADPOOL_PERFORMANCE = false; // Add detailed performance counters
|
||||
constexpr static uint16_t MAX_THREADS = 128; // The maximum number of threads (must be a multiple of 64)
|
||||
constexpr static uint16_t MAX_WAIT = 16; // The maximum number of active waits at any given time
|
||||
|
||||
public:
|
||||
///// Member classes
|
||||
|
@ -132,6 +129,8 @@ public:
|
|||
}
|
||||
//! Check if thread id is null
|
||||
inline bool isNull( ) const { return d_id==nullThreadID; }
|
||||
//! Check if thread id is null
|
||||
inline WorkItem* getWork( ) const { return reinterpret_cast<WorkItem *>( d_work ); }
|
||||
|
||||
private:
|
||||
// Reset the internal data to the given values
|
||||
|
@ -189,9 +188,8 @@ public:
|
|||
*/
|
||||
inline void add_dependencies( const std::vector<ThreadPool::thread_id_t> &ids )
|
||||
{
|
||||
if ( !ids.empty() ) {
|
||||
if ( !ids.empty() )
|
||||
add_dependencies( ids.size(), &ids[0] );
|
||||
}
|
||||
}
|
||||
/*!
|
||||
* \brief Add a list of work item to the list of dependencies
|
||||
|
@ -216,8 +214,8 @@ public:
|
|||
WorkItem( const WorkItem & ); // Private copy constructor
|
||||
WorkItem &operator=( const WorkItem & ); // Private assignment operator
|
||||
volatile char d_state; // Current state (0: not added to threadpool, 1: queued, 2: started, 3: finished)
|
||||
short unsigned int d_N_ids; // Number of dependencies
|
||||
short unsigned int d_size; // Size of d_ids
|
||||
uint16_t d_N_ids; // Number of dependencies
|
||||
uint16_t d_size; // Size of d_ids
|
||||
AtomicOperations::int32_atomic d_count; // Count used by a thread_id
|
||||
thread_id_t *d_ids; // Pointer to id list
|
||||
// Friends
|
||||
|
@ -247,7 +245,7 @@ public:
|
|||
protected:
|
||||
return_type d_result;
|
||||
protected:
|
||||
inline WorkItemRet() { }
|
||||
inline WorkItemRet() : d_result( return_type() ) { }
|
||||
private:
|
||||
WorkItemRet( const WorkItemRet & ); // Private copy constructor
|
||||
WorkItemRet &operator=( const WorkItemRet & ); // Private assignment operator
|
||||
|
@ -257,37 +255,17 @@ public:
|
|||
public:
|
||||
///// Member functions
|
||||
|
||||
//! Empty constructor
|
||||
ThreadPool()
|
||||
{
|
||||
// Note: we need the constructor in the header to ensure that check_startup
|
||||
// is able to check for changes in the byte alignment
|
||||
check_startup( sizeof( ThreadPool ) );
|
||||
initialize( 0, "none", 0, nullptr );
|
||||
if ( !is_valid( this ) )
|
||||
throw std::logic_error( "Thread pool is not valid" );
|
||||
}
|
||||
|
||||
|
||||
/*!
|
||||
* Constructor that initialize the thread pool with N threads
|
||||
* @param N The desired number of worker threads
|
||||
* @param N The desired number of worker threads
|
||||
* @param affinity The affinity scheduler to use:
|
||||
* none - Let the OS handle the affinities (default)
|
||||
* independent - Give each thread an independent set of processors
|
||||
* @param procs The processors to use (defaults to the process affinitiy list)
|
||||
* @param queueSize The maximum number of items in the queue before forcing a wait
|
||||
*/
|
||||
ThreadPool( const int N, const std::string &affinity = "none",
|
||||
const std::vector<int> &procs = std::vector<int>() )
|
||||
{
|
||||
// Note: we need the constructor in the header to ensure that check_startup
|
||||
// is able to check for changes in the byte alignment
|
||||
check_startup( sizeof( ThreadPool ) );
|
||||
const int *procs2 = procs.empty() ? nullptr : ( &procs[0] );
|
||||
initialize( N, affinity.c_str(), (int) procs.size(), procs2 );
|
||||
if ( !is_valid( this ) )
|
||||
throw std::logic_error( "Thread pool is not valid" );
|
||||
}
|
||||
ThreadPool( const int N = 0, const std::string &affinity = "none",
|
||||
const std::vector<int> &procs = std::vector<int>(), int queueSize = 1024 );
|
||||
|
||||
|
||||
//! Destructor
|
||||
|
@ -307,7 +285,7 @@ public:
|
|||
|
||||
|
||||
//! Function to set the affinity of the current process
|
||||
static void setProcessAffinity( std::vector<int> procs );
|
||||
static void setProcessAffinity( const std::vector<int>& procs );
|
||||
|
||||
|
||||
//! Function to return the affinity of the current thread
|
||||
|
@ -325,7 +303,7 @@ public:
|
|||
* Function to set the affinity of the current thread
|
||||
* @param procs The processors to use
|
||||
*/
|
||||
static void setThreadAffinity( std::vector<int> procs );
|
||||
static void setThreadAffinity( const std::vector<int>& procs );
|
||||
|
||||
|
||||
/*!
|
||||
|
@ -333,11 +311,11 @@ public:
|
|||
* @param thread The index of the thread
|
||||
* @param procs The processors to use
|
||||
*/
|
||||
void setThreadAffinity( int thread, std::vector<int> procs ) const;
|
||||
void setThreadAffinity( int thread, const std::vector<int>& procs ) const;
|
||||
|
||||
|
||||
//! Function to return the number of threads in the thread pool
|
||||
int getNumThreads() const { return d_N_threads; }
|
||||
inline int getNumThreads() const { return d_N_threads; }
|
||||
|
||||
|
||||
/*!
|
||||
|
@ -347,21 +325,15 @@ public:
|
|||
* in the ThreadPool without checking the existing work unless the desired number of
|
||||
* threads is 0. In this case, the function will wait for all work items to finish
|
||||
* before deleting the existing work threads.
|
||||
|
||||
* Member threads may not call this function.
|
||||
* @param N The desired number of worker threads
|
||||
* @param affinity The affinity scheduler to use:
|
||||
* none - Let the OS handle the affinities (default)
|
||||
|
||||
* independent - Give each thread an independent set of processors
|
||||
* @param procs The processors to use (defaults to the process affinitiy list)
|
||||
*/
|
||||
inline void setNumThreads( const int N, const std::string &affinity = "none",
|
||||
const std::vector<int> &procs = std::vector<int>() )
|
||||
{
|
||||
const int *procs2 = procs.empty() ? nullptr : ( &procs[0] );
|
||||
setNumThreads( N, affinity.c_str(), (int) procs.size(), procs2 );
|
||||
}
|
||||
void setNumThreads( const int N, const std::string &affinity = "none",
|
||||
const std::vector<int> &procs = std::vector<int>() );
|
||||
|
||||
|
||||
/*!
|
||||
|
@ -409,6 +381,36 @@ public:
|
|||
static inline return_type getFunctionRet( const thread_id_t &id );
|
||||
|
||||
|
||||
/*!
|
||||
* \brief Function to create a work item
|
||||
* \details This function creates a work item that can be added to the queue
|
||||
* @param routine Function to call from the thread pool
|
||||
* @param args Function arguments to pass
|
||||
*/
|
||||
template<class Ret, class... Args>
|
||||
static inline WorkItem* createWork( std::function<Ret(Args...)> routine, std::tuple<Args...> &&args );
|
||||
|
||||
|
||||
/*!
|
||||
* \brief Function to create a work item
|
||||
* \details This function creates a work item that can be added to the queue
|
||||
* @param routine Function to call from the thread pool
|
||||
* @param args Function arguments to pass
|
||||
*/
|
||||
template<class Ret, class... Args>
|
||||
static inline WorkItem* createWork( Ret( *routine )( Args... ), std::tuple<Args...> &&args );
|
||||
|
||||
|
||||
/*!
|
||||
* \brief Function to create a work item
|
||||
* \details This function creates a work item that can be added to the queue
|
||||
* @param routine Function to call from the thread pool
|
||||
* @param args Function arguments to pass
|
||||
*/
|
||||
template<class Ret, class... Args>
|
||||
static inline WorkItem* createWork( std::function<Ret(Args...)> routine, Args... args );
|
||||
|
||||
|
||||
/*!
|
||||
* \brief Function to create a work item
|
||||
* \details This function creates a work item that can be added to the queue
|
||||
|
@ -446,61 +448,33 @@ public:
|
|||
|
||||
/*!
|
||||
* \brief Function to wait until a specific work item has finished
|
||||
* \details This is the function waits for a specific work item to finished. It returns 0 if
|
||||
* successful.
|
||||
* \details This is the function waits for a specific work item to finished.
|
||||
* Note: any thread may call this routine, but they will block until finished.
|
||||
* For worker threads this may eventually lead to a deadlock.
|
||||
* @param id The work item to wait for
|
||||
*/
|
||||
inline int wait( thread_id_t id ) const;
|
||||
inline void wait( thread_id_t id ) const;
|
||||
|
||||
|
||||
/*!
|
||||
* \brief Function to wait until any of the given work items have finished their work
|
||||
* \details This is the function waits for any of the given work items to finish.
|
||||
* If successful it returns the index of a finished work item (the index in the array ids).
|
||||
* If unseccessful it will return -1.
|
||||
* Note: any thread may call this routine, but they will block until finished.
|
||||
* For worker threads this may eventually lead to a deadlock.
|
||||
* @param N_work The number of work items
|
||||
* @param ids Array of work items to wait for
|
||||
*/
|
||||
inline int wait_any( size_t N_work, const thread_id_t *ids );
|
||||
|
||||
|
||||
/*!
|
||||
* \brief Function to wait until any of the given work items have finished their work
|
||||
* \details This is the function waits for any of the given work items to finish.
|
||||
* If successful it returns the index of a finished work item (the index in the array ids).
|
||||
* If unseccessful it will return -1.
|
||||
* Note: any thread may call this routine, but they will block until finished.
|
||||
* For worker threads this may eventually lead to a deadlock.
|
||||
* @param ids Vector of work items to wait for
|
||||
*/
|
||||
inline int wait_any( const std::vector<thread_id_t> &ids ) const;
|
||||
inline size_t wait_any( const std::vector<thread_id_t> &ids ) const;
|
||||
|
||||
|
||||
/*!
|
||||
* \brief Function to wait until all of the given work items have finished their work
|
||||
* \details This is the function waits for all given of the work items to finish. It returns 0
|
||||
* if successful.
|
||||
* Note: any thread may call this routine, but they will block until finished.
|
||||
* For worker threads this may eventually lead to a deadlock.
|
||||
* @param N_work The number of work items
|
||||
* @param ids Array of work items to wait for
|
||||
*/
|
||||
inline int wait_all( size_t N_work, const thread_id_t *ids ) const;
|
||||
|
||||
|
||||
/*!
|
||||
* \brief Function to wait until all of the given work items have finished their work
|
||||
* \details This is the function waits for all given of the work items to finish. It returns 0
|
||||
* if successful.
|
||||
* \details This is the function waits for all given of the work items to finish.
|
||||
* Note: any thread may call this routine, but they will block until finished.
|
||||
* For worker threads this may eventually lead to a deadlock.
|
||||
* @param ids Vector of work items to wait for
|
||||
*/
|
||||
inline int wait_all( const std::vector<thread_id_t> &ids ) const;
|
||||
inline void wait_all( const std::vector<thread_id_t> &ids ) const;
|
||||
|
||||
|
||||
/*!
|
||||
|
@ -511,8 +485,9 @@ public:
|
|||
* For worker threads this may eventually lead to a deadlock.
|
||||
* @param N_wait Number of work items to wait for
|
||||
* @param ids Vector of work items to wait for
|
||||
* @param max_wait Maximum time to wait (seconds)
|
||||
*/
|
||||
inline std::vector<int> wait_some( int N_wait, const std::vector<thread_id_t> &ids ) const;
|
||||
inline std::vector<int> wait_some( int N_wait, const std::vector<thread_id_t> &ids, int max_wait = 10000000 ) const;
|
||||
|
||||
|
||||
/*!
|
||||
|
@ -599,14 +574,13 @@ public: // Static interface
|
|||
|
||||
/*!
|
||||
* \brief Function to wait until all of the given work items have finished their work
|
||||
* \details This is the function waits for all given of the work items to finish. It returns 0
|
||||
* if successful.
|
||||
* \details This is the function waits for all given of the work items to finish.
|
||||
* Note: any thread may call this routine, but they will block until finished.
|
||||
* For worker threads this may eventually lead to a deadlock.
|
||||
* @param tpool Threadpool containing work (must match call to add_work)
|
||||
* @param ids Vector of work items to wait for
|
||||
*/
|
||||
static inline int wait_all( const ThreadPool* tpool, const std::vector<thread_id_t> &ids );
|
||||
static inline void wait_all( const ThreadPool* tpool, const std::vector<thread_id_t> &ids );
|
||||
|
||||
|
||||
/*!
|
||||
|
@ -619,10 +593,6 @@ public: // Static interface
|
|||
static inline void wait_pool_finished( const ThreadPool* tpool ) { if ( tpool ) { tpool->wait_pool_finished(); } }
|
||||
|
||||
|
||||
|
||||
private:
|
||||
typedef AtomicOperations::int32_atomic int32_atomic;
|
||||
|
||||
private:
|
||||
///// Member data structures
|
||||
|
||||
|
@ -659,11 +629,14 @@ private:
|
|||
// before calling wait
|
||||
class wait_ids_struct {
|
||||
public:
|
||||
wait_ids_struct() = delete;
|
||||
wait_ids_struct( const wait_ids_struct& ) = delete;
|
||||
wait_ids_struct& operator=( const wait_ids_struct & ) = delete;
|
||||
wait_ids_struct( size_t N, const ThreadPool::thread_id_t *ids, size_t N_wait,
|
||||
AtomicOperations::pool<condition_variable,128>& cv_pool, int N_wait_list, volatile wait_ids_struct **list );
|
||||
~wait_ids_struct( );
|
||||
void id_finished( const ThreadPool::thread_id_t& id ) const;
|
||||
bool wait_for( double seconds );
|
||||
bool wait_for( double total_time, double recheck_time );
|
||||
private:
|
||||
mutable int d_wait; // The number of work items that must finish before we alert the thread
|
||||
mutable int d_N; // The number of ids we are waiting on
|
||||
|
@ -672,9 +645,8 @@ private:
|
|||
condition_variable *d_wait_event; // Handle to a wait event
|
||||
volatile mutable bool *d_finished; // Has each id finished
|
||||
volatile mutable wait_ids_struct **d_ptr;
|
||||
wait_ids_struct();
|
||||
wait_ids_struct( const wait_ids_struct& );
|
||||
wait_ids_struct& operator=( const wait_ids_struct & );
|
||||
private:
|
||||
inline bool check();
|
||||
};
|
||||
|
||||
|
||||
|
@ -685,10 +657,8 @@ private:
|
|||
ThreadPool( const ThreadPool & );
|
||||
ThreadPool &operator=( const ThreadPool & );
|
||||
|
||||
// Function to initialize the thread pool
|
||||
void setNumThreads( int N, const char *affinity, int N_procs, const int *procs );
|
||||
void initialize( int N, const char *affinity, int N_procs, const int *procs );
|
||||
void check_startup( size_t size0 );
|
||||
// Function to check the startup
|
||||
void check_startup( );
|
||||
|
||||
// Function to add an array of work items
|
||||
void add_work(
|
||||
|
@ -716,39 +686,45 @@ private:
|
|||
inline bool isMemberThread() const { return getThreadNumber()>=0; }
|
||||
|
||||
// Function to wait for some work items to finish
|
||||
int wait_some( size_t N_work, const thread_id_t *ids, size_t N_wait, bool *finished ) const;
|
||||
int wait_some( size_t N_work, const thread_id_t *ids, size_t N_wait, bool *finished, int max_wait ) const;
|
||||
|
||||
// Check if we are waiting too long and pring debug info
|
||||
void check_wait_time( std::chrono::time_point<std::chrono::high_resolution_clock>& t1 ) const;
|
||||
void print_wait_warning( ) const;
|
||||
|
||||
|
||||
private:
|
||||
///// Member data
|
||||
typedef AtomicOperations::int64_atomic atomic_64;
|
||||
typedef AtomicList<thread_id_t,MAX_QUEUED,std::greater<thread_id_t>> queue_type;
|
||||
// Note: We want to store the variables in a certain order to optimize storage
|
||||
// and ensure consistent packing / object size
|
||||
size_t d_NULL_HEAD; // Null data buffer to check memory bounds
|
||||
volatile atomic_64 d_id_assign; // An internal variable used to store the current id to assign
|
||||
volatile mutable bool d_signal_empty; // Do we want to send a signal when the queue is empty
|
||||
volatile mutable int32_atomic d_signal_count; // Signal count
|
||||
short int d_N_threads; // Number of threads
|
||||
volatile int32_atomic d_num_active; // Number of threads that are currently active
|
||||
volatile atomic_64 d_active[MAX_NUM_THREADS/64]; // Which threads are currently active
|
||||
volatile atomic_64 d_cancel[MAX_NUM_THREADS/64]; // Which threads should be deleted
|
||||
volatile atomic_64 d_N_added; // Number of items added to the work queue
|
||||
volatile atomic_64 d_N_started; // Number of items started
|
||||
volatile atomic_64 d_N_finished; // Number of items finished
|
||||
volatile mutable wait_ids_struct *d_wait[MAX_WAIT]; // The wait events to check
|
||||
mutable wait_ids_struct *d_wait_last; // A cached copy of the last completed wait event (in case a thread still has a reference)
|
||||
condition_variable d_wait_finished; // Condition variable to signal when all work is finished
|
||||
condition_variable d_wait_work; // Condition variable to signal when there is new work
|
||||
mutable AtomicOperations::pool<condition_variable,128> d_cond_pool;
|
||||
std::thread d_thread[MAX_NUM_THREADS]; // Handles to the threads
|
||||
std::thread::id d_threadId[MAX_NUM_THREADS]; // Unique id for each thread
|
||||
queue_type d_queue_list; // The work queue
|
||||
size_t d_NULL_TAIL; // Null data buffer to check memory bounds
|
||||
int d_max_wait_time; // The maximum time in a wait command before printing a warning message
|
||||
std::function<void(const std::string&)> d_errorHandler;
|
||||
|
||||
// Typedefs
|
||||
typedef volatile AtomicOperations::int32_atomic vint32_t;
|
||||
typedef volatile AtomicOperations::int64_atomic vint64_t;
|
||||
typedef volatile wait_ids_struct vwait_t;
|
||||
typedef AtomicOperations::pool<condition_variable,128> cond_t;
|
||||
typedef AtomicList<thread_id_t,std::greater<thread_id_t>> queue_type;
|
||||
|
||||
// Internal data
|
||||
uint32_t d_NULL_HEAD; // Null data buffer to check memory bounds
|
||||
volatile mutable bool d_signal_empty; // Do we want to send a signal when the queue is empty
|
||||
uint16_t d_N_threads; // Number of threads
|
||||
int d_max_wait_time; // The maximum time in a wait command before printing a warning message
|
||||
vint32_t d_signal_count; // Signal count
|
||||
vint32_t d_num_active; // Number of threads that are currently active
|
||||
vint64_t d_id_assign; // An internal variable used to store the current id to assign
|
||||
vint64_t d_active[MAX_THREADS/64]; // Which threads are currently active
|
||||
vint64_t d_cancel[MAX_THREADS/64]; // Which threads should be deleted
|
||||
vint64_t d_N_added; // Number of items added to the work queue
|
||||
vint64_t d_N_started; // Number of items started
|
||||
vint64_t d_N_finished; // Number of items finished
|
||||
mutable vwait_t *d_wait[MAX_WAIT]; // The wait events to check
|
||||
mutable wait_ids_struct *d_wait_last; // A cached copy of the last completed wait event (in case a thread still has a reference)
|
||||
condition_variable d_wait_finished; // Condition variable to signal when all work is finished
|
||||
condition_variable d_wait_work; // Condition variable to signal when there is new work
|
||||
mutable cond_t d_cond_pool; // Condition pool
|
||||
std::thread d_thread[MAX_THREADS]; // Handles to the threads
|
||||
std::thread::id d_threadId[MAX_THREADS]; // Unique id for each thread
|
||||
queue_type d_queue_list; // The work queue
|
||||
std::function<void(const std::string&)> d_errorHandler; // Error handler
|
||||
uint32_t d_NULL_TAIL; // Null data buffer to check memory bounds
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -51,19 +51,10 @@
|
|||
* \param args The arguments to pass to the function in the form (arg1,arg2,...)
|
||||
* \param priority Optional argument specifying the priority of the work item
|
||||
*/
|
||||
#define TPOOL_TUPLE_TO_SEQ( t ) TPOOL_TUPLE_TO_SEQ_##II t
|
||||
#define TPOOL_TUPLE_TO_SEQ_II( a, ... ) a, ##__VA_ARGS__
|
||||
#if defined( WIN32 ) || defined( _WIN32 ) || defined( WIN64 ) || defined( _WIN64 )
|
||||
#define TPOOL_GET_PRIORITY( a, N, c, ... ) N
|
||||
#define TPOOL_ADD_WORK( TPOOL, FUNCTION, ARGS, ... ) \
|
||||
ThreadPool_add_work( TPOOL, TPOOL_GET_PRIORITY( 0, __VA_ARGS__, 0, 0 ) + 0, FUNCTION, \
|
||||
TPOOL_TUPLE_TO_SEQ( ARGS ) )
|
||||
#else
|
||||
#define TPOOL_GET_PRIORITY( _0, N, ... ) N
|
||||
#define TPOOL_ADD_WORK( TPOOL, FUNCTION, ARGS, ... ) \
|
||||
ThreadPool_add_work( \
|
||||
TPOOL, TPOOL_GET_PRIORITY( _0, ##__VA_ARGS__, 0 ), FUNCTION, TPOOL_TUPLE_TO_SEQ( ARGS ) )
|
||||
#endif
|
||||
#define TPOOL_ADD_WORK2( TPOOL, FUNCTION, ARGS, PRIORITY, ... ) \
|
||||
ThreadPool_add_work( TPOOL, PRIORITY, FUNCTION, std::make_tuple ARGS )
|
||||
#define TPOOL_ADD_WORK( TPOOL, FUNCTION, ... ) TPOOL_ADD_WORK2( TPOOL, FUNCTION, __VA_ARGS__, 0, 0 )
|
||||
|
||||
|
||||
/*! @} */
|
||||
|
||||
|
@ -89,17 +80,17 @@ struct make_indexes : make_indexes_impl<0, index_tuple<>, Types...> {
|
|||
};
|
||||
template<class Ret, class... Args, int... Indexes>
|
||||
inline Ret apply_helper(
|
||||
Ret ( *pf )( Args... ), index_tuple<Indexes...>, std::tuple<Args...> &&tup )
|
||||
std::function<Ret( Args... )> &pf, index_tuple<Indexes...>, std::tuple<Args...> &&tup )
|
||||
{
|
||||
return pf( std::forward<Args>( std::get<Indexes>( tup ) )... );
|
||||
}
|
||||
template<class Ret, class... Args>
|
||||
inline Ret apply( Ret ( *pf )( Args... ), const std::tuple<Args...> &tup )
|
||||
inline Ret apply( std::function<Ret( Args... )> &pf, const std::tuple<Args...> &tup )
|
||||
{
|
||||
return apply_helper( pf, typename make_indexes<Args...>::type(), std::tuple<Args...>( tup ) );
|
||||
}
|
||||
template<class Ret, class... Args>
|
||||
inline Ret apply( Ret ( *pf )( Args... ), std::tuple<Args...> &&tup )
|
||||
inline Ret apply( std::function<Ret( Args... )> &pf, std::tuple<Args...> &&tup )
|
||||
{
|
||||
return apply_helper(
|
||||
pf, typename make_indexes<Args...>::type(), std::forward<std::tuple<Args...>>( tup ) );
|
||||
|
@ -122,32 +113,40 @@ public:
|
|||
template<class Ret, class... Args>
|
||||
class WorkItemFull;
|
||||
template<class... Args>
|
||||
class WorkItemFull<void, Args...> : public ThreadPool::WorkItemRet<void>
|
||||
class WorkItemFull<void, Args...> final : public ThreadPool::WorkItemRet<void>
|
||||
{
|
||||
private:
|
||||
void ( *routine )( Args... );
|
||||
std::function<void( Args... )> routine;
|
||||
std::tuple<Args...> args;
|
||||
WorkItemFull();
|
||||
|
||||
public:
|
||||
WorkItemFull( void ( *routine2 )( Args... ), Args... ts )
|
||||
: ThreadPool::WorkItemRet<void>(), routine( routine2 ), args( ts... )
|
||||
WorkItemFull( std::function<void( Args... )> &&routine2, Args... ts )
|
||||
: ThreadPool::WorkItemRet<void>(), routine( std::move( routine2 ) ), args( ts... )
|
||||
{
|
||||
}
|
||||
WorkItemFull( std::function<void( Args... )> &&routine2, std::tuple<Args...> &&ts )
|
||||
: ThreadPool::WorkItemRet<void>(), routine( std::move( routine2 ) ), args( ts )
|
||||
{
|
||||
}
|
||||
virtual void run() override { apply( routine, args ); }
|
||||
virtual ~WorkItemFull() {}
|
||||
};
|
||||
template<class Ret, class... Args>
|
||||
class WorkItemFull : public ThreadPool::WorkItemRet<Ret>
|
||||
class WorkItemFull final : public ThreadPool::WorkItemRet<Ret>
|
||||
{
|
||||
private:
|
||||
Ret ( *routine )( Args... );
|
||||
std::function<Ret( Args... )> routine;
|
||||
std::tuple<Args...> args;
|
||||
WorkItemFull();
|
||||
|
||||
public:
|
||||
WorkItemFull( Ret ( *routine2 )( Args... ), Args... ts )
|
||||
: ThreadPool::WorkItemRet<Ret>(), routine( routine2 ), args( ts... )
|
||||
WorkItemFull( std::function<Ret( Args... )> &&routine2, Args... ts )
|
||||
: ThreadPool::WorkItemRet<Ret>(), routine( std::move( routine2 ) ), args( ts... )
|
||||
{
|
||||
}
|
||||
WorkItemFull( std::function<Ret( Args... )> &&routine2, std::tuple<Args...> &&ts )
|
||||
: ThreadPool::WorkItemRet<Ret>(), routine( std::move( routine2 ) ), args( ts )
|
||||
{
|
||||
}
|
||||
virtual void run() override { this->d_result = apply( routine, args ); }
|
||||
|
@ -156,11 +155,40 @@ public:
|
|||
|
||||
|
||||
// Functions to add work to the thread pool
|
||||
template<class Ret, class... Ts>
|
||||
// clang-format off
|
||||
template<class Ret, class... Args>
|
||||
inline ThreadPool::thread_id_t ThreadPool_add_work(
|
||||
ThreadPool *tpool, int priority, Ret ( *routine )( Ts... ), Ts... ts )
|
||||
ThreadPool *tpool, int priority, std::function<Ret( Args... )> routine, std::tuple<Args...> &&args )
|
||||
{
|
||||
auto work = new WorkItemFull<Ret, Ts...>( routine, ts... );
|
||||
auto work = new WorkItemFull<Ret, Args...>( routine, std::move( args ) );
|
||||
return ThreadPool::add_work( tpool, work, priority );
|
||||
}
|
||||
template<class Ret, class... Args>
|
||||
inline ThreadPool::thread_id_t ThreadPool_add_work(
|
||||
ThreadPool *tpool, int priority, Ret ( *routine )( Args... ), std::tuple<Args...> &&args )
|
||||
{
|
||||
auto work = new WorkItemFull<Ret, Args...>( routine, std::move( args ) );
|
||||
return ThreadPool::add_work( tpool, work, priority );
|
||||
}
|
||||
template<class Ret, class... Args>
|
||||
inline ThreadPool::thread_id_t ThreadPool_add_work(
|
||||
ThreadPool *tpool, int priority, Ret ( *routine )(), std::tuple<std::nullptr_t>&& )
|
||||
{
|
||||
auto work = new WorkItemFull<Ret>( routine );
|
||||
return ThreadPool::add_work( tpool, work, priority );
|
||||
}
|
||||
template<class Ret, class... Args>
|
||||
inline ThreadPool::thread_id_t ThreadPool_add_work(
|
||||
ThreadPool *tpool, int priority, std::function<Ret( Args... )> routine, Args... args )
|
||||
{
|
||||
auto work = new WorkItemFull<Ret, Args...>( routine, std::forward_as_tuple( args... ) );
|
||||
return ThreadPool::add_work( tpool, work, priority );
|
||||
}
|
||||
template<class Ret, class... Args>
|
||||
inline ThreadPool::thread_id_t ThreadPool_add_work(
|
||||
ThreadPool *tpool, int priority, Ret ( *routine )( Args... ), Args... args )
|
||||
{
|
||||
auto work = new WorkItemFull<Ret, Args...>( routine, std::forward_as_tuple( args... ) );
|
||||
return ThreadPool::add_work( tpool, work, priority );
|
||||
}
|
||||
template<class Ret>
|
||||
|
@ -171,10 +199,29 @@ inline ThreadPool::thread_id_t ThreadPool_add_work(
|
|||
return ThreadPool::add_work( tpool, work, priority );
|
||||
}
|
||||
template<class Ret, class... Args>
|
||||
inline ThreadPool::WorkItem *ThreadPool::createWork(
|
||||
std::function<Ret( Args... )> routine, Args... args )
|
||||
{
|
||||
return new WorkItemFull<Ret, Args...>( routine, std::forward_as_tuple( args... ) );
|
||||
}
|
||||
template<class Ret, class... Args>
|
||||
inline ThreadPool::WorkItem *ThreadPool::createWork( Ret ( *routine )( Args... ), Args... args )
|
||||
{
|
||||
return new WorkItemFull<Ret, Args...>( routine, args... );
|
||||
return new WorkItemFull<Ret, Args...>( routine, std::forward_as_tuple( args... ) );
|
||||
}
|
||||
template<class Ret, class... Args>
|
||||
inline ThreadPool::WorkItem *ThreadPool::createWork(
|
||||
std::function<Ret( Args... )> routine, std::tuple<Args...> &&args )
|
||||
{
|
||||
return new WorkItemFull<Ret, Args...>( routine, std::move( args ) );
|
||||
}
|
||||
template<class Ret, class... Args>
|
||||
inline ThreadPool::WorkItem *ThreadPool::createWork(
|
||||
Ret ( *routine )( Args... ), std::tuple<Args...> &&args )
|
||||
{
|
||||
return new WorkItemFull<Ret, Args...>( routine, std::move( args ) );
|
||||
}
|
||||
// clang-format on
|
||||
|
||||
|
||||
/******************************************************************
|
||||
|
@ -204,71 +251,49 @@ inline Ret ThreadPool::getFunctionRet( const ThreadPool::thread_id_t &id )
|
|||
/******************************************************************
|
||||
* Inline functions to wait for the work items to finish *
|
||||
******************************************************************/
|
||||
inline int ThreadPool::wait( ThreadPool::thread_id_t id ) const
|
||||
inline void ThreadPool::wait( ThreadPool::thread_id_t id ) const
|
||||
{
|
||||
bool finished;
|
||||
wait_some( 1, &id, 1, &finished );
|
||||
return 0;
|
||||
int N = wait_some( 1, &id, 1, &finished, 10000000 );
|
||||
if ( N != 1 )
|
||||
throw std::logic_error( "Failed to wait for id" );
|
||||
}
|
||||
inline int ThreadPool::wait_any( size_t N_work, const ThreadPool::thread_id_t *ids )
|
||||
{
|
||||
auto finished = new bool[N_work];
|
||||
wait_some( N_work, ids, 1, finished );
|
||||
int index = -1;
|
||||
for ( size_t i = 0; i < N_work; i++ ) {
|
||||
if ( finished[i] ) {
|
||||
index = static_cast<int>( i );
|
||||
break;
|
||||
}
|
||||
}
|
||||
delete[] finished;
|
||||
return index;
|
||||
}
|
||||
inline int ThreadPool::wait_any( const std::vector<thread_id_t> &ids ) const
|
||||
inline size_t ThreadPool::wait_any( const std::vector<thread_id_t> &ids ) const
|
||||
{
|
||||
if ( ids.empty() )
|
||||
return 0;
|
||||
auto finished = new bool[ids.size()];
|
||||
wait_some( ids.size(), &ids[0], 1, finished );
|
||||
int index = -1;
|
||||
int N = wait_some( ids.size(), &ids[0], 1, finished, 10000000 );
|
||||
if ( N < 1 )
|
||||
throw std::logic_error( "Failed to wait for any id" );
|
||||
for ( size_t i = 0; i < ids.size(); i++ ) {
|
||||
if ( finished[i] ) {
|
||||
index = static_cast<int>( i );
|
||||
break;
|
||||
delete[] finished;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
delete[] finished;
|
||||
return index;
|
||||
throw std::logic_error( "wait_any failed" );
|
||||
}
|
||||
inline int ThreadPool::wait_all( size_t N_work, const ThreadPool::thread_id_t *ids ) const
|
||||
{
|
||||
if ( N_work == 0 )
|
||||
return 0;
|
||||
auto finished = new bool[N_work];
|
||||
wait_some( N_work, ids, N_work, finished );
|
||||
delete[] finished;
|
||||
return 0;
|
||||
}
|
||||
inline int ThreadPool::wait_all( const std::vector<thread_id_t> &ids ) const
|
||||
inline void ThreadPool::wait_all( const std::vector<thread_id_t> &ids ) const
|
||||
{
|
||||
if ( ids.empty() )
|
||||
return 0;
|
||||
return;
|
||||
auto finished = new bool[ids.size()];
|
||||
wait_some( ids.size(), ids.data(), ids.size(), finished );
|
||||
int N = wait_some( ids.size(), ids.data(), ids.size(), finished, 10000000 );
|
||||
if ( N != (int) ids.size() )
|
||||
throw std::logic_error( "Failed to wait for all ids" );
|
||||
delete[] finished;
|
||||
return 0;
|
||||
}
|
||||
inline int ThreadPool::wait_all( const ThreadPool *tpool, const std::vector<thread_id_t> &ids )
|
||||
inline void ThreadPool::wait_all( const ThreadPool *tpool, const std::vector<thread_id_t> &ids )
|
||||
{
|
||||
if ( tpool )
|
||||
return tpool->wait_all( ids );
|
||||
return ids.size();
|
||||
}
|
||||
inline std::vector<int> ThreadPool::wait_some(
|
||||
int N_wait, const std::vector<thread_id_t> &ids ) const
|
||||
int N_wait, const std::vector<thread_id_t> &ids, int max_wait ) const
|
||||
{
|
||||
auto finished = new bool[ids.size()];
|
||||
int N_finished = wait_some( ids.size(), ids.data(), N_wait, finished );
|
||||
int N_finished = wait_some( ids.size(), ids.data(), N_wait, finished, max_wait );
|
||||
std::vector<int> index( N_finished, -1 );
|
||||
for ( size_t i = 0, j = 0; i < ids.size(); i++ ) {
|
||||
if ( finished[i] ) {
|
||||
|
@ -343,7 +368,7 @@ inline std::vector<ThreadPool::thread_id_t> ThreadPool::add_work( ThreadPool *tp
|
|||
* Class functions to for the thread id *
|
||||
******************************************************************/
|
||||
inline ThreadPool::thread_id_t::thread_id_t()
|
||||
: d_id( nullThreadID ), d_count( NULL ), d_work( NULL )
|
||||
: d_id( nullThreadID ), d_count( nullptr ), d_work( nullptr )
|
||||
{
|
||||
}
|
||||
inline ThreadPool::thread_id_t::~thread_id_t() { reset(); }
|
||||
|
@ -380,7 +405,7 @@ inline ThreadPool::thread_id_t &ThreadPool::thread_id_t::operator=(
|
|||
inline ThreadPool::thread_id_t::thread_id_t( const volatile ThreadPool::thread_id_t &rhs )
|
||||
: d_id( rhs.d_id ), d_count( rhs.d_count ), d_work( rhs.d_work )
|
||||
{
|
||||
if ( d_count != NULL )
|
||||
if ( d_count != nullptr )
|
||||
AtomicOperations::atomic_increment( d_count );
|
||||
}
|
||||
#if !defined( WIN32 ) && !defined( _WIN32 ) && !defined( WIN64 ) && !defined( _WIN64 )
|
||||
|
@ -465,15 +490,9 @@ inline void ThreadPool::thread_id_t::reset()
|
|||
}
|
||||
inline uint64_t ThreadPool::thread_id_t::createId( int priority, uint64_t local_id )
|
||||
{
|
||||
if ( priority < -127 || priority > 127 )
|
||||
throw std::logic_error( "priority limited to +- 127" );
|
||||
if ( local_id > maxThreadID )
|
||||
throw std::logic_error( "local id >= 2^56" );
|
||||
char tmp1 = static_cast<char>( priority + 128 );
|
||||
unsigned char tmp2 = static_cast<unsigned char>( tmp1 );
|
||||
if ( priority >= 0 )
|
||||
tmp2 |= 0x80;
|
||||
uint64_t id = tmp2;
|
||||
if ( priority < -127 || priority > 127 || local_id > maxThreadID )
|
||||
throw std::logic_error( "Invalid priority or local id" );
|
||||
uint64_t id = priority + 128;
|
||||
id = ( id << 56 ) + local_id;
|
||||
return id;
|
||||
}
|
||||
|
@ -490,9 +509,8 @@ inline void ThreadPool::thread_id_t::reset( int priority, uint64_t local_id, voi
|
|||
d_id = createId( priority, local_id );
|
||||
// Create the work and counter
|
||||
d_count = nullptr;
|
||||
d_work = nullptr;
|
||||
if ( work != nullptr ) {
|
||||
d_work = work;
|
||||
d_work = work;
|
||||
if ( d_work != nullptr ) {
|
||||
d_count = &( reinterpret_cast<WorkItem *>( work )->d_count );
|
||||
*d_count = 1;
|
||||
}
|
||||
|
@ -542,7 +560,6 @@ inline bool ThreadPool::thread_id_t::ready() const
|
|||
******************************************************************/
|
||||
inline bool ThreadPool::isValid( const ThreadPool::thread_id_t &id ) const
|
||||
{
|
||||
static_assert( sizeof( atomic_64 ) == 8, "atomic_64 must be a 64-bit integer" );
|
||||
uint64_t local_id = id.getLocalID();
|
||||
uint64_t next_id = d_id_assign - 1;
|
||||
return local_id != 0 && id.initialized() && local_id <= thread_id_t::maxThreadID &&
|
||||
|
|
Loading…
Reference in New Issue
Block a user