diff --git a/common/Array.h b/common/Array.h index f926b101..edfa687a 100644 --- a/common/Array.h +++ b/common/Array.h @@ -1,34 +1,15 @@ #ifndef included_ArrayClass #define included_ArrayClass -#include #include +#include #include +#include #include -#include #include -#include +#include - -#define ARRAY_NDIM_MAX 5 // Maximum number of dimensions supported - - -#define GET_ARRAY_INDEX3D( N, i1, i2, i3 ) i1 + N[0] * ( i2 + N[1] * i3 ) -#define GET_ARRAY_INDEX4D( N, i1, i2, i3, i4 ) i1 + N[0] * ( i2 + N[1] * ( i3 + N[2] * i4 ) ) -#define GET_ARRAY_INDEX5D( N, i1, i2, i3, i4, i5 ) i1 + N[0] * ( i2 + N[1] * ( i3 + N[2] * ( i4 + N[3] * i5 ) ) ) - -#if defined( DEBUG ) || defined( _DEBUG ) - #define CHECK_ARRAY_INDEX3D( N, i1, i2, i3 ) \ - if ( GET_ARRAY_INDEX3D( N, i1, i2, i3 ) < 0 || GET_ARRAY_INDEX3D( N, i1, i2, i3 ) >= d_length ) \ - throw std::logic_error( "Index exceeds array bounds" ); - #define CHECK_ARRAY_INDEX4D( N, i1, i2, i3, i4 ) \ - if ( GET_ARRAY_INDEX4D( N, i1, i2, i3, i4 ) < 0 || \ - GET_ARRAY_INDEX4D( N, i1, i2, i3, i4 ) >= d_length ) \ - throw std::logic_error( "Index exceeds array bounds" ); -#else - #define CHECK_ARRAY_INDEX3D( N, i1, i2, i3 ) - #define CHECK_ARRAY_INDEX4D( N, i1, i2, i3, i4 ) -#endif +#include "Utilities.h" #if defined( __CUDA_ARCH__ ) @@ -37,20 +18,244 @@ #else #define HOST_DEVICE #endif +#if defined( USING_GCC ) || defined( USING_CLANG ) +#define ATTRIBUTE_INLINE __attribute__( ( always_inline ) ) +#else +#define ATTRIBUTE_INLINE +#endif + + +#if ( defined( DEBUG ) || defined( _DEBUG ) ) && !defined( NDEBUG ) +#define CHECK_ARRAY_LENGTH( i ) \ + do { \ + if ( i >= d_length ) \ + throw std::length_error( "Index exceeds array bounds" ); \ + } while ( 0 ) +#else +#define CHECK_ARRAY_LENGTH( i ) \ + do { \ + } while ( 0 ) +#endif + + +// Forward decleration +class FunctionTable; + + +//! Simple range class +template +class Range final +{ +public: + //! Empty constructor + Range() : i( 0 ), j( -1 ), k( 1 ) {} + + /*! + * Create a range i:k:j (or i:j) + * @param i_ Starting value + * @param j_ Ending value + * @param k_ Increment value + */ + Range( TYPE i_, TYPE j_, TYPE k_ = 1 ) : i( i_ ), j( j_ ), k( k_ ) {} + + TYPE i, j, k; +}; + + +//! Simple class to store the array dimensions +class ArraySize final +{ +public: + //! Empty constructor + inline ArraySize(); + + /*! + * Create the vector size + * @param N1 Number of elements in the first dimension + */ + inline ArraySize( size_t N1 ); + + /*! + * Create the vector size + * @param N1 Number of elements in the first dimension + * @param N2 Number of elements in the second dimension + */ + inline ArraySize( size_t N1, size_t N2 ); + + /*! + * Create the vector size + * @param N1 Number of elements in the first dimension + * @param N2 Number of elements in the second dimension + * @param N3 Number of elements in the third dimension + */ + inline ArraySize( size_t N1, size_t N2, size_t N3 ); + + /*! + * Create the vector size + * @param N1 Number of elements in the first dimension + * @param N2 Number of elements in the second dimension + * @param N3 Number of elements in the third dimension + * @param N4 Number of elements in the fourth dimension + */ + inline ArraySize( size_t N1, size_t N2, size_t N3, size_t N4 ); + + /*! + * Create the vector size + * @param N1 Number of elements in the first dimension + * @param N2 Number of elements in the second dimension + * @param N3 Number of elements in the third dimension + * @param N4 Number of elements in the fourth dimension + * @param N5 Number of elements in the fifth dimension + */ + inline ArraySize( size_t N1, size_t N2, size_t N3, size_t N4, size_t N5 ); + + /*! + * Create from initializer list + * @param N Size of the array + */ + inline ArraySize( std::initializer_list N ); + + /*! + * Create from raw pointer + * @param ndim Number of dimensions + * @param ndim Dimensions + */ + inline ArraySize( size_t ndim, const size_t *dims ); + + /*! + * Create from std::vector + * @param N Size of the array + */ + inline ArraySize( const std::vector &N ); + + /*! + * Copy constructor + * @param rhs Array to copy + */ + inline ArraySize( const ArraySize &rhs ); + + /*! + * Move constructor + * @param rhs Array to copy + */ + inline ArraySize( ArraySize &&rhs ); + + /*! + * Assignment operator + * @param rhs Array to copy + */ + inline ArraySize &operator=( const ArraySize &rhs ); + + /*! + * Move assignment operator + * @param rhs Array to copy + */ + inline ArraySize &operator=( ArraySize &&rhs ); + + /*! + * Access the ith dimension + * @param i Index to access + */ + inline size_t operator[]( size_t i ) const { return d_N[i]; } + + //! Sum the elements + inline uint8_t ndim() const ATTRIBUTE_INLINE { return d_ndim; } + + //! Sum the elements + inline size_t size() const ATTRIBUTE_INLINE { return d_ndim; } + + //! Sum the elements + inline size_t length() const ATTRIBUTE_INLINE { return d_length; } + + //! Sum the elements + inline void resize( uint8_t dim, size_t N ); + + //! Returns an iterator to the beginning + inline const size_t *begin() const ATTRIBUTE_INLINE { return d_N; } + + //! Returns an iterator to the end + inline const size_t *end() const ATTRIBUTE_INLINE { return d_N + d_ndim; } + + // Check if two matrices are equal + inline bool operator==( const ArraySize &rhs ) const ATTRIBUTE_INLINE + { + return d_ndim == rhs.d_ndim && memcmp( d_N, rhs.d_N, sizeof( d_N ) ) == 0; + } + + //! Check if two matrices are not equal + inline bool operator!=( const ArraySize &rhs ) const ATTRIBUTE_INLINE + { + return d_ndim != rhs.d_ndim || memcmp( d_N, rhs.d_N, sizeof( d_N ) ) != 0; + } + + //! Maximum supported dimension + constexpr static uint8_t maxDim() ATTRIBUTE_INLINE { return 5u; } + + //! Get the index + inline size_t index( size_t i ) const ATTRIBUTE_INLINE + { + CHECK_ARRAY_LENGTH( i ); + return i; + } + + //! Get the index + inline size_t index( size_t i1, size_t i2 ) const ATTRIBUTE_INLINE + { + size_t index = i1 + i2 * d_N[0]; + CHECK_ARRAY_LENGTH( index ); + return index; + } + + //! Get the index + inline size_t index( size_t i1, size_t i2, size_t i3 ) const ATTRIBUTE_INLINE + { + size_t index = i1 + d_N[0] * ( i2 + d_N[1] * i3 ); + CHECK_ARRAY_LENGTH( index ); + return index; + } + + //! Get the index + inline size_t index( size_t i1, size_t i2, size_t i3, size_t i4 ) const ATTRIBUTE_INLINE + { + size_t index = i1 + d_N[0] * ( i2 + d_N[1] * ( i3 + d_N[2] * i4 ) ); + CHECK_ARRAY_LENGTH( index ); + return index; + } + + //! Get the index + inline size_t index( + size_t i1, size_t i2, size_t i3, size_t i4, size_t i5 ) const ATTRIBUTE_INLINE + { + size_t index = i1 + d_N[0] * ( i2 + d_N[1] * ( i3 + d_N[2] * ( i4 + d_N[3] * i5 ) ) ); + CHECK_ARRAY_LENGTH( index ); + return index; + } + +private: + uint8_t d_ndim; + size_t d_length; + size_t d_N[5]; +}; /*! * Class Array is a multi-dimensional array class written by Mark Berrill */ -template -class Array +template +class Array final { -public: +public: // Constructors / assignment operators /*! * Create a new empty Array */ Array(); + /*! + * Create an Array with the given size + * @param N Size of the array + */ + explicit Array( const ArraySize &N ); + /*! * Create a new 1D Array with the given number of elements * @param N Number of elements in the array @@ -72,6 +277,25 @@ public: */ explicit Array( size_t N1, size_t N2, size_t N3 ); + /*! + * Create a new 4D Array with the given number of rows and columns + * @param N1 Number of elements in the first dimension + * @param N2 Number of elements in the second dimension + * @param N3 Number of elements in the third dimension + * @param N4 Number of elements in the fourth dimension + */ + explicit Array( size_t N1, size_t N2, size_t N3, size_t N4 ); + + /*! + * Create a new 4D Array with the given number of rows and columns + * @param N1 Number of elements in the first dimension + * @param N2 Number of elements in the second dimension + * @param N3 Number of elements in the third dimension + * @param N4 Number of elements in the fourth dimension + * @param N5 Number of elements in the fifth dimension + */ + explicit Array( size_t N1, size_t N2, size_t N3, size_t N4, size_t N5 ); + /*! * Create a multi-dimensional Array with the given number of elements * @param N Number of elements in each dimension @@ -79,6 +303,19 @@ public: */ explicit Array( const std::vector &N, const TYPE *data = NULL ); + /*! + * Create a 1D Array with the range + * @param range Range of the data + */ + explicit Array( const Range &range ); + + /*! + * Create a 1D Array with the given initializer list + * @param data Input data + */ + Array( std::initializer_list data ); + + /*! * Copy constructor * @param rhs Array to copy @@ -109,7 +346,7 @@ public: */ Array &operator=( const std::vector &rhs ); - +public: // Views/copies/subset /*! * Create a 1D Array view to a raw block of data * @param N Number of elements in the array @@ -141,8 +378,7 @@ public: * @param N Number of elements in each dimension * @param data Pointer to the data */ - static std::shared_ptr view( - const std::vector &N, std::shared_ptr const &data ); + static std::shared_ptr view( const ArraySize &N, std::shared_ptr const &data ); /*! @@ -178,7 +414,7 @@ public: * @param data Pointer to the data */ static std::shared_ptr constView( - const std::vector &N, std::shared_ptr const &data ); + const ArraySize &N, std::shared_ptr const &data ); /*! @@ -192,7 +428,20 @@ public: * @param N Number of elements in each dimension * @param data Pointer to the data */ - void view2( const std::vector &N, std::shared_ptr const &data ); + void view2( const ArraySize &N, std::shared_ptr const &data ); + + /*! + * Make this object a view of the raw data (expert use only). + * Use view2( N, std::shared_ptr(data,[](TYPE*){}) ) instead. + * Note: this interface is not recommended as it does not protect from + * the src data being deleted while still being used by the Array. + * Additionally for maximum performance it does not set the internal shared_ptr + * so functions like getPtr and resize will not work correctly. + * @param ndim Number of dimensions + * @param dims Number of elements in each dimension + * @param data Pointer to the data + */ + void viewRaw( int ndim, const size_t *dims, TYPE *data ); /*! * Make this object a view of the raw data (expert use only). @@ -204,41 +453,30 @@ public: * @param N Number of elements in each dimension * @param data Pointer to the data */ - void viewRaw( const std::initializer_list &N, TYPE *data ); - - /*! - * Make this object a view of the raw data (expert use only). - * Use view2( N, std::shared_ptr(data,[](TYPE*){}) ) instead. - * Note: this interface is not recommended as it does not protect from - * the src data being deleted while still being used by the Array. - * Additionally for maximum performance it does not set the internal shared_ptr - * so functions like getPtr and resize will not work correctly. - * @param N Number of elements in each dimension - * @param data Pointer to the data - */ - void viewRaw( const std::vector &N, TYPE *data ); + void viewRaw( const ArraySize &N, TYPE *data ); /*! * Convert an array of one type to another. This may or may not allocate new memory. * @param array Input array */ - template - static std::shared_ptr> convert( std::shared_ptr> array ); + template + static std::shared_ptr> convert( std::shared_ptr> array ); /*! * Convert an array of one type to another. This may or may not allocate new memory. * @param array Input array */ - template - static std::shared_ptr> convert( std::shared_ptr> array ); + template + static std::shared_ptr> convert( + std::shared_ptr> array ); /*! * Copy and convert data from another array to this array * @param array Source array */ - template + template void copy( const Array &array ); /*! @@ -246,16 +484,23 @@ public: * Note: The current array must be allocated to the proper size first. * @param array Source array */ - template + template void copy( const TYPE2 *array ); /*! * Copy and convert data from this array to a raw vector. * @param array Source array */ - template + template void copyTo( TYPE2 *array ) const; + /*! + * Copy and convert data from this array to a raw vector. + * @param array Source array + */ + template + Array cloneTo() const; + /*! * Fill the array with the given value @@ -274,7 +519,7 @@ public: * @param base Base array * @param exp Exponent value */ - void pow( const Array &baseArray, const TYPE &exp ); + void pow( const Array &base, const TYPE &exp ); //! Destructor ~Array(); @@ -285,23 +530,27 @@ public: //! Return the size of the Array - inline int ndim() const { return d_ndim; } + inline int ndim() const { return d_size.ndim(); } //! Return the size of the Array - inline std::vector size() const { return std::vector( d_N, d_N + d_ndim ); } + inline ArraySize &size() { return d_size; } //! Return the size of the Array - inline size_t size( int d ) const { return d_N[d]; } + inline ArraySize size() const { return d_size; } //! Return the size of the Array - inline size_t length() const { return d_length; } + inline size_t size( int d ) const { return d_size[d]; } + + + //! Return the size of the Array + inline size_t length() const { return d_size.length(); } //! Return true if the Array is empty - inline bool empty() const { return d_length == 0; } + inline bool empty() const { return d_size.length() == 0; } /*! @@ -329,7 +578,8 @@ public: * Resize the Array * @param N Number of elements in each dimension */ - void resize( const std::vector &N ); + void resize( const ArraySize &N ); + /*! * Resize the given dimension of the array @@ -344,48 +594,73 @@ public: * Reshape the Array (total size of array will not change) * @param N Number of elements in each dimension */ - void reshape( const std::vector &N ); + void reshape( const ArraySize &N ); /*! * Subset the Array (total size of array will not change) * @param index Index to subset (imin,imax,jmin,jmax,kmin,kmax,...) */ - template - Array subset( const std::vector &index ) const; + template + Array subset( const std::vector &index ) const; + + + /*! + * Subset the Array (total size of array will not change) + * @param index Index to subset (ix:kx:jx,iy:ky:jy,...) + */ + template + Array subset( const std::vector> &index ) const; + /*! * Copy data from an array into a subset of this array * @param index Index of the subset (imin,imax,jmin,jmax,kmin,kmax,...) * @param subset The subset array to copy from */ - template - void copySubset( const std::vector &index, const Array &subset ); + template + void copySubset( const std::vector &index, const Array &subset ); + + /*! + * Copy data from an array into a subset of this array + * @param index Index of the subset + * @param subset The subset array to copy from + */ + template + void copySubset( const std::vector> &index, const Array &subset ); /*! * Add data from an array into a subset of this array * @param index Index of the subset (imin,imax,jmin,jmax,kmin,kmax,...) * @param subset The subset array to add from */ - void addSubset( const std::vector &index, const Array &subset ); + void addSubset( const std::vector &index, const Array &subset ); + + /*! + * Add data from an array into a subset of this array + * @param index Index of the subset + * @param subset The subset array to add from + */ + void addSubset( const std::vector> &index, const Array &subset ); +public: // Accessors /*! * Access the desired element * @param i The row index */ - HOST_DEVICE inline TYPE &operator()( size_t i ) + HOST_DEVICE inline TYPE &operator()( size_t i ) ATTRIBUTE_INLINE { - CHECK_ARRAY_INDEX3D( d_N, i, 0, 0 ) return d_data[i]; + return d_data[d_size.index( i )]; } /*! * Access the desired element * @param i The row index */ - HOST_DEVICE inline const TYPE &operator()( size_t i ) const + HOST_DEVICE inline const TYPE &operator()( size_t i ) const ATTRIBUTE_INLINE { - CHECK_ARRAY_INDEX3D( d_N, i, 0, 0 ) return d_data[i]; + return d_data[d_size.index( i )]; } /*! @@ -393,9 +668,9 @@ public: * @param i The row index * @param j The column index */ - HOST_DEVICE inline TYPE &operator()( size_t i, size_t j ) + HOST_DEVICE inline TYPE &operator()( size_t i, size_t j ) ATTRIBUTE_INLINE { - CHECK_ARRAY_INDEX3D( d_N, i, j, 0 ) return d_data[i + j * d_N[0]]; + return d_data[d_size.index( i, j )]; } /*! @@ -403,9 +678,9 @@ public: * @param i The row index * @param j The column index */ - HOST_DEVICE inline const TYPE &operator()( size_t i, size_t j ) const + HOST_DEVICE inline const TYPE &operator()( size_t i, size_t j ) const ATTRIBUTE_INLINE { - CHECK_ARRAY_INDEX3D( d_N, i, j, 0 ) return d_data[i + j * d_N[0]]; + return d_data[d_size.index( i, j )]; } /*! @@ -414,9 +689,9 @@ public: * @param j The column index * @param k The third index */ - HOST_DEVICE inline TYPE &operator()( size_t i, size_t j, size_t k ) + HOST_DEVICE inline TYPE &operator()( size_t i, size_t j, size_t k ) ATTRIBUTE_INLINE { - CHECK_ARRAY_INDEX3D( d_N, i, j, k ) return d_data[GET_ARRAY_INDEX3D( d_N, i, j, k )]; + return d_data[d_size.index( i, j, k )]; } /*! @@ -425,35 +700,109 @@ public: * @param j The column index * @param k The third index */ - HOST_DEVICE inline const TYPE &operator()( size_t i, size_t j, size_t k ) const + HOST_DEVICE inline const TYPE &operator()( size_t i, size_t j, size_t k ) const ATTRIBUTE_INLINE { - CHECK_ARRAY_INDEX3D( d_N, i, j, k ) return d_data[GET_ARRAY_INDEX3D( d_N, i, j, k )]; + return d_data[d_size.index( i, j, k )]; } /*! * Access the desired element - * @param i The row index - * @param j The column index - * @param k The third index - * @param l The fourth index + * @param i1 The first index + * @param i2 The second index + * @param i3 The third index + * @param i4 The fourth index */ - HOST_DEVICE inline TYPE &operator()( size_t i, size_t j, size_t k, size_t l ) + HOST_DEVICE inline TYPE &operator()( + size_t i1, size_t i2, size_t i3, size_t i4 ) ATTRIBUTE_INLINE { - CHECK_ARRAY_INDEX4D( d_N, i, j, k, l ) return d_data[GET_ARRAY_INDEX4D( d_N, i, j, k, l )]; + return d_data[d_size.index( i1, i2, i3, i4 )]; } /*! * Access the desired element - * @param i The row index - * @param j The column index - * @param k The third index - * @param l The fourth index + * @param i1 The first index + * @param i2 The second index + * @param i3 The third index + * @param i4 The fourth index */ - HOST_DEVICE inline const TYPE &operator()( size_t i, size_t j, size_t k, size_t l ) const + HOST_DEVICE inline const TYPE &operator()( + size_t i1, size_t i2, size_t i3, size_t i4 ) const ATTRIBUTE_INLINE { - CHECK_ARRAY_INDEX4D( d_N, i, j, k, l ) return d_data[GET_ARRAY_INDEX4D( d_N, i, j, k, l )]; + return d_data[d_size.index( i1, i2, i3, i4 )]; } + /*! + * Access the desired element + * @param i1 The first index + * @param i2 The second index + * @param i3 The third index + * @param i4 The fourth index + * @param i5 The fifth index + */ + HOST_DEVICE inline TYPE &operator()( + size_t i1, size_t i2, size_t i3, size_t i4, size_t i5 ) ATTRIBUTE_INLINE + { + return d_data[d_size.index( i1, i2, i3, i4, i5 )]; + } + + /*! + * Access the desired element + * @param i1 The first index + * @param i2 The second index + * @param i3 The third index + * @param i4 The fourth index + * @param i5 The fifth index + */ + HOST_DEVICE inline const TYPE &operator()( + size_t i1, size_t i2, size_t i3, size_t i4, size_t i5 ) const ATTRIBUTE_INLINE + { + return d_data[d_size.index( i1, i2, i3, i4, i5 )]; + } + + /*! + * Access the desired element as a raw pointer + * @param i The global index + */ + HOST_DEVICE inline TYPE *ptr( size_t i ) ATTRIBUTE_INLINE + { + return i >= d_size.length() ? nullptr : &d_data[i]; + } + + /*! + * Access the desired element as a raw pointer + * @param i The global index + */ + HOST_DEVICE inline const TYPE *ptr( size_t i ) const ATTRIBUTE_INLINE + { + return i >= d_size.length() ? nullptr : &d_data[i]; + } + + //! Get iterator to beginning of data + inline TYPE *begin() ATTRIBUTE_INLINE { return d_data; } + + //! Get iterator to beginning of data + inline const TYPE *begin() const ATTRIBUTE_INLINE { return d_data; } + + //! Get iterator to beginning of data + inline TYPE *end() ATTRIBUTE_INLINE { return d_data + d_size.length(); } + + //! Get iterator to beginning of data + inline const TYPE *end() const ATTRIBUTE_INLINE { return d_data + d_size.length(); } + + //! Return the pointer to the raw data + inline std::shared_ptr getPtr() ATTRIBUTE_INLINE { return d_ptr; } + + //! Return the pointer to the raw data + inline std::shared_ptr getPtr() const ATTRIBUTE_INLINE { return d_ptr; } + + //! Return the pointer to the raw data + HOST_DEVICE inline TYPE *data() ATTRIBUTE_INLINE { return d_data; } + + //! Return the pointer to the raw data + HOST_DEVICE inline const TYPE *data() const ATTRIBUTE_INLINE { return d_data; } + + +public: // Operator overloading //! Check if two matrices are equal // Equality means the dimensions and data have to be identical bool operator==( const Array &rhs ) const; @@ -461,19 +810,28 @@ public: //! Check if two matrices are not equal inline bool operator!=( const Array &rhs ) const { return !this->operator==( rhs ); } + //! Add another array + Array &operator+=( const Array &rhs ); - //! Return the pointer to the raw data - inline std::shared_ptr getPtr() { return d_ptr; } + //! Subtract another array + Array &operator-=( const Array &rhs ); - //! Return the pointer to the raw data - inline std::shared_ptr getPtr() const { return d_ptr; } + //! Add a scalar + Array &operator+=( const TYPE &rhs ); - //! Return the pointer to the raw data - HOST_DEVICE inline TYPE *data() { return d_data; } + //! Subtract a scalar + Array &operator-=( const TYPE &rhs ); - //! Return the pointer to the raw data - HOST_DEVICE inline const TYPE *data() const { return d_data; } +public: // Math operations + //! Concatenates the arrays along the dimension dim. + static Array cat( const std::vector &x, int dim = 0 ); + + //! Concatenates a given array with the current array + void cat( const Array &x, int dim = 0 ); + + //! Initialize the array with random values (defined from the function table) + void rand(); //! Return true if NaNs are present inline bool NaNs() const; @@ -491,13 +849,13 @@ public: inline TYPE mean() const; //! Return the min of all elements in a given direction - Array min( int dir ) const; + Array min( int dir ) const; //! Return the max of all elements in a given direction - Array max( int dir ) const; + Array max( int dir ) const; //! Return the sum of all elements in a given direction - Array sum( int dir ) const; + Array sum( int dir ) const; //! Return the smallest value inline TYPE min( const std::vector &index ) const; @@ -511,52 +869,86 @@ public: //! Return the mean of all elements inline TYPE mean( const std::vector &index ) const; + //! Return the smallest value + inline TYPE min( const std::vector> &index ) const; + + //! Return the largest value + inline TYPE max( const std::vector> &index ) const; + + //! Return the sum of all elements + inline TYPE sum( const std::vector> &index ) const; + + //! Return the mean of all elements + inline TYPE mean( const std::vector> &index ) const; + //! Find all elements that match the operator std::vector find( const TYPE &value, std::function compare ) const; - //! Add another array - Array &operator+=( const Array &rhs ); - - //! Subtract another array - Array &operator-=( const Array &rhs ); - - //! Add a scalar - Array &operator+=( const TYPE &rhs ); - - //! Subtract a scalar - Array &operator-=( const TYPE &rhs ); //! Print an array - void print( std::ostream& os, const std::string& name="A", const std::string& prefix="" ) const; + void print( + std::ostream &os, const std::string &name = "A", const std::string &prefix = "" ) const; //! Multiply two arrays - static Array multiply( const Array& a, const Array& b ); + static Array multiply( const Array &a, const Array &b ); //! Transpose an array - Array reverseDim( ) const; + Array reverseDim() const; + + //! Replicate an array a given number of times in each direction + Array repmat( const std::vector &N ) const; //! Coarsen an array using the given filter - Array coarsen( const Array& filter ) const; + Array coarsen( const Array &filter ) const; //! Coarsen an array using the given filter - Array coarsen( const std::vector& ratio, std::function&)> filter ) const; + Array coarsen( const std::vector &ratio, + std::function & )> filter ) const; + + /*! + * Perform a element-wise operation y = f(x) + * @param[in] fun The function operation + * @param[in] x The input array + */ + static Array transform( std::function fun, const Array &x ); + + /*! + * Perform a element-wise operation z = f(x,y) + * @param[in] fun The function operation + * @param[in] x The first array + * @param[in] y The second array + */ + static Array transform( + std::function fun, const Array &x, const Array &y ); + + /*! + * axpby operation: this = alpha*x + beta*this + * @param[in] alpha alpha + * @param[in] x x + * @param[in] beta beta + */ + void axpby( const TYPE &alpha, const Array &x, const TYPE &beta ); private: - int d_ndim; // Number of dimensions in array - size_t d_N[ARRAY_NDIM_MAX]; // Size of each dimension - size_t d_length; // Total length of array + ArraySize d_size; // Size of each dimension TYPE *d_data; // Raw pointer to data in array std::shared_ptr d_ptr; // Shared pointer to data in array - void allocate( const std::vector &N ); + void allocate( const ArraySize &N ); + +public: + template + inline bool sizeMatch( const Array &rhs ) const + { + return d_size == rhs.d_size; + } private: - template - inline bool sizeMatch( const Array& rhs ) const; - inline void checkSubsetIndex( const std::vector &index ) const; - inline std::array getDimArray() const; - static inline void getSubsetArrays( const std::vector &index, - std::array &first, std::array &last, std::array &N ); + inline void checkSubsetIndex( const std::vector> &range ) const; + inline std::vector> convert( const std::vector &index ) const; + static inline void getSubsetArrays( const std::vector> &range, + std::array &first, std::array &last, std::array &inc, + std::array &N ); }; diff --git a/common/Array.hpp b/common/Array.hpp index aa06cc2c..b91e46b4 100644 --- a/common/Array.hpp +++ b/common/Array.hpp @@ -2,267 +2,396 @@ #define included_ArrayClass_hpp #include "common/Array.h" +#include "common/FunctionTable.h" #include "common/Utilities.h" #include +#include #include #include -#include - /******************************************************** -* Constructors * -********************************************************/ -template -Array::Array() + * ArraySize * + ********************************************************/ +inline ArraySize::ArraySize() { d_ndim = 1; + d_N[0] = 0; + d_N[1] = 1; + d_N[2] = 1; + d_N[3] = 1; + d_N[4] = 1; d_length = 0; - for ( size_t i = 0; i < ARRAY_NDIM_MAX; i++ ) - d_N[i] = 1; - d_N[0] = 0; - d_data = nullptr; } -template -Array::Array( size_t N ) +inline ArraySize::ArraySize( size_t N1 ) { - allocate( std::vector( 1, N ) ); + d_ndim = 1; + d_N[0] = N1; + d_N[1] = 1; + d_N[2] = 1; + d_N[3] = 1; + d_N[4] = 1; + d_length = N1; } -template -Array::Array( size_t N_rows, size_t N_columns ) +inline ArraySize::ArraySize( size_t N1, size_t N2 ) { - std::vector N( 2 ); - N[0] = N_rows; - N[1] = N_columns; - allocate( N ); + d_ndim = 2; + d_N[0] = N1; + d_N[1] = N2; + d_N[2] = 1; + d_N[3] = 1; + d_N[4] = 1; + d_length = N1 * N2; } -template -Array::Array( size_t N1, size_t N2, size_t N3 ) +inline ArraySize::ArraySize( size_t N1, size_t N2, size_t N3 ) { - std::vector N( 3 ); - N[0] = N1; - N[1] = N2; - N[2] = N3; - allocate( N ); + d_ndim = 3; + d_N[0] = N1; + d_N[1] = N2; + d_N[2] = N3; + d_N[3] = 1; + d_N[4] = 1; + d_length = N1 * N2 * N3; } -template -Array::Array( const std::vector &N, const TYPE *data ) +inline ArraySize::ArraySize( size_t N1, size_t N2, size_t N3, size_t N4 ) { - allocate( N ); - if ( data != NULL ) { - for ( size_t i = 0; i < d_length; i++ ) - d_data[i] = data[i]; - } + d_ndim = 4; + d_N[0] = N1; + d_N[1] = N2; + d_N[2] = N3; + d_N[3] = N4; + d_N[4] = 1; + d_length = N1 * N2 * N3 * N4; } -template -void Array::allocate( const std::vector &N ) +inline ArraySize::ArraySize( size_t N1, size_t N2, size_t N3, size_t N4, size_t N5 ) { - d_ndim = static_cast( N.size() ); + d_ndim = 5; + d_N[0] = N1; + d_N[1] = N2; + d_N[2] = N3; + d_N[3] = N4; + d_N[4] = N5; + d_length = N1 * N2 * N3 * N4 * N5; +} +inline ArraySize::ArraySize( std::initializer_list N ) +{ + d_ndim = N.size(); + d_N[0] = 0; + d_N[1] = 1; + d_N[2] = 1; + d_N[3] = 1; + d_N[4] = 1; + auto it = N.begin(); + for ( size_t i = 0; i < d_ndim; i++, ++it ) + d_N[i] = *it; d_length = 1; - for ( size_t i = 0; i < ARRAY_NDIM_MAX; i++ ) - d_N[i] = 1; - for ( size_t i = 0; i < N.size(); i++ ) { - d_N[i] = N[i]; - d_length *= N[i]; - } - if ( N.empty() ) { - d_N[0] = 0; + for ( size_t i = 0; i < maxDim(); i++ ) + d_length *= d_N[i]; + if ( d_ndim == 0 ) d_length = 0; +} +inline ArraySize::ArraySize( size_t ndim, const size_t *dims ) +{ + d_ndim = ndim; + d_N[0] = 0; + d_N[1] = 1; + d_N[2] = 1; + d_N[3] = 1; + d_N[4] = 1; + for ( size_t i = 0; i < ndim; i++ ) + d_N[i] = dims[i]; + d_length = 1; + for ( size_t i = 0; i < maxDim(); i++ ) + d_length *= d_N[i]; + if ( d_ndim == 0 ) + d_length = 0; +} +inline ArraySize::ArraySize( const std::vector &N ) +{ + d_ndim = N.size(); + d_N[0] = 0; + d_N[1] = 1; + d_N[2] = 1; + d_N[3] = 1; + d_N[4] = 1; + for ( size_t i = 0; i < d_ndim; i++ ) + d_N[i] = N[i]; + d_length = 1; + for ( size_t i = 0; i < maxDim(); i++ ) + d_length *= d_N[i]; + if ( d_ndim == 0 ) + d_length = 0; +} +inline ArraySize::ArraySize( const ArraySize &rhs ) { memcpy( this, &rhs, sizeof( *this ) ); } +inline ArraySize::ArraySize( ArraySize &&rhs ) { memcpy( this, &rhs, sizeof( *this ) ); } +inline ArraySize &ArraySize::operator=( const ArraySize &rhs ) +{ + if ( this != &rhs ) + memcpy( this, &rhs, sizeof( *this ) ); + return *this; +} +inline ArraySize &ArraySize::operator=( ArraySize &&rhs ) +{ + if ( this != &rhs ) + memcpy( this, &rhs, sizeof( *this ) ); + return *this; +} +inline void ArraySize::resize( uint8_t dim, size_t N ) +{ + if ( dim >= d_ndim ) + throw std::out_of_range( "Invalid dimension" ); + d_N[dim] = N; + d_length = 1; + for ( size_t i = 0; i < maxDim(); i++ ) + d_length *= d_N[i]; +} + + +/******************************************************** + * Constructors * + ********************************************************/ +template +Array::Array() +{ + d_data = nullptr; +} +template +Array::Array( const ArraySize &N ) +{ + allocate( N ); +} +template +Array::Array( size_t N ) +{ + allocate( ArraySize( N ) ); +} +template +Array::Array( size_t N_rows, size_t N_cols ) +{ + allocate( ArraySize( N_rows, N_cols ) ); +} +template +Array::Array( size_t N1, size_t N2, size_t N3 ) +{ + allocate( ArraySize( N1, N2, N3 ) ); +} +template +Array::Array( size_t N1, size_t N2, size_t N3, size_t N4 ) +{ + allocate( ArraySize( N1, N2, N3, N4 ) ); +} +template +Array::Array( size_t N1, size_t N2, size_t N3, size_t N4, size_t N5 ) +{ + allocate( ArraySize( N1, N2, N3, N4, N5 ) ); +} +template +Array::Array( const std::vector &N, const TYPE *data ) +{ + allocate( N ); + if ( data ) { + for ( size_t i = 0; i < d_size.length(); i++ ) + d_data[i] = data[i]; } - if ( d_length == 0 ) +} +template +Array::Array( const Range &range ) +{ + double tmp = static_cast( ( range.j - range.i ) ) / static_cast( range.k ); + size_t N = static_cast( floor( tmp + 1e-12 ) + 1 ); + allocate( { N } ); + for ( size_t i = 0; i < N; i++ ) + d_data[i] = range.k * ( range.i / range.k + i ); +} +template +Array::Array( std::initializer_list x ) +{ + allocate( { x.size() } ); + auto it = x.begin(); + for ( size_t i = 0; i < x.size(); ++i, ++it ) + d_data[i] = *it; +} +template +void Array::allocate( const ArraySize &N ) +{ + d_size = N; + auto length = d_size.length(); + if ( length == 0 ) d_ptr.reset(); else - d_ptr.reset( new ( std::nothrow ) TYPE[d_length], []( TYPE *p ) { delete[] p; } ); + d_ptr.reset( new ( std::nothrow ) TYPE[length], []( TYPE *p ) { delete[] p; } ); d_data = d_ptr.get(); - if ( d_length > 0 && d_data == nullptr ) + if ( length > 0 && d_data == nullptr ) throw std::logic_error( "Failed to allocate array" ); } -template -Array::Array( const Array &rhs ) - : d_ndim( rhs.d_ndim ), d_length( rhs.d_length ), d_data( nullptr ) +template +Array::Array( const Array &rhs ) : d_size( rhs.d_size ), d_data( nullptr ) { allocate( rhs.size() ); - for ( size_t i = 0; i < d_length; i++ ) - d_data[i] = rhs.d_data[i]; + for ( size_t i = 0; i < d_size.length(); i++ ) + d_data[i] = rhs.d_data[i]; } -template -Array::Array( Array &&rhs ) - : d_ndim( rhs.d_ndim ), d_length( rhs.d_length ), d_data( rhs.d_data ) +template +Array::Array( Array &&rhs ) : d_size( rhs.d_size ), d_data( rhs.d_data ) { - rhs.d_ndim = 0; - memcpy( d_N, rhs.d_N, sizeof( rhs.d_N ) ); - memset( rhs.d_N, 0, sizeof( rhs.d_N ) ); - rhs.d_length = 0; - rhs.d_data = nullptr; - d_ptr = std::move( rhs.d_ptr ); + rhs.d_size = ArraySize(); + rhs.d_data = nullptr; + d_ptr = std::move( rhs.d_ptr ); } -template -Array &Array::operator=( const Array &rhs ) +template +Array &Array::operator=( const Array &rhs ) { if ( this == &rhs ) return *this; this->allocate( rhs.size() ); - for ( size_t i = 0; i < d_length; i++ ) + for ( size_t i = 0; i < d_size.length(); i++ ) this->d_data[i] = rhs.d_data[i]; return *this; } -template -Array &Array::operator=( Array &&rhs ) +template +Array &Array::operator=( Array &&rhs ) { if ( this == &rhs ) return *this; - d_ndim = rhs.d_ndim; - rhs.d_ndim = 0; - memcpy( d_N, rhs.d_N, sizeof( rhs.d_N ) ); - memset( rhs.d_N, 0, sizeof( rhs.d_N ) ); - d_length = rhs.d_length; - rhs.d_length = 0; - d_data = rhs.d_data; - rhs.d_data = nullptr; - d_ptr = std::move( rhs.d_ptr ); + d_size = rhs.d_size; + rhs.d_size = ArraySize(); + d_data = rhs.d_data; + rhs.d_data = nullptr; + d_ptr = std::move( rhs.d_ptr ); return *this; } -template -Array &Array::operator=( const std::vector &rhs ) +template +Array &Array::operator=( const std::vector &rhs ) { - this->allocate( std::vector( 1, rhs.size() ) ); - for ( size_t i = 0; i < rhs.size(); i++ ) + this->allocate( ArraySize( rhs.size() ) ); + for ( size_t i = 0; i < rhs.size(); i++ ) this->d_data[i] = rhs[i]; return *this; } -template -Array::~Array() +template +Array::~Array() { } -template -void Array::clear() +template +void Array::clear() { - d_ndim = 0; - d_length = 0; - for ( size_t i = 0; i < ARRAY_NDIM_MAX; i++ ) - d_N[i] = 1; - d_N[0] = 0; + d_size = ArraySize(); d_ptr.reset(); d_data = nullptr; } /******************************************************** -* Check if the size of the array matches rhs * -********************************************************/ -template -template -bool Array::sizeMatch( const Array& rhs ) const -{ - bool test = d_ndim == rhs.d_ndim; - for ( int d = 0; d < d_ndim; d++ ) - test = test && d_N[d] == rhs.d_N[d]; - return test; -} + * Access elements * + ********************************************************/ /******************************************************** -* Resize the array * -********************************************************/ -template -void Array::resize( size_t N ) + * Copy/move values from one array to another (resize) * + ********************************************************/ +template +inline void moveValues( const ArraySize &N1, const ArraySize &N2, TYPE *data1, TYPE *data2 ) { - resize( std::vector{N} ); -} -template -void Array::resize( size_t N1, size_t N2 ) -{ - resize( std::vector{N1,N2} ); -} -template -void Array::resize( size_t N1, size_t N2, size_t N3 ) -{ - resize( std::vector{N1,N2,N3} ); -} -template -void Array::resize( const std::vector &N ) -{ - // Check if the array actually changed size - size_t new_length = 1; - for ( size_t i = 0; i < N.size(); i++ ) - new_length *= N[i]; - if ( N.empty() ) - new_length = 0; - bool changed = new_length != d_length || (int) N.size() != d_ndim; - for ( size_t i = 0; i < N.size(); i++ ) - changed = changed || N[i] != d_N[i]; - if ( !changed ) - return; -// Store the old data -#if ARRAY_NDIM_MAX > 5 -#error Function programmed for more than 5 dimensions -#endif - std::array N1{ { 1, 1, 1, 1, 1 } }; - std::array N2{ { 1, 1, 1, 1, 1 } }; - for ( int d = 0; d < d_ndim; d++ ) - N1[d] = d_N[d]; - for ( size_t d = 0; d < N.size(); d++ ) - N2[d] = N[d]; - if ( d_ndim == 0 ) { - N1[0] = 0; - } - if ( N.empty() ) { - N2[0] = 0; - } - std::shared_ptr old_data = d_ptr; - // Allocate new data - allocate( N ); - // Copy the old values - if ( d_length > 0 ) { - TYPE *data1 = old_data.get(); - TYPE *data2 = d_data; - if ( old_data.unique() ) { - // We own the data, use std:move - for ( size_t i5 = 0; i5 < std::min( N1[4], N2[4] ); i5++ ) { - for ( size_t i4 = 0; i4 < std::min( N1[3], N2[3] ); i4++ ) { - for ( size_t i3 = 0; i3 < std::min( N1[2], N2[2] ); i3++ ) { - for ( size_t i2 = 0; i2 < std::min( N1[1], N2[1] ); i2++ ) { - for ( size_t i1 = 0; i1 < std::min( N1[0], N2[0] ); i1++ ) { - size_t index1 = GET_ARRAY_INDEX5D( N1, i1, i2, i3, i4, i5 ); - size_t index2 = GET_ARRAY_INDEX5D( N2, i1, i2, i3, i4, i5 ); - data2[index2] = std::move( data1[index1] ); - } - } - } - } - } - } else { - // We do not own the data, copy - for ( size_t i5 = 0; i5 < std::min( N1[4], N2[4] ); i5++ ) { - for ( size_t i4 = 0; i4 < std::min( N1[3], N2[3] ); i4++ ) { - for ( size_t i3 = 0; i3 < std::min( N1[2], N2[2] ); i3++ ) { - for ( size_t i2 = 0; i2 < std::min( N1[1], N2[1] ); i2++ ) { - for ( size_t i1 = 0; i1 < std::min( N1[0], N2[0] ); i1++ ) { - size_t index1 = GET_ARRAY_INDEX5D( N1, i1, i2, i3, i4, i5 ); - size_t index2 = GET_ARRAY_INDEX5D( N2, i1, i2, i3, i4, i5 ); - data2[index2] = data1[index1]; - } - } + for ( size_t i5 = 0; i5 < std::min( N1[4], N2[4] ); i5++ ) { + for ( size_t i4 = 0; i4 < std::min( N1[3], N2[3] ); i4++ ) { + for ( size_t i3 = 0; i3 < std::min( N1[2], N2[2] ); i3++ ) { + for ( size_t i2 = 0; i2 < std::min( N1[1], N2[1] ); i2++ ) { + for ( size_t i1 = 0; i1 < std::min( N1[0], N2[0] ); i1++ ) { + size_t index1 = N1.index( i1, i2, i3, i4, i5 ); + size_t index2 = N2.index( i1, i2, i3, i4, i5 ); + data2[index2] = std::move( data1[index1] ); } } } } } } -template -void Array::resizeDim( int dim, size_t N, const TYPE &value ) +template +inline typename std::enable_if::type copyValues( + const ArraySize &N1, const ArraySize &N2, const TYPE *data1, TYPE *data2 ) { - if ( dim >= d_ndim ) - throw std::logic_error( "Invalid dimension" ); - std::vector N2 = size(); - size_t N0 = N2[dim]; - N2[dim] = N; - resize( N2 ); + for ( size_t i5 = 0; i5 < std::min( N1[4], N2[4] ); i5++ ) { + for ( size_t i4 = 0; i4 < std::min( N1[3], N2[3] ); i4++ ) { + for ( size_t i3 = 0; i3 < std::min( N1[2], N2[2] ); i3++ ) { + for ( size_t i2 = 0; i2 < std::min( N1[1], N2[1] ); i2++ ) { + for ( size_t i1 = 0; i1 < std::min( N1[0], N2[0] ); i1++ ) { + size_t index1 = N1.index( i1, i2, i3, i4, i5 ); + size_t index2 = N2.index( i1, i2, i3, i4, i5 ); + data2[index2] = data1[index1]; + } + } + } + } + } +} +template +inline typename std::enable_if::type copyValues( + const ArraySize &, const ArraySize &, const TYPE *, TYPE * ) +{ + throw std::logic_error( "No copy constructor" ); +} + + +/******************************************************** + * Resize the array * + ********************************************************/ +template +void Array::resize( size_t N ) +{ + resize( ArraySize( N ) ); +} +template +void Array::resize( size_t N1, size_t N2 ) +{ + resize( ArraySize( N1, N2 ) ); +} +template +void Array::resize( size_t N1, size_t N2, size_t N3 ) +{ + resize( ArraySize( N1, N2, N3 ) ); +} + +template +void Array::resize( const ArraySize &N ) +{ + // Check if the array actually changed size + bool equal = true; + for ( size_t i = 0; i < ArraySize::maxDim(); i++ ) + equal = equal && N[i] == d_size[i]; + if ( equal ) { + d_size = N; + return; + } + // Store the old data + auto N0 = d_size; + auto data0 = d_ptr; + // Allocate new data + allocate( N ); + // Copy the old values + if ( N.length() > 0 && d_size.length() > 0 ) { + if ( data0.use_count() <= 1 ) { + // We own the data, use std:move + moveValues( N0, N, data0.get(), d_data ); + } else { + // We do not own the data, copy + copyValues::value, TYPE>( N0, N, data0.get(), d_data ); + } + } +} +template +void Array::resizeDim( int dim, size_t N, const TYPE &value ) +{ + if ( dim < 0 || dim > d_size.ndim() ) + throw std::out_of_range( "Invalid dimension" ); + size_t N0 = d_size[dim]; + auto size = d_size; + size.resize( dim, N ); + resize( size ); size_t n1 = 1, n2 = 1; for ( int d = 0; d < dim; d++ ) - n1 *= N2[d]; - for ( size_t d = dim + 1; d < N2.size(); d++ ) - n2 *= N2[d]; + n1 *= size[d]; + for ( size_t d = dim + 1; d < size.ndim(); d++ ) + n2 *= size[d]; for ( size_t k = 0; k < n2; k++ ) { for ( size_t j = N0; j < N; j++ ) { for ( size_t i = 0; i < n1; i++ ) { @@ -274,129 +403,112 @@ void Array::resizeDim( int dim, size_t N, const TYPE &value ) /******************************************************** -* Reshape the array * -********************************************************/ -template -void Array::reshape( const std::vector &N ) + * Reshape the array * + ********************************************************/ +template +void Array::reshape( const ArraySize &N ) { - size_t new_length = 1; - for ( size_t i = 0; i < N.size(); i++ ) - new_length *= N[i]; - if ( new_length != d_length ) + if ( N.length() != d_size.length() ) throw std::logic_error( "reshape is not allowed to change the array size" ); - d_ndim = N.size(); - for ( size_t i = 0; i < ARRAY_NDIM_MAX; i++ ) - d_N[i] = 1; - for ( size_t i = 0; i < N.size(); i++ ) - d_N[i] = N[i]; + d_size = N; } /******************************************************** -* Subset the array * -********************************************************/ -// clang-format off + * Subset the array * + ********************************************************/ // Helper function to check subset indices -template -inline void Array::checkSubsetIndex( const std::vector &index ) const +template +inline void Array::checkSubsetIndex( const std::vector> &range ) const { - bool test = index.size() % 2 == 0 && (int) index.size() / 2 <= d_ndim; - for ( size_t d = 0; d < index.size() / 2; d++ ) - test = test && index[2 * d + 0] < d_N[d] && index[2 * d + 1] < d_N[d]; + bool test = (int) range.size() == d_size.ndim(); + for ( size_t d = 0; d < range.size(); d++ ) + test = test && range[d].i >= 0 && range[d].j <= d_size[d]; if ( !test ) throw std::logic_error( "indices for subset are invalid" ); } -// Helper function to return dimensions as a std::array for hard coded loops -template -inline std::array Array::getDimArray() const +template +inline std::vector> Array::convert( + const std::vector &index ) const { - #if ARRAY_NDIM_MAX > 5 - #error Function programmed for more than 5 dimensions - #endif - std::array N{ { 1, 1, 1, 1, 1 } }; - for ( int d = 0; d < d_ndim; d++ ) - N[d] = d_N[d]; - return N; + std::vector> range( d_size.ndim() ); + if ( index.size() % 2 != 0 || static_cast( index.size() / 2 ) < d_size.ndim() ) + throw std::logic_error( "indices for subset are invalid" ); + for ( int d = 0; d < d_size.ndim(); d++ ) + range[d] = Range( index[2 * d + 0], index[2 * d + 1] ); + return range; } // Helper function to return dimensions for the subset array -template -inline void Array::getSubsetArrays( const std::vector &index, - std::array &first, - std::array &last, - std::array &N ) +template +inline void Array::getSubsetArrays( const std::vector> &index, + std::array &first, std::array &last, std::array &inc, + std::array &N ) { - #if ARRAY_NDIM_MAX > 5 - #error Function programmed for more than 5 dimensions - #endif - size_t ndim = index.size() / 2; + first.fill( 0 ); + last.fill( 0 ); + inc.fill( 1 ); + N.fill( 1 ); + size_t ndim = index.size(); for ( size_t d = 0; d < ndim; d++ ) { - first[d] = index[2 * d + 0]; - last[d] = index[2 * d + 1]; - N[d] = last[d] - first[d] + 1; - } - for ( size_t d = ndim; d < 5; d++ ) { - first[d] = 0; - last[d] = 0; - N[d] = 1; + first[d] = index[d].i; + last[d] = index[d].j; + inc[d] = index[d].k; + N[d] = ( last[d] - first[d] + inc[d] ) / inc[d]; } } -template -template -Array Array::subset( const std::vector &index ) const +template +template +Array Array::subset( const std::vector> &index ) const { // Get the subset indicies checkSubsetIndex( index ); - std::array first, last, N1; - getSubsetArrays( index, first, last, N1 ); - std::array N2 = getDimArray(); + std::array first, last, inc, N1; + getSubsetArrays( index, first, last, inc, N1 ); + ArraySize S1( d_size.ndim(), N1.data() ); // Create the new array - std::vector dim( d_ndim ); - for ( int d = 0; d < d_ndim; d++ ) - dim[d] = last[d] - first[d] + 1; - Array subset( dim ); + Array subset_array( S1 ); // Fill the new array - #if ARRAY_NDIM_MAX > 5 - #error Function programmed for more than 5 dimensions - #endif - TYPE2 *subset_data = subset.data(); - for (size_t i4=first[4]; i4<=last[4]; i4++) { - for (size_t i3=first[3]; i3<=last[3]; i3++) { - for (size_t i2=first[2]; i2<=last[2]; i2++) { - for (size_t i1=first[1]; i1<=last[1]; i1++) { - for (size_t i0=first[0]; i0<=last[0]; i0++) { - size_t k1 = GET_ARRAY_INDEX5D( N1, i0-first[0], - i1-first[1], i2-first[2], i3-first[3], i4-first[4] ); - size_t k2 = GET_ARRAY_INDEX5D( N2, i0, i1, i2, i3, i4 ); + static_assert( ArraySize::maxDim() == 5, "Not programmed for more than 5 dimensions" ); + TYPE2 *subset_data = subset_array.data(); + for ( size_t i4 = first[4], k1 = 0; i4 <= last[4]; i4 += inc[4] ) { + for ( size_t i3 = first[3]; i3 <= last[3]; i3 += inc[3] ) { + for ( size_t i2 = first[2]; i2 <= last[2]; i2 += inc[2] ) { + for ( size_t i1 = first[1]; i1 <= last[1]; i1 += inc[1] ) { + for ( size_t i0 = first[0]; i0 <= last[0]; i0 += inc[0], k1++ ) { + size_t k2 = d_size.index( i0, i1, i2, i3, i4 ); subset_data[k1] = static_cast( d_data[k2] ); } } } } } - return subset; + return subset_array; } -template -template -void Array::copySubset( const std::vector &index, const Array &subset ) +template +template +Array Array::subset( const std::vector &index ) const +{ + auto range = convert( index ); + return subset( range ); +} +template +template +void Array::copySubset( + const std::vector> &index, const Array &subset ) { // Get the subset indices checkSubsetIndex( index ); - std::array first, last, N1; - getSubsetArrays( index, first, last, N1 ); - std::array N2 = getDimArray(); + std::array first, last, inc, N1; + getSubsetArrays( index, first, last, inc, N1 ); // Copy the sub-array - #if ARRAY_NDIM_MAX > 5 - #error Function programmed for more than 5 dimensions - #endif + static_assert( ArraySize::maxDim() == 5, "Not programmed for more than 5 dimensions" ); const TYPE2 *src_data = subset.data(); - for (size_t i4=first[4]; i4<=last[4]; i4++) { - for (size_t i3=first[3]; i3<=last[3]; i3++) { - for (size_t i2=first[2]; i2<=last[2]; i2++) { - for (size_t i1=first[1]; i1<=last[1]; i1++) { - for (size_t i0=first[0]; i0<=last[0]; i0++) { - size_t k1 = GET_ARRAY_INDEX5D( N1, i0-first[0], - i1-first[1], i2-first[2], i3-first[3], i4-first[4] ); - size_t k2 = GET_ARRAY_INDEX5D( N2, i0, i1, i2, i3, i4 ); + for ( size_t i4 = first[4], k1 = 0; i4 <= last[4]; i4 += inc[4] ) { + for ( size_t i3 = first[3]; i3 <= last[3]; i3 += inc[3] ) { + for ( size_t i2 = first[2]; i2 <= last[2]; i2 += inc[2] ) { + for ( size_t i1 = first[1]; i1 <= last[1]; i1 += inc[1] ) { + for ( size_t i0 = first[0]; i0 <= last[0]; i0 += inc[0], k1++ ) { + size_t k2 = d_size.index( i0, i1, i2, i3, i4 ); d_data[k2] = static_cast( src_data[k1] ); } } @@ -405,26 +517,22 @@ void Array::copySubset( const std::vector &index, const Array -void Array::addSubset( const std::vector &index, const Array &subset ) +template +void Array::addSubset( + const std::vector> &index, const Array &subset ) { // Get the subset indices checkSubsetIndex( index ); - std::array first, last, N1; - getSubsetArrays( index, first, last, N1 ); - std::array N2 = getDimArray(); + std::array first, last, inc, N1; + getSubsetArrays( index, first, last, inc, N1 ); // add the sub-array - #if ARRAY_NDIM_MAX > 5 - #error Function programmed for more than 5 dimensions - #endif - for (size_t i4=first[4]; i4<=last[4]; i4++) { - for (size_t i3=first[3]; i3<=last[3]; i3++) { - for (size_t i2=first[2]; i2<=last[2]; i2++) { - for (size_t i1=first[1]; i1<=last[1]; i1++) { - for (size_t i0=first[0]; i0<=last[0]; i0++) { - size_t k1 = GET_ARRAY_INDEX5D( N1, i0-first[0], - i1-first[1], i2-first[2], i3-first[3], i4-first[4] ); - size_t k2 = GET_ARRAY_INDEX5D( N2, i0, i1, i2, i3, i4 ); + static_assert( ArraySize::maxDim() == 5, "Not programmed for more than 5 dimensions" ); + for ( size_t i4 = first[4], k1 = 0; i4 <= last[4]; i4 += inc[4] ) { + for ( size_t i3 = first[3]; i3 <= last[3]; i3 += inc[3] ) { + for ( size_t i2 = first[2]; i2 <= last[2]; i2 += inc[2] ) { + for ( size_t i1 = first[1]; i1 <= last[1]; i1 += inc[1] ) { + for ( size_t i0 = first[0]; i0 <= last[0]; i0 += inc[0], k1++ ) { + size_t k2 = d_size.index( i0, i1, i2, i3, i4 ); d_data[k2] += subset.d_data[k1]; } } @@ -432,155 +540,134 @@ void Array::addSubset( const std::vector &index, const Array } } } -// clang-format on +template +template +void Array::copySubset( + const std::vector &index, const Array &subset ) +{ + auto range = convert( index ); + copySubset( range, subset ); +} + +template +void Array::addSubset( const std::vector &index, const Array &subset ) +{ + auto range = convert( index ); + addSubset( range, subset ); +} /******************************************************** -* Operator overloading * -********************************************************/ -template -bool Array::operator==( const Array &rhs ) const + * Operator overloading * + ********************************************************/ +template +bool Array::operator==( const Array &rhs ) const { if ( this == &rhs ) return true; - if ( d_length != rhs.d_length ) + if ( d_size != rhs.d_size ) return false; - if ( d_ndim != rhs.d_ndim ) - return false; - for ( int d = 0; d < d_ndim; d++ ) { - if ( d_N[d] != rhs.d_N[d] ) - return false; - } bool match = true; - for ( size_t i = 0; i < d_length; i++ ) + for ( size_t i = 0; i < d_size.length(); i++ ) match = match && d_data[i] == rhs.d_data[i]; return match; } /******************************************************** -* Get a view of an C array * -********************************************************/ -template -std::shared_ptr> Array::view( size_t N, std::shared_ptr const &data ) + * Get a view of an C array * + ********************************************************/ +template +std::shared_ptr> Array::view( + size_t N, std::shared_ptr const &data ) { - return view( std::vector{N}, data ); + return view( ArraySize( N ), data ); } -template -std::shared_ptr> Array::view( +template +std::shared_ptr> Array::view( size_t N1, size_t N2, std::shared_ptr const &data ) { - return view( std::vector{N1,N2}, data ); + return view( ArraySize( N1, N2 ), data ); } -template -std::shared_ptr> Array::view( +template +std::shared_ptr> Array::view( size_t N1, size_t N2, size_t N3, std::shared_ptr const &data ) { - return view( std::vector{N1,N2,N3}, data ); + return view( ArraySize( N1, N2, N3 ), data ); } -template -std::shared_ptr> Array::constView( +template +std::shared_ptr> Array::constView( size_t N, std::shared_ptr const &data ) { - return constView( std::vector{N}, data ); + return constView( ArraySize( N ), data ); } -template -std::shared_ptr> Array::constView( +template +std::shared_ptr> Array::constView( size_t N1, size_t N2, std::shared_ptr const &data ) { - return constView( std::vector{N1,N2}, data ); + return constView( ArraySize( N1, N2 ), data ); } -template -std::shared_ptr> Array::constView( +template +std::shared_ptr> Array::constView( size_t N1, size_t N2, size_t N3, std::shared_ptr const &data ) { - return constView( std::vector{N1,N2,N3}, data ); + return constView( ArraySize( N1, N2, N3 ), data ); } -template -std::shared_ptr> Array::view( - const std::vector &N, std::shared_ptr const &data ) +template +std::shared_ptr> Array::view( + const ArraySize &N, std::shared_ptr const &data ) { - std::shared_ptr> array( new Array() ); - array->d_ndim = N.size(); - array->d_length = 1; - for ( size_t i = 0; i < N.size(); i++ ) { - array->d_N[i] = N[i]; - array->d_length *= N[i]; - } - if ( array->d_ndim == 0 ) - array->d_length = 0; - array->d_ptr = data; - array->d_data = array->d_ptr.get(); + std::shared_ptr> array( new Array() ); + array->d_size = N; + array->d_ptr = data; + array->d_data = array->d_ptr.get(); return array; } -template -std::shared_ptr> Array::constView( - const std::vector &N, std::shared_ptr const &data ) +template +std::shared_ptr> Array::constView( + const ArraySize &N, std::shared_ptr const &data ) { - return view( N, std::const_pointer_cast( data ) ); + std::shared_ptr> array( new Array() ); + array->d_size = N; + array->d_ptr = data; + array->d_data = array->d_ptr.get(); + return array; } -template -void Array::view2( Array &src ) +template +void Array::view2( Array &src ) { view2( src.size(), src.getPtr() ); d_data = src.d_data; } -template -void Array::view2( const std::vector &N, std::shared_ptr const &data ) +template +void Array::view2( const ArraySize &N, std::shared_ptr const &data ) { - d_ndim = static_cast( N.size() ); - for ( size_t i = 0; i < ARRAY_NDIM_MAX; i++ ) { - d_N[i] = 1; - } - d_length = d_ndim == 0 ? 0 : 1; - for ( size_t i = 0; i < N.size(); i++ ) { - d_N[i] = N[i]; - d_length *= d_N[i]; - } + d_size = N; d_ptr = data; d_data = d_ptr.get(); } - -template -void Array::viewRaw( const std::initializer_list &N, TYPE *data ) +template +void Array::viewRaw( int ndim, const size_t *dims, TYPE *data ) { - d_ndim = static_cast( N.size() ); - for ( size_t i = 0; i < ARRAY_NDIM_MAX; i++ ) { - d_N[i] = 1; - } - d_length = d_ndim == 0 ? 0 : 1; - size_t i = 0; - for ( auto it = N.begin(); it != N.end(); ++it, ++i ) { - d_N[i] = *it; - d_length *= *it; - } + d_size = ArraySize( ndim, dims ); d_ptr.reset(); d_data = data; } -template -void Array::viewRaw( const std::vector &N, TYPE *data ) +template +void Array::viewRaw( const ArraySize &N, TYPE *data ) { - d_ndim = static_cast( N.size() ); - for ( size_t i = 0; i < ARRAY_NDIM_MAX; i++ ) { - d_N[i] = 1; - } - d_length = d_ndim == 0 ? 0 : 1; - size_t i = 0; - for ( auto it = N.begin(); it != N.end(); ++it, ++i ) { - d_N[i] = *it; - d_length *= *it; - } + d_size = N; d_ptr.reset(); d_data = data; } /******************************************************** -* Convert array types * -********************************************************/ -template -template -std::shared_ptr> Array::convert( std::shared_ptr> array ) + * Convert array types * + ********************************************************/ +template +template +std::shared_ptr> Array::convert( std::shared_ptr> array ) { if ( std::is_same() ) return array; @@ -588,161 +675,200 @@ std::shared_ptr> Array::convert( std::shared_ptr> array2.copy( *array ); return array2; } -template -template -std::shared_ptr> Array::convert( std::shared_ptr> array ) +template +template +std::shared_ptr> Array::convert( + std::shared_ptr> array ) { - return Array::convert( std::const_pointer_cast>( array ) ); + return Array::convert( std::const_pointer_cast>( array ) ); } -template -template -void Array::copy( const Array &array ) +template +template +void Array::copy( const Array &array ) { resize( array.size() ); const TYPE2 *src = array.data(); - for ( size_t i = 0; i < d_length; i++ ) - d_data[i] = static_cast( src[i] ); + for ( size_t i = 0; i < d_size.length(); i++ ) + d_data[i] = static_cast( src[i] ); } -template -template -void Array::copy( const TYPE2 *src ) +template +template +void Array::copy( const TYPE2 *src ) { - for ( size_t i = 0; i < d_length; i++ ) - d_data[i] = static_cast( src[i] ); + for ( size_t i = 0; i < d_size.length(); i++ ) + d_data[i] = static_cast( src[i] ); } -template -template -void Array::copyTo( TYPE2 *dst ) const +template +template +void Array::copyTo( TYPE2 *dst ) const { - for ( size_t i = 0; i < d_length; i++ ) - dst[i] = static_cast( d_data[i] ); + for ( size_t i = 0; i < d_size.length(); i++ ) + dst[i] = static_cast( d_data[i] ); } -template -void Array::fill( const TYPE &value ) +template +template +Array Array::cloneTo() const { - for ( size_t i = 0; i < d_length; i++ ) - d_data[i] = value; + Array dst( this->size() ); + auto dst_data = dst.data(); + for ( size_t i = 0; i < d_size.length(); i++ ) + dst_data[i] = static_cast( d_data[i] ); + return dst; } -template -void Array::scale( const TYPE &value ) +template +void Array::fill( const TYPE &value ) { - for ( size_t i = 0; i < d_length; i++ ) + for ( size_t i = 0; i < d_size.length(); i++ ) + d_data[i] = value; +} +template +void Array::scale( const TYPE &value ) +{ + for ( size_t i = 0; i < d_size.length(); i++ ) d_data[i] *= value; } -template - void Array::pow(const Array &baseArray, const TYPE &exp ) +template +void Array::pow( const Array &baseArray, const TYPE &exp ) { // not insisting on the shapes being the same // but insisting on the total size being the same - AMP_ASSERT(d_length==baseArray.length()); + if ( d_size.length() != baseArray.length() ) + throw std::logic_error( "length of arrays do not match" ); const auto base_data = baseArray.data(); - for ( size_t i = 0; i < d_length; i++ ) - d_data[i] = pow(base_data[i], exp); + for ( size_t i = 0; i < d_size.length(); i++ ) + d_data[i] = std::pow( base_data[i], exp ); } + /******************************************************** -* Simple math operations * -********************************************************/ -template -bool Array::NaNs() const + * Replicate the array * + ********************************************************/ +template +Array Array::repmat( const std::vector &N_rep ) const +{ + std::vector N2( d_size.begin(), d_size.end() ); + if ( N2.size() < N_rep.size() ) + N2.resize( N_rep.size(), 1 ); + std::array N1, Nr; + N1.fill( 1 ); + Nr.fill( 1 ); + for ( size_t d = 0; d < N_rep.size(); d++ ) { + N1[d] = d_size[d]; + Nr[d] = N_rep[d]; + N2[d] *= N_rep[d]; + } + Array y( N2 ); + static_assert( ArraySize::maxDim() <= 5, "Not programmed for dimensions > 5" ); + TYPE *y2 = y.data(); + for ( size_t i4 = 0, index = 0; i4 < N1[4]; i4++ ) { + for ( size_t j4 = 0; j4 < Nr[4]; j4++ ) { + for ( size_t i3 = 0; i3 < N1[3]; i3++ ) { + for ( size_t j4 = 0; j4 < Nr[3]; j4++ ) { + for ( size_t i2 = 0; i2 < N1[2]; i2++ ) { + for ( size_t j4 = 0; j4 < Nr[2]; j4++ ) { + for ( size_t i1 = 0; i1 < N1[1]; i1++ ) { + for ( size_t j4 = 0; j4 < Nr[1]; j4++ ) { + for ( size_t i0 = 0; i0 < N1[0]; i0++ ) { + size_t k = d_size.index( i0, i1, i2, i3, i4 ); + TYPE x = d_data[k]; + for ( size_t j4 = 0; j4 < Nr[0]; j4++, index++ ) + y2[index] = x; + } + } + } + } + } + } + } + } + } + return y; +} + + +/******************************************************** + * Simple math operations * + ********************************************************/ +template +bool Array::NaNs() const { bool test = false; - for ( size_t i = 0; i < d_length; i++ ) + for ( size_t i = 0; i < d_size.length(); i++ ) test = test || d_data[i] != d_data[i]; return test; } -template -TYPE Array::min() const + +template +TYPE Array::mean( void ) const { - TYPE x = std::numeric_limits::max(); - for ( size_t i = 0; i < d_length; i++ ) - x = std::min( x, d_data[i] ); + TYPE x = this->sum() / d_size.length(); return x; } -template -TYPE Array::max() const +template +Array Array::min( int dir ) const { - TYPE x = std::numeric_limits::min(); - for ( size_t i = 0; i < d_length; i++ ) - x = std::max( x, d_data[i] ); - return x; -} -template -TYPE Array::sum() const -{ - TYPE x = 0; - for ( size_t i = 0; i < d_length; i++ ) - x += d_data[i]; - return x; -} -template -TYPE Array::mean( void ) const -{ - TYPE x = sum() / d_length; - return x; -} -template -Array Array::min( int dir ) const -{ - std::vector size_ans = size(); - size_ans[dir] = 1; - Array ans( size_ans ); + auto size_ans = d_size; + size_ans.resize( dir, 1 ); + Array ans( size_ans ); size_t N1 = 1, N2 = 1, N3 = 1; - for ( int d = 0; d < std::min( dir, d_ndim ); d++ ) - N1 *= d_N[d]; - N2 = d_N[dir]; - for ( int d = dir + 1; d < std::min( d_ndim, ARRAY_NDIM_MAX ); d++ ) - N3 *= d_N[d]; + for ( int d = 0; d < std::min( dir, d_size.ndim() ); d++ ) + N1 *= d_size[d]; + N2 = d_size[dir]; + for ( size_t d = dir + 1; d < d_size.ndim(); d++ ) + N3 *= d_size[d]; TYPE *data2 = ans.d_data; for ( size_t i3 = 0; i3 < N3; i3++ ) { for ( size_t i1 = 0; i1 < N1; i1++ ) { TYPE x = d_data[i1 + i3 * N1 * N2]; for ( size_t i2 = 0; i2 < N2; i2++ ) - x = std::min( x, d_data[i1 + i2 * N1 + i3 * N1 * N2] ); + x = std::min( x, d_data[i1 + i2 * N1 + i3 * N1 * N2] ); data2[i1 + i3 * N1] = x; } } return ans; } -template -Array Array::max( int dir ) const +template +Array Array::max( int dir ) const { - std::vector size_ans = size(); - size_ans[dir] = 1; - Array ans( size_ans ); + auto size_ans = d_size; + size_ans.resize( dir, 1 ); + Array ans( size_ans ); size_t N1 = 1, N2 = 1, N3 = 1; - for ( int d = 0; d < std::min( dir, d_ndim ); d++ ) - N1 *= d_N[d]; - N2 = d_N[dir]; - for ( int d = dir + 1; d < std::min( d_ndim, ARRAY_NDIM_MAX ); d++ ) - N3 *= d_N[d]; - TYPE *data2 = ans.d_data; + for ( int d = 0; d < std::min( dir, d_size.ndim() ); d++ ) + N1 *= d_size[d]; + N2 = d_size[dir]; + DISABLE_WARNINGS // Suppress false array subscript is above array bounds + for ( size_t d = dir + 1; d < d_size.ndim(); d++ ) N3 *= d_size[d]; + ENABLE_WARNINGS // Enable warnings + TYPE *data2 = ans.d_data; for ( size_t i3 = 0; i3 < N3; i3++ ) { for ( size_t i1 = 0; i1 < N1; i1++ ) { TYPE x = d_data[i1 + i3 * N1 * N2]; for ( size_t i2 = 0; i2 < N2; i2++ ) - x = std::max( x, d_data[i1 + i2 * N1 + i3 * N1 * N2] ); + x = std::max( x, d_data[i1 + i2 * N1 + i3 * N1 * N2] ); data2[i1 + i3 * N1] = x; } } return ans; } -template -Array Array::sum( int dir ) const +template +Array Array::sum( int dir ) const { - std::vector size_ans = size(); - size_ans[dir] = 1; - Array ans( size_ans ); + auto size_ans = d_size; + size_ans.resize( dir, 1 ); + Array ans( size_ans ); size_t N1 = 1, N2 = 1, N3 = 1; - for ( int d = 0; d < std::min( dir, d_ndim ); d++ ) - N1 *= d_N[d]; - N2 = d_N[dir]; - for ( int d = dir + 1; d < std::min( d_ndim, ARRAY_NDIM_MAX ); d++ ) - N3 *= d_N[d]; + for ( int d = 0; d < std::min( dir, d_size.ndim() ); d++ ) + N1 *= d_size[d]; + N2 = d_size[dir]; + DISABLE_WARNINGS + for ( size_t d = dir + 1; d < d_size.ndim(); d++ ) + N3 *= d_size[d]; + ENABLE_WARNINGS TYPE *data2 = ans.d_data; - for ( int i3 = 0; i3 < N3; i3++ ) { - for ( int i1 = 0; i1 < N1; i1++ ) { + for ( size_t i3 = 0; i3 < N3; i3++ ) { + for ( size_t i1 = 0; i1 < N1; i1++ ) { TYPE x = 0; for ( size_t i2 = 0; i2 < N2; i2++ ) x += d_data[i1 + i2 * N1 + i3 * N1 * N2]; @@ -751,51 +877,44 @@ Array Array::sum( int dir ) const } return ans; } -template -TYPE Array::min( const std::vector &index ) const +template +TYPE Array::min( const std::vector> &range ) const { // Get the subset indicies - checkSubsetIndex( index ); - std::array first, last, N1; - getSubsetArrays( index, first, last, N1 ); - std::array N2 = getDimArray(); -#if ARRAY_NDIM_MAX > 5 -#error Function programmed for more than 5 dimensions -#endif + checkSubsetIndex( range ); + std::array first, last, inc, N1; + getSubsetArrays( range, first, last, inc, N1 ); + static_assert( ArraySize::maxDim() <= 5, "Function programmed for more than 5 dimensions" ); TYPE x = std::numeric_limits::max(); - for ( size_t i4 = first[4]; i4 <= last[4]; i4++ ) { - for ( size_t i3 = first[3]; i3 <= last[3]; i3++ ) { - for ( size_t i2 = first[2]; i2 <= last[2]; i2++ ) { - for ( size_t i1 = first[1]; i1 <= last[1]; i1++ ) { - for ( size_t i0 = first[0]; i0 <= last[0]; i0++ ) { - size_t k1 = GET_ARRAY_INDEX5D( N2, i0, i1, i2, i3, i4 ); + for ( size_t i4 = first[4]; i4 <= last[4]; i4 += inc[4] ) { + for ( size_t i3 = first[3]; i3 <= last[3]; i3 += inc[3] ) { + for ( size_t i2 = first[2]; i2 <= last[2]; i2 += inc[2] ) { + for ( size_t i1 = first[1]; i1 <= last[1]; i1 += inc[1] ) { + for ( size_t i0 = first[0]; i0 <= last[0]; i0 += inc[0] ) { + size_t k1 = d_size.index( i0, i1, i2, i3, i4 ); x = std::min( x, d_data[k1] ); } } } } } - return x; } -template -TYPE Array::max( const std::vector &index ) const +template +TYPE Array::max( const std::vector> &range ) const { // Get the subset indicies - checkSubsetIndex( index ); - std::array first, last, N1; - getSubsetArrays( index, first, last, N1 ); - std::array N2 = getDimArray(); -#if ARRAY_NDIM_MAX > 5 -#error Function programmed for more than 5 dimensions -#endif + checkSubsetIndex( range ); + std::array first, last, inc, N1; + getSubsetArrays( range, first, last, inc, N1 ); + static_assert( ArraySize::maxDim() <= 5, "Function programmed for more than 5 dimensions" ); TYPE x = std::numeric_limits::min(); - for ( size_t i4 = first[4]; i4 <= last[4]; i4++ ) { - for ( size_t i3 = first[3]; i3 <= last[3]; i3++ ) { - for ( size_t i2 = first[2]; i2 <= last[2]; i2++ ) { - for ( size_t i1 = first[1]; i1 <= last[1]; i1++ ) { - for ( size_t i0 = first[0]; i0 <= last[0]; i0++ ) { - size_t k1 = GET_ARRAY_INDEX5D( N2, i0, i1, i2, i3, i4 ); + for ( size_t i4 = first[4]; i4 <= last[4]; i4 += inc[4] ) { + for ( size_t i3 = first[3]; i3 <= last[3]; i3 += inc[3] ) { + for ( size_t i2 = first[2]; i2 <= last[2]; i2 += inc[2] ) { + for ( size_t i1 = first[1]; i1 <= last[1]; i1 += inc[1] ) { + for ( size_t i0 = first[0]; i0 <= last[0]; i0 += inc[0] ) { + size_t k1 = d_size.index( i0, i1, i2, i3, i4 ); x = std::max( x, d_data[k1] ); } } @@ -804,24 +923,21 @@ TYPE Array::max( const std::vector &index ) const } return x; } -template -TYPE Array::sum( const std::vector &index ) const +template +TYPE Array::sum( const std::vector> &range ) const { // Get the subset indicies - checkSubsetIndex( index ); - std::array first, last, N1; - getSubsetArrays( index, first, last, N1 ); - std::array N2 = getDimArray(); -#if ARRAY_NDIM_MAX > 5 -#error Function programmed for more than 5 dimensions -#endif + checkSubsetIndex( range ); + std::array first, last, inc, N1; + getSubsetArrays( range, first, last, inc, N1 ); + static_assert( ArraySize::maxDim() <= 5, "Function programmed for more than 5 dimensions" ); TYPE x = 0; - for ( size_t i4 = first[4]; i4 <= last[4]; i4++ ) { - for ( size_t i3 = first[3]; i3 <= last[3]; i3++ ) { - for ( size_t i2 = first[2]; i2 <= last[2]; i2++ ) { - for ( size_t i1 = first[1]; i1 <= last[1]; i1++ ) { - for ( size_t i0 = first[0]; i0 <= last[0]; i0++ ) { - size_t k1 = GET_ARRAY_INDEX5D( N2, i0, i1, i2, i3, i4 ); + for ( size_t i4 = first[4]; i4 <= last[4]; i4 += inc[4] ) { + for ( size_t i3 = first[3]; i3 <= last[3]; i3 += inc[3] ) { + for ( size_t i2 = first[2]; i2 <= last[2]; i2 += inc[2] ) { + for ( size_t i1 = first[1]; i1 <= last[1]; i1 += inc[1] ) { + for ( size_t i0 = first[0]; i0 <= last[0]; i0 += inc[0] ) { + size_t k1 = d_size.index( i0, i1, i2, i3, i4 ); x += d_data[k1]; } } @@ -830,105 +946,56 @@ TYPE Array::sum( const std::vector &index ) const } return x; } -template -TYPE Array::mean( const std::vector &index ) const +template +TYPE Array::mean( const std::vector> &range ) const { // Get the subset indicies - checkSubsetIndex( index ); - std::array first, last, N1; - getSubsetArrays( index, first, last, N1 ); -#if ARRAY_NDIM_MAX > 5 -#error Function programmed for more than 5 dimensions -#endif + checkSubsetIndex( range ); + std::array first, last, inc, N1; + getSubsetArrays( range, first, last, inc, N1 ); + static_assert( ArraySize::maxDim() <= 5, "Function programmed for more than 5 dimensions" ); size_t n = 1; for ( auto &d : N1 ) n *= d; - TYPE x = sum( index ) / n; + TYPE x = sum( range ) / n; return x; } - -template -Array &Array::operator+=( const Array &rhs ) +template +TYPE Array::min( const std::vector &index ) const { - if ( !sizeMatch(rhs) ) - throw std::logic_error( "Array don't match" ); - for ( size_t i = 0; i < d_length; i++ ) - d_data[i] += rhs.d_data[i]; - return *this; + auto range = convert( index ); + return min( range ); } -template -Array &Array::operator-=( const Array &rhs ) +template +TYPE Array::max( const std::vector &index ) const { - if ( !sizeMatch(rhs) ) - throw std::logic_error( "Array don't match" ); - for ( size_t i = 0; i < d_length; i++ ) - d_data[i] -= rhs.d_data[i]; - return *this; + auto range = convert( index ); + return max( range ); } -template -Array &Array::operator+=( const TYPE &rhs ) +template +TYPE Array::sum( const std::vector &index ) const { - for ( size_t i = 0; i < d_length; i++ ) - d_data[i] += rhs; - return *this; + auto range = convert( index ); + return sum( range ); } -template -Array &Array::operator-=( const TYPE &rhs ) +template +TYPE Array::mean( const std::vector &index ) const { - for ( size_t i = 0; i < d_length; i++ ) - d_data[i] -= rhs; - return *this; -} -template -Array operator+( const Array& a, const Array& b ) -{ - Array c = a; - c += b; - return c; -} -template -Array operator-( const Array& a, const Array& b ) -{ - Array c = a; - c -= b; - return c; -} -template -Array operator*( const Array& a, const Array& b ) -{ - return Array::multiply(a,b); -} -template -Array Array::multiply( const Array& a, const Array& b ) -{ - Array c; - if ( a.d_ndim==2 && b.d_ndim==2 ) { - c.resize( a.size(0), b.size(1) ); - c.fill(0); - for (size_t k=0; k -std::vector Array::find( + * Find all elements that match the given operation * + ********************************************************/ +template +std::vector Array::find( const TYPE &value, std::function compare ) const { std::vector result; - result.reserve( d_length ); - for ( size_t i = 0; i < d_length; i++ ) { + result.reserve( d_size.length() ); + for ( size_t i = 0; i < d_size.length(); i++ ) { if ( compare( d_data[i], value ) ) result.push_back( i ); } @@ -937,115 +1004,122 @@ std::vector Array::find( /******************************************************** -* Print an array to an output stream * -********************************************************/ -template -void Array::print( std::ostream& os, const std::string& name, const std::string& prefix ) const + * Print an array to an output stream * + ********************************************************/ +template +void Array::print( + std::ostream &os, const std::string &name, const std::string &prefix ) const { - if ( d_ndim==1 ) { - for (size_t i=0; i -Array Array::reverseDim( ) const + * Reverse dimensions (transpose) * + ********************************************************/ +template +Array Array::reverseDim() const { - std::vector N2(ARRAY_NDIM_MAX); - for ( int d=0; d y( N2 ); -#if ARRAY_NDIM_MAX != 5 - #error Function programmed for dimensions other than 5 -#endif - TYPE* y2 = y.data(); - for (size_t i0=0; i0 y( S2 ); + static_assert( ArraySize::maxDim() == 5, "Not programmed for dimensions other than 5" ); + TYPE *y2 = y.data(); + for ( size_t i0 = 0; i0 < d_size[0]; i0++ ) { + for ( size_t i1 = 0; i1 < d_size[1]; i1++ ) { + for ( size_t i2 = 0; i2 < d_size[2]; i2++ ) { + for ( size_t i3 = 0; i3 < d_size[3]; i3++ ) { + for ( size_t i4 = 0; i4 < d_size[4]; i4++ ) { + y2[S2.index( i4, i3, i2, i1, i0 )] = + d_data[d_size.index( i0, i1, i2, i3, i4 )]; } } } } } - auto S2 = size(); - for ( int d=0; d -Array Array::coarsen( const Array& filter ) const + * Coarsen the array * + ********************************************************/ +template +Array Array::coarsen( const Array &filter ) const { auto S2 = size(); - for (size_t i=0; i y( S2 ); - INSIST(d_ndim<=3,"Function programmed for more than 5 dimensions"); - const size_t *Nh = filter.d_N; - for (size_t k1=0; k1 y( S2 ); + if ( d_size.ndim() <= 3 ) + throw std::logic_error( "Function programmed for more than 3 dimensions" ); + const auto& Nh = filter.d_size; + for ( size_t k1 = 0; k1 < y.d_size[2]; k1++ ) { + for ( size_t j1 = 0; j1 < y.d_size[1]; j1++ ) { + for ( size_t i1 = 0; i1 < y.d_size[0]; i1++ ) { TYPE tmp = 0; - for (size_t k2=0; k2operator()(i1*Nh[0]+i2,j1*Nh[1]+j2,k1*Nh[2]+k2); + for ( size_t k2 = 0; k2 < Nh[2]; k2++ ) { + for ( size_t j2 = 0; j2 < Nh[1]; j2++ ) { + for ( size_t i2 = 0; i2 < Nh[0]; i2++ ) { + tmp += filter( i2, j2, k2 ) * this->operator()( i1 *Nh[0] + i2, + j1 * Nh[1] + j2, k1 * Nh[2] + k2 ); } } } - y(i1,j1,k1) = tmp; + y( i1, j1, k1 ) = tmp; } } } return y; } -template -Array Array::coarsen( const std::vector& ratio, std::function&)> filter ) const +template +Array Array::coarsen( + const std::vector &ratio, std::function & )> filter ) const { - ASSERT((int)ratio.size()==d_ndim); + if ( ratio.size() != d_size.ndim() ) + throw std::logic_error( "ratio size does not match ndim" ); auto S2 = size(); - for (size_t i=0; i tmp(ratio); - TYPE* tmp2 = tmp.data(); - Array y( S2 ); - INSIST(d_ndim<=3,"Function programmed for more than 3 dimensions"); - for (size_t k1=0; k1operator()(i1*ratio[0]+i2,j1*ratio[1]+j2,k1*ratio[2]+k2); + Array tmp( ratio ); + Array y( S2 ); + if ( d_size.ndim() <= 3 ) + throw std::logic_error( "Function programmed for more than 3 dimensions" ); + for ( size_t k1 = 0; k1 < y.d_size[2]; k1++ ) { + for ( size_t j1 = 0; j1 < y.d_size[1]; j1++ ) { + for ( size_t i1 = 0; i1 < y.d_size[0]; i1++ ) { + for ( size_t k2 = 0; k2 < ratio[2]; k2++ ) { + for ( size_t j2 = 0; j2 < ratio[1]; j2++ ) { + for ( size_t i2 = 0; i2 < ratio[0]; i2++ ) { + tmp( i2, j2, k2 ) = this->operator()( + i1 *ratio[0] + i2, j1 * ratio[1] + j2, k1 * ratio[2] + k2 ); } } } - y(i1,j1,k1) = filter(tmp); + y( i1, j1, k1 ) = filter( tmp ); } } } @@ -1053,4 +1127,172 @@ Array Array::coarsen( const std::vector& ratio, std::functio } +/******************************************************** + * Concatenates the arrays * + ********************************************************/ +template +void Array::cat( const Array &x, int dim ) +{ + std::vector> tmp( 2 ); + tmp[0].view2( *this ); + tmp[1].view2( const_cast &>( x ) ); + *this = cat( tmp, dim ); +} +template +Array Array::cat( const std::vector &x, int dim ) +{ + if ( x.empty() ) + return Array(); + // Check that the dimensions match + bool check = true; + for ( size_t i = 1; i < x.size(); i++ ) { + check = check && x[i].ndim() == x[0].ndim(); + for ( int d = 0; d < x[0].ndim(); d++ ) + check = check && d == dim; + } + if ( !check ) + throw std::logic_error( "Array dimensions do not match for concatenation" ); + // Create the output array + auto size = x[0].d_size; + for ( size_t i = 1; i < x.size(); i++ ) + size.resize( dim, size[dim] + x[i].size( dim ) ); + Array out( size ); + size_t N1 = 1; + size_t N2 = size[dim]; + size_t N3 = 1; + for ( int d = 0; d < dim; d++ ) + N1 *= size[d]; + for ( size_t d = dim + 1; d < size.ndim(); d++ ) + N3 *= size[d]; + TYPE *data = out.data(); + for ( size_t i = 0, i0 = 0; i < x.size(); i++ ) { + const TYPE *src = x[i].data(); + size_t N22 = x[i].size( dim ); + for ( size_t j2 = 0; j2 < N3; j2++ ) { + for ( size_t i1 = 0; i1 < N22; i1++ ) { + for ( size_t j1 = 0; j1 < N1; j1++ ) { + data[j1 + ( i1 + i0 ) * N1 + j2 * N1 * N2] = src[j1 + i1 * N1 + j2 * N1 * N22]; + } + } + } + i0 += N22; + } + return out; +} + + +/******************************************************** + * Math operations (should call the Math class) * + ********************************************************/ +template +void Array::rand() +{ + FUN::rand( *this ); +} +template +Array &Array::operator+=( const Array &rhs ) +{ + const auto &fun = []( const TYPE &a, const TYPE &b ) { return a + b; }; + FUN::transform( fun, *this, rhs, *this ); + return *this; +} +template +Array &Array::operator-=( const Array &rhs ) +{ + const auto &fun = []( const TYPE &a, const TYPE &b ) { return a - b; }; + FUN::transform( fun, *this, rhs, *this ); + return *this; +} +template +Array &Array::operator+=( const TYPE &rhs ) +{ + const auto &fun = [rhs]( const TYPE &x ) { return x + rhs; }; + FUN::transform( fun, *this, *this ); + return *this; +} +template +Array &Array::operator-=( const TYPE &rhs ) +{ + const auto &fun = [rhs]( const TYPE &x ) { return x - rhs; }; + FUN::transform( fun, *this, *this ); + return *this; +} +template +Array operator+( const Array &a, const Array &b ) +{ + Array c; + const auto &fun = []( const TYPE &a, const TYPE &b ) { return a + b; }; + FUN::transform( fun, a, b, c ); + return c; +} +template +Array operator-( const Array &a, const Array &b ) +{ + Array c; + const auto &fun = []( const TYPE &a, const TYPE &b ) { return a - b; }; + FUN::transform( fun, a, b, c ); + return c; +} +template +Array operator*( const Array &a, const Array &b ) +{ + return Array::multiply( a, b ); +} +template +inline Array operator*( const Array &a, const std::vector &b ) +{ + Array b2; + b2.viewRaw( { b.size() }, const_cast( b.data() ) ); + return Array::multiply( a, b2 ); +} +template +TYPE Array::min() const +{ + const auto &fun = []( const TYPE &a, const TYPE &b ) { return a < b ? a : b; }; + return FUN::reduce( fun, *this ); +} +template +TYPE Array::max() const +{ + const auto &fun = []( const TYPE &a, const TYPE &b ) { return a > b ? a : b; }; + return FUN::reduce( fun, *this ); +} +template +TYPE Array::sum() const +{ + const auto &fun = []( const TYPE &a, const TYPE &b ) { return a + b; }; + return FUN::reduce( fun, *this ); +} +template +Array Array::multiply( const Array &a, const Array &b ) +{ + Array c; + FUN::multiply( a, b, c ); + return c; +} +template +void Array::axpby( const TYPE &alpha, const Array &x, const TYPE &beta ) +{ + const auto &fun = [alpha, beta]( + const TYPE &x, const TYPE &y ) { return alpha * x + beta * y; }; + return FUN::transform( fun, x, *this ); +} +template +Array Array::transform( + std::function fun, const Array &x ) +{ + Array y; + FUN::transform( fun, x, y ); + return y; +} +template +Array Array::transform( std::function fun, + const Array &x, const Array &y ) +{ + Array z; + FUN::transform( fun, x, y, z ); + return z; +} + + #endif diff --git a/common/FunctionTable.h b/common/FunctionTable.h new file mode 100644 index 00000000..e2bdcb67 --- /dev/null +++ b/common/FunctionTable.h @@ -0,0 +1,81 @@ +#ifndef included_FunctionTable +#define included_FunctionTable + + +#include "common/Array.h" + +#include + + +/*! + * Class FunctionTable is a serial function table class that defines + * a series of operations that can be performed on the Array class. + * Users can impliment additional versions of the function table that match + * the interface to change the behavior of the array class. + */ +class FunctionTable final +{ +public: + /*! + * Initialize the array with random values + * @param[in] x The array to operate on + */ + template + static void rand( Array &x ); + + /*! + * Perform a reduce operator y = f(x) + * @param[in] op The function operation + * Note: the operator is a template parameter + * (compared to a std::function to improve performance) + * @param[in] A The array to operate on + * @return The reduction + */ + template + static inline TYPE reduce( LAMBDA &op, const Array &A ); + + /*! + * Perform a element-wise operation y = f(x) + * @param[in] fun The function operation + * Note: the operator is a template parameter + * (compared to a std::function to improve performance) + * @param[in] x The input array to operate on + * @param[out] y The output array + */ + template + static inline void transform( LAMBDA &fun, const Array &x, Array &y ); + + /*! + * Perform a element-wise operation z = f(x,y) + * @param[in] fun The function operation + * Note: the operator is a template parameter + * (compared to a std::function to improve performance) + * @param[in] x The first array + * @param[in] y The second array + * @param[out] z The result + */ + template + static inline void transform( + LAMBDA &fun, const Array &x, const Array &y, Array &z ); + + /*! + * Multiply two arrays + * @param[in] a The first array + * @param[in] b The second array + * @param[out] c The output array + */ + template + static void multiply( + const Array &a, const Array &b, Array &c ); + + +private: + FunctionTable(); + + template + static inline void rand( size_t N, T *x ); +}; + +#include "common/FunctionTable.hpp" + +#endif diff --git a/common/FunctionTable.hpp b/common/FunctionTable.hpp new file mode 100644 index 00000000..52897d5c --- /dev/null +++ b/common/FunctionTable.hpp @@ -0,0 +1,116 @@ +#ifndef included_FunctionTable_hpp +#define included_FunctionTable_hpp + +#include "common/FunctionTable.h" +#include "common/Utilities.h" + +#include +#include +#include +#include + + +/******************************************************** + * Random number initialization * + ********************************************************/ +template +void FunctionTable::rand( Array &x ) +{ + FunctionTable::rand( x.length(), x.data() ); +} +template<> +inline void FunctionTable::rand( size_t N, double *x ) +{ + std::random_device rd; + std::mt19937 gen( rd() ); + std::uniform_real_distribution<> dis( 0, 1 ); + for ( size_t i = 0; i < N; i++ ) + x[i] = dis( gen ); +} +template<> +inline void FunctionTable::rand( size_t N, float *x ) +{ + std::random_device rd; + std::mt19937 gen( rd() ); + std::uniform_real_distribution<> dis( 0, 1 ); + for ( size_t i = 0; i < N; i++ ) + x[i] = dis( gen ); +} +template<> +inline void FunctionTable::rand( size_t N, int *x ) +{ + std::random_device rd; + std::mt19937 gen( rd() ); + std::uniform_int_distribution<> dis; + for ( size_t i = 0; i < N; i++ ) + x[i] = dis( gen ); +} + + +/******************************************************** + * Reduction * + ********************************************************/ +template +inline TYPE FunctionTable::reduce( LAMBDA &op, const Array &A ) +{ + if ( A.length() == 0 ) + return TYPE(); + const TYPE *x = A.data(); + TYPE y = x[0]; + const size_t N = A.length(); + for ( size_t i = 1; i < N; i++ ) + y = op( x[i], y ); + return y; +} + + +/******************************************************** + * Unary transformation * + ********************************************************/ +template +inline void FunctionTable::transform( LAMBDA &fun, const Array &x, Array &y ) +{ + y.resize( x.size() ); + const size_t N = x.length(); + for ( size_t i = 0; i < N; i++ ) + y( i ) = fun( x( i ) ); +} +template +inline void FunctionTable::transform( + LAMBDA &fun, const Array &x, const Array &y, Array &z ) +{ + if ( !x.sizeMatch( y ) ) + throw std::logic_error( "Sizes of x and y do not match" ); + z.resize( x.size() ); + const size_t N = x.length(); + for ( size_t i = 0; i < N; i++ ) + z( i ) = fun( x( i ), y( i ) ); +} + + +/******************************************************** + * Multiply two arrays * + ********************************************************/ +template +void FunctionTable::multiply( + const Array &a, const Array &b, Array &c ) +{ + if ( a.ndim() <= 2 && b.ndim() <= 2 ) { + if ( a.size( 1 ) != b.size( 0 ) ) + throw std::logic_error( "Inner dimensions must match" ); + c.resize( a.size( 0 ), b.size( 1 ) ); + c.fill( 0 ); + for ( size_t k = 0; k < b.size( 1 ); k++ ) { + for ( size_t j = 0; j < a.size( 1 ); j++ ) { + for ( size_t i = 0; i < a.size( 0 ); i++ ) { + c( i, k ) += a( i, j ) * b( j, k ); + } + } + } + } else { + throw std::logic_error( "Not finished yet" ); + } +} + + +#endif diff --git a/common/StackTrace.cpp b/common/StackTrace.cpp index 9786644e..8b9e4015 100644 --- a/common/StackTrace.cpp +++ b/common/StackTrace.cpp @@ -4,14 +4,17 @@ #include #include #include -#include #include +#include #include +#include +#include #include #include #include -#include -#include + + +#define perr std::cerr // Detect the OS @@ -22,7 +25,7 @@ #elif defined( __APPLE__ ) #define USE_MAC #define USE_NM -#elif defined( __linux ) || defined( __unix ) || defined( __posix ) +#elif defined( __linux ) || defined( __linux__ ) || defined( __unix ) || defined( __posix ) #define USE_LINUX #define USE_NM #else @@ -31,23 +34,6 @@ // clang-format on -// Include/detect MPI -// clang-format off -#ifndef USE_MPI - #ifdef USE_EXT_MPI - #define USE_MPI - #elif defined(__has_include) - #if __has_include("mpi.h") - #define USE_MPI - #endif - #endif -#endif -#ifdef USE_MPI - #include "mpi.h" -#endif -// clang-format on - - // Include system dependent headers // clang-format off // Detect the OS and include system dependent headers @@ -66,7 +52,7 @@ #include #include #include - #include + #include #include #include #endif @@ -98,22 +84,26 @@ // Set the callstack signal #ifdef SIGRTMIN - #define CALLSTACK_SIG SIGRTMIN+4 +#define CALLSTACK_SIG SIGRTMIN + 4 #else - #define CALLSTACK_SIG SIGUSR1 - #define SIGRTMIN SIGUSR1 - #define SIGRTMAX SIGUSR1 +#define CALLSTACK_SIG SIGUSR1 +#define SIGRTMIN SIGUSR1 +#define SIGRTMAX SIGUSR1 #endif +// Helper thread +static std::shared_ptr globalMonitorThread; + + // Utility to break a string by a newline -static inline std::vector breakString( const std::string& str ) +static inline std::vector breakString( const std::string &str ) { std::vector strvec; size_t i1 = 0; size_t i2 = std::min( str.find( '\n', i1 ), str.length() ); while ( i1 < str.length() ) { - strvec.push_back( str.substr( i1, i2-i1 ) ); + strvec.push_back( str.substr( i1, i2 - i1 ) ); i1 = i2 + 1; i2 = std::min( str.find( '\n', i1 ), str.length() ); } @@ -121,12 +111,26 @@ static inline std::vector breakString( const std::string& str ) } +// Function to replace all instances of a string with another +static inline void strrep( std::string &str, const std::string &s, const std::string &r ) +{ + size_t i = 0; + while ( i < str.length() ) { + i = str.find( s, i ); + if ( i == std::string::npos ) { + break; + } + str.replace( i, s.length(), r ); + i += r.length(); + } +} + + // Utility to strip the path from a filename static inline std::string stripPath( const std::string &filename ) { - if ( filename.empty() ) { + if ( filename.empty() ) return std::string(); - } int i = 0; for ( i = (int) filename.size() - 1; i >= 0 && filename[i] != 47 && filename[i] != 92; i-- ) { } @@ -166,17 +170,17 @@ BOOL GetModuleListTH32( HANDLE hProcess, DWORD pid ); BOOL GetModuleListPSAPI( HANDLE hProcess ); DWORD LoadModule( HANDLE hProcess, LPCSTR img, LPCSTR mod, DWORD64 baseAddr, DWORD size ); void LoadModules(); -}; +}; // namespace StackTrace #endif // Functions to copy data -static inline char* copy_in( size_t N, const void* data, char *ptr ) +static inline char *copy_in( size_t N, const void *data, char *ptr ) { memcpy( ptr, data, N ); return ptr + N; } -static inline const char* copy_out( size_t N, void* data, const char *ptr ) +static inline const char *copy_out( size_t N, void *data, const char *ptr ) { memcpy( data, ptr, N ); return ptr + N; @@ -184,62 +188,86 @@ static inline const char* copy_out( size_t N, void* data, const char *ptr ) /**************************************************************************** -* Utility to call system command and return output * -****************************************************************************/ + * Utility to call system command and return output * + ****************************************************************************/ #ifdef USE_WINDOWS #define popen _popen #define pclose _pclose #endif -std::string StackTrace::exec( const std::string& cmd, int& code ) +std::string StackTrace::exec( const std::string &cmd, int &code ) { - signal( SIGCHLD, SIG_DFL ); // Clear child exited - FILE* pipe = popen(cmd.c_str(), "r"); + signal( SIGCHLD, SIG_DFL ); // Clear child exited + FILE *pipe = popen( cmd.c_str(), "r" ); if ( pipe == nullptr ) return std::string(); std::string result = ""; - result.reserve(1024); - while ( !feof(pipe) ) { + result.reserve( 1024 ); + while ( !feof( pipe ) ) { char buffer[257]; buffer[256] = 0; - if ( fgets(buffer, 128, pipe) != NULL ) + if ( fgets( buffer, 128, pipe ) != nullptr ) result += buffer; } auto status = pclose( pipe ); - code = WEXITSTATUS(status); + code = WEXITSTATUS( status ); return result; } /**************************************************************************** -* stack_info * -****************************************************************************/ -bool StackTrace::stack_info::operator==( const StackTrace::stack_info& rhs ) const + * stack_info * + ****************************************************************************/ +void StackTrace::stack_info::clear() +{ + address = nullptr; + address2 = nullptr; + object.clear(); + function.clear(); + filename.clear(); + line = -1; +} +bool StackTrace::stack_info::operator==( const StackTrace::stack_info &rhs ) const { if ( address == rhs.address ) return true; - if ( address2==rhs.address2 && object==rhs.object ) + if ( address2 == rhs.address2 && object == rhs.object ) return true; return false; } -bool StackTrace::stack_info::operator!=( const StackTrace::stack_info& rhs ) const +bool StackTrace::stack_info::operator!=( const StackTrace::stack_info &rhs ) const { return !operator==( rhs ); } -std::string StackTrace::stack_info::print() const +int StackTrace::stack_info::getAddressWidth() const { - char tmp[32]; - sprintf( tmp, "0x%016llx: ", reinterpret_cast( address ) ); - std::string stack( tmp ); - sprintf( tmp, "%i", line ); - std::string line_str( tmp ); + auto addr = reinterpret_cast( address ); + if ( addr <= 0xFFFF ) + return 4; + if ( addr <= 0xFFFFFFFF ) + return 8; + if ( addr <= 0xFFFFFFFFFFFF ) + return 12; + return 16; +} +std::string +StackTrace::stack_info::print( int widthAddress, int widthObject, int widthFunction ) const +{ + char tmp1[64], tmp2[64]; + sprintf( tmp1, "0x%%0%illx: ", widthAddress ); + sprintf( tmp2, tmp1, reinterpret_cast( address ) ); + std::string stack( tmp2 ); + sprintf( tmp2, "%i", line ); + std::string line_str( tmp2 ); + size_t N = stack.length(); stack += stripPath( object ); - stack.resize( std::max( stack.size(), 38 ), ' ' ); + stack.resize( std::max( stack.size(), N + widthObject ), ' ' ); + N = stack.length() + 2; stack += " " + function; if ( !filename.empty() && line > 0 ) { - stack.resize( std::max( stack.size(), 72 ), ' ' ); + stack.resize( std::max( stack.size(), N + widthFunction ), ' ' ); stack += " " + stripPath( filename ) + ":" + line_str; } else if ( !filename.empty() ) { - stack.resize( std::max( stack.size(), 72 ), ' ' ); + stack.resize( std::max( stack.size(), N + widthFunction ), ' ' ); stack += " " + stripPath( filename ); } else if ( line > 0 ) { stack += " : " + line_str; @@ -248,164 +276,207 @@ std::string StackTrace::stack_info::print() const } size_t StackTrace::stack_info::size() const { - return 2*sizeof(void*) + 4*sizeof(int) + object.size() + function.size() + filename.size(); + return 2 * sizeof( void * ) + 4 * sizeof( int ) + object.size() + function.size() + + filename.size(); } -char* StackTrace::stack_info::pack( char* ptr ) const +char *StackTrace::stack_info::pack( char *ptr ) const { - int Nobj = object.size(); - int Nfun = function.size(); + int Nobj = object.size(); + int Nfun = function.size(); int Nfile = filename.size(); - ptr = copy_in( sizeof(void*), &address, ptr ); - ptr = copy_in( sizeof(void*), &address2, ptr ); - ptr = copy_in( sizeof(int), &Nobj, ptr ); - ptr = copy_in( sizeof(int), &Nfun, ptr ); - ptr = copy_in( sizeof(int), &Nfile, ptr ); - ptr = copy_in( sizeof(int), &line, ptr ); - ptr = copy_in( Nobj, object.data(), ptr ); - ptr = copy_in( Nfun, function.data(), ptr ); - ptr = copy_in( Nfile, filename.data(), ptr ); - return ptr; + ptr = copy_in( sizeof( void * ), &address, ptr ); + ptr = copy_in( sizeof( void * ), &address2, ptr ); + ptr = copy_in( sizeof( int ), &Nobj, ptr ); + ptr = copy_in( sizeof( int ), &Nfun, ptr ); + ptr = copy_in( sizeof( int ), &Nfile, ptr ); + ptr = copy_in( sizeof( int ), &line, ptr ); + ptr = copy_in( Nobj, object.data(), ptr ); + ptr = copy_in( Nfun, function.data(), ptr ); + ptr = copy_in( Nfile, filename.data(), ptr ); + return ptr; } -const char* StackTrace::stack_info::unpack( const char* ptr ) +const char *StackTrace::stack_info::unpack( const char *ptr ) { int Nobj, Nfun, Nfile; - ptr = copy_out( sizeof(void*), &address, ptr ); - ptr = copy_out( sizeof(void*), &address2, ptr ); - ptr = copy_out( sizeof(int), &Nobj, ptr ); - ptr = copy_out( sizeof(int), &Nfun, ptr ); - ptr = copy_out( sizeof(int), &Nfile, ptr ); - ptr = copy_out( sizeof(int), &line, ptr ); + ptr = copy_out( sizeof( void * ), &address, ptr ); + ptr = copy_out( sizeof( void * ), &address2, ptr ); + ptr = copy_out( sizeof( int ), &Nobj, ptr ); + ptr = copy_out( sizeof( int ), &Nfun, ptr ); + ptr = copy_out( sizeof( int ), &Nfile, ptr ); + ptr = copy_out( sizeof( int ), &line, ptr ); object.resize( Nobj ); function.resize( Nfun ); filename.resize( Nfile ); - ptr = copy_out( Nobj, &object.front(), ptr ); - ptr = copy_out( Nfun, &function.front(), ptr ); + ptr = copy_out( Nobj, &object.front(), ptr ); + ptr = copy_out( Nfun, &function.front(), ptr ); ptr = copy_out( Nfile, &filename.front(), ptr ); - return ptr; + return ptr; } -std::vector StackTrace::stack_info::packArray( const std::vector& data ) +std::vector StackTrace::stack_info::packArray( const std::vector &data ) { - size_t size = sizeof(int); - for (size_t i=0; i vec(size,0); - char* ptr = vec.data(); - int N = data.size(); - ptr = copy_in( sizeof(int), &N, ptr ); - for (size_t i=0; i vec( size, 0 ); + char *ptr = vec.data(); + int N = data.size(); + ptr = copy_in( sizeof( int ), &N, ptr ); + for ( const auto &i : data ) + ptr = i.pack( ptr ); return vec; } -std::vector StackTrace::stack_info::unpackArray( const char* ptr ) +std::vector StackTrace::stack_info::unpackArray( const char *ptr ) { int N; - ptr = copy_out( sizeof(int), &N, ptr ); - std::vector data(N); - for (size_t i=0; i data( N ); + for ( auto &i : data ) + ptr = i.unpack( ptr ); return data; } -static std::vector pack( const std::vector>& data ) +#ifdef USE_MPI +static std::vector pack( const std::vector> &data ) { - size_t size = sizeof(int); - for (size_t i=0; i out( size, 0 ); - char* ptr = out.data(); - int N = data.size(); - ptr = copy_in( sizeof(int), &N, ptr ); - for (int i=0; i> unpack( const std::vector& in ) +static std::vector> unpack( const std::vector &in ) { - const char* ptr = in.data(); + const char *ptr = in.data(); int N; - ptr = copy_out( sizeof(int), &N, ptr ); + ptr = copy_out( sizeof( int ), &N, ptr ); std::vector> data( N ); - for (int i=0; i &rhs ) { - int depth = 0; - for ( auto child : stack.children ) - depth = std::max( depth, maxDepth( child ) ); - return depth+1; -}*/ -std::vector StackTrace::multi_stack_info::print( const std::string& prefix ) const + operator=( rhs ); +} +StackTrace::multi_stack_info &StackTrace::multi_stack_info:: +operator=( const std::vector &rhs ) +{ + clear(); + if ( rhs.empty() ) + return *this; + N = 1; + stack = rhs[0]; + if ( rhs.size() > 1 ) + add( rhs.size() - 1, &rhs[1] ); + return *this; +} +void StackTrace::multi_stack_info::clear() +{ + N = 0; + stack.clear(); + children.clear(); +} +void StackTrace::multi_stack_info::print2( const std::string &prefix, + int w[3], + std::vector &text ) const { - std::vector text; if ( stack == stack_info() ) { - for ( const auto& child : children ) { - auto tmp = child.print( ); - text.insert( text.end(), tmp.begin(), tmp.end() ); - } - return text; - } - //auto depth = maxDepth( *this ); - //std::string line = prefix + "[" + std::to_string( N ) + "] "; - //for (auto i=1; i1 && j>0 && i text2; + child.print2( "", w, text2 ); + for ( size_t j = 0; j < text2.size(); j++ ) { + std::string line = prefix2 + text2[j]; + if ( children.size() > 1 && j > 0 && i < children.size() - 1 ) line[prefix2.size()] = '|'; text.push_back( line ); } } +} +std::vector StackTrace::multi_stack_info::print( const std::string &prefix ) const +{ + std::vector text; + int w[3] = { 0 }; + w[0] = getAddressWidth(); + w[1] = getObjectWidth(); + w[2] = getFunctionWidth(); + print2( prefix, w, text ); return text; } +int StackTrace::multi_stack_info::getAddressWidth() const +{ + int w = stack.getAddressWidth(); + for ( const auto &child : children ) + w = std::max( w, child.getAddressWidth() ); + return w; +} +int StackTrace::multi_stack_info::getObjectWidth() const +{ + int w = std::min( stripPath( stack.object ).size() + 1, 20 ); + for ( const auto &child : children ) + w = std::max( w, child.getObjectWidth() ); + return w; +} +int StackTrace::multi_stack_info::getFunctionWidth() const +{ + int w = std::min( stack.function.size() + 1, 40 ); + for ( const auto &child : children ) + w = std::max( w, child.getFunctionWidth() ); + return w; +} void StackTrace::multi_stack_info::add( size_t len, const stack_info *stack ) { if ( len == 0 ) return; - const auto& s = stack[len-1]; - for ( size_t i=0; i 1 ) - children[i].add( len-1, stack ); + i.add( len - 1, stack ); return; } } - children.resize( children.size()+1 ); - children.back().N = 1; + children.resize( children.size() + 1 ); + children.back().N = 1; children.back().stack = s; if ( len > 1 ) - children.back().add( len-1, stack ); + children.back().add( len - 1, stack ); } /**************************************************************************** -* Function to find an entry * -****************************************************************************/ + * Function to find an entry * + ****************************************************************************/ template inline size_t findfirst( const std::vector &X, TYPE Y ) { @@ -429,28 +500,18 @@ inline size_t findfirst( const std::vector &X, TYPE Y ) /**************************************************************************** -* Function to get symbols for the executable from nm (if availible) * -* Note: this function maintains an internal cached copy to prevent * -* exccessive calls to nm. This function also uses a lock to ensure * -* thread safety. * -****************************************************************************/ -std::mutex getSymbols_mutex; -struct global_symbols_struct { - std::vector address; - std::vector type; - std::vector obj; - int error; -} global_symbols; -std::string StackTrace::getExecutable() + * Function to get the executable name * + ****************************************************************************/ +static char global_exe_name[1000] = { 0 }; +static bool setGlobalExecutableName( char *exe ) { - std::string exe; try { #ifdef USE_LINUX - char *buf = new char[0x10000]; + auto *buf = new char[0x10000]; int len = ::readlink( "/proc/self/exe", buf, 0x10000 ); if ( len != -1 ) { buf[len] = '\0'; - exe = std::string( buf ); + strcpy( exe, buf ); } delete[] buf; #elif defined( USE_MAC ) @@ -458,21 +519,42 @@ std::string StackTrace::getExecutable() char *buf = new char[size]; memset( buf, 0, size ); if ( _NSGetExecutablePath( buf, &size ) == 0 ) - exe = std::string( buf ); + strcpy( exe, buf ); delete[] buf; #elif defined( USE_WINDOWS ) DWORD size = 0x10000; char *buf = new char[size]; memset( buf, 0, size ); GetModuleFileName( nullptr, buf, size ); - exe = std::string( buf ); + strcpy( exe, buf ); delete[] buf; #endif } catch ( ... ) { } - return exe; + return true; } -std::string global_exe_name = StackTrace::getExecutable(); +static bool global_exe_name_set = setGlobalExecutableName( global_exe_name ); +std::string StackTrace::getExecutable() +{ + if ( !global_exe_name_set ) + global_exe_name_set = setGlobalExecutableName( global_exe_name ); + return std::string( global_exe_name ); +} + + +/**************************************************************************** + * Function to get symbols for the executable from nm (if availible) * + * Note: this function maintains an internal cached copy to prevent * + * exccessive calls to nm. This function also uses a lock to ensure * + * thread safety. * + ****************************************************************************/ +std::mutex getSymbols_mutex; +struct global_symbols_struct { + std::vector address; + std::vector type; + std::vector obj; + int error; +} global_symbols; static const global_symbols_struct &getSymbols2() { static bool loaded = false; @@ -486,20 +568,20 @@ static const global_symbols_struct &getSymbols2() try { char cmd[1024]; #ifdef USE_LINUX - sprintf( cmd, "nm -n --demangle %s", global_exe_name.c_str() ); + sprintf( cmd, "nm -n --demangle %s", global_exe_name ); #elif defined( USE_MAC ) - sprintf( cmd, "nm -n %s | c++filt", global_exe_name.c_str() ); + sprintf( cmd, "nm -n %s | c++filt", global_exe_name ); #else #error Unknown OS using nm #endif int code; auto output = breakString( StackTrace::exec( cmd, code ) ); - for ( const auto& line : output ) { + for ( const auto &line : output ) { if ( line.empty() ) continue; if ( line[0] == ' ' ) continue; - char *a = const_cast(line.c_str()); + auto *a = const_cast( line.c_str() ); char *b = strchr( a, ' ' ); if ( b == nullptr ) continue; @@ -512,11 +594,11 @@ static const global_symbols_struct &getSymbols2() c++; char *d = strchr( c, '\n' ); if ( d ) - d[0] = 0; + d[0] = 0; size_t add = strtoul( a, nullptr, 16 ); data.address.push_back( reinterpret_cast( add ) ); data.type.push_back( b[0] ); - data.obj.push_back( std::string( c ) ); + data.obj.emplace_back( c ); } } catch ( ... ) { data.error = -3; @@ -530,8 +612,9 @@ static const global_symbols_struct &getSymbols2() } return data; } -int StackTrace::getSymbols( - std::vector &address, std::vector &type, std::vector &obj ) +int StackTrace::getSymbols( std::vector &address, + std::vector &type, + std::vector &obj ) { const global_symbols_struct &data = getSymbols2(); address = data.address; @@ -542,12 +625,12 @@ int StackTrace::getSymbols( /**************************************************************************** -* Function to get call stack info * -****************************************************************************/ + * Function to get call stack info * + ****************************************************************************/ #ifdef USE_MAC -static void *loadAddress( const std::string& object ) +static void *loadAddress( const std::string &object ) { - static std::map obj_map; + static std::map obj_map; if ( obj_map.empty() ) { uint32_t numImages = _dyld_image_count(); for ( uint32_t i = 0; i < numImages; i++ ) { @@ -603,19 +686,21 @@ static std::tuple split_atos( const } #endif #ifdef USE_LINUX - typedef uint64_t uint_p; -#elif defined(USE_MAC) - typedef unsigned long uint_p; +using uint_p = uint64_t; +#elif defined( USE_MAC ) +typedef unsigned long uint_p; #endif #if defined( USE_LINUX ) || defined( USE_MAC ) -static inline std::string generateCmd( const std::string& s1, - const std::string& s2, const std::string& s3, - std::vector addresses, const std::string& s4 ) +static inline std::string generateCmd( const std::string &s1, + const std::string &s2, + const std::string &s3, + std::vector addresses, + const std::string &s4 ) { std::string cmd = s1 + s2 + s3; - for (size_t i=0; i( addresses[i] ) ); + sprintf( tmp, "%lx ", reinterpret_cast( addresse ) ); cmd += tmp; } cmd += s4; @@ -635,6 +720,8 @@ static void getFileAndLineObject( std::vector &info ) address_list[i] = info[i]->address; if ( info[i]->object.find( ".so" ) != std::string::npos ) address_list[i] = info[i]->address2; + if ( info[i]->object.find( ".mexa64" ) != std::string::npos ) + address_list[i] = info[i]->address2; } std::string cmd = generateCmd( "addr2line -C -e ", info[0]->object, " -f -i ", address_list, " 2> /dev/null" ); @@ -696,9 +783,9 @@ static void getFileAndLine( std::vector &info ) { // Build a list of stack elements for each object std::map> obj_map; - for (size_t i=0; i 0 ) info.object = global_symbols.obj[index - 1]; else - info.object = global_exe_name; + info.object = std::string(global_exe_name); } } static void signal_handler( int sig ) @@ -799,10 +886,9 @@ std::vector StackTrace::getStackInfo( const std::vector< info[i].function = std::string( dlinfo.dli_sname ); } free( demangled ); - #else - if ( dlinfo.dli_sname != NULL ) - info[i].function = std::string( dlinfo.dli_sname ); #endif + if ( dlinfo.dli_sname != nullptr && info[i].function.empty() ) + info[i].function = std::string( dlinfo.dli_sname ); #else getDataFromGlobalSymbols( info[i] ); #endif @@ -820,25 +906,23 @@ std::vector StackTrace::getStackInfo( const std::vector< /**************************************************************************** * Function to get the backtrace * ****************************************************************************/ +static int backtrace_thread( const std::thread::native_handle_type&, void**, size_t ); #if defined( USE_LINUX ) || defined( USE_MAC ) -static std::vector thread_backtrace; -static bool thread_backtrace_finished; +static int thread_backtrace_count; +static void* thread_backtrace[1000]; static std::mutex thread_backtrace_mutex; static void _callstack_signal_handler( int, siginfo_t*, void* ) { - thread_backtrace = StackTrace::backtrace( ); - thread_backtrace_finished = true; + thread_backtrace_count = backtrace_thread( StackTrace::thisThread(), thread_backtrace, 1000 ); } #endif -std::vector StackTrace::backtrace( std::thread::native_handle_type tid ) +static int backtrace_thread( const std::thread::native_handle_type& tid, void **buffer, size_t size ) { - std::vector trace; + int count = 0; #if defined( USE_LINUX ) || defined( USE_MAC ) // Get the trace if ( tid == pthread_self() ) { - trace.resize(1000,nullptr); - int trace_size = ::backtrace( trace.data(), trace.size() ); - trace.resize (trace_size ); + count = ::backtrace( buffer, size ); } else { // Note: this will get the backtrace, but terminates the thread in the process!!! thread_backtrace_mutex.lock(); @@ -846,17 +930,18 @@ std::vector StackTrace::backtrace( std::thread::native_handle_type tid ) sigfillset(&sa.sa_mask); sa.sa_flags = SA_SIGINFO; sa.sa_sigaction = _callstack_signal_handler; - sigaction(CALLSTACK_SIG, &sa, NULL); - thread_backtrace_finished = false; + sigaction(CALLSTACK_SIG, &sa, nullptr); + thread_backtrace_count = -1; pthread_kill( tid, CALLSTACK_SIG ); auto t1 = std::chrono::high_resolution_clock::now(); auto t2 = std::chrono::high_resolution_clock::now(); - while ( !thread_backtrace_finished && std::chrono::duration(t2-t1).count()<0.1 ) { + while ( thread_backtrace_count==-1 && std::chrono::duration(t2-t1).count()<0.15 ) { std::this_thread::yield(); t2 = std::chrono::high_resolution_clock::now(); } - std::swap( trace, thread_backtrace ); - thread_backtrace_finished = false; + count = std::max(thread_backtrace_count,0); + memcpy( buffer, thread_backtrace, count*sizeof(void*) ); + thread_backtrace_count = -1; thread_backtrace_mutex.unlock(); } #elif defined( USE_WINDOWS ) @@ -902,7 +987,6 @@ std::vector StackTrace::backtrace( std::thread::native_handle_type tid ) #error "Platform not supported!" #endif - trace.reserve( 1000 ); auto pid = GetCurrentProcess(); for ( int frameNum = 0; frameNum<1024; ++frameNum ) { BOOL rtn = StackWalk64( imageType, pid, tid, &frame, &context, readProcMem, @@ -911,10 +995,10 @@ std::vector StackTrace::backtrace( std::thread::native_handle_type tid ) printf( "ERROR: StackWalk64 (%p)\n", frame.AddrPC.Offset ); break; } - - if ( frame.AddrPC.Offset != 0 ) - trace.push_back( reinterpret_cast( frame.AddrPC.Offset ) ); - + if ( frame.AddrPC.Offset != 0 ) { + buffer[count] = reinterpret_cast( frame.AddrPC.Offset ) ); + count++; + } if ( frame.AddrReturn.Offset == 0 ) break; } @@ -923,11 +1007,20 @@ std::vector StackTrace::backtrace( std::thread::native_handle_type tid ) #else #warning Stack trace is not supported on this compiler/OS #endif + return count; +} +std::vector StackTrace::backtrace( std::thread::native_handle_type tid ) +{ + std::vector trace( 1000, nullptr ); + size_t count = backtrace_thread( tid, trace.data(), trace.size() ); + trace.resize(count); return trace; } std::vector StackTrace::backtrace() { - std::vector trace = backtrace( thisThread() ); + std::vector trace( 1000, nullptr ); + size_t count = backtrace_thread( thisThread(), trace.data(), trace.size() ); + trace.resize(count); return trace; } std::vector> StackTrace::backtraceAll() @@ -935,10 +1028,14 @@ std::vector> StackTrace::backtraceAll() // Get the list of threads auto threads = activeThreads( ); // Get the backtrace of each thread - std::vector> thread_backtrace; - for ( auto thread : threads ) - thread_backtrace.push_back( backtrace( thread ) ); - return thread_backtrace; + std::vector> trace(threads.size()); + size_t i = 0; + for ( auto it=threads.begin(); i> StackTrace::backtraceAll() ****************************************************************************/ #if defined( USE_LINUX ) static std::thread::native_handle_type thread_handle; +static bool thread_id_finished; static void _activeThreads_signal_handler( int ) { auto handle = StackTrace::thisThread( ); thread_handle = handle; - thread_backtrace_finished = true; + thread_id_finished = true; } static inline int get_tid( int pid, const std::string& line ) { - char buf2[128]; + char buf2[128]={0}; int i1 = 0; while ( line[i1]==' ' && line[i1]!=0 ) { i1++; } int i2 = i1; @@ -1006,12 +1104,12 @@ std::set StackTrace::activeThreads( ) signal( CALLSTACK_SIG, _activeThreads_signal_handler ); for ( auto tid2 : tid ) { thread_backtrace_mutex.lock(); - thread_backtrace_finished = false; + thread_id_finished = false; thread_handle = thisThread(); syscall( SYS_tgkill, pid, tid2, CALLSTACK_SIG ); auto t1 = std::chrono::high_resolution_clock::now(); auto t2 = std::chrono::high_resolution_clock::now(); - while ( !thread_backtrace_finished && std::chrono::duration(t2-t1).count()<0.1 ) { + while ( !thread_id_finished && std::chrono::duration(t2-t1).count()<0.1 ) { std::this_thread::yield(); t2 = std::chrono::high_resolution_clock::now(); } @@ -1043,54 +1141,57 @@ std::set StackTrace::activeThreads( ) #warning activeThreads is not yet supported on this compiler/OS #endif threads.insert( thisThread() ); + if ( globalMonitorThread ) + threads.erase( globalMonitorThread->native_handle() ); return threads; } // clang-format on /**************************************************************************** -* Function to get the current call stack * -****************************************************************************/ + * Function to get the current call stack * + ****************************************************************************/ std::vector StackTrace::getCallStack() { auto trace = StackTrace::backtrace(); - auto info = getStackInfo(trace); + auto info = getStackInfo( trace ); return info; } std::vector StackTrace::getCallStack( std::thread::native_handle_type id ) { auto trace = StackTrace::backtrace( id ); - auto info = getStackInfo(trace); + auto info = getStackInfo( trace ); return info; } -static StackTrace::multi_stack_info generateMultiStack( const std::vector>& thread_backtrace ) +static StackTrace::multi_stack_info +generateMultiStack( const std::vector> &thread_backtrace ) { // Get the stack data for all pointers - std::set addresses_set; - for (const auto& trace : thread_backtrace ) { - for (auto ptr : trace ) + std::set addresses_set; + for ( const auto &trace : thread_backtrace ) { + for ( auto ptr : trace ) addresses_set.insert( ptr ); } - std::vector addresses( addresses_set.begin(), addresses_set.end() ); + std::vector addresses( addresses_set.begin(), addresses_set.end() ); auto stack_data = StackTrace::getStackInfo( addresses ); - std::map map_data; - for ( size_t i=0; i map_data; + for ( size_t i = 0; i < addresses.size(); i++ ) map_data.insert( std::make_pair( addresses[i], stack_data[i] ) ); // Create the multi-stack trace StackTrace::multi_stack_info multistack; - for ( const auto& trace : thread_backtrace ) { + for ( const auto &trace : thread_backtrace ) { if ( trace.empty() ) continue; // Create the stack for the given thread trace std::vector stack( trace.size() ); - for (size_t i=0; i StackTrace::allSignalsToCatch() { std::set signals; - for (int i=1; i<32; i++) + for ( int i = 1; i < 32; i++ ) signals.insert( i ); - for (int i=SIGRTMIN; i<=SIGRTMAX; i++) + for ( int i = SIGRTMIN; i <= SIGRTMAX; i++ ) signals.insert( i ); signals.erase( SIGKILL ); signals.erase( SIGSTOP ); @@ -1352,15 +1449,15 @@ std::vector StackTrace::defaultSignalsToCatch() { auto tmp = allSignalsToCatch(); std::set signals( tmp.begin(), tmp.end() ); - signals.erase( SIGWINCH ); // Don't catch window changed by default - signals.erase( SIGCONT ); // Don't catch continue by default + signals.erase( SIGWINCH ); // Don't catch window changed by default + signals.erase( SIGCONT ); // Don't catch continue by default return std::vector( signals.begin(), signals.end() ); } /**************************************************************************** -* Set the signal handlers * -****************************************************************************/ + * Set the signal handlers * + ****************************************************************************/ static std::function abort_fun; static std::string rethrow() { @@ -1398,7 +1495,7 @@ static void term_func() } void StackTrace::clearSignal( int sig ) { - if ( signals_set.find(sig) != signals_set.end() ) { + if ( signals_set.find( sig ) != signals_set.end() ) { signal( sig, SIG_DFL ); signals_set.erase( sig ); } @@ -1409,7 +1506,7 @@ void StackTrace::clearSignals() signal( sig, SIG_DFL ); signals_set.clear(); } -void StackTrace::setSignals( const std::vector& signals, void (*handler) (int) ) +void StackTrace::setSignals( const std::vector &signals, void ( *handler )( int ) ) { for ( auto sig : signals ) { signal( sig, handler ); @@ -1427,12 +1524,11 @@ void StackTrace::setErrorHandlers( /**************************************************************************** -* Global call stack functionallity * -****************************************************************************/ + * Global call stack functionallity * + ****************************************************************************/ #ifdef USE_MPI static MPI_Comm globalCommForGlobalCommStack = MPI_COMM_NULL; -static std::shared_ptr globalMonitorThread; -static bool stopGlobalMonitorThread = false; +static bool stopGlobalMonitorThread = false; static void runGlobalMonitorThread() { int rank = 0; @@ -1445,7 +1541,7 @@ static void runGlobalMonitorThread() MPI_Status status; int err = MPI_Iprobe( MPI_ANY_SOURCE, 1, globalCommForGlobalCommStack, &flag, &status ); if ( err != MPI_SUCCESS ) { - printf("Internal error in StackTrace::getGlobalCallStacks::runGlobalMonitorThread\n"); + printf( "Internal error in StackTrace::getGlobalCallStacks::runGlobalMonitorThread\n" ); break; } else if ( flag != 0 ) { // We received a request @@ -1453,8 +1549,8 @@ static void runGlobalMonitorThread() int tag; MPI_Recv( &tag, 1, MPI_INT, src_rank, 1, globalCommForGlobalCommStack, &status ); // Get a trace of all threads (except this) - auto threads = StackTrace::activeThreads( ); - threads.erase( StackTrace::thisThread( ) ); + auto threads = StackTrace::activeThreads(); + threads.erase( StackTrace::thisThread() ); if ( threads.empty() ) continue; // Get the stack trace of each thread @@ -1467,38 +1563,52 @@ static void runGlobalMonitorThread() MPI_Send( data.data(), count, MPI_CHAR, src_rank, tag, globalCommForGlobalCommStack ); } else { // No requests recieved - std::this_thread::sleep_for( std::chrono::milliseconds(50) ); + std::this_thread::sleep_for( std::chrono::milliseconds( 50 ) ); } } } void StackTrace::globalCallStackInitialize( MPI_Comm comm ) { - #ifdef USE_MPI - MPI_Comm_dup( comm, &globalCommForGlobalCommStack ); - #endif +#ifdef USE_MPI + MPI_Comm_dup( comm, &globalCommForGlobalCommStack ); +#endif stopGlobalMonitorThread = false; globalMonitorThread.reset( new std::thread( runGlobalMonitorThread ) ); } -void StackTrace::globalCallStackFinalize( ) +void StackTrace::globalCallStackFinalize() { stopGlobalMonitorThread = true; globalMonitorThread->join(); globalMonitorThread.reset(); - #ifdef USE_MPI - if ( globalCommForGlobalCommStack ) - MPI_Comm_free( &globalCommForGlobalCommStack ); - #endif +#ifdef USE_MPI + if ( globalCommForGlobalCommStack != MPI_COMM_NULL ) + MPI_Comm_free( &globalCommForGlobalCommStack ); + globalCommForGlobalCommStack = MPI_COMM_NULL; +#endif } -StackTrace::multi_stack_info StackTrace::getGlobalCallStacks( ) +StackTrace::multi_stack_info StackTrace::getGlobalCallStacks() { // Check if we properly initialized the comm if ( globalMonitorThread == nullptr ) { - printf("Warning: getGlobalCallStacks called without call to globalCallStackInitialize\n"); - return getAllCallStacks( ); + printf( "Warning: getGlobalCallStacks called without call to globalCallStackInitialize\n" ); + return getAllCallStacks(); } - if ( activeThreads().size()==1 ) { - printf("Warning: getAllCallStacks not supported on this OS, defaulting to basic call stack\n"); - return getAllCallStacks( ); + if ( globalMonitorThread == nullptr ) { + printf( "Warning: getGlobalCallStacks called without call to globalCallStackInitialize\n" ); + return getAllCallStacks(); + } +#ifdef USE_MPI + int provided; + MPI_Query_thread( &provided ); + if ( provided != MPI_THREAD_MULTIPLE ) { + printf( "Warning: getGlobalCallStacks requires support for MPI_THREAD_MULTIPLE\n" ); + return getAllCallStacks(); + } +#endif + if ( activeThreads().size() == 1 ) { + printf( "Warning: getAllCallStacks not supported on this OS, defaulting to basic call " + "stack\n" ); + return getAllCallStacks(); } // Signal all processes that we want their stack for all threads int rank = 0; @@ -1506,34 +1616,33 @@ StackTrace::multi_stack_info StackTrace::getGlobalCallStacks( ) MPI_Comm_size( globalCommForGlobalCommStack, &size ); MPI_Comm_rank( globalCommForGlobalCommStack, &rank ); std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<> dis(2,0x7FFF); - int tag = dis(gen); + std::mt19937 gen( rd() ); + std::uniform_int_distribution<> dis( 2, 0x7FFF ); + int tag = dis( gen ); std::vector sendRequest( size ); - for (int i=0; inative_handle() ); + auto threads = StackTrace::activeThreads(); StackTrace::multi_stack_info multistack; for ( auto thread : threads ) { auto stack = StackTrace::getCallStack( thread ); multistack.add( stack.size(), stack.data() ); } // Recieve the backtrace for all processes/threads - int N_finished = 1; - auto start = std::chrono::steady_clock::now(); - double time = 0; - const double max_time = 2.0 + size*20e-3; - while ( N_finished data( count, 0 ); - MPI_Recv( data.data(), count, MPI_CHAR, src_rank, tag, globalCommForGlobalCommStack, &status ); + MPI_Recv( data.data(), + count, + MPI_CHAR, + src_rank, + tag, + globalCommForGlobalCommStack, + &status ); auto stack_list = unpack( data ); - for ( const auto& stack : stack_list ) + for ( const auto &stack : stack_list ) multistack.add( stack.size(), stack.data() ); N_finished++; } else { auto stop = std::chrono::steady_clock::now(); - time = std::chrono::duration_cast(stop-start).count(); + time = std::chrono::duration_cast( stop - start ).count(); std::this_thread::yield(); } } + for ( int i = 0; i < size; i++ ) { + if ( i == rank ) + continue; + MPI_Request_free( &sendRequest[i] ); + } return multistack; } #else -void StackTrace::globalCallStackInitialize( MPI_Comm ) -{ -} -void StackTrace::globalCallStackFinalize( ) -{ -} -StackTrace::multi_stack_info StackTrace::getGlobalCallStacks( ) -{ - return getAllCallStacks( ); -} +void StackTrace::globalCallStackInitialize( MPI_Comm ) {} +void StackTrace::globalCallStackFinalize() {} +StackTrace::multi_stack_info StackTrace::getGlobalCallStacks() { return getAllCallStacks(); } #endif + +/**************************************************************************** + * Cleanup the call stack * + ****************************************************************************/ +static inline size_t findMatching( const std::string &str, size_t pos ) +{ + if ( str[pos] != '<' ) { + perr << "Internal error string matching\n"; + perr << " " << str << std::endl; + perr << " " << pos << std::endl; + return pos; + } + size_t pos2 = pos + 1; + int count = 1; + while ( count != 0 && pos2 < str.size() ) { + if ( str[pos2] == '<' ) + count++; + if ( str[pos2] == '>' ) + count--; + pos2++; + } + return pos2; +} +void StackTrace::cleanupStackTrace( multi_stack_info &stack ) +{ + auto it = stack.children.begin(); + const size_t npos = std::string::npos; + while ( it != stack.children.end() ) { + auto &object = it->stack.object; + auto &function = it->stack.function; + auto &filename = it->stack.filename; + bool remove_entry = false; + // Cleanup object and filename + object = stripPath( object ); + filename = stripPath( filename ); + // Remove callstack (and all children) for threads that are just contributing + if ( function.find( "_callstack_signal_handler" ) != npos && + filename.find( "StackTrace.cpp" ) != npos ) { + it = stack.children.erase( it ); + continue; + } + // Remove __libc_start_main + if ( function.find( "__libc_start_main" ) != npos && + filename.find( "libc-start.c" ) != npos ) + remove_entry = true; + // Remove backtrace_thread + if ( function.find( "backtrace_thread" ) != npos && + filename.find( "StackTrace.cpp" ) != npos ) + remove_entry = true; + // Remove __restore_rt + if ( function.find( "__restore_rt" ) != npos && object.find( "libpthread" ) != npos ) + remove_entry = true; + // Remove std::condition_variable::__wait_until_impl + if ( function.find( "std::condition_variable::__wait_until_impl" ) != npos && + filename == "condition_variable" ) + remove_entry = true; + // Remove std::_Function_handler< + if ( function.find( "std::_Function_handler<" ) != npos && filename == "functional" ) + remove_entry = true; + // Remove std::_Bind_simple< + if ( function.find( "std::_Bind_simple<" ) != npos && filename == "functional" ) { + auto pos = function.find( "std::_Bind_simple<" ); + function = function.substr( 0, pos ) + "std::_Bind_simple<...>(...)"; + remove_entry = true; + } + // Remove std::this_thread::__sleep_for + if ( function.find( "std::this_thread::__sleep_for(" ) != npos && + object.find( "libstdc++" ) != npos ) + remove_entry = true; + // Remove std::thread::_Impl + if ( function.find( "std::thread::_Impl<" ) != npos && filename == "thread" ) + remove_entry = true; + // Remove MATLAB internal routines + if ( object == "libmwmcr.so" || object == "libmwm_lxe.so" || object == "libmwbridge.so" || + object == "libmwiqm.so" ) + remove_entry = true; + // Remove the desired entry + if ( remove_entry ) { + if ( it->children.empty() ) { + it = stack.children.erase( it ); + continue; + } else if ( it->children.size() == 1 ) { + *it = it->children[0]; + continue; + } + } + // Cleanup template space + strrep( function, " >", ">" ); + strrep( function, "< ", "<" ); + // Replace std::chrono::duration with abbriviated version + if ( function.find( "std::chrono::duration<" ) != npos ) { + strrep( function, "std::chrono::duration >", "ticks" ); + strrep( function, + "std::chrono::duration >", + "nanoseconds" ); + } + // Replace std::ratio with abbriviated version. + if ( function.find( "std::ratio<" ) != npos ) { + strrep( function, "std::ratio<1l, 1000000000000000000000000l>", "std::yocto" ); + strrep( function, "std::ratio<1l, 1000000000000000000000l>", "std::zepto" ); + strrep( function, "std::ratio<1l, 1000000000000000000l>", "std::atto" ); + strrep( function, "std::ratio<1l, 1000000000000000l>", "std::femto" ); + strrep( function, "std::ratio<1l, 1000000000000l>", "std::pico" ); + strrep( function, "std::ratio<1l, 1000000000l>", "std::nano" ); + strrep( function, "std::ratio<1l, 1000000l>", "std::micro" ); + strrep( function, "std::ratio<1l, 1000l>", "std::milli" ); + strrep( function, "std::ratio<1l, 100l>", "std::centi" ); + strrep( function, "std::ratio<1l, 10l>", "std::deci" ); + strrep( function, "std::ratio<1l, 1l>", "" ); + strrep( function, "std::ratio<10l, 1l>", "std::deca" ); + strrep( function, "std::ratio<60l, 1l>", "std::ratio<60>" ); + strrep( function, "std::ratio<100l, 1l>", "std::hecto" ); + strrep( function, "std::ratio<1000l, 1l>", "std::kilo" ); + strrep( function, "std::ratio<3600l, 1l>", "std::ratio<3600>" ); + strrep( function, "std::ratio<1000000l, 1l>", "std::mega" ); + strrep( function, "std::ratio<1000000000l, 1l>", "std::giga" ); + strrep( function, "std::ratio<1000000000000l, 1l>", "std::tera" ); + strrep( function, "std::ratio<1000000000000000l, 1l>", "std::peta" ); + strrep( function, "std::ratio<1000000000000000000l, 1l>", "std::exa" ); + strrep( function, "std::ratio<1000000000000000000000l, 1l>", "std::zetta" ); + strrep( function, "std::ratio<1000000000000000000000000l, 1l>", "std::yotta" ); + strrep( function, " >", ">" ); + strrep( function, "< ", "<" ); + } + // Replace std::chrono::duration with abbriviated version. + if ( function.find( "std::chrono::duration<" ) != npos ) { + // clang-format off + strrep( function, "std::chrono::duration", "std::chrono::nanoseconds" ); + strrep( function, "std::chrono::duration", "std::chrono::microseconds" ); + strrep( function, "std::chrono::duration", "std::chrono::milliseconds" ); + strrep( function, "std::chrono::duration", "std::chrono::seconds" ); + strrep( function, "std::chrono::duration", "std::chrono::seconds" ); + strrep( function, "std::chrono::duration>", "std::chrono::minutes" ); + strrep( function, "std::chrono::duration>", "std::chrono::hours" ); + strrep( function, " >", ">" ); + strrep( function, "< ", "<" ); + // clang-format on + } + // Replace std::this_thread::sleep_for with abbriviated version. + if ( function.find( "::sleep_for<" ) != npos ) { + strrep( function, "::sleep_for", "::sleep_for" ); + strrep( function, "::sleep_for", "::sleep_for" ); + strrep( function, "::sleep_for", "::sleep_for" ); + strrep( function, "::sleep_for", "::sleep_for" ); + strrep( function, "::sleep_for", "::sleep_for" ); + strrep( function, "::sleep_for>", "::sleep_for" ); + strrep( function, "::sleep_for>", "::sleep_for" ); + strrep( function, + "::sleep_for(std::chrono::nanoseconds", + "::sleep_for(std::chrono::nanoseconds" ); + strrep( function, + "::sleep_for(std::chrono::microseconds", + "::sleep_for(std::chrono::microseconds" ); + strrep( function, + "::sleep_for(std::chrono::milliseconds", + "::sleep_for(std::chrono::milliseconds" ); + strrep( function, + "::sleep_for(std::chrono::seconds", + "::sleep_for(std::chrono::seconds" ); + strrep( function, + "::sleep_for(std::chrono::minutes", + "::sleep_for(std::chrono::milliseconds" ); + strrep( function, + "::sleep_for(std::chrono::hours", + "::sleep_for(std::chrono::hours" ); + } + // Replace std::basic_string with abbriviated version + size_t pos = 0; + while ( pos < function.size() ) { + // Find next instance of std::basic_string + const std::string match = "std::basic_string<"; + pos = function.find( match, pos ); + if ( pos == npos ) + break; + // Find the matching > + size_t pos1 = pos + match.size() - 1; + size_t pos2 = findMatching( function, pos1 ); + if ( pos2 == pos1 ) + break; + if ( function.substr( pos1 + 1, 4 ) == "char" ) + function.replace( pos, pos2 - pos, "std::string" ); + else if ( function.substr( pos1 + 1, 7 ) == "wchar_t" ) + function.replace( pos, pos2 - pos, "std::wstring" ); + else if ( function.substr( pos1 + 1, 8 ) == "char16_t" ) + function.replace( pos, pos2 - pos, "std::u16string" ); + else if ( function.substr( pos1 + 1, 8 ) == "char32_t" ) + function.replace( pos, pos2 - pos, "std::u32string" ); + pos++; + } + // Cleanup the children + cleanupStackTrace( *it ); + ++it; + } +} diff --git a/common/StackTrace.h b/common/StackTrace.h index f3ca5698..8d436bf7 100644 --- a/common/StackTrace.h +++ b/common/StackTrace.h @@ -1,14 +1,11 @@ -#ifndef included_AtomicStackTrace -#define included_AtomicStackTrace +#ifndef included_StackTrace +#define included_StackTrace #include #include -#include -#include -#include -#include -#include #include +#include +#include // Check for and include MPI @@ -39,35 +36,51 @@ struct stack_info { int line; //! Default constructor stack_info() : address( nullptr ), address2( nullptr ), line( 0 ) {} + //! Reset the stack + void clear(); //! Operator== - bool operator==( const stack_info& rhs ) const; + bool operator==( const stack_info &rhs ) const; //! Operator!= - bool operator!=( const stack_info& rhs ) const; + bool operator!=( const stack_info &rhs ) const; + //! Get the minimum width to print the addresses + int getAddressWidth() const; //! Print the stack info - std::string print() const; + std::string print( int widthAddress = 16, int widthObject = 20, int widthFunction = 32 ) const; //! Compute the number of bytes needed to store the object size_t size() const; //! Pack the data to a byte array, returning a pointer to the end of the data - char* pack( char* ptr ) const; + char *pack( char *ptr ) const; //! Unpack the data from a byte array, returning a pointer to the end of the data - const char* unpack( const char* ptr ); + const char *unpack( const char *ptr ); //! Pack a vector of data to a memory block - static std::vector packArray( const std::vector& data ); + static std::vector packArray( const std::vector &data ); //! Unpack a vector of data from a memory block - static std::vector unpackArray( const char* data ); + static std::vector unpackArray( const char *data ); }; struct multi_stack_info { - int N; - stack_info stack; - std::vector children; + int N; // Number of threads/processes + stack_info stack; // Current stack item + std::vector children; // Children //! Default constructor multi_stack_info() : N( 0 ) {} + //! Construct from a simple call stack + explicit multi_stack_info( const std::vector & ); + //! Copy constructor from a simple call stack + multi_stack_info &operator=( const std::vector & ); + //! Reset the stack + void clear(); //! Add the given stack to the multistack - void add( size_t N, const stack_info *stack ); + void add( size_t len, const stack_info *stack ); //! Print the stack info - std::vector print( const std::string& prefix=std::string() ) const; + std::vector print( const std::string &prefix = std::string() ) const; + +private: + void print2( const std::string &prefix, int w[3], std::vector &text ) const; + int getAddressWidth() const; + int getObjectWidth() const; + int getFunctionWidth() const; }; @@ -95,7 +108,7 @@ std::vector getCallStack( std::thread::native_handle_type id ); * Note: This functionality may not be availible on all platforms * @return Returns vector containing the stack */ -multi_stack_info getAllCallStacks( ); +multi_stack_info getAllCallStacks(); /*! @@ -107,7 +120,17 @@ multi_stack_info getAllCallStacks( ); * Note: This functionality may not be availible on all platforms * @return Returns vector containing the stack */ -multi_stack_info getGlobalCallStacks( ); +multi_stack_info getGlobalCallStacks(); + + +/*! + * @brief Clean up the stack trace + * @details This function modifies the stack trace to remove entries + * related to acquiring the stack trace in an attempt to make it + * more useful for display/users. + * @param[in,out] stack The stack trace to modify + */ +void cleanupStackTrace( multi_stack_info &stack ); //! Function to return the current call stack for the current thread @@ -136,8 +159,9 @@ std::string signalName( int signal ); * Return the symbols from the current executable (not availible for all platforms) * @return Returns 0 if sucessful */ -int getSymbols( - std::vector &address, std::vector &type, std::vector &obj ); +int getSymbols( std::vector &address, + std::vector &type, + std::vector &obj ); /*! @@ -159,16 +183,17 @@ enum class terminateType { signal, exception }; /*! * Set the error handlers - * @param[in] Function to terminate the program: abort(msg,type) + * @param[in] abort Function to terminate the program: abort(msg,type) */ void setErrorHandlers( std::function abort ); /*! * Set the given signals to the handler - * @param[in] Function to terminate the program: abort(msg,type) + * @param[in] signals Signals to handle + * @param[in] handler Function to terminate the program: abort(msg,type) */ -void setSignals( const std::vector& signals, void (*handler) (int) ); +void setSignals( const std::vector &signals, void ( *handler )( int ) ); //! Clear a signal set by setSignals @@ -176,28 +201,28 @@ void clearSignal( int signal ); //! Clear all signals set by setSignals -void clearSignals( ); +void clearSignals(); //! Return a list of all signals that can be caught -std::vector allSignalsToCatch( ); +std::vector allSignalsToCatch(); //! Return a default list of signals to catch -std::vector defaultSignalsToCatch( ); +std::vector defaultSignalsToCatch(); //! Get a list of the active threads -std::set activeThreads( ); +std::set activeThreads(); //! Get a handle to this thread -std::thread::native_handle_type thisThread( ); +std::thread::native_handle_type thisThread(); //! Initialize globalCallStack functionallity void globalCallStackInitialize( MPI_Comm comm ); //! Clean up globalCallStack functionallity -void globalCallStackFinalize( ); +void globalCallStackFinalize(); /*! @@ -208,9 +233,10 @@ void globalCallStackFinalize( ); * @param[out] exit_code Exit code returned from child process * @return Returns string containing the output */ -std::string exec( const std::string& cmd, int& exit_code ); +std::string exec( const std::string &cmd, int &exit_code ); } // namespace StackTrace + #endif diff --git a/common/UnitTest.cpp b/common/UnitTest.cpp index febc535c..b995fa68 100755 --- a/common/UnitTest.cpp +++ b/common/UnitTest.cpp @@ -1,345 +1,379 @@ -#include -#include -#include -#include #include "common/UnitTest.h" #include "common/Utilities.h" +#include +#include +#include +#include +#include -#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) - // Windows - // Sleep is defined in milliseconds -#else - // Linux - // usleep is defined in microseconds, create a Sleep command - #define Sleep(x) usleep(x*1000) -#endif - +#define pout std::cout +#define printp printf /******************************************************************** -* Empty Constructor * -********************************************************************/ -UnitTest::UnitTest() { - #ifdef USE_MPI - comm = MPI_COMM_WORLD; - #endif + * Constructor/Destructor * + ********************************************************************/ +UnitTest::UnitTest() +{ +#ifdef USE_MPI + comm = MPI_COMM_WORLD; +#endif +} +UnitTest::~UnitTest() { reset(); } +void UnitTest::reset() +{ + mutex.lock(); + // Clear the data forcing a reallocation + std::vector().swap( pass_messages ); + std::vector().swap( fail_messages ); + std::vector().swap( expected_fail_messages ); + mutex.unlock(); } /******************************************************************** -* Print a global report * -* Note: only rank 0 will print, all messages will be aggregated * -********************************************************************/ -void UnitTest::report(const int level0) { + * Add a pass, fail, expected failure message in a thread-safe way * + ********************************************************************/ +void UnitTest::passes( const std::string &in ) +{ + mutex.lock(); + pass_messages.push_back( in ); + mutex.unlock(); +} +void UnitTest::failure( const std::string &in ) +{ + mutex.lock(); + fail_messages.push_back( in ); + mutex.unlock(); +} +void UnitTest::expected_failure( const std::string &in ) +{ + mutex.lock(); + expected_fail_messages.push_back( in ); + mutex.unlock(); +} + + +/******************************************************************** + * Print a global report * + * Note: only rank 0 will print, all messages will be aggregated * + ********************************************************************/ +inline std::vector UnitTest::allGather( int value ) const +{ + int size = getSize(); + std::vector data( size, value ); +#ifdef USE_MPI + if ( size > 1 ) + MPI_Allgather( &value, 1, MPI_INT, data.data(), 1, MPI_INT, comm ); +#endif + return data; +} +inline void UnitTest::barrier() const +{ +#ifdef USE_MPI + if ( getSize() > 1 ) + MPI_Barrier( comm ); +#endif +} +static inline void print_messages( const std::vector> &messages ) +{ + if ( messages.size() > 1 ) { + for ( size_t i = 0; i < messages.size(); i++ ) { + if ( !messages[i].empty() ) { + printp( " Proccessor %i:\n", static_cast( i ) ); + for ( const auto &j : messages[i] ) + pout << " " << j << std::endl; + } + } + } else { + for ( const auto &j : messages[0] ) + pout << " " << j << std::endl; + } +} +void UnitTest::report( const int level0 ) const +{ + mutex.lock(); int size = getSize(); int rank = getRank(); // Broadcast the print level from rank 0 int level = level0; - #ifdef USE_MPI - if ( getSize() > 1 ) - MPI_Bcast( &level, 1, MPI_INT, 0, comm ); - #endif - if ( level<0 || level > 2 ) - ERROR("Invalid print level"); +#ifdef USE_MPI + if ( getSize() > 1 ) + MPI_Bcast( &level, 1, MPI_INT, 0, comm ); +#endif + if ( level < 0 || level > 2 ) + ERROR( "Invalid print level" ); // Perform a global all gather to get the number of failures per processor - std::vector N_pass(size,0); - std::vector N_fail(size,0); - std::vector N_expected_fail(size,0); - int local_pass_size = (int) pass_messages.size(); - int local_fail_size = (int) fail_messages.size(); - int local_expected_fail_size = (int) expected_fail_messages.size(); - if ( getSize() > 1 ) { - #ifdef USE_MPI - MPI_Allgather( &local_pass_size, 1, MPI_INT, &N_pass[0], 1, MPI_INT, comm); - MPI_Allgather( &local_fail_size, 1, MPI_INT, &N_fail[0], 1, MPI_INT, comm); - MPI_Allgather( &local_expected_fail_size, 1, MPI_INT, &N_expected_fail[0], 1, MPI_INT, comm); - #endif - } else { - N_pass[0] = local_pass_size; - N_fail[0] = local_fail_size; - N_expected_fail[0] = local_expected_fail_size; - } - int N_pass_tot = 0; + auto N_pass = allGather( pass_messages.size() ); + auto N_fail = allGather( fail_messages.size() ); + auto N_expected_fail = allGather( expected_fail_messages.size() ); + int N_pass_tot = 0; + int N_fail_tot = 0; int N_expected_fail_tot = 0; - for (int i=0; i > pass_messages_rank(size); - std::vector< std::vector > fail_messages_rank(size); - std::vector< std::vector > expected_fail_rank(size); + std::vector> pass_messages_rank( size ); + std::vector> fail_messages_rank( size ); + std::vector> expected_fail_rank( size ); // Get the pass messages - if ( ( level==1 && N_pass_tot<=20 ) || level==2 ) { - if ( rank==0 ) { - // Rank 0 should receive all messages - for (int i=0; i0 ) - pass_messages_rank[i] = unpack_message_stream(i,1); - } - } else if ( pass_messages.size() ) { - // All other ranks send their message (use non-blocking communication) - pack_message_stream(pass_messages,0,1); - } - } + if ( ( level == 1 && N_pass_tot <= 20 ) || level == 2 ) + pass_messages_rank = UnitTest::gatherMessages( pass_messages, 1 ); // Get the fail messages - if ( level==1 || level==2 ) { - if ( rank==0 ) { - // Rank 0 should receive all messages - for (int i=0; i0 ) - fail_messages_rank[i] = unpack_message_stream(i,2); - } - } else if ( !fail_messages.empty() ){ - // All other ranks send their message (use non-blocking communication) - pack_message_stream(fail_messages,0,2); - } - } + if ( level == 1 || level == 2 ) + fail_messages_rank = UnitTest::gatherMessages( fail_messages, 2 ); // Get the expected_fail messages - if ( ( level==1 && N_expected_fail_tot<=50 ) || level==2 ) { - if ( rank==0 ) { - // Rank 0 should receive all messages - for (int i=0; i0 ) - expected_fail_rank[i] = unpack_message_stream(i,3); - } - } else if ( !expected_fail_messages.empty() ){ - // All other ranks send their message (use non-blocking communication) - pack_message_stream(expected_fail_messages,0,3); - } - } + if ( ( level == 1 && N_expected_fail_tot <= 50 ) || level == 2 ) + expected_fail_rank = UnitTest::gatherMessages( expected_fail_messages, 2 ); // Print the results of all messages (only rank 0 will print) - if ( rank==0 ) { - std::cout << std::endl; + if ( rank == 0 ) { + pout << std::endl; // Print the passed tests - std::cout << "Tests passed" << std::endl; - if ( level==0 || ( level==1 && N_pass_tot>20 ) ) { + pout << "Tests passed" << std::endl; + if ( level == 0 || ( level == 1 && N_pass_tot > 20 ) ) { // We want to print a summary - if ( size>8 ) { + if ( size > 8 ) { // Print 1 summary for all processors - std::cout << " " << N_pass_tot << " tests passed (use report level 2 for more detail)" << std::endl; + printp( " %i tests passed (use report level 2 for more detail)\n", N_pass_tot ); } else { // Print a summary for each processor - for (int i=0; i 0 ) { - std::cout << " Proccessor " << i << ":" << std::endl; - for (unsigned int j=0; j8 ) { + if ( size > 8 ) { // Print 1 summary for all processors - std::cout << " " << N_pass_tot << " tests failed (use report level 2 for more detail)" << std::endl; + printp( " %i tests failed (use report level 2 for more detail)\n", N_fail_tot ); } else { // Print a summary for each processor - for (int i=0; i 0 ) { - std::cout << " Processor " << i << ":" << std::endl; - for (unsigned int j=0; j50 ) ) { + pout << "Tests expected failed" << std::endl; + if ( level == 0 || ( level == 1 && N_expected_fail_tot > 50 ) ) { // We want to print a summary - if ( size>8 ) { + if ( size > 8 ) { // Print 1 summary for all processors - std::cout << " " << N_expected_fail_tot << " tests expected failed (use report level 2 for more detail)" << std::endl; + printp( " %i tests expected failed (use report level 2 for more detail)\n", + N_expected_fail_tot ); } else { // Print a summary for each processor - for (int i=0; i 0 ) { - std::cout << " Processor " << i << ":" << std::endl; - for (unsigned int j=0; j 1 ) - MPI_Barrier(comm); - #endif -} - - - -/******************************************************************** -* Pack and send the given messages * -********************************************************************/ -void UnitTest::pack_message_stream(const std::vector& messages, const int rank, const int tag) -{ - #ifdef USE_MPI - // Get the size of the messages - int N_messages = (int) messages.size(); - int *msg_size = new int[N_messages]; - int msg_size_tot = 0; - for (int i=0; i UnitTest::unpack_message_stream(const int rank, const int tag) + * Gather the messages to rank 0 * + ********************************************************************/ +std::vector> UnitTest::gatherMessages( + const std::vector &local_messages, int tag ) const { - #ifdef USE_MPI - // Probe the message to get the message size - MPI_Status status; - MPI_Probe(rank,tag,comm,&status); - int size_data=-1; - MPI_Get_count(&status,MPI_BYTE,&size_data); - ASSERT(size_data>=0); - // Allocate memory to receive the data - char *data = new char[size_data]; - // receive the data (using a non-blocking receive) - MPI_Request request; - MPI_Irecv( data, size_data, MPI_CHAR, rank, tag, comm, &request ); - // Wait for the communication to be received - MPI_Wait( &request, &status ); - // Unpack the message stream - int *tmp = (int*) data; - int N_messages = tmp[0]; - int *msg_size = &tmp[1]; - std::vector messages(N_messages); - int k = (N_messages+1)*sizeof(int); - for (int i=0; i> messages( size ); + if ( rank == 0 ) { + // Rank 0 should receive all messages + for ( int i = 0; i < size; i++ ) { + if ( i == 0 ) + messages[i] = local_messages; + else + messages[i] = unpack_message_stream( i, tag ); } - // Delete the temporary memory - delete [] data; - return messages; - #else + } else { + // All other ranks send their message (use non-blocking communication) + pack_message_stream( local_messages, 0, tag ); + } + return messages; +} + + +/******************************************************************** + * Pack and send the given messages * + ********************************************************************/ +void UnitTest::pack_message_stream( + const std::vector &messages, const int rank, const int tag ) const +{ +#ifdef USE_MPI + // Get the size of the messages + auto N_messages = (int) messages.size(); + auto *msg_size = new int[N_messages]; + int msg_size_tot = 0; + for ( int i = 0; i < N_messages; i++ ) { + msg_size[i] = (int) messages[i].size(); + msg_size_tot += msg_size[i]; + } + // Allocate space for the message stream + size_t size_data = ( N_messages + 1 ) * sizeof( int ) + msg_size_tot; + auto *data = new char[size_data]; + // Pack the message stream + memcpy( data, &N_messages, sizeof( int ) ); + memcpy( &data[sizeof( int )], msg_size, N_messages * sizeof( int ) ); + size_t k = ( N_messages + 1 ) * sizeof( int ); + for ( int i = 0; i < N_messages; i++ ) { + messages[i].copy( &data[k], msg_size[i] ); + k += msg_size[i]; + } + // Send the message stream (using a non-blocking send) + MPI_Request request; + MPI_Isend( data, size_data, MPI_CHAR, rank, tag, comm, &request ); + // Wait for the communication to send and free the temporary memory + MPI_Status status; + MPI_Wait( &request, &status ); + delete[] data; + delete[] msg_size; +#else + NULL_USE( messages ); + NULL_USE( rank ); + NULL_USE( tag ); +#endif +} + + +/******************************************************************** + * Receive and unpack a message stream * + ********************************************************************/ +std::vector UnitTest::unpack_message_stream( const int rank, const int tag ) const +{ +#ifdef USE_MPI + // Probe the message to get the message size + MPI_Status status; + MPI_Probe( rank, tag, comm, &status ); + int size_data = -1; + MPI_Get_count( &status, MPI_BYTE, &size_data ); + ASSERT( size_data >= 0 ); + // Allocate memory to receive the data + auto *data = new char[size_data]; + // receive the data (using a non-blocking receive) + MPI_Request request; + MPI_Irecv( data, size_data, MPI_CHAR, rank, tag, comm, &request ); + // Wait for the communication to be received + MPI_Wait( &request, &status ); + // Unpack the message stream + int N_messages = 0; + memcpy( &N_messages, data, sizeof( int ) ); + if ( N_messages == 0 ) { + delete[] data; return std::vector(); - #endif + } + std::vector msg_size( N_messages ); + std::vector messages( N_messages ); + memcpy( msg_size.data(), &data[sizeof( int )], N_messages * sizeof( int ) ); + int k = ( N_messages + 1 ) * sizeof( int ); + for ( int i = 0; i < N_messages; i++ ) { + messages[i] = std::string( &data[k], msg_size[i] ); + k += msg_size[i]; + } + delete[] data; + return messages; +#else + NULL_USE( rank ); + NULL_USE( tag ); + return std::vector(); +#endif } /******************************************************************** -* Other functions * -********************************************************************/ -int UnitTest::getRank() + * Other functions * + ********************************************************************/ +int UnitTest::getRank() const { int rank = 0; - #ifdef USE_MPI - int flag=0; - MPI_Initialized(&flag); - if ( flag ) - MPI_Comm_rank( comm, &rank ); - #endif +#ifdef USE_MPI + int flag = 0; + MPI_Initialized( &flag ); + if ( flag ) + MPI_Comm_rank( comm, &rank ); +#endif return rank; } -int UnitTest::getSize() +int UnitTest::getSize() const { int size = 1; - #ifdef USE_MPI - int flag=0; - MPI_Initialized(&flag); - if ( flag ) - MPI_Comm_size( comm, &size ); - #endif +#ifdef USE_MPI + int flag = 0; + MPI_Initialized( &flag ); + if ( flag ) + MPI_Comm_size( comm, &size ); +#endif return size; } -size_t UnitTest::NumPassGlobal() +size_t UnitTest::NumPassGlobal() const { size_t num = pass_messages.size(); - #ifdef USE_MPI - if ( getSize() > 1 ) { - int send = static_cast(num); - int sum = 0; - MPI_Allreduce( &send, &sum, 1, MPI_INT, MPI_SUM, comm ); - num = static_cast(sum); - } - #endif +#ifdef USE_MPI + if ( getSize() > 1 ) { + auto send = static_cast( num ); + int sum = 0; + MPI_Allreduce( &send, &sum, 1, MPI_INT, MPI_SUM, comm ); + num = static_cast( sum ); + } +#endif return num; } -size_t UnitTest::NumFailGlobal() +size_t UnitTest::NumFailGlobal() const { size_t num = fail_messages.size(); - #ifdef USE_MPI - if ( getSize() > 1 ) { - int send = static_cast(num); - int sum = 0; - MPI_Allreduce( &send, &sum, 1, MPI_INT, MPI_SUM, comm ); - num = static_cast(sum); - } - #endif +#ifdef USE_MPI + if ( getSize() > 1 ) { + auto send = static_cast( num ); + int sum = 0; + MPI_Allreduce( &send, &sum, 1, MPI_INT, MPI_SUM, comm ); + num = static_cast( sum ); + } +#endif return num; } -size_t UnitTest::NumExpectedFailGlobal() +size_t UnitTest::NumExpectedFailGlobal() const { size_t num = expected_fail_messages.size(); - #ifdef USE_MPI - if ( getSize() > 1 ) { - int send = static_cast(num); - int sum = 0; - MPI_Allreduce( &send, &sum, 1, MPI_INT, MPI_SUM, comm ); - num = static_cast(sum); - } - #endif +#ifdef USE_MPI + if ( getSize() > 1 ) { + auto send = static_cast( num ); + int sum = 0; + MPI_Allreduce( &send, &sum, 1, MPI_INT, MPI_SUM, comm ); + num = static_cast( sum ); + } +#endif return num; } - - diff --git a/common/UnitTest.h b/common/UnitTest.h index 365bac35..80503d19 100755 --- a/common/UnitTest.h +++ b/common/UnitTest.h @@ -1,11 +1,12 @@ #ifndef included_UnitTest #define included_UnitTest +#include #include -#include #include +#include #ifdef USE_MPI - #include "mpi.h" +#include "mpi.h" #endif @@ -27,78 +28,92 @@ * \endcode */ -class UnitTest { +class UnitTest +{ public: - //! Constructor UnitTest(); - //! Indicate a passed test - virtual void passes (const std::string &in) { pass_messages.push_back(in); } + //! Destructor + virtual ~UnitTest(); - //! Indicate a failed test - virtual void failure (const std::string &in) { fail_messages.push_back(in); } + //! Indicate a passed test (thread-safe) + virtual void passes( const std::string &in ); - //! Indicate an expected failed test - virtual void expected_failure (const std::string &in) { expected_fail_messages.push_back(in); } + //! Indicate a failed test (thread-safe) + virtual void failure( const std::string &in ); + + //! Indicate an expected failed test (thread-safe) + virtual void expected_failure( const std::string &in ); //! Return the number of passed tests locally - virtual size_t NumPassLocal () { return pass_messages.size(); } + virtual size_t NumPassLocal() const { return pass_messages.size(); } //! Return the number of failed tests locally - virtual size_t NumFailLocal () { return fail_messages.size(); } + virtual size_t NumFailLocal() const { return fail_messages.size(); } //! Return the number of expected failed tests locally - virtual size_t NumExpectedFailLocal () { return expected_fail_messages.size(); } + virtual size_t NumExpectedFailLocal() const { return expected_fail_messages.size(); } //! Return the number of passed tests locally - virtual size_t NumPassGlobal (); + virtual size_t NumPassGlobal() const; //! Return the number of failed tests locally - virtual size_t NumFailGlobal (); + virtual size_t NumFailGlobal() const; //! Return the number of expected failed tests locally - virtual size_t NumExpectedFailGlobal (); + virtual size_t NumExpectedFailGlobal() const; //! Return the rank of the current processor - int getRank (); + int getRank() const; //! Return the number of processors - int getSize (); + int getSize() const; /*! * Print a report of the passed and failed tests. * Note: This is a blocking call that all processors must execute together. - * Note: Only rank 0 will print the messages (this is necessary as other ranks may not be able to print correctly). + * Note: Only rank 0 will print the messages (this is necessary as other ranks may not be able + * to print correctly). * @param level Optional integer specifying the level of reporting (default: 1) * 0: Report the number of tests passed, failed, and expected failures. - * 1: Report the number of passed tests (if <=20) or the number passed otherwise, - * report all failures, - * report the number of expected failed tests (if <=50) or the number passed otherwise. + * 1: Report the number of passed tests (if <=20) or the number passed + * otherwise, report all failures, report the number of expected + * failed tests (if <=50) or the number passed otherwise. * 2: Report all passed, failed, and expected failed tests. */ - virtual void report(const int level=1); + virtual void report( const int level = 1 ) const; + + //! Clear the messages + void reset(); protected: std::vector pass_messages; std::vector fail_messages; std::vector expected_fail_messages; - #ifdef USE_MPI - MPI_Comm comm; - #endif + mutable std::mutex mutex; +#ifdef USE_MPI + MPI_Comm comm; +#endif private: // Make the copy constructor private - UnitTest(const UnitTest& p) {} + UnitTest( const UnitTest & ) {} // Function to pack the messages into a single data stream and send to the given processor // Note: This function does not return until the message stream has been sent - void pack_message_stream(const std::vector& messages, const int rank, const int tag); + void pack_message_stream( + const std::vector &messages, const int rank, const int tag ) const; // Function to unpack the messages from a single data stream // Note: This function does not return until the message stream has been received - std::vector unpack_message_stream(const int rank, const int tag); + std::vector unpack_message_stream( const int rank, const int tag ) const; + // Helper functions + inline void barrier() const; + inline std::vector allGather( int value ) const; + inline std::vector> gatherMessages( + const std::vector &local_messages, int tag ) const; }; diff --git a/common/Utilities.h b/common/Utilities.h index e1f1713d..e6db4279 100644 --- a/common/Utilities.h +++ b/common/Utilities.h @@ -1,74 +1,107 @@ #ifndef included_Utilities #define included_Utilities +#include +#include +#include +#include #include #include -#include +#include +#include #include +namespace Utilities { + /*! - * Utilities is a Singleton class containing basic routines for error - * reporting, file manipulations, etc. Included are a set of \ref Macros "macros" that are commonly used. + * Aborts the run after printing an error message with file and + * linenumber information. */ -namespace Utilities -{ - - /*! - * Aborts the run after printing an error message with file and - * linenumber information. - */ - void abort(const std::string &message, const std::string &filename, const int line); +void abort( const std::string &message, const std::string &filename, const int line ); - /*! - * Set the behavior of abort - * @param printMemory Print the current memory usage (default is true) - * @param printStack Print the current call stack (default is true) - * @param throwException Throw an exception instead of MPI_Abort (default is false) - */ - void setAbortBehavior( bool printMemory, bool printStack, bool throwException ); +/*! + * Set the behavior of abort + * @param printMemory Print the current memory usage (default is true) + * @param printStack Print the current call stack (default is true) + * @param throwException Throw an exception instead of MPI_Abort (default is false) + */ +void setAbortBehavior( bool printMemory, bool printStack, bool throwException ); - //! Function to set the error handlers - void setErrorHandlers(); - - /*! - * Function to get the memory availible. - * This function will return the total memory availible - * Note: depending on the implimentation, this number may be rounded to - * to a multiple of the page size. - * If this function fails, it will return 0. - */ - size_t getSystemMemory(); - - /*! - * Function to get the memory usage. - * This function will return the total memory used by the application. - * Note: depending on the implimentation, this number may be rounded to - * to a multiple of the page size. - * If this function fails, it will return 0. - */ - size_t getMemoryUsage(); +//! Function to set the error handlers +void setErrorHandlers(); - //! Function to get an arbitrary point in time - double time(); +/*! + * Function to get the memory availible. + * This function will return the total memory availible + * Note: depending on the implimentation, this number may be rounded to + * to a multiple of the page size. + * If this function fails, it will return 0. + */ +size_t getSystemMemory(); - //! Function to get the resolution of time - double tick(); - //! Factor a number into it's prime factors - std::vector factor(size_t number); +/*! + * Function to get the memory usage. + * This function will return the total memory used by the application. + * Note: depending on the implimentation, this number may be rounded to + * to a multiple of the page size. + * If this function fails, it will return 0. + */ +size_t getMemoryUsage(); - //! Print AMP Banner - void nullUse( void* ); + +//! Function to get an arbitrary point in time +double time(); + + +//! Function to get the resolution of time +double tick(); + + +//! std::string version of sprintf +inline std::string stringf( const char *format, ... ); + + +/*! + * Sleep for X ms + * @param N Time to sleep (ms) + */ +inline void sleep_ms( int N ) { std::this_thread::sleep_for( std::chrono::milliseconds( N ) ); } + + +/*! + * Sleep for X s + * @param N Time to sleep (s) + */ +inline void sleep_s( int N ) { std::this_thread::sleep_for( std::chrono::seconds( N ) ); } + + +//! Factor a number into it's prime factors +std::vector factor(size_t number); + +//! Print AMP Banner +void nullUse( void* ); } // namespace Utilities #include "common/UtilityMacros.h" + +// stringf +inline std::string Utilities::stringf( const char *format, ... ) +{ + va_list ap; + va_start( ap, format ); + char tmp[4096]; + vsprintf( tmp, format, ap ); + va_end( ap ); + return std::string( tmp ); +} + + #endif - - diff --git a/common/UtilityMacros.h b/common/UtilityMacros.h index 2165b1d5..bfac172f 100644 --- a/common/UtilityMacros.h +++ b/common/UtilityMacros.h @@ -9,8 +9,8 @@ #include -/*! \defgroup Macros Set of utility macro functions - * \details These functions are a list of C++ macros that are used +/*! \defgroup Macros Set of utility macro functions + * \details These functions are a list of C++ macros that are used * for common operations, including checking for errors. * \addtogroup Macros * @{ @@ -19,13 +19,19 @@ /*! \def NULL_STATEMENT * \brief A null statement - * \details A statement that does nothing, for insure++ make it something + * \details A statement that does nothing, for insure++ make it something * more complex than a simple C null statement to avoid a warning. */ +#ifndef NULL_STATEMENT #ifdef __INSURE__ - #define NULL_STATEMENT do{if(0) int nullstatement=0 }}while(0) +#define NULL_STATEMENT \ + do { \ + if ( 0 ) \ + int nullstatement = 0 \ + } while ( 0 ) #else - #define NULL_STATEMENT +#define NULL_STATEMENT +#endif #endif @@ -34,9 +40,15 @@ * \details A null use of a variable, use to avoid GNU compiler warnings about unused variables. * \param variable Variable to pretend to use */ -#define NULL_USE(variable) do { \ - if(0) {char *temp = (char *)&variable; temp++;} \ -}while(0) +#ifndef NULL_USE +#define NULL_USE( variable ) \ + do { \ + if ( 0 ) { \ + auto temp = (char *) &variable; \ + temp++; \ + } \ + } while ( 0 ) +#endif /*! \def ERROR(MSG) @@ -46,9 +58,10 @@ * line number of the abort are also printed. * \param MSG Error message to print */ -#define ERROR(MSG) do { \ - ::Utilities::abort(MSG,__FILE__,__LINE__); \ -}while(0) +#define ERROR(MSG) \ + do { \ + ::Utilities::abort( MSG, __FILE__, __LINE__ ); \ + } while ( 0 ) /*! \def WARNING(MSG) @@ -56,11 +69,13 @@ * \details Print a warning without exit. Print file and line number of the warning. * \param MSG Warning message to print */ -#define WARNING(MSG) do { \ - std::stringstream tboxos; \ - tboxos << MSG << std::ends; \ - printf("WARNING: %s\n Warning called in %s on line %i\n",tboxos.str().c_str(),__FILE__,__LINE__); \ -}while(0) +#define WARNING(MSG) \ + do { \ + std::stringstream tboxos; \ + tboxos << MSG << std::ends; \ + printf("WARNING: %s\n Warning called in %s on line %i\n", \ + tboxos.str().c_str(),__FILE__,__LINE__); \ + }while(0) /*! \def ASSERT(EXP) @@ -71,13 +86,14 @@ * The file and line number of the abort are printed along with the stack trace (if availible). * \param EXP Expression to evaluate */ -#define ASSERT(EXP) do { \ - if ( !(EXP) ) { \ - std::stringstream tboxos; \ - tboxos << "Failed assertion: " << #EXP << std::ends; \ - ::Utilities::abort(tboxos.str(), __FILE__, __LINE__); \ - } \ -}while(0) +#define ASSERT(EXP) \ + do { \ + if ( !(EXP) ) { \ + std::stringstream tboxos; \ + tboxos << "Failed assertion: " << #EXP << std::ends; \ + ::Utilities::abort(tboxos.str(), __FILE__, __LINE__); \ + } \ + }while(0) /*! \def INSIST(EXP,MSG) @@ -99,7 +115,6 @@ }while(0) - /** * Macro for use when assertions are to be included * only when debugging. @@ -118,6 +133,49 @@ #endif +/*! \def DISABLE_WARNINGS + * \brief Reenable warnings + * \details This will re-enable warnings after a call to DIASABLE_WARNINGS + */ +/*! \def ENABLE_WARNINGS + * \brief Supress all warnings + * \details This will start to supress all compile warnings. + * Be sure to follow with ENABLE_WARNINGS + */ +// clang-format off +#ifdef DISABLE_WARNINGS + // Macros previously defined +#elif defined( USING_MSVC ) + #define DISABLE_WARNINGS __pragma( warning( push, 0 ) ) + #define ENABLE_WARNINGS __pragma( warning( pop ) ) +#elif defined( USING_CLANG ) + #define DISABLE_WARNINGS \ + _Pragma( "clang diagnostic push" ) _Pragma( "clang diagnostic ignored \"-Wall\"" ) \ + _Pragma( "clang diagnostic ignored \"-Wextra\"" ) \ + _Pragma( "clang diagnostic ignored \"-Wunused-private-field\"" ) \ + _Pragma( "clang diagnostic ignored \"-Wmismatched-new-delete\"" ) + #define ENABLE_WARNINGS _Pragma( "clang diagnostic pop" ) +#elif defined( USING_GCC ) + // Note: We cannot disable the -Wliteral-suffix message with this macro because the + // pragma command cannot suppress warnings from the C++ preprocessor. See gcc bug #53431. + #define DISABLE_WARNINGS \ + _Pragma( "GCC diagnostic push" ) _Pragma( "GCC diagnostic ignored \"-Wall\"" ) \ + _Pragma( "GCC diagnostic ignored \"-Wextra\"" ) \ + _Pragma( "GCC diagnostic ignored \"-Wpragmas\"" ) \ + _Pragma( "GCC diagnostic ignored \"-Wunused-local-typedefs\"" ) \ + _Pragma( "GCC diagnostic ignored \"-Woverloaded-virtual\"" ) \ + _Pragma( "GCC diagnostic ignored \"-Wunused-parameter\"" ) \ + _Pragma( "GCC diagnostic ignored \"-Warray-bounds\"" ) \ + _Pragma( "GCC diagnostic ignored \"-Wterminate\"" ) + #define ENABLE_WARNINGS _Pragma( "GCC diagnostic pop" ) +#else + #define DISABLE_WARNINGS + #define ENABLE_WARNINGS +#endif +// clang-format on + + + /*! @} */ diff --git a/tests/lbpm_color_simulator.h b/tests/lbpm_color_simulator.h index 626ef757..3d48655e 100644 --- a/tests/lbpm_color_simulator.h +++ b/tests/lbpm_color_simulator.h @@ -9,9 +9,24 @@ #define ANALYSIS_INTERVAL 1000 #define BLOBID_INTERVAL 1000 -enum AnalysisType{ AnalyzeNone=0, IdentifyBlobs=0x01, CopyPhaseIndicator=0x02, + +enum class AnalysisType : uint64_t { AnalyzeNone=0, IdentifyBlobs=0x01, CopyPhaseIndicator=0x02, CopySimState=0x04, ComputeAverages=0x08, CreateRestart=0x10, WriteVis=0x20 }; +AnalysisType& operator |=(AnalysisType &lhs, AnalysisType rhs) +{ + lhs = static_cast ( + static_cast::type>(lhs) | + static_cast::type>(rhs) + ); + return lhs; +} +bool matches( AnalysisType x, AnalysisType y ) +{ + return static_cast::type>(x) & + static_cast::type>(y) != 0; +} + template void DeleteArray( const TYPE *p ) @@ -30,7 +45,7 @@ struct AnalysisWaitIdStruct { // Helper class to write the restart file from a seperate thread -class WriteRestartWorkItem: public ThreadPool::WorkItem +class WriteRestartWorkItem: public ThreadPool::WorkItemRet { public: WriteRestartWorkItem( const char* filename_, std::shared_ptr cDen_, @@ -41,7 +56,6 @@ public: WriteCheckpoint(filename,cDen.get(),cfq.get(),N); PROFILE_STOP("Save Checkpoint",1); }; - virtual bool has_result() const { return false; } private: WriteRestartWorkItem(); const char* filename; @@ -54,7 +68,7 @@ private: static const std::string id_map_filename = "lbpm_id_map.txt"; typedef std::shared_ptr > BlobIDstruct; typedef std::shared_ptr > BlobIDList; -class BlobIdentificationWorkItem1: public ThreadPool::WorkItem +class BlobIdentificationWorkItem1: public ThreadPool::WorkItemRet { public: BlobIdentificationWorkItem1( int timestep_, int Nx_, int Ny_, int Nz_, const RankInfoStruct& rank_info_, @@ -75,7 +89,6 @@ public: new_index->first = ComputeGlobalBlobIDs(Nx-2,Ny-2,Nz-2,rank_info,*phase,dist,vF,vS,ids,newcomm); PROFILE_STOP("Identify blobs",1); } - virtual bool has_result() const { return false; } private: BlobIdentificationWorkItem1(); int timestep; @@ -87,7 +100,7 @@ private: BlobIDList new_list; MPI_Comm newcomm; }; -class BlobIdentificationWorkItem2: public ThreadPool::WorkItem +class BlobIdentificationWorkItem2: public ThreadPool::WorkItemRet { public: BlobIdentificationWorkItem2( int timestep_, int Nx_, int Ny_, int Nz_, const RankInfoStruct& rank_info_, @@ -122,7 +135,6 @@ public: } PROFILE_STOP("Identify blobs maps",1); } - virtual bool has_result() const { return false; } private: BlobIdentificationWorkItem2(); int timestep; @@ -137,7 +149,7 @@ private: // Helper class to write the vis file from a thread -class WriteVisWorkItem: public ThreadPool::WorkItem +class WriteVisWorkItem: public ThreadPool::WorkItemRet { public: WriteVisWorkItem( int timestep_, std::vector& visData_, @@ -164,7 +176,6 @@ public: IO::writeData( timestep, visData, newcomm ); PROFILE_STOP("Save Vis",1); }; - virtual bool has_result() const { return false; } private: WriteVisWorkItem(); int timestep; @@ -177,7 +188,7 @@ private: // Helper class to run the analysis from within a thread // Note: Averages will be modified after the constructor is called -class AnalysisWorkItem: public ThreadPool::WorkItem +class AnalysisWorkItem: public ThreadPool::WorkItemRet { public: AnalysisWorkItem( AnalysisType type_, int timestep_, TwoPhase& Averages_, @@ -191,10 +202,10 @@ public: Averages.Label_NWP_map = *id_list; Averages.NumberComponents_WP = 1; Averages.Label_WP.fill(0.0); - if ( (type&CopyPhaseIndicator) != 0 ) { + if ( matches(type,AnalysisType::CopyPhaseIndicator) ) { // Averages.ColorToSignedDistance(beta,Averages.Phase,Averages.Phase_tplus); } - if ( (type&ComputeAverages) != 0 ) { + if ( matches(type,AnalysisType::ComputeAverages) ) { PROFILE_START("Compute dist",1); Averages.Initialize(); Averages.ComputeDelPhi(); @@ -212,7 +223,6 @@ public: PROFILE_STOP("Compute dist",1); } } - virtual bool has_result() const { return false; } private: AnalysisWorkItem(); AnalysisType type; @@ -223,6 +233,7 @@ private: double beta; }; + // Function to start the analysis void run_analysis( int timestep, int restart_interval, const RankInfoStruct& rank_info, ScaLBL_Communicator &ScaLBL_Comm, TwoPhase& Averages, @@ -236,46 +247,45 @@ void run_analysis( int timestep, int restart_interval, int N = Nx*Ny*Nz; // Determin the analysis we want to perform - AnalysisType type = AnalyzeNone; + AnalysisType type = AnalysisType::AnalyzeNone; if ( timestep%ANALYSIS_INTERVAL + 5 == ANALYSIS_INTERVAL ) { // Copy the phase indicator field for the earlier timestep - type = static_cast( type | CopyPhaseIndicator ); + type |= AnalysisType::CopyPhaseIndicator; } if ( timestep%BLOBID_INTERVAL == 0 ) { // Identify blobs and update global ids in time - type = static_cast( type | IdentifyBlobs ); + type |= AnalysisType::IdentifyBlobs; } - /* #ifdef USE_CUDA + /*#ifdef USE_CUDA if ( tpool.getQueueSize()<=3 && tpool.getNumThreads()>0 && timestep%50==0 ) { // Keep a few blob identifications queued up to keep the processors busy, // allowing us to track the blobs as fast as possible // Add more detailed estimates of the update frequency required to track blobs - type = static_cast( type | IdentifyBlobs ); + type |= AnalysisType::IdentifyBlobs; } - #endif - */ + #endif */ if ( timestep%ANALYSIS_INTERVAL == 0 ) { // Copy the averages to the CPU (and identify blobs) - type = static_cast( type | CopySimState ); - type = static_cast( type | IdentifyBlobs ); + type |= AnalysisType::CopySimState; + type |= AnalysisType::IdentifyBlobs; } if ( timestep%ANALYSIS_INTERVAL == 5 ) { // Run the analysis - type = static_cast( type | ComputeAverages ); + type |= AnalysisType::ComputeAverages; } if (timestep%restart_interval == 0) { // Write the restart file - type = static_cast( type | CreateRestart ); + type |= AnalysisType::CreateRestart; } if (timestep%restart_interval == 0) { // Write the visualization data - type = static_cast( type | WriteVis ); - type = static_cast( type | CopySimState ); - type = static_cast( type | IdentifyBlobs ); + type |= AnalysisType::WriteVis; + type |= AnalysisType::CopySimState; + type |= AnalysisType::IdentifyBlobs; } // Return if we are not doing anything - if ( type == AnalyzeNone ) + if ( type == AnalysisType::AnalyzeNone ) return; PROFILE_START("start_analysis"); @@ -284,26 +294,28 @@ void run_analysis( int timestep, int restart_interval, ScaLBL_DeviceBarrier(); PROFILE_START("Copy data to host",1); std::shared_ptr phase; - if ( (type&CopyPhaseIndicator)!=0 || (type&ComputeAverages)!=0 || - (type&CopySimState)!=0 || (type&IdentifyBlobs)!=0 ) + if ( matches(type,AnalysisType::CopyPhaseIndicator) || + matches(type,AnalysisType::ComputeAverages) || + matches(type,AnalysisType::CopySimState) || + matches(type,AnalysisType::IdentifyBlobs) ) { phase = std::shared_ptr(new DoubleArray(Nx,Ny,Nz)); ScaLBL_CopyToHost(phase->data(),Phi,N*sizeof(double)); } - if ( (type&CopyPhaseIndicator)!=0 ) { + if ( matches(type,AnalysisType::CopyPhaseIndicator) ) { memcpy(Averages.Phase_tplus.data(),phase->data(),N*sizeof(double)); //Averages.ColorToSignedDistance(beta,Averages.Phase,Averages.Phase_tplus); } - if ( (type&ComputeAverages)!=0 ) { + if ( matches(type,AnalysisType::ComputeAverages) ) { memcpy(Averages.Phase_tminus.data(),phase->data(),N*sizeof(double)); //Averages.ColorToSignedDistance(beta,Averages.Phase,Averages.Phase_tminus); } - if ( (type&CopySimState) != 0 ) { + if ( matches(type,AnalysisType::CopySimState) ) { // Copy the members of Averages to the cpu (phase was copied above) // Wait PROFILE_START("Copy-Pressure",1); - ScaLBL_D3Q19_Pressure(fq,Pressure,Np); - ScaLBL_D3Q19_Momentum(fq,Velocity,Np); + ScaLBL_D3Q19_Pressure(fq,Pressure,Np); + ScaLBL_D3Q19_Momentum(fq,Velocity,Np); ScaLBL_DeviceBarrier(); PROFILE_STOP("Copy-Pressure",1); PROFILE_START("Copy-Wait",1); @@ -312,14 +324,14 @@ void run_analysis( int timestep, int restart_interval, PROFILE_STOP("Copy-Wait",1); PROFILE_START("Copy-State",1); memcpy(Averages.Phase.data(),phase->data(),N*sizeof(double)); - ScaLBL_Comm.RegularLayout(Map,Pressure,Averages.Press); - ScaLBL_Comm.RegularLayout(Map,&Velocity[0],Averages.Vel_x); - ScaLBL_Comm.RegularLayout(Map,&Velocity[Np],Averages.Vel_y); - ScaLBL_Comm.RegularLayout(Map,&Velocity[2*Np],Averages.Vel_z); + ScaLBL_Comm.RegularLayout(Map,Pressure,Averages.Press); + ScaLBL_Comm.RegularLayout(Map,&Velocity[0],Averages.Vel_x); + ScaLBL_Comm.RegularLayout(Map,&Velocity[Np],Averages.Vel_y); + ScaLBL_Comm.RegularLayout(Map,&Velocity[2*Np],Averages.Vel_z); PROFILE_STOP("Copy-State",1); } std::shared_ptr cDen, cfq; - if ( (type&CreateRestart) != 0 ) { + if ( matches(type,AnalysisType::CreateRestart) ) { // Copy restart data to the CPU cDen = std::shared_ptr(new double[2*Np],DeleteArray); cfq = std::shared_ptr(new double[19*Np],DeleteArray); @@ -329,14 +341,14 @@ void run_analysis( int timestep, int restart_interval, PROFILE_STOP("Copy data to host",1); // Spawn threads to do blob identification work - if ( (type&IdentifyBlobs)!=0 ) { + if ( matches(type,AnalysisType::IdentifyBlobs) ) { BlobIDstruct new_index(new std::pair(0,IntArray())); BlobIDstruct new_ids(new std::pair(0,IntArray())); BlobIDList new_list(new std::vector()); - ThreadPool::WorkItem *work1 = new BlobIdentificationWorkItem1(timestep, - Nx,Ny,Nz,rank_info,phase,Averages.SDs,last_ids,new_index,new_ids,new_list); - ThreadPool::WorkItem *work2 = new BlobIdentificationWorkItem2(timestep, - Nx,Ny,Nz,rank_info,phase,Averages.SDs,last_ids,new_index,new_ids,new_list); + auto work1 = new BlobIdentificationWorkItem1(timestep,Nx,Ny,Nz,rank_info, + phase,Averages.SDs,last_ids,new_index,new_ids,new_list); + auto work2 = new BlobIdentificationWorkItem2(timestep,Nx,Ny,Nz,rank_info, + phase,Averages.SDs,last_ids,new_index,new_ids,new_list); work1->add_dependency(wait.blobID); work2->add_dependency(tpool.add_work(work1)); wait.blobID = tpool.add_work(work2); @@ -346,9 +358,8 @@ void run_analysis( int timestep, int restart_interval, } // Spawn threads to do the analysis work - if ( (type&ComputeAverages) != 0 ) { - ThreadPool::WorkItem *work = new AnalysisWorkItem( - type,timestep,Averages,last_index,last_id_map,beta); + if ( matches(type,AnalysisType::ComputeAverages) ) { + auto work = new AnalysisWorkItem(type,timestep,Averages,last_index,last_id_map,beta); work->add_dependency(wait.blobID); work->add_dependency(wait.analysis); work->add_dependency(wait.vis); // Make sure we are done using analysis before modifying @@ -356,35 +367,35 @@ void run_analysis( int timestep, int restart_interval, } // Spawn a thread to write the restart file - if ( (type&CreateRestart) != 0 ) { + if ( matches(type,AnalysisType::CreateRestart) ) { int rank = MPI_WORLD_RANK(); - //if (pBC) { - //err = fabs(sat_w - sat_w_previous); - //sat_w_previous = sat_w; - //if (rank==0){ - // printf("Timestep %i: change in saturation since last checkpoint is %f \n",timestep,err); - // } - // } + /* if (pBC) { + err = fabs(sat_w - sat_w_previous); + sat_w_previous = sat_w; + if (rank==0){ + printf("Timestep %i: change in saturation since last checkpoint is %f \n",timestep,err); + } + } */ // Wait for previous restart files to finish writing (not necessary, but helps to ensure memory usage is limited) tpool.wait(wait.restart); - // Retain the timestep associated with the restart files - if (rank==0){ - FILE *Rst = fopen("Restart.txt","w"); - fprintf(Rst,"%i\n",timestep+5); - fclose(Rst); - } + // Retain the timestep associated with the restart files + if (rank==0) { + FILE *Rst = fopen("Restart.txt","w"); + fprintf(Rst,"%i\n",timestep+5); + fclose(Rst); + } // Write the restart file (using a seperate thread) - WriteRestartWorkItem *work = new WriteRestartWorkItem(LocalRestartFile,cDen,cfq,Np); + auto work = new WriteRestartWorkItem(LocalRestartFile,cDen,cfq,Np); work->add_dependency(wait.restart); wait.restart = tpool.add_work(work); } // Save the results for visualization - if ( (type&CreateRestart) != 0 ) { + if ( matches(type,AnalysisType::CreateRestart) ) { // Wait for previous restart files to finish writing (not necessary, but helps to ensure memory usage is limited) tpool.wait(wait.vis); // Write the vis files - ThreadPool::WorkItem *work = new WriteVisWorkItem( timestep, visData, Averages, fillData ); + auto work = new WriteVisWorkItem( timestep, visData, Averages, fillData ); work->add_dependency(wait.blobID); work->add_dependency(wait.analysis); work->add_dependency(wait.vis); diff --git a/threadpool/atomic_helpers.cpp b/threadpool/atomic_helpers.cpp index 1cac8e83..574cd30e 100644 --- a/threadpool/atomic_helpers.cpp +++ b/threadpool/atomic_helpers.cpp @@ -27,4 +27,3 @@ int atomic_pthread_lock_initialized = create_atomic_pthread_lock(); } // AtomicOperations namespace - diff --git a/threadpool/atomic_helpers.h b/threadpool/atomic_helpers.h index 5e8c4cfb..178c1af1 100644 --- a/threadpool/atomic_helpers.h +++ b/threadpool/atomic_helpers.h @@ -5,7 +5,6 @@ #include #include #include -#include // Choose the OS #if defined( WIN32 ) || defined( _WIN32 ) || defined( WIN64 ) || defined( _WIN64 ) @@ -89,6 +88,16 @@ inline int32_atomic atomic_get( const int32_atomic volatile *x ); */ inline int64_atomic atomic_get( const int64_atomic volatile *x ); + +/** + * \brief Get the value + * \details Read the data in x + * \param[in] x The pointer to the value to get + */ +template +inline TYPE *atomic_get( volatile TYPE **x ); + + /** * \brief Set the value * \details Set the data in x to y (*x=y) @@ -185,9 +194,8 @@ inline bool atomic_compare_and_swap( void *volatile *v, void *x, void *y ); * \brief Fetch the current value and "and" with given value * \details Perform *v = (*v) & x, returning the previous value * \return Returns the previous value before the "and" operation - * \param[in] v The pointer to the value to check and swap - * \param[in] x The value to compare - * \param[in] y The value to swap iff *v==x + * \param[in] v The pointer to the value to check and and + * \param[in] x The value to and */ inline int32_atomic atomic_fetch_and_and( int32_atomic volatile *v, int32_atomic x ); @@ -195,9 +203,8 @@ inline int32_atomic atomic_fetch_and_and( int32_atomic volatile *v, int32_atomic * \brief Fetch the current value and "and" with given value * \details Perform *v = (*v) & x, returning the previous value * \return Returns the previous value before the "and" operation - * \param[in] v The pointer to the value to check and swap - * \param[in] x The value to compare - * \param[in] y The value to swap iff *v==x + * \param[in] v The pointer to the value to check and and + * \param[in] x The value to and */ inline int64_atomic atomic_fetch_and_and( int64_atomic volatile *v, int64_atomic x ); @@ -205,9 +212,8 @@ inline int64_atomic atomic_fetch_and_and( int64_atomic volatile *v, int64_atomic * \brief Fetch the current value and "or" with given value * \details Perform *v = (*v) | x, returning the previous value * \return Returns the previous value before the "and" operation - * \param[in] v The pointer to the value to check and swap - * \param[in] x The value to compare - * \param[in] y The value to swap iff *v==x + * \param[in] v The pointer to the value to check and or + * \param[in] x The value to or */ inline int32_atomic atomic_fetch_and_or( int32_atomic volatile *v, int32_atomic x ); @@ -216,52 +222,52 @@ inline int32_atomic atomic_fetch_and_or( int32_atomic volatile *v, int32_atomic * \details Perform *v = (*v) | x, returning the previous value * \return Returns the previous value before the "and" operation * \param[in] v The pointer to the value to check and swap - * \param[in] x The value to compare - * \param[in] y The value to swap iff *v==x + * \param[in] v The pointer to the value to check and or + * \param[in] x The value to or */ inline int64_atomic atomic_fetch_and_or( int64_atomic volatile *v, int64_atomic x ); - /** * \brief Class to store a pool of objects * \details This class stores a pool of objects that can be added/removed in a thread-safe way */ -template +template class pool { - public: - pool( ) +public: + pool() { - d_data = new volatile TYPE*[N_MAX]; - for (int i=0; i( d_data[i] ); - bool swapped = atomic_compare_and_swap( (void* volatile*) &d_data[i], tmp, nullptr ); - if ( swapped && ( tmp != nullptr ) ) - return tmp; - i = (i+1)%N_MAX; - } - } - inline void put( TYPE* ptr ) + inline TYPE *get() { int i = 0; - while ( !atomic_compare_and_swap( (void* volatile*) &d_data[i], nullptr, ptr ) ) - i = (i+1)%N_MAX; + while ( true ) { + TYPE *tmp = const_cast( d_data[i] ); + bool swapped = atomic_compare_and_swap( (void *volatile *) &d_data[i], tmp, nullptr ); + if ( swapped && ( tmp != nullptr ) ) + return tmp; + i = ( i + 1 ) % N_MAX; + } } - private: + inline void put( TYPE *ptr ) + { + int i = 0; + while ( !atomic_compare_and_swap( (void *volatile *) &d_data[i], nullptr, ptr ) ) + i = ( i + 1 ) % N_MAX; + } + +private: volatile TYPE **d_data; pool( const pool &rhs ); pool &operator=( const pool &rhs ); @@ -323,10 +329,24 @@ inline int64_atomic atomic_decrement( int64_atomic volatile *x ) { return OSAtomicDecrement64Barrier( x ); } -int32_atomic atomic_fetch_and_or( int32_atomic volatile *v, int32_atomic x ) { return OSAtomicOr32Orig( x, (volatile uint32_t *) v ); } -int32_atomic atomic_fetch_and_and( int32_atomic volatile *v, int32_atomic x ) { return OSAtomicAnd32Orig( x, (volatile uint32_t *) v); } -int64_atomic atomic_fetch_and_or( int64_atomic volatile *v, int64_atomic x ) { throw std::logic_error("Not availible for this OS"); return 0; } -int64_atomic atomic_fetch_and_and( int64_atomic volatile *v, int64_atomic x ) { throw std::logic_error("Not availible for this OS"); return 0; } +int32_atomic atomic_fetch_and_or( int32_atomic volatile *v, int32_atomic x ) +{ + return OSAtomicOr32Orig( x, (volatile uint32_t *) v ); +} +int32_atomic atomic_fetch_and_and( int32_atomic volatile *v, int32_atomic x ) +{ + return OSAtomicAnd32Orig( x, (volatile uint32_t *) v ); +} +int64_atomic atomic_fetch_and_or( int64_atomic volatile *v, int64_atomic x ) +{ + throw std::logic_error( "Not availible for this OS" ); + return 0; +} +int64_atomic atomic_fetch_and_and( int64_atomic volatile *v, int64_atomic x ) +{ + throw std::logic_error( "Not availible for this OS" ); + return 0; +} inline int32_atomic atomic_add( int32_atomic volatile *x, int32_atomic y ) { return OSAtomicAdd32Barrier( y, x ); @@ -352,10 +372,22 @@ int32_atomic atomic_increment( int32_atomic volatile *x ) { return __sync_add_an int64_atomic atomic_increment( int64_atomic volatile *x ) { return __sync_add_and_fetch( x, 1 ); } int32_atomic atomic_decrement( int32_atomic volatile *x ) { return __sync_sub_and_fetch( x, 1 ); } int64_atomic atomic_decrement( int64_atomic volatile *x ) { return __sync_sub_and_fetch( x, 1 ); } -int32_atomic atomic_fetch_and_or( int32_atomic volatile *v, int32_atomic x ) { return __sync_fetch_and_or( v, x ); } -int64_atomic atomic_fetch_and_or( int64_atomic volatile *v, int64_atomic x ) { return __sync_fetch_and_or( v, x ); } -int32_atomic atomic_fetch_and_and( int32_atomic volatile *v, int32_atomic x ) { return __sync_fetch_and_and( v, x ); } -int64_atomic atomic_fetch_and_and( int64_atomic volatile *v, int64_atomic x ) { return __sync_fetch_and_and( v, x ); } +int32_atomic atomic_fetch_and_or( int32_atomic volatile *v, int32_atomic x ) +{ + return __sync_fetch_and_or( v, x ); +} +int64_atomic atomic_fetch_and_or( int64_atomic volatile *v, int64_atomic x ) +{ + return __sync_fetch_and_or( v, x ); +} +int32_atomic atomic_fetch_and_and( int32_atomic volatile *v, int32_atomic x ) +{ + return __sync_fetch_and_and( v, x ); +} +int64_atomic atomic_fetch_and_and( int64_atomic volatile *v, int64_atomic x ) +{ + return __sync_fetch_and_and( v, x ); +} inline int32_atomic atomic_add( int32_atomic volatile *x, int32_atomic y ) { return __sync_add_and_fetch( x, y ); @@ -459,31 +491,44 @@ inline int64_atomic atomic_get( const int64_atomic volatile *x ) { return atomic_add( const_cast( x ), 0 ); } +template +inline TYPE *atomic_get( volatile TYPE **x ) +{ + return reinterpret_cast( + atomic_add( reinterpret_cast( x ), 0 ) ); +} inline void atomic_set( int32_atomic volatile *x, int32_atomic y ) { int32_atomic tmp = *x; - while ( !atomic_compare_and_swap( x, tmp, y ) ) { tmp = *x; } + while ( !atomic_compare_and_swap( x, tmp, y ) ) { + tmp = *x; + } } inline void atomic_set( int64_atomic volatile *x, int64_atomic y ) { int64_atomic tmp = *x; - while ( !atomic_compare_and_swap( x, tmp, y ) ) { tmp = *x; } + while ( !atomic_compare_and_swap( x, tmp, y ) ) { + tmp = *x; + } } inline void atomic_swap( int32_atomic volatile *x, int32_atomic *y ) { int32_atomic tmp = *x; - while ( !atomic_compare_and_swap( x, tmp, *y ) ) { tmp = *x; } + while ( !atomic_compare_and_swap( x, tmp, *y ) ) { + tmp = *x; + } *y = tmp; } inline void atomic_swap( int64_atomic volatile *x, int64_atomic *y ) { int64_atomic tmp = *x; - while ( !atomic_compare_and_swap( x, tmp, *y ) ) { tmp = *x; } + while ( !atomic_compare_and_swap( x, tmp, *y ) ) { + tmp = *x; + } *y = tmp; } - // Define an atomic counter struct counter_t { public: @@ -499,6 +544,7 @@ public: inline void setCount( int val ) { count = val; } // Get the current value of the count inline int getCount() const { return count; } + private: counter_t( const counter_t & ); counter_t &operator=( const counter_t & ); diff --git a/threadpool/atomic_list.h b/threadpool/atomic_list.h index d3c73f2e..5da8cc85 100644 --- a/threadpool/atomic_list.h +++ b/threadpool/atomic_list.h @@ -1,52 +1,48 @@ #ifndef included_AtomicModelAtomicList #define included_AtomicModelAtomicList -#include -#include #include +#include +#include #include "threadpool/atomic_helpers.h" - /** \class AtomicList * - * \brief Maintain a sorted list of entries + * \brief Maintain a sorted list of entries * \details This class implements a basic sorted list that is thread-safe and lock-free. * Entries are stored smallest to largest according to the compare operator */ -template< class TYPE, int MAX_SIZE, class COMPARE = std::less > +template> class AtomicList final { public: //! Default constructor - AtomicList( const TYPE& default_value=TYPE(), const COMPARE& comp=COMPARE() ); + AtomicList( const TYPE &default_value = TYPE(), const COMPARE &comp = COMPARE() ); /*! * \brief Remove an item from the list * \details Find and remove first entry that meets the given criteria - * @return Return the item that matches the criteria, or the default item if no item matches - * @param comp Comparison function object (i.e. an object that satisfies + * @return Return the item that matches the criteria, + * or the default item if no item matches + * @param compare Comparison function object (i.e. an object that satisfies * the requirements of Compare) which returns ​true if the * given value meets the selection criteria. * The signature of the comparison function should be equivalent to: * bool cmp( const TYPE& value, ... ); + * @param args Additional arguments for the comparison */ - template + template inline TYPE remove( Compare compare, Args... args ); //! Remove the first from the list - inline TYPE remove_first( ); + inline TYPE remove_first(); /*! * \brief Insert an item * \details Insert an item into the list * @param x Item to insert - * @param comp Comparison function object (i.e. an object that satisfies - * the requirements of Compare) which returns ​true if the - * first argument is less than (i.e. is ordered before) the second. - * The signature of the comparison function should be equivalent to: - * bool cmp(const TYPE &a, const TYPE &b); */ inline void insert( TYPE x ); @@ -54,19 +50,19 @@ public: * \brief Return the size of the list * \details Return the number of items in the list */ - inline int size( ) const { return AtomicOperations::atomic_get(&d_N); } + inline int size() const { return AtomicOperations::atomic_get( &d_N ); } /*! * \brief Check if the list is empty * \details Return true if the list is empty */ - inline bool empty( ) const { return AtomicOperations::atomic_get(&d_N)==0; } + inline bool empty() const { return AtomicOperations::atomic_get( &d_N ) == 0; } /*! * \brief Return the capacity of the list * \details Return the maximum number of items the list can hold */ - inline int capacity( ) const { return MAX_SIZE; } + inline int capacity() const { return MAX_SIZE; } /*! * \brief Check the list @@ -76,15 +72,15 @@ public: * It is intended for debugging purposes only! * @return This function returns true if the list is in a good working state */ - inline bool check( ); + inline bool check(); //! Return the total number of inserts since object creation - inline int64_t N_insert() const { return AtomicOperations::atomic_get(&d_N_insert); } + inline int64_t N_insert() const { return AtomicOperations::atomic_get( &d_N_insert ); } //! Return the total number of removals since object creation - inline int64_t N_remove() const { return AtomicOperations::atomic_get(&d_N_remove); } + inline int64_t N_remove() const { return AtomicOperations::atomic_get( &d_N_remove ); } private: // Data members @@ -92,7 +88,7 @@ private: volatile TYPE d_default; volatile TYPE d_objects[MAX_SIZE]; volatile AtomicOperations::int32_atomic d_N; - volatile AtomicOperations::int32_atomic d_next[MAX_SIZE+1]; + volatile AtomicOperations::int32_atomic d_next[MAX_SIZE + 1]; volatile AtomicOperations::int32_atomic d_unused; volatile AtomicOperations::int64_atomic d_N_insert; volatile AtomicOperations::int64_atomic d_N_remove; @@ -112,12 +108,12 @@ private: if ( i != -1 ) AtomicOperations::atomic_fetch_and_or( &d_next[i], value ); } - inline int get_unused( ) + inline int get_unused() { int i = 0; while ( i == 0 ) i = AtomicOperations::atomic_fetch_and_and( &d_unused, 0 ); - AtomicOperations::atomic_fetch_and_or( &d_unused, -(d_next[i]+4)+1 ); + AtomicOperations::atomic_fetch_and_or( &d_unused, -( d_next[i] + 4 ) + 1 ); d_next[i] = -3; return i; } @@ -126,14 +122,14 @@ private: int j = 0; while ( j == 0 ) AtomicOperations::atomic_swap( &d_unused, &j ); - d_next[i] = -3-j; + d_next[i] = -3 - j; AtomicOperations::atomic_fetch_and_or( &d_unused, i ); } private: - AtomicList( const AtomicList& ); - AtomicList& operator=( const AtomicList& ); + AtomicList( const AtomicList & ); + AtomicList &operator=( const AtomicList & ); }; @@ -142,7 +138,7 @@ private: * \brief Pool allocator * \details This class implements a basic fast pool allocator that is thread-safe. */ -template< class TYPE, class INT_TYPE=int > +template class MemoryPool final { public: @@ -150,21 +146,21 @@ public: explicit MemoryPool( size_t size ); //! destructor - ~MemoryPool( ); + ~MemoryPool(); /*! * \brief Allocate an object * \details Allocates a new object from the pool * @return Return the new pointer, or nullptr if there is no more room in the pool */ - inline TYPE* allocate( ); + inline TYPE *allocate(); /*! * \brief Insert an item * \details Insert an item into the list * @param ptr The pointer to free */ - inline void free( TYPE* ptr ); + inline void free( TYPE *ptr ); private: // Data members @@ -172,13 +168,11 @@ private: volatile AtomicOperations::int32_atomic d_next; private: - MemoryPool( const MemoryPool& ); - MemoryPool& operator=( const MemoryPool& ); + MemoryPool( const MemoryPool & ); + MemoryPool &operator=( const MemoryPool & ); }; - - #include "threadpool/atomic_list.hpp" #endif diff --git a/threadpool/atomic_list.hpp b/threadpool/atomic_list.hpp index 877d953f..a0850971 100644 --- a/threadpool/atomic_list.hpp +++ b/threadpool/atomic_list.hpp @@ -2,41 +2,39 @@ #define included_AtomicList_hpp -#include #include +#include #include - /****************************************************************** -* Constructor * -******************************************************************/ -template -AtomicList::AtomicList( const TYPE& default_value, const COMPARE& comp ): - d_compare(comp), - d_default(default_value) + * Constructor * + ******************************************************************/ +template +AtomicList::AtomicList( const TYPE &default_value, const COMPARE &comp ) + : d_compare( comp ), d_default( default_value ) { - d_N = 0; - d_next[0] = -1; - d_unused = 1; + d_N = 0; + d_next[0] = -1; + d_unused = 1; d_N_insert = 0; d_N_remove = 0; - for (int i=0; i -template -inline TYPE AtomicList::remove( Compare compare, Args... args ) + * Remove an item * + ******************************************************************/ +template +template +inline TYPE AtomicList::remove( Compare compare, Args... args ) { - // Acquiring temporary ownership - int pos = 0; + // Acquiring temporary ownership + int pos = 0; auto next = lock( 0 ); while ( true ) { if ( next == -1 ) { @@ -50,9 +48,10 @@ inline TYPE AtomicList::remove( Compare compare, Args... // Acquire ownership of the next item int next2 = lock( next ); // Test to see if the object passes compare - bool test = compare( const_cast(d_objects[next-1]), args... ); + bool test = compare( const_cast( d_objects[next - 1] ), args... ); if ( test ) { - // We want to return this object, update next to point to another entry and remove the entry + // We want to return this object, update next to point to another entry and remove the + // entry unlock( next, -3 ); unlock( pos, next2 ); pos = next; @@ -60,28 +59,28 @@ inline TYPE AtomicList::remove( Compare compare, Args... } // Release the ownership and move on unlock( pos, next ); - pos = next; + pos = next; next = next2; } - TYPE rtn(d_default); + TYPE rtn( d_default ); if ( pos != -1 ) { - std::swap( rtn, const_cast( d_objects[pos-1] ) ); + std::swap( rtn, const_cast( d_objects[pos - 1] ) ); put_unused( pos ); AtomicOperations::atomic_decrement( &d_N ); AtomicOperations::atomic_increment( &d_N_remove ); } return rtn; } -template -inline TYPE AtomicList::remove_first( ) +template +inline TYPE AtomicList::remove_first() { - TYPE rtn(d_default); + TYPE rtn( d_default ); auto next = lock( 0 ); if ( next != -1 ) { int next2 = lock( next ); unlock( next, -3 ); unlock( 0, next2 ); - std::swap( rtn, const_cast( d_objects[next-1] ) ); + std::swap( rtn, const_cast( d_objects[next - 1] ) ); put_unused( next ); AtomicOperations::atomic_decrement( &d_N ); AtomicOperations::atomic_increment( &d_N_remove ); @@ -93,10 +92,10 @@ inline TYPE AtomicList::remove_first( ) /****************************************************************** -* Insert an item * -******************************************************************/ -template -inline void AtomicList::insert( TYPE x ) + * Insert an item * + ******************************************************************/ +template +inline void AtomicList::insert( TYPE x ) { int N_used = AtomicOperations::atomic_increment( &d_N ); if ( N_used > MAX_SIZE ) { @@ -105,14 +104,14 @@ inline void AtomicList::insert( TYPE x ) } // Get an index to store the entry auto index = get_unused(); - if ( index<1 ) + if ( index < 1 ) throw std::logic_error( "Internal error" ); // Store the object in d_objects AtomicOperations::atomic_increment( &d_N_insert ); - d_objects[index-1] = x; - d_next[index] = -1; + d_objects[index - 1] = x; + d_next[index] = -1; // Find the position to store and update the next entires - int pos = 0; + int pos = 0; auto next = lock( pos ); while ( true ) { // Get the next item in the list (acquiring temporary ownership) @@ -122,7 +121,7 @@ inline void AtomicList::insert( TYPE x ) break; } // Test to see if the object is < the value being compared - bool test = d_compare.operator()( x, const_cast(d_objects[next-1]) ); + bool test = d_compare.operator()( x, const_cast( d_objects[next - 1] ) ); if ( test ) { // We want to store this object before next d_next[index] = next; @@ -131,35 +130,35 @@ inline void AtomicList::insert( TYPE x ) } // Release the ownership and move on int last = pos; - pos = next; - next = lock( next ); + pos = next; + next = lock( next ); unlock( last, pos ); } } /****************************************************************** -* Check the internal structures of the list * -* This is mostly thread-safe, but blocks all threads * -******************************************************************/ -template -inline bool AtomicList::check( ) + * Check the internal structures of the list * + * This is mostly thread-safe, but blocks all threads * + ******************************************************************/ +template +inline bool AtomicList::check() { // Get the lock and check for any other threads modifying the list auto start = lock( 0 ); - std::this_thread::sleep_for( std::chrono::microseconds(100) ); + std::this_thread::sleep_for( std::chrono::microseconds( 100 ) ); // Perform the checks on the list - bool pass = true; - int N1 = 0; - int N2 = 0; + bool pass = true; + int N1 = 0; + int N2 = 0; int N_unused = 0; - int N_tail = 0; - for (int i=0; i 0 ) { N2++; } else if ( next < -3 ) { @@ -170,71 +169,70 @@ inline bool AtomicList::check( ) pass = false; } } - pass = pass && N_tail==1 && N1==d_N && N2==d_N && N_unused+d_N==MAX_SIZE; - int it = 0; + pass = pass && N_tail == 1 && N1 == d_N && N2 == d_N && N_unused + d_N == MAX_SIZE; + int it = 0; int pos = 0; while ( true ) { - int next = pos==0 ? start:d_next[pos]; + int next = pos == 0 ? start : d_next[pos]; if ( next == -1 ) break; pos = next; it++; } - pass = pass && it==d_N; + pass = pass && it == d_N; // Unlock the list and return the results unlock( 0, start ); return pass; } - /****************************************************************** -* MemoryPool * -******************************************************************/ -template -MemoryPool::MemoryPool( size_t size ) + * MemoryPool * + ******************************************************************/ +template +MemoryPool::MemoryPool( size_t size ) { - static_assert( sizeof(TYPE) >= sizeof(int), + static_assert( sizeof( TYPE ) >= sizeof( int ), "sizeof(TYPE) must be >= sizeof(int) to ensure proper operation" ); - static_assert( sizeof(TYPE) >= sizeof(INT_TYPE), + static_assert( sizeof( TYPE ) >= sizeof( INT_TYPE ), "sizeof(TYPE) must be >= sizeof(INT_TYPE) to ensure proper operation" ); - d_objects = reinterpret_cast( malloc(sizeof(TYPE)*size) ); - d_next = 1; - for (size_t i=0; i(d_objects[i]) = i+1; - reinterpret_cast(d_objects[size-1]) = -1; + d_objects = reinterpret_cast( malloc( sizeof( TYPE ) * size ) ); + d_next = 1; + for ( size_t i = 0; i < size; i++ ) + reinterpret_cast( d_objects[i] ) = i + 1; + reinterpret_cast( d_objects[size - 1] ) = -1; } -template -MemoryPool::~MemoryPool() +template +MemoryPool::~MemoryPool() { - free( const_cast( d_objects ) ); + free( const_cast( d_objects ) ); d_objects = nullptr; } -template -inline TYPE* MemoryPool::allocate() +template +inline TYPE *MemoryPool::allocate() { AtomicOperations::int32_atomic i = 0; while ( i == 0 ) AtomicOperations::atomic_swap( &d_next, &i ); TYPE *ptr = nullptr; - if ( i!=-1 ) { - INT_TYPE j = reinterpret_cast(d_objects[i-1]); - ptr = const_cast( &d_objects[i-1] ); - new(ptr) TYPE(); - i = j+1; + if ( i != -1 ) { + INT_TYPE j = reinterpret_cast( d_objects[i - 1] ); + ptr = const_cast( &d_objects[i - 1] ); + new ( ptr ) TYPE(); + i = j + 1; } AtomicOperations::atomic_fetch_and_or( &d_next, i ); return ptr; } -template -inline void MemoryPool::free( TYPE* ptr ) +template +inline void MemoryPool::free( TYPE *ptr ) { ptr->~TYPE(); AtomicOperations::int32_atomic i = 0; while ( i == 0 ) AtomicOperations::atomic_swap( &d_next, &i ); - reinterpret_cast(*ptr) = i-1; - i = ptr - d_objects + 1; + reinterpret_cast( *ptr ) = i - 1; + i = ptr - d_objects + 1; AtomicOperations::atomic_fetch_and_or( &d_next, i ); } diff --git a/threadpool/test/test_atomic.cpp b/threadpool/test/test_atomic.cpp index c3e0c5b0..27c76ee1 100644 --- a/threadpool/test/test_atomic.cpp +++ b/threadpool/test/test_atomic.cpp @@ -1,15 +1,15 @@ #include "threadpool/atomic_helpers.h" #include "common/UnitTest.h" #include "common/Utilities.h" -#include -#include -#include -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include +#include #define perr std::cerr @@ -21,18 +21,18 @@ static void modify_counter( int N, AtomicOperations::counter_t &counter ) { if ( N > 0 ) { - for (int i=0; i(stop-start).count() / N_count; + auto stop = std::chrono::high_resolution_clock::now(); + double time_inc_serial = std::chrono::duration( stop - start ).count() / N_count; int val = count.getCount(); if ( val != N_count ) { char tmp[100]; @@ -90,8 +90,8 @@ int main( int, char *[] ) // Decrement the counter in serial start = std::chrono::high_resolution_clock::now(); modify_counter( -N_count, count ); - stop = std::chrono::high_resolution_clock::now(); - double time_dec_serial = std::chrono::duration(stop-start).count() / N_count; + stop = std::chrono::high_resolution_clock::now(); + double time_dec_serial = std::chrono::duration( stop - start ).count() / N_count; val = count.getCount(); if ( val != 0 ) { char tmp[100]; @@ -104,12 +104,13 @@ int main( int, char *[] ) std::vector threads( N_threads ); start = std::chrono::high_resolution_clock::now(); for ( int i = 0; i < N_threads; i++ ) - threads[i] = std::thread( modify_counter, N_count, std::ref(count) ); + threads[i] = std::thread( modify_counter, N_count, std::ref( count ) ); for ( int i = 0; i < N_threads; i++ ) threads[i].join(); stop = std::chrono::high_resolution_clock::now(); - double time_inc_parallel = std::chrono::duration(stop-start).count() / ( N_count * N_threads ); - val = count.getCount(); + double time_inc_parallel = + std::chrono::duration( stop - start ).count() / ( N_count * N_threads ); + val = count.getCount(); if ( val != N_count * N_threads ) { char tmp[100]; sprintf( tmp, "Count of %i did not match expected count of %i", val, N_count * N_threads ); @@ -120,12 +121,13 @@ int main( int, char *[] ) // Decrement the counter in parallel start = std::chrono::high_resolution_clock::now(); for ( int i = 0; i < N_threads; i++ ) - threads[i] = std::thread( modify_counter, -N_count, std::ref(count) ); + threads[i] = std::thread( modify_counter, -N_count, std::ref( count ) ); for ( int i = 0; i < N_threads; i++ ) threads[i].join(); stop = std::chrono::high_resolution_clock::now(); - double time_dec_parallel = std::chrono::duration(stop-start).count() / ( N_count * N_threads ); - val = count.getCount(); + double time_dec_parallel = + std::chrono::duration( stop - start ).count() / ( N_count * N_threads ); + val = count.getCount(); if ( val != 0 ) { char tmp[100]; sprintf( tmp, "Count of %i did not match expected count of %i", val, 0 ); @@ -147,6 +149,6 @@ int main( int, char *[] ) // Finished ut.report(); - int N_errors = static_cast( ut.NumFailGlobal() ); + auto N_errors = static_cast( ut.NumFailGlobal() ); return N_errors; } diff --git a/threadpool/test/test_atomic_list.cpp b/threadpool/test/test_atomic_list.cpp index 7d4aee16..4717dcc3 100644 --- a/threadpool/test/test_atomic_list.cpp +++ b/threadpool/test/test_atomic_list.cpp @@ -1,210 +1,221 @@ #include "threadpool/atomic_list.h" #include "common/UnitTest.h" #include "common/Utilities.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include -static void modify_list( AtomicList& list ) +static void modify_list( AtomicList &list ) { const int N_count = 50000; - for (int i=0; i=(rand()/8); } ); - auto v4 = list.remove( [](int v) { return v>=(rand()/4); } ); - auto v5 = list.remove( [](int v) { return v>=(rand()/2); } ); - if ( v1 !=-1 ) { list.insert( v1 ); } - if ( v2 !=-1 ) { list.insert( v2 ); } - if ( v3 !=-1 ) { list.insert( v3 ); } - if ( v4 !=-1 ) { list.insert( v4 ); } - if ( v5 !=-1 ) { list.insert( v5 ); } + for ( int i = 0; i < N_count; i++ ) { + auto v1 = list.remove_first(); + auto v2 = list.remove( []( int ) { return true; } ); + auto v3 = list.remove( []( int v ) { return v >= ( rand() / 8 ); } ); + auto v4 = list.remove( []( int v ) { return v >= ( rand() / 4 ); } ); + auto v5 = list.remove( []( int v ) { return v >= ( rand() / 2 ); } ); + if ( v1 != -1 ) { + list.insert( v1 ); + } + if ( v2 != -1 ) { + list.insert( v2 ); + } + if ( v3 != -1 ) { + list.insert( v3 ); + } + if ( v4 != -1 ) { + list.insert( v4 ); + } + if ( v5 != -1 ) { + list.insert( v5 ); + } } } -static bool check_list( const std::vector& x, AtomicList& list ) +static bool check_list( const std::vector &x, AtomicList &list ) { bool pass = list.check(); - pass = pass && (int) x.size() == list.size(); + pass = pass && (int) x.size() == list.size(); if ( pass ) { - for (size_t i=0; i& list ) +static inline void clear_list( AtomicList &list ) { - for (int i=0; i list(-1); - if ( list.size()==0 && list.check() ) + AtomicList list( -1 ); + if ( list.size() == 0 && list.check() ) ut.passes( "Initialize" ); else ut.failure( "Initialize" ); // Initialize the list with some empty values - for (int i=0; i<80; i++) + for ( int i = 0; i < 80; i++ ) list.insert( rand() ); list.insert( 2 ); list.insert( 1 ); list.insert( rand() ); // Try to pull off a couple of values - int v1 = list.remove( [](int a) { return a==1; } ); // Find the entry with 1 - int v2 = list.remove( [](int) { return true; } ); // Get the first entry - int v3 = list.remove( [](int) { return false; } ); // Fail to get an entry - if ( v1==1 && v2==2 && v3==-1 && list.size()==81 && list.check() ) + int v1 = list.remove( []( int a ) { return a == 1; } ); // Find the entry with 1 + int v2 = list.remove( []( int ) { return true; } ); // Get the first entry + int v3 = list.remove( []( int ) { return false; } ); // Fail to get an entry + if ( v1 == 1 && v2 == 2 && v3 == -1 && list.size() == 81 && list.check() ) ut.passes( "Basic sanity test" ); else ut.failure( "Basic sanity test" ); // Clear the list - while ( list.remove( [](int) { return true; } ) != -1 ) {} + while ( list.remove( []( int ) { return true; } ) != -1 ) { + } // Create a list of known values - //std::vector data0(512); - std::vector data0(5*N_threads); - for (size_t i=0; i data0(512); + std::vector data0( 5 * N_threads ); + for ( int &i : data0 ) + i = rand(); auto data = data0; std::sort( data.begin(), data.end() ); // Test the cost to insert int N_it = 20; - for (int i=0; i time; std::chrono::time_point start, stop; time = time.zero(); - for (int it=0; it(stop-start).count(); - int64_t N1 = list.N_remove(); - bool pass = check_list( data, list ); + stop = std::chrono::high_resolution_clock::now(); + double time_serial = std::chrono::duration( stop - start ).count(); + int64_t N1 = list.N_remove(); + bool pass = check_list( data, list ); if ( pass ) ut.passes( "Serial get/insert" ); else ut.failure( "Serial get/insert" ); - printf("serial time = %0.5f s\n",time_serial); - printf("serial time/item = %0.0f ns\n",1e9*time_serial/(N1-N0)); + printf( "serial time = %0.5f s\n", time_serial ); + printf( "serial time/item = %0.0f ns\n", 1e9 * time_serial / ( N1 - N0 ) ); // Have multiple threads reading/writing to the list simultaneously std::vector threads( N_threads ); start = std::chrono::high_resolution_clock::now(); for ( int i = 0; i < N_threads; i++ ) - threads[i] = std::thread( modify_list, std::ref(list) ); + threads[i] = std::thread( modify_list, std::ref( list ) ); for ( int i = 0; i < N_threads; i++ ) threads[i].join(); - stop = std::chrono::high_resolution_clock::now(); - double time_parallel = std::chrono::duration(stop-start).count(); - int64_t N2 = list.N_remove(); - pass = check_list( data, list ); + stop = std::chrono::high_resolution_clock::now(); + double time_parallel = std::chrono::duration( stop - start ).count(); + int64_t N2 = list.N_remove(); + pass = check_list( data, list ); if ( pass ) ut.passes( "Parallel get/insert" ); else ut.failure( "Parallel get/insert" ); - printf("parallel time = %0.5f s\n",time_parallel); - printf("parallel time/item = %0.0f ns\n",1e9*time_parallel/(N2-N1)); + printf( "parallel time = %0.5f s\n", time_parallel ); + printf( "parallel time/item = %0.0f ns\n", 1e9 * time_parallel / ( N2 - N1 ) ); // Try to over-fill the list while ( !list.empty() ) list.remove_first(); - for (int i=1; i<=list.capacity(); i++) + for ( int i = 1; i <= list.capacity(); i++ ) list.insert( i ); try { - list.insert( list.capacity()+1 ); + list.insert( list.capacity() + 1 ); ut.failure( "List overflow" ); - } catch (const std::exception& e) { + } catch ( const std::exception &e ) { ut.passes( "List overflow" ); - } catch(...) { + } catch ( ... ) { ut.failure( "List overflow (unknown exception)" ); } // Finished ut.report(); - int N_errors = static_cast( ut.NumFailGlobal() ); + auto N_errors = static_cast( ut.NumFailGlobal() ); return N_errors; } diff --git a/threadpool/test/test_thread_pool.cpp b/threadpool/test/test_thread_pool.cpp index 1fd0ae63..b7168f4b 100644 --- a/threadpool/test/test_thread_pool.cpp +++ b/threadpool/test/test_thread_pool.cpp @@ -5,15 +5,15 @@ #include "threadpool/thread_pool.h" #include "common/UnitTest.h" #include "common/Utilities.h" -#include #include +#include +#include +#include #include +#include #include -#include -#include #include #include -#include #define MAX( x, y ) ( ( x ) > ( y ) ? ( x ) : ( y ) ) @@ -28,8 +28,8 @@ #include "mpi.h" #endif -#define to_ns(x) std::chrono::duration_cast(x).count() -#define to_ms(x) std::chrono::duration_cast(x).count() +#define to_ns( x ) std::chrono::duration_cast( x ).count() +#define to_ms( x ) std::chrono::duration_cast( x ).count() // Wrapper functions for mpi @@ -82,18 +82,17 @@ void waste_cpu( int N ) // Sleep for the given time // Note: since we may encounter interrupts, we may not sleep for the desired time // so we need to perform the sleep in a loop -void sleep_ms( int64_t N ) { +void sleep_ms( int64_t N ) +{ auto t1 = std::chrono::high_resolution_clock::now(); auto t2 = std::chrono::high_resolution_clock::now(); - while ( to_ms(t2-t1) < N ) { - int N2 = N - to_ms(t2-t1); - std::this_thread::sleep_for( std::chrono::milliseconds(N2) ); + while ( to_ms( t2 - t1 ) < N ) { + int N2 = N - to_ms( t2 - t1 ); + std::this_thread::sleep_for( std::chrono::milliseconds( N2 ) ); t2 = std::chrono::high_resolution_clock::now(); } } -void sleep_s( int N ) { - sleep_ms(1000*N); -} +void sleep_s( int N ) { sleep_ms( 1000 * N ); } // Function to sleep for N seconds then increment a global count @@ -133,9 +132,9 @@ void print_processor( ThreadPool *tpool ) int processor = ThreadPool::getCurrentProcessor(); char tmp[100]; sprintf( tmp, "%i: Thread,proc = %i,%i\n", rank, thread, processor ); - sleep_ms( 10*rank ); + sleep_ms( 10 * rank ); print_processor_mutex.lock(); - std::cout << tmp; + pout << tmp; print_processor_mutex.unlock(); sleep_ms( 100 ); } @@ -161,7 +160,9 @@ int test_member_thread( ThreadPool *tpool ) } -// Functions to test the templates +/****************************************************************** + * Test the TPOOL_ADD_WORK macro with variable number of arguments * + ******************************************************************/ static int myfun0() { return 0; } static int myfun1( int ) { return 1; } static int myfun2( int, float ) { return 2; } @@ -170,60 +171,6 @@ static int myfun4( int, float, double, char ) { return 4; } static int myfun5( int, float, double, char, std::string ) { return 5; } static int myfun6( int, float, double, char, std::string, int ) { return 6; } static int myfun7( int, float, double, char, std::string, int, int ) { return 7; } - - -// Function to test instantiation of functions with different number of arguments -// clang-format off -static void vfunarg00() {} -static void vfunarg01( int ) {} -static void vfunarg02( int, char ) {} -static void vfunarg03( int, char, double ) {} -static void vfunarg04( int, char, double, int ) {} -static void vfunarg05( int, char, double, int, char ) {} -static void vfunarg06( int, char, double, int, char, double ) {} -static void vfunarg07( int, char, double, int, char, double, int ) {} -static void vfunarg08( int, char, double, int, char, double, int, char ) {} -static void vfunarg09( int, char, double, int, char, double, int, char, double ) {} -static void vfunarg10( int, char, double, int, char, double, int, char, double, int ) {} -static void vfunarg11( int, char, double, int, char, double, int, char, double, int, char ) {} -static void vfunarg12( int, char, double, int, char, double, int, char, double, int, char, double ) {} -static void vfunarg13( int, char, double, int, char, double, int, char, double, int, char, double, int ) {} -static void vfunarg14( int, char, double, int, char, double, int, char, double, int, char, double, int, char ) {} -static void vfunarg15( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double ) {} -static void vfunarg16( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int ) {} -static void vfunarg17( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char ) {} -static void vfunarg18( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double ) {} -static void vfunarg19( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int ) {} -static void vfunarg20( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char ) {} -static void vfunarg21( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double ) {} -static void vfunarg22( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int ) {} -static void vfunarg23( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char ) {} -static void vfunarg24( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double ) {} -static int funarg00() { return 0; } -static int funarg01( int ) { return 1; } -static int funarg02( int, char ) { return 2; } -static int funarg03( int, char, double ) { return 3; } -static int funarg04( int, char, double, int ) { return 4; } -static int funarg05( int, char, double, int, char ) { return 5; } -static int funarg06( int, char, double, int, char, double ) { return 6; } -static int funarg07( int, char, double, int, char, double, int ) { return 7; } -static int funarg08( int, char, double, int, char, double, int, char ) { return 8; } -static int funarg09( int, char, double, int, char, double, int, char, double ) { return 9; } -static int funarg10( int, char, double, int, char, double, int, char, double, int ) { return 10; } -static int funarg11( int, char, double, int, char, double, int, char, double, int, char ) { return 11; } -static int funarg12( int, char, double, int, char, double, int, char, double, int, char, double ) { return 12; } -static int funarg13( int, char, double, int, char, double, int, char, double, int, char, double, int ) { return 13; } -static int funarg14( int, char, double, int, char, double, int, char, double, int, char, double, int, char ) { return 14; } -static int funarg15( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double ) { return 15; } -static int funarg16( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int ) { return 16; } -static int funarg17( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char ) { return 17; } -static int funarg18( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double ) { return 18; } -static int funarg19( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int ) { return 19; } -static int funarg20( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char ) { return 20; } -static int funarg21( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double ) { return 21; } -static int funarg22( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int ) { return 22; } -static int funarg23( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char ) { return 23; } -static int funarg24( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double ) { return 24; } static int test_function_arguements( ThreadPool *tpool ) { int N_errors = 0; @@ -231,88 +178,56 @@ static int test_function_arguements( ThreadPool *tpool ) ThreadPool::thread_id_t id0 = TPOOL_ADD_WORK( tpool, myfun0, ( nullptr ) ); ThreadPool::thread_id_t id1 = TPOOL_ADD_WORK( tpool, myfun1, ( (int) 1 ) ); ThreadPool::thread_id_t id2 = TPOOL_ADD_WORK( tpool, myfun2, ( (int) 1, (float) 2 ) ); - ThreadPool::thread_id_t id3 = TPOOL_ADD_WORK( tpool, myfun3, ( (int) 1, (float) 2, (double) 3 ) ); - ThreadPool::thread_id_t id4 = TPOOL_ADD_WORK( tpool, myfun4, ( (int) 1, (float) 2, (double) 3, (char) 4 ) ); - ThreadPool::thread_id_t id5 = TPOOL_ADD_WORK( tpool, myfun5, ( (int) 1, (float) 2, (double) 3, (char) 4, std::string( "test" ) ) ); - ThreadPool::thread_id_t id52= TPOOL_ADD_WORK( tpool, myfun5, ( (int) 1, (float) 2, (double) 3, (char) 4, std::string( "test" ) ), -1 ); - ThreadPool::thread_id_t id6 = TPOOL_ADD_WORK( tpool, myfun6, ( (int) 1, (float) 2, (double) 3, (char) 4, std::string( "test" ), (int) 1 ) ); - ThreadPool::thread_id_t id7 = TPOOL_ADD_WORK( tpool, myfun7, ( (int) 1, (float) 2, (double) 3, (char) 4, std::string( "test" ), (int) 1, (int) 1 ) ); + ThreadPool::thread_id_t id3 = + TPOOL_ADD_WORK( tpool, myfun3, ( (int) 1, (float) 2, (double) 3 ) ); + ThreadPool::thread_id_t id4 = + TPOOL_ADD_WORK( tpool, myfun4, ( (int) 1, (float) 2, (double) 3, (char) 4 ) ); + ThreadPool::thread_id_t id5 = TPOOL_ADD_WORK( + tpool, myfun5, ( (int) 1, (float) 2, (double) 3, (char) 4, std::string( "test" ) ) ); + ThreadPool::thread_id_t id52 = TPOOL_ADD_WORK( + tpool, myfun5, ( (int) 1, (float) 2, (double) 3, (char) 4, std::string( "test" ) ), -1 ); + ThreadPool::thread_id_t id6 = TPOOL_ADD_WORK( tpool, myfun6, + ( (int) 1, (float) 2, (double) 3, (char) 4, std::string( "test" ), (int) 1 ) ); + ThreadPool::thread_id_t id7 = TPOOL_ADD_WORK( tpool, myfun7, + ( (int) 1, (float) 2, (double) 3, (char) 4, std::string( "test" ), (int) 1, (int) 1 ) ); tpool->wait_pool_finished(); - if ( !tpool->isFinished( id0 ) ) { N_errors++; } - if ( tpool->getFunctionRet( id0 ) != 0 ) { N_errors++; } - if ( tpool->getFunctionRet( id1 ) != 1 ) { N_errors++; } - if ( tpool->getFunctionRet( id2 ) != 2 ) { N_errors++; } - if ( tpool->getFunctionRet( id3 ) != 3 ) { N_errors++; } - if ( tpool->getFunctionRet( id4 ) != 4 ) { N_errors++; } - if ( tpool->getFunctionRet( id5 ) != 5 ) { N_errors++; } - if ( tpool->getFunctionRet( id52 ) != 5 ){ N_errors++; } - if ( tpool->getFunctionRet( id6 ) != 6 ) { N_errors++; } - if ( tpool->getFunctionRet( id7 ) != 7 ) { N_errors++; } - // Test all the different numbers of arguments allowed - TPOOL_ADD_WORK( tpool, vfunarg00, ( nullptr ) ); - TPOOL_ADD_WORK( tpool, vfunarg01, ( 1 ) ); - TPOOL_ADD_WORK( tpool, vfunarg02, ( 1, 'a' ) ); - TPOOL_ADD_WORK( tpool, vfunarg03, ( 1, 'a', 3.0 ) ); - TPOOL_ADD_WORK( tpool, vfunarg04, ( 1, 'a', 3.0, 4 ) ); - TPOOL_ADD_WORK( tpool, vfunarg05, ( 1, 'a', 3.0, 4, 'e' ) ); - TPOOL_ADD_WORK( tpool, vfunarg06, ( 1, 'a', 3.0, 4, 'e', 6.0 ) ); - TPOOL_ADD_WORK( tpool, vfunarg07, ( 1, 'a', 3.0, 4, 'e', 6.0, 7 ) ); - TPOOL_ADD_WORK( tpool, vfunarg08, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h' ) ); - TPOOL_ADD_WORK( tpool, vfunarg09, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0 ) ); - TPOOL_ADD_WORK( tpool, vfunarg10, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10 ) ); - TPOOL_ADD_WORK( tpool, vfunarg11, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k' ) ); - TPOOL_ADD_WORK( tpool, vfunarg12, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0 ) ); - TPOOL_ADD_WORK( tpool, vfunarg13, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13 ) ); - TPOOL_ADD_WORK( tpool, vfunarg14, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n' ) ); - TPOOL_ADD_WORK( tpool, vfunarg15, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0 ) ); - TPOOL_ADD_WORK( tpool, vfunarg16, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16 ) ); - TPOOL_ADD_WORK( tpool, vfunarg17, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q' ) ); - TPOOL_ADD_WORK( tpool, vfunarg18, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q', 18.0 ) ); - TPOOL_ADD_WORK( tpool, vfunarg19, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q', 18.0, 19 ) ); - TPOOL_ADD_WORK( tpool, vfunarg20, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q', 18.0, 19, 't' ) ); - TPOOL_ADD_WORK( tpool, vfunarg21, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q', 18.0, 19, 't', 21.0 ) ); - TPOOL_ADD_WORK( tpool, vfunarg22, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q', 18.0, 19, 't', 21.0, 22 ) ); - TPOOL_ADD_WORK( tpool, vfunarg23, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q', 18.0, 19, 't', 21.0, 22, 'w' ) ); - TPOOL_ADD_WORK( tpool, vfunarg24, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q', 18.0, 19, 't', 21.0, 22, 'w', 24.0 ) ); - std::vector ids( 25 ); - ids[0] = TPOOL_ADD_WORK( tpool, funarg00, ( nullptr ) ); - ids[1] = TPOOL_ADD_WORK( tpool, funarg01, ( 1 ) ); - ids[2] = TPOOL_ADD_WORK( tpool, funarg02, ( 1, 'a' ) ); - ids[3] = TPOOL_ADD_WORK( tpool, funarg03, ( 1, 'a', 3.0 ) ); - ids[4] = TPOOL_ADD_WORK( tpool, funarg04, ( 1, 'a', 3.0, 4 ) ); - ids[5] = TPOOL_ADD_WORK( tpool, funarg05, ( 1, 'a', 3.0, 4, 'e' ) ); - ids[6] = TPOOL_ADD_WORK( tpool, funarg06, ( 1, 'a', 3.0, 4, 'e', 6.0 ) ); - ids[7] = TPOOL_ADD_WORK( tpool, funarg07, ( 1, 'a', 3.0, 4, 'e', 6.0, 7 ) ); - ids[8] = TPOOL_ADD_WORK( tpool, funarg08, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h' ) ); - ids[9] = TPOOL_ADD_WORK( tpool, funarg09, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0 ) ); - ids[10] = TPOOL_ADD_WORK( tpool, funarg10, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10 ) ); - ids[11] = TPOOL_ADD_WORK( tpool, funarg11, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k' ) ); - ids[12] = TPOOL_ADD_WORK( tpool, funarg12, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0 ) ); - ids[13] = TPOOL_ADD_WORK( tpool, funarg13, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13 ) ); - ids[14] = TPOOL_ADD_WORK( tpool, funarg14, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'h' ) ); - ids[15] = TPOOL_ADD_WORK( tpool, funarg15, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'h', 15.0 ) ); - ids[16] = TPOOL_ADD_WORK( tpool, funarg16, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16 ) ); - ids[17] = TPOOL_ADD_WORK( tpool, funarg17, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q' ) ); - ids[18] = TPOOL_ADD_WORK( tpool, funarg18, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q', 18.0 ) ); - ids[19] = TPOOL_ADD_WORK( tpool, funarg19, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q', 18.0, 19 ) ); - ids[20] = TPOOL_ADD_WORK( tpool, funarg20, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q', 18.0, 19, 't' ) ); - ids[21] = TPOOL_ADD_WORK( tpool, funarg21, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q', 18.0, 19, 't', 21.0 ) ); - ids[22] = TPOOL_ADD_WORK( tpool, funarg22, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q', 18.0, 19, 't', 21.0, 22 ) ); - ids[23] = TPOOL_ADD_WORK( tpool, funarg23, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q', 18.0, 19, 't', 21.0, 22, 'w' ) ); - ids[24] = TPOOL_ADD_WORK( tpool, funarg24, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q', 18.0, 19, 't', 21.0, 22, 'w', 24.0 ) ); - tpool->wait_all( ids ); - for ( size_t i = 0; i < ids.size(); i++ ) { - if ( tpool->getFunctionRet( ids[i] ) != static_cast( i ) ) - N_errors++; + if ( !tpool->isFinished( id0 ) ) { + N_errors++; + } + if ( tpool->getFunctionRet( id0 ) != 0 ) { + N_errors++; + } + if ( tpool->getFunctionRet( id1 ) != 1 ) { + N_errors++; + } + if ( tpool->getFunctionRet( id2 ) != 2 ) { + N_errors++; + } + if ( tpool->getFunctionRet( id3 ) != 3 ) { + N_errors++; + } + if ( tpool->getFunctionRet( id4 ) != 4 ) { + N_errors++; + } + if ( tpool->getFunctionRet( id5 ) != 5 ) { + N_errors++; + } + if ( tpool->getFunctionRet( id52 ) != 5 ) { + N_errors++; + } + if ( tpool->getFunctionRet( id6 ) != 6 ) { + N_errors++; + } + if ( tpool->getFunctionRet( id7 ) != 7 ) { + N_errors++; } return N_errors; } -// clang-format on /****************************************************************** -* Examples to derive a user work item * -******************************************************************/ + * Examples to derive a user work item * + ******************************************************************/ class UserWorkItemVoid : public ThreadPool::WorkItem { public: @@ -323,15 +238,15 @@ public: NULL_USE( dummy ); } // User defined run (can do anything) - virtual void run() override + void run() override { // Perform the tasks printf( "Hello work from UserWorkItem (void)" ); } // Will the routine return a result - virtual bool has_result() const override { return false; } + bool has_result() const override { return false; } // User defined destructor - virtual ~UserWorkItemVoid() {} + ~UserWorkItemVoid() override = default; }; class UserWorkItemInt : public ThreadPool::WorkItemRet { @@ -343,38 +258,31 @@ public: NULL_USE( dummy ); } // User defined run (can do anything) - virtual void run() override + void run() override { // Perform the tasks printf( "Hello work from UserWorkItem (int)" ); // Store the results (it's type will match the template) ThreadPool::WorkItemRet::d_result = 1; } - // Will the routine return a result - virtual bool has_result() const override { return false; } // User defined destructor - virtual ~UserWorkItemInt() {} + ~UserWorkItemInt() override = default; }; /****************************************************************** -* test the time to run N tasks in parallel * -******************************************************************/ -inline double run_parallel( ThreadPool *tpool, int N_tasks, int N_work ) + * test the time to run N tasks in parallel * + ******************************************************************/ +template +inline double launchAndTime( ThreadPool &tpool, int N, Ret ( *routine )( Args... ), Args... args ) { - // Make sure the thread pool is empty - tpool->wait_pool_finished(); - // Add the work - std::vector ids; - ids.reserve( N_tasks ); + tpool.wait_pool_finished(); auto start = std::chrono::high_resolution_clock::now(); - for ( int i = 0; i < N_tasks; i++ ) - ids.push_back( TPOOL_ADD_WORK( tpool, waste_cpu, ( N_work ) ) ); - // Wait for the thread pool to finish - tpool->wait_pool_finished(); - // Compute the time spent running the tasks + for ( int i = 0; i < N; i++ ) + ThreadPool_add_work( &tpool, 0, routine, args... ); + tpool.wait_pool_finished(); auto stop = std::chrono::high_resolution_clock::now(); - return std::chrono::duration(stop-start).count(); + return std::chrono::duration( stop - start ).count(); } @@ -384,8 +292,8 @@ ThreadPool::thread_id_t f2( ThreadPool::thread_id_t a ) { return a; } /****************************************************************** -* Test the basic functionallity of the atomics * -******************************************************************/ + * Test the basic functionallity of the atomics * + ******************************************************************/ int test_atomics() { using namespace AtomicOperations; @@ -411,33 +319,35 @@ int test_atomics() /****************************************************************** -* Test FIFO behavior * -******************************************************************/ -void test_FIFO( UnitTest& ut, ThreadPool& tpool ) + * Test FIFO behavior * + ******************************************************************/ +void test_FIFO( UnitTest &ut, ThreadPool &tpool ) { - int rank = getRank(); - int size = getSize(); - for (int r=0; r ids; - for (size_t i=0; i<4000; i++) - ids.push_back( TPOOL_ADD_WORK( &tpool, sleep_inc2, ( 0.001 ) ) ); + ids.reserve( N ); + for ( size_t i = 0; i < N; i++ ) + ids.emplace_back( TPOOL_ADD_WORK( &tpool, sleep_inc2, ( 0.001 ) ) ); bool pass = true; while ( tpool.N_queued() > 0 ) { - int i1=-1, i2=ids.size(); - for (size_t i=0; i= 0; i-- ) { bool started = ids[i].started(); if ( started ) - i1 = std::max(i1,i); // Last index to processing item + i1 = std::max( i1, i ); // Last index to processing item else - i2 = std::min(i2,i); // First index to queued item + i2 = std::min( i2, i ); // First index to queued item } - int diff = i1==-1 ? 0:(i2-i1-1); - if ( abs(diff)>4 ) { - printf("%i %i %i\n",i1,i2,diff); - pass = pass && abs(i2-i1-1)<=2; + int diff = i1 == -1 ? 0 : ( i2 - i1 - 1 ); + if ( abs( diff ) > 4 ) { + printf( "%i %i %i\n", i1, i2, diff ); + pass = pass && abs( i2 - i1 - 1 ) <= 2; } } ids.clear(); @@ -451,8 +361,8 @@ void test_FIFO( UnitTest& ut, ThreadPool& tpool ) /****************************************************************** -* The main program * -******************************************************************/ + * The main program * + ******************************************************************/ #ifdef USE_WINDOWS int __cdecl main( int argc, char **argv ) { @@ -510,11 +420,7 @@ int main( int argc, char *argv[] ) // Get the number of processors availible barrier(); - int N_procs = 0; - try { - N_procs = ThreadPool::getNumberOfProcessors(); - } catch ( ... ) { - } + int N_procs = ThreadPool::getNumberOfProcessors(); if ( N_procs > 0 ) ut.passes( "getNumberOfProcessors" ); else @@ -524,15 +430,11 @@ int main( int argc, char *argv[] ) // Get the processor affinities for the process barrier(); - std::vector cpus; - try { - cpus = ThreadPool::getProcessAffinity(); - printp( "%i cpus for current process: ", (int) cpus.size() ); - for ( size_t i = 0; i < cpus.size(); i++ ) - printp( "%i ", cpus[i] ); - printp( "\n" ); - } catch ( ... ) { - } + std::vector cpus = ThreadPool::getProcessAffinity(); + printp( "%i cpus for current process: ", (int) cpus.size() ); + for ( int cpu : cpus ) + printp( "%i ", cpu ); + printp( "\n" ); if ( !cpus.empty() ) { ut.passes( "getProcessAffinity" ); } else { @@ -559,8 +461,8 @@ int main( int argc, char *argv[] ) cpus = ThreadPool::getProcessAffinity(); std::vector cpus = ThreadPool::getProcessAffinity(); printp( "%i cpus for current process (updated): ", (int) cpus.size() ); - for ( size_t i = 0; i < cpus.size(); i++ ) - printp( "%i ", cpus[i] ); + for ( int cpu : cpus ) + printp( "%i ", cpu ); printp( "\n" ); pass = cpus.size() > 1; } else { @@ -630,8 +532,8 @@ int main( int argc, char *argv[] ) std::vector procs_thread = tpool.getThreadAffinity( i ); if ( procs_thread != procs ) { printp( "%i: Initial thread affinity: ", rank ); - for ( size_t i = 0; i < procs_thread.size(); i++ ) - printp( "%i ", procs_thread[i] ); + for ( int i : procs_thread ) + printp( "%i ", i ); printp( "\n" ); pass = false; } @@ -646,15 +548,15 @@ int main( int argc, char *argv[] ) int N_procs_thread = std::max( (int) cpus.size() / N_threads, 1 ); for ( int i = 0; i < N_threads; i++ ) { std::vector procs_thread( N_procs_thread, -1 ); - for ( int j = 0; j < N_procs_thread; j++ ) + for ( int j = 0; j < N_procs_thread; j++ ) procs_thread[j] = procs[( i * N_procs_thread + j ) % procs.size()]; tpool.setThreadAffinity( i, procs_thread ); sleep_ms( 10 ); // Give time for OS to update thread affinities std::vector procs_thread2 = tpool.getThreadAffinity( i ); if ( procs_thread2 != procs_thread ) { printp( "%i: Final thread affinity: ", rank ); - for ( size_t i = 0; i < procs_thread.size(); i++ ) - printp( "%i ", procs_thread[i] ); + for ( int i : procs_thread ) + printp( "%i ", i ); printp( "\n" ); pass = false; } @@ -674,8 +576,8 @@ int main( int argc, char *argv[] ) for ( int i = 0; i < N_threads; i++ ) { std::vector procs_thread = tpool.getThreadAffinity( i ); printp( "Thread affinity: " ); - for ( size_t i = 0; i < procs_thread.size(); i++ ) - printp( "%i ", procs_thread[i] ); + for ( int i : procs_thread ) + printp( "%i ", i ); printp( "\n" ); } @@ -683,9 +585,7 @@ int main( int argc, char *argv[] ) barrier(); ThreadPool::set_OS_warnings( 1 ); print_processor( &tpool ); - for ( int i = 0; i < N_threads; i++ ) - TPOOL_ADD_WORK( &tpool, print_processor, ( &tpool ) ); - tpool.wait_pool_finished(); + launchAndTime( tpool, N_threads, print_processor, &tpool ); // Run some basic tests barrier(); @@ -694,8 +594,8 @@ int main( int argc, char *argv[] ) for ( int i = 0; i < N_work; i++ ) waste_cpu( data1[i] ); } - auto stop = std::chrono::high_resolution_clock::now(); - double time = std::chrono::duration(stop-start).count(); + auto stop = std::chrono::high_resolution_clock::now(); + double time = std::chrono::duration( stop - start ).count(); printp( "Time for serial cycle = %0.0f us\n", 1e6 * time / N_it ); printp( "Time for serial item = %0.0f ns\n", 1e9 * time / ( N_it * N_work ) ); id = TPOOL_ADD_WORK( &tpool, waste_cpu, ( data1[0] ) ); @@ -728,20 +628,14 @@ int main( int argc, char *argv[] ) tpool.wait_pool_finished(); start = std::chrono::high_resolution_clock::now(); sleep_inc( 1 ); - stop = std::chrono::high_resolution_clock::now(); - double sleep_serial = std::chrono::duration(stop-start).count(); - ids2.clear(); - start = std::chrono::high_resolution_clock::now(); - for ( int i = 0; i < N_threads; i++ ) - ids2.push_back( TPOOL_ADD_WORK( &tpool, sleep_inc, ( 1 ) ) ); - tpool.wait_all( N_procs_used, &ids2[0] ); - stop = std::chrono::high_resolution_clock::now(); - ids2.clear(); - double sleep_parallel = std::chrono::duration(stop-start).count(); + stop = std::chrono::high_resolution_clock::now(); + double sleep_serial = std::chrono::duration( stop - start ).count(); + double sleep_parallel = launchAndTime( tpool, N_threads, sleep_inc, 1 ); double sleep_speedup = N_procs_used * sleep_serial / sleep_parallel; printf( "%i: Speedup on %i sleeping threads: %0.3f\n", rank, N_procs_used, sleep_speedup ); printf( "%i: ts = %0.3f, tp = %0.3f\n", rank, sleep_serial, sleep_parallel ); - if ( fabs( sleep_serial - 1.0 ) < 0.05 && fabs( sleep_parallel - 1.0 ) < 0.25 && sleep_speedup>3 ) + if ( fabs( sleep_serial - 1.0 ) < 0.05 && fabs( sleep_parallel - 1.0 ) < 0.25 && + sleep_speedup > 3 ) ut.passes( "Passed thread sleep" ); else ut.failure( "Failed thread sleep" ); @@ -770,11 +664,11 @@ int main( int argc, char *argv[] ) // Run in serial start = std::chrono::high_resolution_clock::now(); waste_cpu( N ); - stop = std::chrono::high_resolution_clock::now(); - double time_serial = std::chrono::duration(stop-start).count(); + stop = std::chrono::high_resolution_clock::now(); + double time_serial = std::chrono::duration( stop - start ).count(); // Run in parallel - double time_parallel2 = run_parallel( &tpool, N_procs_used, N / 1000 ); - double time_parallel = run_parallel( &tpool, N_procs_used, N ); + double time_parallel = launchAndTime( tpool, N_procs_used, waste_cpu, N ); + double time_parallel2 = launchAndTime( tpool, N_procs_used, waste_cpu, N / 1000 ); double speedup = N_procs_used * time_serial / time_parallel; printf( "%i: Speedup on %i procs: %0.3f\n", rank, N_procs_used, speedup ); printf( "%i: ts = %0.3f, tp = %0.3f, tp2 = %0.3f\n", rank, time_serial, time_parallel, @@ -823,8 +717,8 @@ int main( int argc, char *argv[] ) ids.reserve( 5 ); global_sleep_count = 0; // Reset the count before this test ThreadPool::thread_id_t id0; - auto id1 = TPOOL_ADD_WORK( &tpool, sleep_inc, ( 1 ) ); - auto id2 = TPOOL_ADD_WORK( &tpool, sleep_inc, ( 2 ) ); + auto id1 = TPOOL_ADD_WORK( &tpool, sleep_inc, ( 1 ) ); + auto id2 = TPOOL_ADD_WORK( &tpool, sleep_inc, ( 2 ) ); auto *wait1 = new WorkItemFull( check_inc, 1 ); auto *wait2 = new WorkItemFull( check_inc, 2 ); wait1->add_dependency( id0 ); @@ -842,15 +736,15 @@ int main( int argc, char *argv[] ) tpool.wait_pool_finished(); // Test waiting on more dependencies than in the thread pool (changing priorities) ids.clear(); - for (size_t i=0; i<20; i++) + for ( size_t i = 0; i < 20; i++ ) ids.push_back( TPOOL_ADD_WORK( &tpool, sleep_inc2, ( 0.1 ) ) ); - auto *wait3 = new WorkItemFull( sleep_inc2, 0 ); + auto *wait3 = new WorkItemFull( sleep_inc2, 0 ); wait3->add_dependencies( ids ); id = tpool.add_work( wait3, 50 ); tpool.wait( id ); bool pass = true; - for (size_t i=0; i(stop-start).count(); + time = std::chrono::duration( stop - start ).count(); PROFILE_STOP( timer_name ); printp( " time = %0.0f ms\n", 1e3 * time ); printp( " time / cycle = %0.0f us\n", 1e6 * time / N_it ); printp( " average time / item = %0.0f ns\n", 1e9 * time / ( N_it * N_work ) ); - printp( " create = %i ns\n", static_cast( time_create / ( N_it * N_work ) ) ); - printp( " run = %i ns\n", static_cast( time_run / ( N_it * N_work ) ) ); - printp( " delete = %i us\n", static_cast( time_delete / ( N_it * N_work ) ) ); + printp( " create = %i ns\n", time_create / ( N_it * N_work ) ); + printp( " run = %i ns\n", time_run / ( N_it * N_work ) ); + printp( " delete = %i us\n", time_delete / ( N_it * N_work ) ); } // Test the timing adding a single item @@ -921,17 +815,17 @@ int main( int argc, char *argv[] ) if ( it == 0 ) { printp( "Testing timmings (adding a single item to empty tpool):\n" ); timer_name = "Add single item to empty pool"; - tpool_ptr = &tpool0; + tpool_ptr = &tpool0; } else if ( it == 1 ) { printp( "Testing timmings (adding a single item):\n" ); timer_name = "Add single item to tpool"; - tpool_ptr = &tpool; + tpool_ptr = &tpool; } PROFILE_START( timer_name ); std::vector ids( N_work ); int64_t time_add = 0; int64_t time_wait = 0; - start = std::chrono::high_resolution_clock::now(); + start = std::chrono::high_resolution_clock::now(); for ( int n = 0; n < N_it; n++ ) { auto t1 = std::chrono::high_resolution_clock::now(); for ( int i = 0; i < N_work; i++ ) @@ -939,19 +833,19 @@ int main( int argc, char *argv[] ) auto t2 = std::chrono::high_resolution_clock::now(); tpool_ptr->wait_all( N_work, &ids[0] ); auto t3 = std::chrono::high_resolution_clock::now(); - time_add += to_ns(t2-t1); - time_wait += to_ns(t3-t2); + time_add += to_ns( t2 - t1 ); + time_wait += to_ns( t3 - t2 ); if ( ( n + 1 ) % 100 == 0 ) printp( "Cycle %i of %i finished\n", n + 1, N_it ); } stop = std::chrono::high_resolution_clock::now(); - time = std::chrono::duration(stop-start).count(); + time = std::chrono::duration( stop - start ).count(); PROFILE_STOP( timer_name ); printp( " time = %0.0f ms\n", 1e3 * time ); printp( " time / cycle = %0.0f us\n", 1e6 * time / N_it ); printp( " average time / item = %0.0f ns\n", 1e9 * time / ( N_it * N_work ) ); - printp( " create and add = %i ns\n", static_cast( time_add / ( N_it * N_work ) ) ); - printp( " wait = %i us\n", static_cast( time_wait / ( N_it * N_work ) ) ); + printp( " create and add = %i ns\n", time_add / ( N_it * N_work ) ); + printp( " wait = %i us\n", time_wait / ( N_it * N_work ) ); } // Test the timing pre-creating the work items and adding multiple at a time @@ -962,11 +856,11 @@ int main( int argc, char *argv[] ) if ( it == 0 ) { printp( "Testing timmings (adding a block of items to empty tpool):\n" ); timer_name = "Add multiple items to empty pool"; - tpool_ptr = &tpool0; + tpool_ptr = &tpool0; } else if ( it == 1 ) { printp( "Testing timmings (adding a block of items):\n" ); timer_name = "Add multiple items to tpool"; - tpool_ptr = &tpool; + tpool_ptr = &tpool; } PROFILE_START( timer_name ); int64_t time_create_work = 0; @@ -978,26 +872,26 @@ int main( int argc, char *argv[] ) auto t1 = std::chrono::high_resolution_clock::now(); for ( int i = 0; i < N_work; i++ ) work[i] = ThreadPool::createWork( waste_cpu, data1[i] ); - auto t2 = std::chrono::high_resolution_clock::now(); + auto t2 = std::chrono::high_resolution_clock::now(); auto ids = tpool_ptr->add_work( work, priority ); - auto t3 = std::chrono::high_resolution_clock::now(); + auto t3 = std::chrono::high_resolution_clock::now(); tpool_ptr->wait_all( ids ); auto t4 = std::chrono::high_resolution_clock::now(); - time_create_work += to_ns(t2-t1); - time_add_work += to_ns(t3-t2); - time_wait_work += to_ns(t4-t3); + time_create_work += to_ns( t2 - t1 ); + time_add_work += to_ns( t3 - t2 ); + time_wait_work += to_ns( t4 - t3 ); if ( ( n + 1 ) % 100 == 0 ) printp( "Cycle %i of %i finished\n", n + 1, N_it ); } stop = std::chrono::high_resolution_clock::now(); - time = std::chrono::duration(stop-start).count(); + time = std::chrono::duration( stop - start ).count(); PROFILE_STOP( timer_name ); printp( " time = %0.0f ms\n", 1e3 * time ); printp( " time / cycle = %0.0f us\n", 1e6 * time / N_it ); printp( " average time / item = %0.0f ns\n", 1e9 * time / ( N_it * N_work ) ); - printp( " create = %i ns\n", static_cast( time_create_work / ( N_it * N_work ) ) ); - printp( " add = %i ns\n", static_cast( time_add_work / ( N_it * N_work ) ) ); - printp( " wait = %i ns\n", static_cast( time_wait_work / ( N_it * N_work ) ) ); + printp( " create = %i ns\n", time_create_work / ( N_it * N_work ) ); + printp( " add = %i ns\n", time_add_work / ( N_it * N_work ) ); + printp( " wait = %i ns\n", time_wait_work / ( N_it * N_work ) ); } // Run a dependency test that tests a simple case that should keep the thread pool busy @@ -1035,8 +929,8 @@ int main( int argc, char *argv[] ) barrier(); pass = true; try { - ThreadPool *tpool = new ThreadPool( MAX_NUM_THREADS - 1 ); - if ( tpool->getNumThreads() != MAX_NUM_THREADS - 1 ) + ThreadPool *tpool = new ThreadPool( ThreadPool::MAX_NUM_THREADS - 1 ); + if ( tpool->getNumThreads() != ThreadPool::MAX_NUM_THREADS - 1 ) pass = false; if ( !ThreadPool::is_valid( tpool ) ) pass = false; @@ -1056,14 +950,14 @@ int main( int argc, char *argv[] ) // Print the test results barrier(); ut.report(); - int N_errors = static_cast( ut.NumFailGlobal() ); + auto N_errors = static_cast( ut.NumFailGlobal() ); // Shudown MPI pout << "Shutting down\n"; barrier(); #ifdef USE_TIMER if ( rank == 0 ) - MemoryApp::print( std::cout ); + MemoryApp::print( pout ); #endif #ifdef USE_MPI MPI_Finalize(); diff --git a/threadpool/thread_pool.cpp b/threadpool/thread_pool.cpp index 30281727..9b0ff4fd 100644 --- a/threadpool/thread_pool.cpp +++ b/threadpool/thread_pool.cpp @@ -5,14 +5,14 @@ #include "ProfilerApp.h" #include #include +#include #include +#include +#include #include #include -#include -#include -#include #include -#include +#include #define perr std::cerr @@ -22,6 +22,15 @@ // OS specific includes / definitions // clang-format off +#if defined( WIN32 ) || defined( _WIN32 ) || defined( WIN64 ) || defined( _WIN64 ) + #define USE_WINDOWS +#elif defined( __APPLE__ ) + #define USE_MAC +#elif defined( __linux ) || defined( __unix ) || defined( __posix ) + #define USE_LINUX +#else + #error Unknown OS +#endif #if defined( USE_WINDOWS ) #include #include @@ -54,41 +63,45 @@ // Set some macros #if PROFILE_THREADPOOL_PERFORMANCE - #define PROFILE_THREADPOOL_START( X ) PROFILE_START( X, 3 ) - #define PROFILE_THREADPOOL_START2( X ) PROFILE_START2( X, 3 ) - #define PROFILE_THREADPOOL_STOP( X ) PROFILE_STOP( X, 3 ) - #define PROFILE_THREADPOOL_STOP2( X ) PROFILE_STOP2( X, 3 ) +#define PROFILE_THREADPOOL_START( X ) PROFILE_START( X, 3 ) +#define PROFILE_THREADPOOL_START2( X ) PROFILE_START2( X, 3 ) +#define PROFILE_THREADPOOL_STOP( X ) PROFILE_STOP( X, 3 ) +#define PROFILE_THREADPOOL_STOP2( X ) PROFILE_STOP2( X, 3 ) #else - #define PROFILE_THREADPOOL_START( X ) \ - do { \ - } while ( 0 ) - #define PROFILE_THREADPOOL_START2( X ) \ - do { \ - } while ( 0 ) - #define PROFILE_THREADPOOL_STOP( X ) \ - do { \ - } while ( 0 ) - #define PROFILE_THREADPOOL_STOP2( X ) \ - do { \ - } while ( 0 ) +#define PROFILE_THREADPOOL_START( X ) \ + do { \ + } while ( 0 ) +#define PROFILE_THREADPOOL_START2( X ) \ + do { \ + } while ( 0 ) +#define PROFILE_THREADPOOL_STOP( X ) \ + do { \ + } while ( 0 ) +#define PROFILE_THREADPOOL_STOP2( X ) \ + do { \ + } while ( 0 ) #endif #if MONITOR_THREADPOOL_PERFORMANCE == 1 - #define accumulate( x, t1, t2 ) AtomicOperations::atomic_add( &x, \ - std::chrono::duration_cast(t2-t1).count() ); +#define accumulate( x, t1, t2 ) \ + AtomicOperations::atomic_add( \ + &x, std::chrono::duration_cast( t2 - t1 ).count() ); #endif #if MONITOR_THREADPOOL_PERFORMANCE == 1 - static AtomicOperations::int64_atomic total_add_work_time[5] = {0,0,0,0,0}; +static AtomicOperations::int64_atomic total_add_work_time[5] = { 0, 0, 0, 0, 0 }; #endif // Helper functions -template -void quicksort( int N, T* data ); -template -inline void quicksort( std::vector &x ) { quicksort((int)x.size(),x.data()); } -static inline int find_id( int, const ThreadPool::thread_id_t*, const ThreadPool::thread_id_t& ); +template +void quicksort( int N, T *data ); +template +inline void quicksort( std::vector &x ) +{ + quicksort( (int) x.size(), x.data() ); +} +static inline int find_id( int, const ThreadPool::thread_id_t *, const ThreadPool::thread_id_t & ); // Function to generate a random size_t number (excluding 0 and ~0) @@ -116,8 +129,8 @@ static size_t rand_size_t() /****************************************************************** -* Run some basic compile-time checks * -******************************************************************/ + * Run some basic compile-time checks * + ******************************************************************/ #if MAX_NUM_THREADS % 64 != 0 // We use a bit array for d_active and d_cancel #error MAX_NUM_THREADS must be a multiple of 64 @@ -130,47 +143,52 @@ static size_t rand_size_t() // We store the indicies to the queue list as short ints #error MAX_QUEUED must < 65535 #endif +// Check the c++ std +#if CXX_STD == 98 +#error Thread pool class requires c++11 or newer +#endif /****************************************************************** -* Get/Set a bit * -* Note: these functions are thread-safe * -******************************************************************/ + * Get/Set a bit * + * Note: these functions are thread-safe * + ******************************************************************/ static inline void set_bit( volatile AtomicOperations::int64_atomic *x, size_t index ) { uint64_t mask = 0x01; mask <<= index % 64; - size_t i = index / 64; + size_t i = index / 64; bool test = false; while ( !test ) { AtomicOperations::int64_atomic y = x[i]; - test = AtomicOperations::atomic_compare_and_swap( &x[i], y, (y|mask) ); + test = AtomicOperations::atomic_compare_and_swap( &x[i], y, ( y | mask ) ); } } static inline void unset_bit( volatile AtomicOperations::int64_atomic *x, size_t index ) { uint64_t mask = 0x01; mask <<= index % 64; - mask = ~mask; - size_t i = index / 64; + mask = ~mask; + size_t i = index / 64; bool test = false; while ( !test ) { AtomicOperations::int64_atomic y = x[i]; - test = AtomicOperations::atomic_compare_and_swap( &x[i], y, (y&mask) ); + test = AtomicOperations::atomic_compare_and_swap( &x[i], y, ( y & mask ) ); } } static inline bool get_bit( const volatile AtomicOperations::int64_atomic *x, size_t index ) { uint64_t mask = 0x01; mask <<= index % 64; - AtomicOperations::int64_atomic y = x[index / 64]; // This is thread-safe since we only care about a single bit + // This is thread-safe since we only care about a single bit + AtomicOperations::int64_atomic y = x[index / 64]; return ( y & mask ) != 0; } /****************************************************************** -* Simple function to check if the parity is odd (true) or even * -******************************************************************/ + * Simple function to check if the parity is odd (true) or even * + ******************************************************************/ static inline bool is_odd8( size_t x ) { // This only works for 64-bit integers x ^= ( x >> 1 ); @@ -181,7 +199,7 @@ static inline bool is_odd8( size_t x ) x ^= ( x >> 32 ); return ( x & 0x01 ) > 0; } -template +template static inline int count_bits( int_type x ) { int count = 0; @@ -194,8 +212,18 @@ static inline int count_bits( int_type x ) /****************************************************************** -* Set the bahvior of OS warnings * -******************************************************************/ + * Set the global constants * + ******************************************************************/ +constexpr int ThreadPool::MAX_NUM_THREADS; +constexpr int ThreadPool::MAX_QUEUED; +constexpr int ThreadPool::MAX_WAIT; +constexpr bool ThreadPool::PROFILE_THREADPOOL_PERFORMANCE; +constexpr bool ThreadPool::MONITOR_THREADPOOL_PERFORMANCE; + + +/****************************************************************** + * Set the behavior of OS warnings * + ******************************************************************/ static int global_OS_behavior = 0; std::mutex OS_warning_mutex; void ThreadPool::set_OS_warnings( int behavior ) @@ -213,11 +241,14 @@ static void OS_warning( const std::string &message ) } OS_warning_mutex.unlock(); } - +void ThreadPool::setErrorHandler( std::function fun ) +{ + d_errorHandler = fun; +} /****************************************************************** -* Function to return the number of processors availible * -******************************************************************/ + * Function to return the number of processors availible * + ******************************************************************/ int ThreadPool::getNumberOfProcessors() { #if defined( USE_LINUX ) || defined( USE_MAC ) @@ -233,8 +264,8 @@ int ThreadPool::getNumberOfProcessors() /****************************************************************** -* Function to return the processor number of the current thread * -******************************************************************/ + * Function to return the processor number of the current thread * + ******************************************************************/ int ThreadPool::getCurrentProcessor() { #if defined( USE_LINUX ) @@ -251,8 +282,8 @@ int ThreadPool::getCurrentProcessor() /****************************************************************** -* Function to get/set the affinity of the current process * -******************************************************************/ + * Function to get/set the affinity of the current process * + ******************************************************************/ std::vector ThreadPool::getProcessAffinity() { std::vector procs; @@ -325,8 +356,8 @@ void ThreadPool::setProcessAffinity( std::vector procs ) /****************************************************************** -* Function to get the thread affinities * -******************************************************************/ + * Function to get the thread affinities * + ******************************************************************/ #ifdef USE_WINDOWS DWORD GetThreadAffinityMask( HANDLE thread ) { @@ -387,7 +418,7 @@ std::vector ThreadPool::getThreadAffinity( int thread ) const if ( thread >= getNumThreads() ) std::logic_error( "Invalid thread number" ); std::vector procs; - auto handle = const_cast( d_thread[thread] ).native_handle(); + auto handle = const_cast( d_thread[thread] ).native_handle(); #ifdef USE_LINUX #ifdef _GNU_SOURCE cpu_set_t mask; @@ -423,8 +454,8 @@ std::vector ThreadPool::getThreadAffinity( int thread ) const /****************************************************************** -* Function to set the thread affinity * -******************************************************************/ + * Function to set the thread affinity * + ******************************************************************/ void ThreadPool::setThreadAffinity( std::vector procs ) { #ifdef USE_LINUX @@ -458,7 +489,7 @@ void ThreadPool::setThreadAffinity( int thread, std::vector procs ) const { if ( thread >= getNumThreads() ) std::logic_error( "Invalid thread number" ); - auto handle = const_cast( d_thread[thread] ).native_handle(); + auto handle = const_cast( d_thread[thread] ).native_handle(); #ifdef USE_LINUX #ifdef __USE_GNU cpu_set_t mask; @@ -490,15 +521,15 @@ void ThreadPool::setThreadAffinity( int thread, std::vector procs ) const /****************************************************************** -* Function to perform some basic checks before we start * -******************************************************************/ + * Function to perform some basic checks before we start * + ******************************************************************/ void ThreadPool::check_startup( size_t size0 ) { // Check the size of the class to make sure that we don't have any // byte alignment problems between a library implimentation and a calling pacakge size_t size1 = sizeof( ThreadPool ); - size_t size2 = ( (size_t) &d_NULL_HEAD ) - ( ( size_t ) this ) + sizeof( size_t ); - size_t size3 = ( (size_t) &d_NULL_TAIL ) - ( ( size_t ) this ) + sizeof( size_t ); + size_t size2 = ( (size_t) &d_NULL_HEAD ) - ( (size_t) this ) + sizeof( size_t ); + size_t size3 = ( (size_t) &d_NULL_TAIL ) - ( (size_t) this ) + sizeof( size_t ); if ( size0 != size1 || size1 < size2 || size1 < size3 ) throw std::logic_error( "Internal data format problem" ); // Check the size of variables @@ -517,7 +548,7 @@ void ThreadPool::check_startup( size_t size0 ) ThreadPool::thread_id_t id; if ( id.getPriority() != -128 ) pass = false; - id.reset( 3, 564, NULL ); + id.reset( 3, 564, nullptr ); if ( id.getPriority() != 3 || id.getLocalID() != 564 ) pass = false; if ( count_bits( 0x0 ) != 0 || count_bits( 0x03 ) != 2 ) @@ -530,8 +561,10 @@ void ThreadPool::check_startup( size_t size0 ) if ( is_odd8( ~( (size_t) 0 ) ) || !is_odd8( thread_id_t::maxThreadID ) ) pass = false; for ( size_t i = 0; i < 1024; i++ ) { - if ( ( count_bits( thread_id_t::maxThreadID - i ) % 2 == 1 ) != is_odd8( thread_id_t::maxThreadID - i ) ) { - printp( "%i %i %s\n", count_bits( thread_id_t::maxThreadID - i ), is_odd8( thread_id_t::maxThreadID - i ) ? 1 : 0, + if ( ( count_bits( thread_id_t::maxThreadID - i ) % 2 == 1 ) != + is_odd8( thread_id_t::maxThreadID - i ) ) { + printp( "%i %i %s\n", count_bits( thread_id_t::maxThreadID - i ), + is_odd8( thread_id_t::maxThreadID - i ) ? 1 : 0, std::bitset<64>( thread_id_t::maxThreadID - i ).to_string().c_str() ); pass = false; } @@ -550,27 +583,28 @@ void ThreadPool::check_startup( size_t size0 ) /****************************************************************** -* Function to initialize the thread pool * -******************************************************************/ + * Function to initialize the thread pool * + ******************************************************************/ void ThreadPool::initialize( const int N, const char *affinity, int N_procs, const int *procs ) { // Initialize the header/tail d_NULL_HEAD = rand_size_t(); d_NULL_TAIL = d_NULL_HEAD; // Initialize the variables to NULL values - d_id_assign = 0; - d_signal_empty = false; - d_signal_count = 0; - d_N_threads = 0; - d_num_active = 0; - d_N_added = 0; - d_N_started = 0; - d_N_finished = 0; + d_id_assign = 0; + d_signal_empty = false; + d_signal_count = 0; + d_N_threads = 0; + d_num_active = 0; + d_N_added = 0; + d_N_started = 0; + d_N_finished = 0; + d_max_wait_time = 600; memset( (void *) d_active, 0, MAX_NUM_THREADS / 8 ); memset( (void *) d_cancel, 0, MAX_NUM_THREADS / 8 ); d_wait_last = nullptr; - for ( int i = 0; i < MAX_WAIT; i++ ) - d_wait[i] = nullptr; + for ( auto &i : d_wait ) + i = nullptr; // Initialize the id d_id_assign = thread_id_t::maxThreadID; // Create the threads @@ -579,14 +613,14 @@ void ThreadPool::initialize( const int N, const char *affinity, int N_procs, con /****************************************************************** -* This is the de-constructor * -******************************************************************/ + * This is the de-constructor * + ******************************************************************/ ThreadPool::~ThreadPool() { - if ( !is_valid( this ) ) { - std::cerr << "Thread pool is not valid\n"; - std::terminate(); - } + DISABLE_WARNINGS + if ( !is_valid( this ) ) + throw std::logic_error( "Thread pool is not valid" ); + ENABLE_WARNINGS // Destroy the threads setNumThreads( 0 ); // Delete all remaining data @@ -598,16 +632,15 @@ ThreadPool::~ThreadPool() // Print the performance metrics printp( "ThreadPool Performance:\n" ); printp( "add_work: %lu us, %lu us, %lu us, %lu us, %lu us\n", - total_add_work_time[0]/1000, total_add_work_time[1]/1000, - total_add_work_time[2]/1000, total_add_work_time[3]/1000, - total_add_work_time[4]/1000 ); + total_add_work_time[0] / 1000, total_add_work_time[1] / 1000, total_add_work_time[2] / 1000, + total_add_work_time[3] / 1000, total_add_work_time[4] / 1000 ); #endif } /****************************************************************** -* Check if the pointer points to a valid thread pool object * -******************************************************************/ + * Check if the pointer points to a valid thread pool object * + ******************************************************************/ bool ThreadPool::is_valid( const ThreadPool *tpool ) { if ( tpool == nullptr ) @@ -621,8 +654,8 @@ bool ThreadPool::is_valid( const ThreadPool *tpool ) /****************************************************************** -* This function creates the threads in the thread pool * -******************************************************************/ + * This function creates the threads in the thread pool * + ******************************************************************/ void ThreadPool::setNumThreads( int num_worker_threads, const char *affinity2, int N_procs, const int *procs ) { @@ -643,8 +676,8 @@ void ThreadPool::setNumThreads( int d_N_threads_diff = num_worker_threads - d_N_threads; if ( d_N_threads_diff > 0 ) { // Check that no threads are in the process of being deleted - for ( int i = 0; i < MAX_NUM_THREADS / 64; i++ ) { - if ( d_cancel[i] != 0 ) + for ( long i : d_cancel ) { + if ( i != 0 ) throw std::logic_error( "Threads are being created and destroyed at the same time" ); } @@ -670,11 +703,11 @@ void ThreadPool::setNumThreads( j++; } // Wait for all of the threads to finish initialization - while ( 1 ) { - std::this_thread::sleep_for( std::chrono::milliseconds(25) ); + while ( true ) { + std::this_thread::sleep_for( std::chrono::milliseconds( 25 ) ); bool wait = false; - for ( int i = 0; i < MAX_NUM_THREADS / 64; i++ ) { - if ( d_cancel[i] != 0 ) + for ( long i : d_cancel ) { + if ( i != 0 ) wait = true; } if ( !wait ) @@ -684,7 +717,7 @@ void ThreadPool::setNumThreads( #if defined( USE_LINUX ) || defined( USE_MAC ) pthread_attr_destroy( &attr ); #endif - std::this_thread::sleep_for( std::chrono::milliseconds(25) ); + std::this_thread::sleep_for( std::chrono::milliseconds( 25 ) ); delete[] tmp; } else if ( d_N_threads_diff < 0 ) { // Reduce the number of threads @@ -697,7 +730,7 @@ void ThreadPool::setNumThreads( set_bit( d_cancel, d_N_threads - 1 + i ); // Wake all threads to process the shutdown d_wait_work.notify_all(); - std::this_thread::sleep_for( std::chrono::milliseconds(25) ); + std::this_thread::sleep_for( std::chrono::milliseconds( 25 ) ); // Wait for the threads to close for ( int i = 0; i > d_N_threads_diff; i-- ) { d_thread[d_N_threads - 1 + i].join(); @@ -732,13 +765,13 @@ void ThreadPool::setNumThreads( // We do not have a list of cpus to use, do nothing (OS not supported) } else if ( affinity == "none" ) { // We are using the default thread affinities (all threads get all procs of the program) - for ( int i = 0; i < d_N_threads; i++ ) + for ( int i = 0; i < d_N_threads; i++ ) t_procs[i] = cpus; } else if ( affinity == "independent" ) { // We want to use an independent set of processors for each thread if ( (int) cpus.size() == d_N_threads ) { // The number of cpus matches the number of threads - for ( int i = 0; i < d_N_threads; i++ ) + for ( int i = 0; i < d_N_threads; i++ ) t_procs[i] = std::vector( 1, cpus[i] ); } else if ( (int) cpus.size() > d_N_threads ) { // There are more cpus than threads, threads will use more the one processor @@ -752,7 +785,7 @@ void ThreadPool::setNumThreads( } } else { // There are fewer cpus than threads, threads will share a processor - int N_threads_proc = + auto N_threads_proc = static_cast( ( cpus.size() + d_N_threads - 1 ) / cpus.size() ); for ( int i = 0; i < d_N_threads; i++ ) t_procs[i].push_back( cpus[i / N_threads_proc] ); @@ -776,10 +809,10 @@ void ThreadPool::setNumThreads( /****************************************************************** -* This is the function that controls the individual thread and * -* allows it to do work. * -* Note: this function is lock free * -******************************************************************/ + * This is the function that controls the individual thread and * + * allows it to do work. * + * Note: this function is lock free * + ******************************************************************/ void ThreadPool::tpool_thread( int thread_id ) { bool shutdown = false; @@ -797,8 +830,8 @@ void ThreadPool::tpool_thread( int thread_id ) try { std::vector cpus = ThreadPool::getProcessAffinity(); printp( "%i cpus for current thread: ", (int) cpus.size() ); - for ( size_t i = 0; i < cpus.size(); i++ ) - printp( "%i ", cpus[i] ); + for ( int cpu : cpus ) + printp( "%i ", cpu ); printp( "\n" ); } catch ( ... ) { printp( "Unable to get process affinity\n" ); @@ -811,24 +844,39 @@ void ThreadPool::tpool_thread( int thread_id ) // Check if there is work to do if ( d_queue_list.size() > 0 ) { // Get next work item to process - auto work_id = d_queue_list.remove( []( const thread_id_t& id ) { return id.ready(); } ); + auto work_id = + d_queue_list.remove( []( const thread_id_t &id ) { return id.ready(); } ); if ( work_id.isNull() ) { std::this_thread::yield(); continue; } - WorkItem *work = work_id.work( ); + WorkItem *work = work_id.work(); AtomicOperations::atomic_increment( &d_N_started ); // Start work here PROFILE_THREADPOOL_START( "thread working" ); - work->d_state = 2; - work->run(); - work->d_state = 3; + work->d_state = 2; + if ( d_errorHandler ) { + try { + work->run(); + } catch ( std::exception &e ) { + auto msg = Utilities::stringf( + "Error, caught exception in thread %i:\n %s\n", thread_id, e.what() ); + d_errorHandler( msg ); + } catch ( ... ) { + auto msg = Utilities::stringf( + "Error, caught unknown exception in thread %i\n", thread_id ); + d_errorHandler( msg ); + } + } else { + work->run(); + } + work->d_state = 3; PROFILE_THREADPOOL_STOP( "thread working" ); AtomicOperations::atomic_increment( &d_N_finished ); // Check if any threads are waiting on the current work item // This can be done without blocking - for ( int i = 0; i < MAX_WAIT; i++ ) { - const wait_ids_struct *wait = const_cast(d_wait[i]); + for ( auto &i : d_wait ) { + auto wait = AtomicOperations::atomic_get( &i ); if ( wait != nullptr ) wait->id_finished( work_id ); } @@ -849,7 +897,7 @@ void ThreadPool::tpool_thread( int thread_id ) } // Wait for work PROFILE_THREADPOOL_STOP2( "thread active" ); - d_wait_work.wait_for(1e-3); + d_wait_work.wait_for( 1e-3 ); PROFILE_THREADPOOL_START2( "thread active" ); AtomicOperations::atomic_increment( &d_num_active ); set_bit( d_active, thread_id ); @@ -865,21 +913,22 @@ void ThreadPool::tpool_thread( int thread_id ) /****************************************************************** -* This is the function that adds work to the thread pool * -* Note: this version uses a last in - first out work scheduling. * -******************************************************************/ -inline void ThreadPool::add_work( const ThreadPool::thread_id_t& id ) + * This is the function that adds work to the thread pool * + * Note: this version uses a last in - first out work scheduling. * + ******************************************************************/ +inline void ThreadPool::add_work( const ThreadPool::thread_id_t &id ) { - auto work = id.work(); + auto work = id.work(); work->d_state = 1; // Check and change priorities of dependency ids const int priority = id.getPriority(); - for (int i=0; id_N_ids; i++) { - const auto& id1 = work->d_ids[i]; - if ( !id1.started() && id1d_N_ids; i++ ) { + const auto &id1 = work->d_ids[i]; + if ( !id1.started() && id1 < id ) { // Remove and add the id back with a higher priority - auto id2 = d_queue_list.remove( []( const thread_id_t& a, const thread_id_t& b ) { return a==b; }, id1 ); - id2.setPriority( std::max(priority,id2.getPriority()) ); + auto id2 = d_queue_list.remove( + []( const thread_id_t &a, const thread_id_t &b ) { return a == b; }, id1 ); + id2.setPriority( std::max( priority, id2.getPriority() ) ); d_queue_list.insert( id2 ); } } @@ -894,7 +943,7 @@ void ThreadPool::add_work( if ( N > block_size ) { size_t i = 0; while ( i < N ) { - add_work( std::min(N-i,block_size), &work[i], &priority[i], &ids[i] ); + add_work( std::min( N - i, block_size ), &work[i], &priority[i], &ids[i] ); i += block_size; } return; @@ -905,7 +954,7 @@ void ThreadPool::add_work( #endif // Create the thread ids (can be done without blocking) for ( size_t i = 0; i < N; i++ ) - ids[i].reset( priority[i], AtomicOperations::atomic_decrement(&d_id_assign), work[i] ); + ids[i].reset( priority[i], AtomicOperations::atomic_decrement( &d_id_assign ), work[i] ); #if MONITOR_THREADPOOL_PERFORMANCE auto t2 = std::chrono::high_resolution_clock::now(); accumulate( total_add_work_time[0], t1, t2 ); @@ -913,23 +962,23 @@ void ThreadPool::add_work( // If there are no threads, perform the work immediately if ( d_N_threads < 1 ) { for ( size_t i = 0; i < N; i++ ) { - work[i]->d_state = 2; + work[i]->d_state = 2; work[i]->run(); - work[i]->d_state = 3; + work[i]->d_state = 3; } - #if MONITOR_THREADPOOL_PERFORMANCE - auto t5 = std::chrono::high_resolution_clock::now(); - accumulate( total_add_work_time[4], t2, t5 ); - #endif +#if MONITOR_THREADPOOL_PERFORMANCE + auto t5 = std::chrono::high_resolution_clock::now(); + accumulate( total_add_work_time[4], t2, t5 ); +#endif PROFILE_THREADPOOL_STOP2( "add_work" ); return; } // Wait for enough room in the queue (doesn't need blocking since it isn't that precise) if ( N > static_cast( MAX_QUEUED - d_queue_list.size() ) ) { - int N_wait = static_cast( N - ( MAX_QUEUED - d_queue_list.size() ) ); + auto N_wait = static_cast( N - ( MAX_QUEUED - d_queue_list.size() ) ); while ( N_wait > 0 ) { d_signal_count = static_cast( std::min( N_wait, 255 ) ); - d_wait_finished.wait_for(1e-4); + d_wait_finished.wait_for( 1e-4 ); N_wait = static_cast( N - ( MAX_QUEUED - d_queue_list.size() ) ); } } @@ -965,19 +1014,8 @@ void ThreadPool::add_work( /****************************************************************** -* This function removes a finished work item * -******************************************************************/ -ThreadPool::WorkItem *ThreadPool::getFinishedWorkItem( ThreadPool::thread_id_t id ) const -{ - if ( id.finished() ) - return id.work(); - return nullptr; -} - - -/****************************************************************** -* This function waits for a some of the work items to finish * -******************************************************************/ + * This function waits for a some of the work items to finish * + ******************************************************************/ static inline void check_finished( size_t N_work, const ThreadPool::thread_id_t *ids, size_t &N_finished, bool *finished ) { @@ -1004,8 +1042,8 @@ int ThreadPool::wait_some( N_finished++; } size_t local_id = ids[k].getLocalID(); - bool test = local_id == 0 || local_id > thread_id_t::maxThreadID || local_id <= next_id; - test = test && !finished[k]; + bool test = local_id == 0 || local_id > thread_id_t::maxThreadID || local_id <= next_id; + test = test && !finished[k]; if ( test ) throw std::logic_error( "Invalid ids for wait" ); } @@ -1018,7 +1056,7 @@ int ThreadPool::wait_some( auto tmp = new wait_ids_struct( N_work, ids, N_wait, d_cond_pool, MAX_WAIT, d_wait ); // Wait for the ids auto t1 = std::chrono::high_resolution_clock::now(); - while ( !tmp->wait_for(0.01) ) { + while ( !tmp->wait_for( 0.01 ) ) { check_wait_time( t1 ); } // Update the ids that have finished @@ -1027,33 +1065,35 @@ int ThreadPool::wait_some( throw std::logic_error( "Internal error: failed to wait" ); // Delete the wait event struct // Note: we want to maintain the reference in case a thread is still using it - // Note: technically this should be atomic - std::swap(d_wait_last,tmp); + // Note: technically this should be atomic, but it really isn't necessary here + std::swap( d_wait_last, tmp ); delete tmp; return N_finished; } /****************************************************************** -* This function waits for all of the threads to finish their work * -******************************************************************/ -void ThreadPool::check_wait_time( std::chrono::time_point& t1 ) const + * This function waits for all of the threads to finish their work * + ******************************************************************/ +void ThreadPool::check_wait_time( + std::chrono::time_point &t1 ) const { auto t2 = std::chrono::high_resolution_clock::now(); - if ( std::chrono::duration_cast(t2-t1).count() > MAX_WAIT_TIME_DEBUG ) { - std::cout << "Warning: Maximum wait time in ThreadPool exceeded, threads may be hung\n"; - std::cout << "N_active: " << d_num_active << std::endl; - std::cout << "N_queued: " << d_queue_list.size() << std::endl; - std::cout << "N_added: " << d_N_added << std::endl; - std::cout << "N_started: " << d_N_started << std::endl; - std::cout << "N_finished: " << d_N_finished << std::endl; - std::cout << "queue.insert(): " << d_queue_list.N_insert() << std::endl; - std::cout << "queue.remove(): " << d_queue_list.N_remove() << std::endl; - std::cout << "Stack Trace:\n"; - auto call_stack = StackTrace::getAllCallStacks( ); + if ( std::chrono::duration_cast( t2 - t1 ).count() > d_max_wait_time ) { + pout << "Warning: Maximum wait time in ThreadPool exceeded, threads may be hung\n"; + pout << "N_active: " << d_num_active << std::endl; + pout << "N_queued: " << d_queue_list.size() << std::endl; + pout << "N_added: " << d_N_added << std::endl; + pout << "N_started: " << d_N_started << std::endl; + pout << "N_finished: " << d_N_finished << std::endl; + pout << "queue.insert(): " << d_queue_list.N_insert() << std::endl; + pout << "queue.remove(): " << d_queue_list.N_remove() << std::endl; + pout << "Stack Trace:\n"; + auto call_stack = StackTrace::getAllCallStacks(); + StackTrace::cleanupStackTrace( call_stack ); auto text = call_stack.print( " " ); - for ( auto& line : text ) - std::cout << line << std::endl; + for ( auto &line : text ) + pout << line << std::endl; t1 = std::chrono::high_resolution_clock::now(); } } @@ -1068,82 +1108,91 @@ void ThreadPool::wait_pool_finished() const while ( d_num_active > 0 || d_queue_list.size() > 0 ) { check_wait_time( t1 ); d_signal_empty = true; - d_wait_finished.wait_for(10e-6); + d_wait_finished.wait_for( 10e-6 ); } d_signal_empty = false; } /****************************************************************** -* Member functions of wait_ids_struct * -******************************************************************/ -ThreadPool::wait_ids_struct::wait_ids_struct( size_t N, const ThreadPool::thread_id_t *ids, size_t N_wait, - AtomicOperations::pool& cv_pool, int N_wait_list, volatile wait_ids_struct **list ): - d_wait( N_wait ), - d_N(0), - d_cv_pool( cv_pool ), - d_wait_event( cv_pool.get() ) + * Member functions of wait_ids_struct * + ******************************************************************/ +ThreadPool::wait_ids_struct::wait_ids_struct( size_t N, const ThreadPool::thread_id_t *ids, + size_t N_wait, AtomicOperations::pool &cv_pool, int N_wait_list, + volatile wait_ids_struct **list ) + : d_wait( N_wait ), d_N( 0 ), d_cv_pool( cv_pool ), d_wait_event( cv_pool.get() ) { d_ids = new ThreadPool::thread_id_t[N]; for ( size_t i = 0; i < N; i++ ) { if ( ids[i].finished() ) - d_wait = std::max(d_wait-1,0); + d_wait = std::max( d_wait - 1, 0 ); else d_ids[d_N++] = ids[i]; } quicksort( d_N, d_ids ); d_finished = new bool[d_N]; - memset((void*)d_finished,0,d_N); + memset( (void *) d_finished, 0, d_N ); int i = 0; - while ( !AtomicOperations::atomic_compare_and_swap( (void *volatile *) &list[i], nullptr, this ) ) { i = (i+1)%N_wait_list; } + while ( + !AtomicOperations::atomic_compare_and_swap( (void *volatile *) &list[i], nullptr, this ) ) { + i = ( i + 1 ) % N_wait_list; + } d_ptr = &list[i]; } -void ThreadPool::wait_ids_struct::id_finished( const ThreadPool::thread_id_t& id ) const +ThreadPool::wait_ids_struct::~wait_ids_struct() +{ + d_cv_pool.put( d_wait_event ); + delete[] d_finished; + delete[] d_ids; +} +void ThreadPool::wait_ids_struct::id_finished( const ThreadPool::thread_id_t &id ) const { int index = find_id( d_N, d_ids, id ); if ( index >= 0 ) { d_finished[index] = true; - int N_finished = 0; - for (int i=0; i= d_wait ) { - *d_ptr = nullptr; + d_N = 0; d_wait = 0; - d_N = 0; + AtomicOperations::atomic_compare_and_swap( + (void *volatile *) d_ptr, (void *) *d_ptr, nullptr ); d_wait_event->notify_all(); } } } bool ThreadPool::wait_ids_struct::wait_for( double seconds ) { - for (int i=0; i=d_wait || d_N==0 ) { + for ( int i = 0; i < d_N; i++ ) + N_finished += d_finished[i] ? 1 : 0; + if ( N_finished >= d_wait || d_N == 0 ) { *d_ptr = nullptr; d_wait = 0; - d_N = 0; + d_N = 0; break; } auto t2 = std::chrono::high_resolution_clock::now(); - if ( 1e-6*std::chrono::duration_cast(t2-t1).count() > seconds ) + if ( 1e-6 * std::chrono::duration_cast( t2 - t1 ).count() > + seconds ) return false; - d_wait_event->wait_for(1e-5); + d_wait_event->wait_for( 1e-5 ); } return true; } /****************************************************************** -* templated quicksort routine * -******************************************************************/ -template + * templated quicksort routine * + ******************************************************************/ +template void quicksort( int n, T *arr ) { if ( n <= 1 ) @@ -1154,7 +1203,7 @@ void quicksort( int n, T *arr ) jstack = 0; l = 0; ir = n - 1; - while ( 1 ) { + while ( true ) { if ( ir - l < 7 ) { // Insertion sort when subarray small enough. for ( j = l + 1; j <= ir; j++ ) { a = arr[j]; @@ -1231,8 +1280,8 @@ void quicksort( int n, T *arr ) /************************************************************************ -* Function to find the id in a sorted vector * -************************************************************************/ + * Function to find the id in a sorted vector * + ************************************************************************/ inline int find_id( int n, const ThreadPool::thread_id_t *x, const ThreadPool::thread_id_t &id ) { if ( n == 0 ) @@ -1243,7 +1292,7 @@ inline int find_id( int n, const ThreadPool::thread_id_t *x, const ThreadPool::t if ( id < x[0] ) return -1; if ( id == x[n - 1] ) - return n-1; + return n - 1; if ( id > x[n - 1] ) return -1; // Perform the search @@ -1264,13 +1313,13 @@ inline int find_id( int n, const ThreadPool::thread_id_t *x, const ThreadPool::t /************************************************************************ -* Function to add dependencies to the work item * -* Note: when expanding the size of d_ids, we need to allocate space for * -* one extra entry for a spinlock. * -************************************************************************/ + * Function to add dependencies to the work item * + * Note: when expanding the size of d_ids, we need to allocate space for * + * one extra entry for a spinlock. * + ************************************************************************/ void ThreadPool::WorkItem::add_dependencies( size_t N, const ThreadPool::thread_id_t *ids ) { - if ( d_state!=0 ) { + if ( d_state != 0 ) { // The item has already been added to the threadpool, // we are not allowed to add dependencies throw std::logic_error( @@ -1291,9 +1340,9 @@ void ThreadPool::WorkItem::add_dependencies( size_t N, const ThreadPool::thread_ for ( size_t i = 0; i < d_N_ids; i++ ) const_cast( ids[i] ).swap( tmp[i] ); delete[] tmp; - d_size = N2; - int* lock = reinterpret_cast(&d_ids[d_size-1]); - *lock = 0; + d_size = N2; + auto *lock = reinterpret_cast( &d_ids[d_size - 1] ); + *lock = 0; } const ThreadPool::thread_id_t id0; for ( size_t i = 0; i < N; i++ ) { diff --git a/threadpool/thread_pool.h b/threadpool/thread_pool.h index db3eec9d..eff12433 100644 --- a/threadpool/thread_pool.h +++ b/threadpool/thread_pool.h @@ -3,53 +3,25 @@ // PARTICULAR PURPOSE. #ifndef included_AtomicModelThreadPool #define included_AtomicModelThreadPool + +#include #include #include +#include #include #include #include #include +#include #include #include -#include -#include -#include #include "threadpool/atomic_helpers.h" #include "threadpool/atomic_list.h" -// Choose the OS -#if defined( WIN32 ) || defined( _WIN32 ) || defined( WIN64 ) || defined( _WIN64 ) - // Using windows - #define USE_WINDOWS -#elif defined( __APPLE__ ) - // Using MAC - #define USE_MAC -#elif defined( __linux ) || defined( __unix ) || defined( __posix ) - // Using linux - #define USE_LINUX -#else - #error Unknown OS -#endif - - -// Set some definitions -#define MAX_NUM_THREADS 128 // The maximum number of threads (must be a multiple of 64) -#define MAX_QUEUED 1024 // The maximum number of items in the work queue at any moment -#define MAX_WAIT 16 // The maximum number of active waits at any given time -#define MAX_WAIT_TIME_DEBUG 600 // The maximum time in a wait command before printing a warning message - -#define PROFILE_THREADPOOL_PERFORMANCE 0 // Add profile timers to the threadpool (default is 0) -#define MONITOR_THREADPOOL_PERFORMANCE 0 // Add detailed performance counters (default is 0) - - -// Check the c++ std -#if CXX_STD==98 -#error Thread pool class requires c++11 or newer -#endif - +// clang-format off /** \class ThreadPool @@ -75,6 +47,13 @@ */ class ThreadPool { +public: + ///// Set some global properties + constexpr static int MAX_NUM_THREADS = 128; // The maximum number of threads (must be a multiple of 64) + constexpr static int MAX_QUEUED = 1024; // The maximum number of items in the work queue at any moment + constexpr static int MAX_WAIT = 16; // The maximum number of active waits at any given time + constexpr static bool PROFILE_THREADPOOL_PERFORMANCE = false; // Add profile timers to the threadpool + constexpr static bool MONITOR_THREADPOOL_PERFORMANCE = false; // Add detailed performance counters public: ///// Member classes @@ -102,7 +81,7 @@ public: inline thread_id_t( volatile thread_id_t &&rhs ); inline thread_id_t &operator=( const thread_id_t &rhs ) volatile; inline thread_id_t &operator=( volatile thread_id_t &&rhs ) volatile; -#ifndef USE_WINDOWS +#if !defined( WIN32 ) && !defined( _WIN32 ) && !defined( WIN64 ) && !defined( _WIN64 ) inline thread_id_t( const thread_id_t &rhs ); inline thread_id_t &operator=( thread_id_t &&rhs ); inline thread_id_t &operator=( const thread_id_t &rhs ); @@ -245,7 +224,7 @@ public: //! Run the work item virtual void run() override = 0; //! Will the routine return a result - virtual bool has_result() const override = 0; + virtual bool has_result() const override final { return !std::is_same::value; } //! Return the results return_type get_results() const { return d_result; } //! Virtual destructor @@ -353,10 +332,12 @@ public: * in the ThreadPool without checking the existing work unless the desired number of * threads is 0. In this case, the function will wait for all work items to finish * before deleting the existing work threads. + * Member threads may not call this function. * @param N The desired number of worker threads * @param affinity The affinity scheduler to use: * none - Let the OS handle the affinities (default) + * independent - Give each thread an independent set of processors * @param procs The processors to use (defaults to the process affinitiy list) */ @@ -368,6 +349,16 @@ public: } + /*! + * \brief Function to set the maximum wait time + * \details This function sets the maximum time the thread pool will + * wait before warning about a possible hung thread. + * Default is to wait 10 minutes. + * @param time The number of seconds to wait (seconds) + */ + inline void setMaxWaitTimeDebug( const int time ) { d_max_wait_time = time; } + + /*! * \brief Function to return the current thread number * \details This function will return the thread number of current active thread. @@ -400,16 +391,14 @@ public: * @param id The id of the work item */ template - inline return_type getFunctionRet( const thread_id_t &id ) const; + static inline return_type getFunctionRet( const thread_id_t &id ); /*! * \brief Function to create a work item * \details This function creates a work item that can be added to the queue - * @param work Pointer to the work item to add - * Note that the threadpool will automatically destroy the item when - * finished - * @param priority A value indicating the priority of the work item (0-default) + * @param routine Function to call from the thread pool + * @param args Function arguments to pass */ template static inline WorkItem* createWork( Ret( *routine )( Args... ), Args... args ); @@ -505,6 +494,7 @@ public: * If successful it returns the indicies of the finished work items (the index in the array ids). * Note: any thread may call this routine, but they will block until finished. * For worker threads this may eventually lead to a deadlock. + * @param N_wait Number of work items to wait for * @param ids Vector of work items to wait for */ inline std::vector wait_some( int N_wait, const std::vector &ids ) const; @@ -552,6 +542,69 @@ public: //! Return the number of items queued int N_queued( ) const { return d_queue_list.size(); } + + //! Set the error handler for threads + void setErrorHandler( std::function fun ); + + +public: // Static interface + + /*! + * \brief Function to return the number of work threads + * \details This function returns the number of threads in the thread pool, + * or 0 if the thread pool is empty or does not exist + * @param tpool Threadpool to add work to (may be null) + */ + static inline int numThreads( const ThreadPool* tpool ) { return tpool ? tpool->getNumThreads() : 0; } + + /*! + * \brief Function to add a work item + * \details This function adds a work item to the queue + * Note: any thread may call this routine. + * @param tpool Threadpool to add work to (may be null) + * @param work Pointer to the work item to add + * Note that the threadpool will automatically destroy the item when finished + * @param priority A value indicating the priority of the work item (0-default) + */ + static inline thread_id_t add_work( ThreadPool* tpool, ThreadPool::WorkItem *work, int priority = 0 ); + + + /*! + * \brief Function to add multiple work items + * \details This function adds multiple work item to the queue + * Note: any thread may call this routine. + * @param tpool Threadpool to add work to (may be null) + * @param work Vector of pointers to the work items to add + * Note that the threadpool will automatically destroy the item when finished + * @param priority Vector of values indicating the priority of the work items + */ + static inline std::vector add_work( ThreadPool* tpool, const std::vector &work, + const std::vector &priority = std::vector() ); + + + /*! + * \brief Function to wait until all of the given work items have finished their work + * \details This is the function waits for all given of the work items to finish. It returns 0 + * if successful. + * Note: any thread may call this routine, but they will block until finished. + * For worker threads this may eventually lead to a deadlock. + * @param tpool Threadpool containing work (must match call to add_work) + * @param ids Vector of work items to wait for + */ + static inline int wait_all( const ThreadPool* tpool, const std::vector &ids ); + + + /*! + * \brief Function to wait until all work items in the thread pool have finished their work + * \details This function will wait until all work has finished. + * Note: member threads may not call this function. + * Only one non-member thread should call this routine at a time. + * @param tpool Threadpool containing work (must match call to add_work) + */ + static inline void wait_pool_finished( const ThreadPool* tpool ) { if ( tpool ) { tpool->wait_pool_finished(); } } + + + private: typedef AtomicOperations::int32_atomic int32_atomic; @@ -593,7 +646,7 @@ private: public: wait_ids_struct( size_t N, const ThreadPool::thread_id_t *ids, size_t N_wait, AtomicOperations::pool& cv_pool, int N_wait_list, volatile wait_ids_struct **list ); - ~wait_ids_struct( ) { d_cv_pool.put( d_wait_event ); delete [] d_finished; delete [] d_ids; } + ~wait_ids_struct( ); void id_finished( const ThreadPool::thread_id_t& id ) const; bool wait_for( double seconds ); private: @@ -628,7 +681,10 @@ private: inline void add_work( const ThreadPool::thread_id_t& id ); // Function to get a work item that has finished - WorkItem *getFinishedWorkItem( ThreadPool::thread_id_t id ) const; + static inline WorkItem *getFinishedWorkItem( const ThreadPool::thread_id_t& id ) + { + return id.finished() ? id.work():nullptr; + } // This function provides a wrapper (needed for the threads) static inline void create_new_thread( ThreadPool *tpool, int id ) @@ -676,10 +732,13 @@ private: std::thread::id d_threadId[MAX_NUM_THREADS]; // Unique id for each thread queue_type d_queue_list; // The work queue size_t d_NULL_TAIL; // Null data buffer to check memory bounds + int d_max_wait_time; // The maximum time in a wait command before printing a warning message + std::function d_errorHandler; }; #include "threadpool/thread_pool.hpp" +// clang-format on #endif diff --git a/threadpool/thread_pool.hpp b/threadpool/thread_pool.hpp index 60840ec1..a87860b3 100644 --- a/threadpool/thread_pool.hpp +++ b/threadpool/thread_pool.hpp @@ -23,7 +23,7 @@ */ #define TPOOL_TUPLE_TO_SEQ( t ) TPOOL_TUPLE_TO_SEQ_##II t #define TPOOL_TUPLE_TO_SEQ_II( a, ... ) a, ##__VA_ARGS__ -#ifdef USE_WINDOWS +#if defined( WIN32 ) || defined( _WIN32 ) || defined( WIN64 ) || defined( _WIN64 ) #define TPOOL_GET_PRIORITY( a, N, c, ... ) N #define TPOOL_ADD_WORK( TPOOL, FUNCTION, ARGS, ... ) \ ThreadPool_add_work( TPOOL, TPOOL_GET_PRIORITY( 0, __VA_ARGS__, 0, 0 ) + 0, FUNCTION, \ @@ -40,35 +40,35 @@ // \cond HIDDEN_SYMBOLS - // Unpack a tuple and call a function -template +template struct index_tuple { }; -template +template struct make_indexes_impl; -template +template struct make_indexes_impl, T, Types...> { typedef typename make_indexes_impl, Types...>::type type; }; -template +template struct make_indexes_impl> { typedef index_tuple type; }; -template +template struct make_indexes : make_indexes_impl<0, index_tuple<>, Types...> { }; -template -inline Ret apply_helper( Ret ( *pf )( Args... ), index_tuple, std::tuple &&tup ) +template +inline Ret apply_helper( + Ret ( *pf )( Args... ), index_tuple, std::tuple &&tup ) { return pf( std::forward( std::get( tup ) )... ); } -template +template inline Ret apply( Ret ( *pf )( Args... ), const std::tuple &tup ) { return apply_helper( pf, typename make_indexes::type(), std::tuple( tup ) ); } -template +template inline Ret apply( Ret ( *pf )( Args... ), std::tuple &&tup ) { return apply_helper( @@ -77,21 +77,21 @@ inline Ret apply( Ret ( *pf )( Args... ), std::tuple &&tup ) // Specialization for no return argument -template <> +template<> class ThreadPool::WorkItemRet : public ThreadPool::WorkItem { public: virtual void run() override = 0; - virtual bool has_result() const override { return false; } void get_results() {} virtual ~WorkItemRet() {} + virtual bool has_result() const override final { return false; } }; // Final class for the work item -template +template class WorkItemFull; -template +template class WorkItemFull : public ThreadPool::WorkItemRet { private: @@ -104,14 +104,10 @@ public: : ThreadPool::WorkItemRet(), routine( routine2 ), args( ts... ) { } - virtual void run() override - { - apply( routine, args ); - } - virtual bool has_result() const override { return false; } + virtual void run() override { apply( routine, args ); } virtual ~WorkItemFull() {} }; -template +template class WorkItemFull : public ThreadPool::WorkItemRet { private: @@ -124,62 +120,60 @@ public: : ThreadPool::WorkItemRet(), routine( routine2 ), args( ts... ) { } - virtual void run() override - { - this->d_result = apply( routine, args ); - } - virtual bool has_result() const override { return true; } + virtual void run() override { this->d_result = apply( routine, args ); } virtual ~WorkItemFull() {} }; // Functions to add work to the thread pool -template +template inline ThreadPool::thread_id_t ThreadPool_add_work( ThreadPool *tpool, int priority, Ret ( *routine )( Ts... ), Ts... ts ) { - ThreadPool::WorkItem *work = new WorkItemFull( routine, ts... ); - return tpool->add_work( work, priority ); + auto work = new WorkItemFull( routine, ts... ); + return ThreadPool::add_work( tpool, work, priority ); } -template +template inline ThreadPool::thread_id_t ThreadPool_add_work( ThreadPool *tpool, int priority, Ret ( *routine )(), void * ) { - ThreadPool::WorkItem *work = new WorkItemFull( routine ); - return tpool->add_work( work, priority ); + auto work = new WorkItemFull( routine ); + return ThreadPool::add_work( tpool, work, priority ); } -template -inline ThreadPool::WorkItem* ThreadPool::createWork( Ret( *routine )( Args... ), Args... args ) +template +inline ThreadPool::WorkItem *ThreadPool::createWork( Ret ( *routine )( Args... ), Args... args ) { return new WorkItemFull( routine, args... ); } /****************************************************************** -* Function to get the returned function value * -******************************************************************/ -template inline constexpr T zeroConstructor(); -template<> inline constexpr bool zeroConstructor( ) { return false; } -template<> inline constexpr char zeroConstructor( ) { return 0; } -template<> inline constexpr unsigned char zeroConstructor( ) { return 0; } -template<> inline constexpr int zeroConstructor( ) { return 0; } -template<> inline constexpr unsigned int zeroConstructor( ) { return 0; } -template<> inline constexpr long zeroConstructor( ) { return 0; } -template<> inline constexpr unsigned long zeroConstructor( ) { return 0; } -template<> inline constexpr float zeroConstructor( ) { return 0; } -template<> inline constexpr double zeroConstructor( ) { return 0; } -template inline constexpr T zeroConstructor() { return T(); } -template -inline Ret ThreadPool::getFunctionRet( const ThreadPool::thread_id_t &id ) const + * Function to get the returned function value * + ******************************************************************/ +// clang-format off +template inline constexpr T zeroConstructor(); +template<> inline constexpr bool zeroConstructor() { return false; } +template<> inline constexpr char zeroConstructor() { return 0; } +template<> inline constexpr unsigned char zeroConstructor() { return 0; } +template<> inline constexpr int zeroConstructor() { return 0; } +template<> inline constexpr unsigned int zeroConstructor() { return 0; } +template<> inline constexpr long zeroConstructor() { return 0; } +template<> inline constexpr unsigned long zeroConstructor() { return 0; } +template<> inline constexpr float zeroConstructor() { return 0; } +template<> inline constexpr double zeroConstructor() { return 0; } +template inline constexpr T zeroConstructor() { return T(); } +template +inline Ret ThreadPool::getFunctionRet( const ThreadPool::thread_id_t &id ) { - WorkItemRet *work = dynamic_cast*>( getFinishedWorkItem( id ) ); + auto work = dynamic_cast *>( getFinishedWorkItem( id ) ); return work == nullptr ? zeroConstructor() : work->get_results(); } +// clang-format on /****************************************************************** -* Inline functions to wait for the work items to finish * -******************************************************************/ + * Inline functions to wait for the work items to finish * + ******************************************************************/ inline int ThreadPool::wait( ThreadPool::thread_id_t id ) const { bool finished; @@ -218,7 +212,7 @@ inline int ThreadPool::wait_any( const std::vector &ids ) const } inline int ThreadPool::wait_all( size_t N_work, const ThreadPool::thread_id_t *ids ) const { - if ( N_work==0 ) + if ( N_work == 0 ) return 0; auto finished = new bool[N_work]; wait_some( N_work, ids, N_work, finished ); @@ -234,25 +228,32 @@ inline int ThreadPool::wait_all( const std::vector &ids ) const delete[] finished; return 0; } -inline std::vector ThreadPool::wait_some( int N_wait, const std::vector &ids ) const +inline int ThreadPool::wait_all( const ThreadPool *tpool, const std::vector &ids ) { - auto finished = new bool[ids.size()]; + if ( tpool ) + return tpool->wait_all( ids ); + return ids.size(); +} +inline std::vector ThreadPool::wait_some( + int N_wait, const std::vector &ids ) const +{ + auto finished = new bool[ids.size()]; int N_finished = wait_some( ids.size(), ids.data(), N_wait, finished ); - std::vector index(N_finished,-1); - for ( size_t i=0, j=0; i < ids.size(); i++ ) { + std::vector index( N_finished, -1 ); + for ( size_t i = 0, j = 0; i < ids.size(); i++ ) { if ( finished[i] ) { index[j] = i; j++; } } - delete [] finished; + delete[] finished; return index; } /****************************************************************** -* Functions to add work items. * -******************************************************************/ + * Functions to add work items. * + ******************************************************************/ inline ThreadPool::thread_id_t ThreadPool::add_work( WorkItem *work, int priority ) { ThreadPool::thread_id_t id; @@ -280,11 +281,37 @@ inline std::vector ThreadPool::add_work( delete[] priority2; return ids; } +inline ThreadPool::thread_id_t ThreadPool::add_work( + ThreadPool *tpool, ThreadPool::WorkItem *work, int priority ) +{ + ThreadPool::thread_id_t id; + if ( tpool ) { + id = tpool->add_work( work, priority ); + } else { + id.reset( priority, std::rand(), work ); + work->d_state = 2; + work->run(); + work->d_state = 3; + } + return id; +} +inline std::vector ThreadPool::add_work( ThreadPool *tpool, + const std::vector &work, const std::vector &priority ) +{ + if ( tpool ) { + return tpool->add_work( work, priority ); + } else { + std::vector ids( work.size() ); + for ( size_t i = 0; i < work.size(); i++ ) + ids[i] = add_work( tpool, work[i], priority[i] ); + return ids; + } +} /****************************************************************** -* Class functions to for the thread id * -******************************************************************/ + * Class functions to for the thread id * + ******************************************************************/ inline ThreadPool::thread_id_t::thread_id_t() : d_id( nullThreadID ), d_count( NULL ), d_work( NULL ) { @@ -326,7 +353,7 @@ inline ThreadPool::thread_id_t::thread_id_t( const volatile ThreadPool::thread_i if ( d_count != NULL ) AtomicOperations::atomic_increment( d_count ); } -#ifndef USE_WINDOWS +#if !defined( WIN32 ) && !defined( _WIN32 ) && !defined( WIN64 ) && !defined( _WIN64 ) inline ThreadPool::thread_id_t::thread_id_t( const thread_id_t &rhs ) : d_id( rhs.d_id ), d_count( rhs.d_count ), d_work( rhs.d_work ) { @@ -417,8 +444,8 @@ inline uint64_t ThreadPool::thread_id_t::createId( int priority, uint64_t local_ if ( priority >= 0 ) tmp2 |= 0x80; uint64_t id = tmp2; - id = ( id << 56 ) + local_id; - return id; + id = ( id << 56 ) + local_id; + return id; } inline void ThreadPool::thread_id_t::reset( int priority, uint64_t local_id, void *work ) { @@ -435,8 +462,8 @@ inline void ThreadPool::thread_id_t::reset( int priority, uint64_t local_id, voi d_count = nullptr; d_work = nullptr; if ( work != nullptr ) { - d_work = work; - d_count = &(reinterpret_cast( work )->d_count); + d_work = work; + d_count = &( reinterpret_cast( work )->d_count ); *d_count = 1; } } @@ -473,7 +500,7 @@ inline bool ThreadPool::thread_id_t::ready() const bool ready = true; if ( !isNull() ) { auto tmp = work(); - for (size_t i=0; id_N_ids; i++) + for ( size_t i = 0; i < tmp->d_N_ids; i++ ) ready = ready && tmp->d_ids[i].finished(); } return ready; @@ -481,21 +508,22 @@ inline bool ThreadPool::thread_id_t::ready() const /****************************************************************** -* This function checks if the id is valid * -******************************************************************/ + * This function checks if the id is valid * + ******************************************************************/ inline bool ThreadPool::isValid( const ThreadPool::thread_id_t &id ) const { - static_assert( sizeof(atomic_64)==8, "atomic_64 must be a 64-bit integer" ); + static_assert( sizeof( atomic_64 ) == 8, "atomic_64 must be a 64-bit integer" ); uint64_t local_id = id.getLocalID(); uint64_t next_id = d_id_assign - 1; - return local_id!=0 && id.initialized() && local_id<=thread_id_t::maxThreadID && local_id>next_id; + return local_id != 0 && id.initialized() && local_id <= thread_id_t::maxThreadID && + local_id > next_id; } /****************************************************************** -* Function to get the thread number * -* (-1 if it is not a member thread) * -******************************************************************/ + * Function to get the thread number * + * (-1 if it is not a member thread) * + ******************************************************************/ inline int ThreadPool::getThreadNumber() const { std::thread::id id = std::this_thread::get_id();