Updating threadpool, Array, StackTrace, ... classes

This commit is contained in:
Mark Berrill
2018-02-06 10:50:43 -05:00
parent 396bb07b26
commit 98d86d2f94
21 changed files with 3839 additions and 2444 deletions

View File

@@ -1,34 +1,15 @@
#ifndef included_ArrayClass
#define included_ArrayClass
#include <vector>
#include <array>
#include <cstring>
#include <functional>
#include <initializer_list>
#include <iostream>
#include <stdexcept>
#include <memory>
#include <iostream>
#include <vector>
#define ARRAY_NDIM_MAX 5 // Maximum number of dimensions supported
#define GET_ARRAY_INDEX3D( N, i1, i2, i3 ) i1 + N[0] * ( i2 + N[1] * i3 )
#define GET_ARRAY_INDEX4D( N, i1, i2, i3, i4 ) i1 + N[0] * ( i2 + N[1] * ( i3 + N[2] * i4 ) )
#define GET_ARRAY_INDEX5D( N, i1, i2, i3, i4, i5 ) i1 + N[0] * ( i2 + N[1] * ( i3 + N[2] * ( i4 + N[3] * i5 ) ) )
#if defined( DEBUG ) || defined( _DEBUG )
#define CHECK_ARRAY_INDEX3D( N, i1, i2, i3 ) \
if ( GET_ARRAY_INDEX3D( N, i1, i2, i3 ) < 0 || GET_ARRAY_INDEX3D( N, i1, i2, i3 ) >= d_length ) \
throw std::logic_error( "Index exceeds array bounds" );
#define CHECK_ARRAY_INDEX4D( N, i1, i2, i3, i4 ) \
if ( GET_ARRAY_INDEX4D( N, i1, i2, i3, i4 ) < 0 || \
GET_ARRAY_INDEX4D( N, i1, i2, i3, i4 ) >= d_length ) \
throw std::logic_error( "Index exceeds array bounds" );
#else
#define CHECK_ARRAY_INDEX3D( N, i1, i2, i3 )
#define CHECK_ARRAY_INDEX4D( N, i1, i2, i3, i4 )
#endif
#include "Utilities.h"
#if defined( __CUDA_ARCH__ )
@@ -37,20 +18,244 @@
#else
#define HOST_DEVICE
#endif
#if defined( USING_GCC ) || defined( USING_CLANG )
#define ATTRIBUTE_INLINE __attribute__( ( always_inline ) )
#else
#define ATTRIBUTE_INLINE
#endif
#if ( defined( DEBUG ) || defined( _DEBUG ) ) && !defined( NDEBUG )
#define CHECK_ARRAY_LENGTH( i ) \
do { \
if ( i >= d_length ) \
throw std::length_error( "Index exceeds array bounds" ); \
} while ( 0 )
#else
#define CHECK_ARRAY_LENGTH( i ) \
do { \
} while ( 0 )
#endif
// Forward decleration
class FunctionTable;
//! Simple range class
template<class TYPE = size_t>
class Range final
{
public:
//! Empty constructor
Range() : i( 0 ), j( -1 ), k( 1 ) {}
/*!
* Create a range i:k:j (or i:j)
* @param i_ Starting value
* @param j_ Ending value
* @param k_ Increment value
*/
Range( TYPE i_, TYPE j_, TYPE k_ = 1 ) : i( i_ ), j( j_ ), k( k_ ) {}
TYPE i, j, k;
};
//! Simple class to store the array dimensions
class ArraySize final
{
public:
//! Empty constructor
inline ArraySize();
/*!
* Create the vector size
* @param N1 Number of elements in the first dimension
*/
inline ArraySize( size_t N1 );
/*!
* Create the vector size
* @param N1 Number of elements in the first dimension
* @param N2 Number of elements in the second dimension
*/
inline ArraySize( size_t N1, size_t N2 );
/*!
* Create the vector size
* @param N1 Number of elements in the first dimension
* @param N2 Number of elements in the second dimension
* @param N3 Number of elements in the third dimension
*/
inline ArraySize( size_t N1, size_t N2, size_t N3 );
/*!
* Create the vector size
* @param N1 Number of elements in the first dimension
* @param N2 Number of elements in the second dimension
* @param N3 Number of elements in the third dimension
* @param N4 Number of elements in the fourth dimension
*/
inline ArraySize( size_t N1, size_t N2, size_t N3, size_t N4 );
/*!
* Create the vector size
* @param N1 Number of elements in the first dimension
* @param N2 Number of elements in the second dimension
* @param N3 Number of elements in the third dimension
* @param N4 Number of elements in the fourth dimension
* @param N5 Number of elements in the fifth dimension
*/
inline ArraySize( size_t N1, size_t N2, size_t N3, size_t N4, size_t N5 );
/*!
* Create from initializer list
* @param N Size of the array
*/
inline ArraySize( std::initializer_list<size_t> N );
/*!
* Create from raw pointer
* @param ndim Number of dimensions
* @param ndim Dimensions
*/
inline ArraySize( size_t ndim, const size_t *dims );
/*!
* Create from std::vector
* @param N Size of the array
*/
inline ArraySize( const std::vector<size_t> &N );
/*!
* Copy constructor
* @param rhs Array to copy
*/
inline ArraySize( const ArraySize &rhs );
/*!
* Move constructor
* @param rhs Array to copy
*/
inline ArraySize( ArraySize &&rhs );
/*!
* Assignment operator
* @param rhs Array to copy
*/
inline ArraySize &operator=( const ArraySize &rhs );
/*!
* Move assignment operator
* @param rhs Array to copy
*/
inline ArraySize &operator=( ArraySize &&rhs );
/*!
* Access the ith dimension
* @param i Index to access
*/
inline size_t operator[]( size_t i ) const { return d_N[i]; }
//! Sum the elements
inline uint8_t ndim() const ATTRIBUTE_INLINE { return d_ndim; }
//! Sum the elements
inline size_t size() const ATTRIBUTE_INLINE { return d_ndim; }
//! Sum the elements
inline size_t length() const ATTRIBUTE_INLINE { return d_length; }
//! Sum the elements
inline void resize( uint8_t dim, size_t N );
//! Returns an iterator to the beginning
inline const size_t *begin() const ATTRIBUTE_INLINE { return d_N; }
//! Returns an iterator to the end
inline const size_t *end() const ATTRIBUTE_INLINE { return d_N + d_ndim; }
// Check if two matrices are equal
inline bool operator==( const ArraySize &rhs ) const ATTRIBUTE_INLINE
{
return d_ndim == rhs.d_ndim && memcmp( d_N, rhs.d_N, sizeof( d_N ) ) == 0;
}
//! Check if two matrices are not equal
inline bool operator!=( const ArraySize &rhs ) const ATTRIBUTE_INLINE
{
return d_ndim != rhs.d_ndim || memcmp( d_N, rhs.d_N, sizeof( d_N ) ) != 0;
}
//! Maximum supported dimension
constexpr static uint8_t maxDim() ATTRIBUTE_INLINE { return 5u; }
//! Get the index
inline size_t index( size_t i ) const ATTRIBUTE_INLINE
{
CHECK_ARRAY_LENGTH( i );
return i;
}
//! Get the index
inline size_t index( size_t i1, size_t i2 ) const ATTRIBUTE_INLINE
{
size_t index = i1 + i2 * d_N[0];
CHECK_ARRAY_LENGTH( index );
return index;
}
//! Get the index
inline size_t index( size_t i1, size_t i2, size_t i3 ) const ATTRIBUTE_INLINE
{
size_t index = i1 + d_N[0] * ( i2 + d_N[1] * i3 );
CHECK_ARRAY_LENGTH( index );
return index;
}
//! Get the index
inline size_t index( size_t i1, size_t i2, size_t i3, size_t i4 ) const ATTRIBUTE_INLINE
{
size_t index = i1 + d_N[0] * ( i2 + d_N[1] * ( i3 + d_N[2] * i4 ) );
CHECK_ARRAY_LENGTH( index );
return index;
}
//! Get the index
inline size_t index(
size_t i1, size_t i2, size_t i3, size_t i4, size_t i5 ) const ATTRIBUTE_INLINE
{
size_t index = i1 + d_N[0] * ( i2 + d_N[1] * ( i3 + d_N[2] * ( i4 + d_N[3] * i5 ) ) );
CHECK_ARRAY_LENGTH( index );
return index;
}
private:
uint8_t d_ndim;
size_t d_length;
size_t d_N[5];
};
/*!
* Class Array is a multi-dimensional array class written by Mark Berrill
*/
template <class TYPE>
class Array
template<class TYPE, class FUN = FunctionTable>
class Array final
{
public:
public: // Constructors / assignment operators
/*!
* Create a new empty Array
*/
Array();
/*!
* Create an Array with the given size
* @param N Size of the array
*/
explicit Array( const ArraySize &N );
/*!
* Create a new 1D Array with the given number of elements
* @param N Number of elements in the array
@@ -72,6 +277,25 @@ public:
*/
explicit Array( size_t N1, size_t N2, size_t N3 );
/*!
* Create a new 4D Array with the given number of rows and columns
* @param N1 Number of elements in the first dimension
* @param N2 Number of elements in the second dimension
* @param N3 Number of elements in the third dimension
* @param N4 Number of elements in the fourth dimension
*/
explicit Array( size_t N1, size_t N2, size_t N3, size_t N4 );
/*!
* Create a new 4D Array with the given number of rows and columns
* @param N1 Number of elements in the first dimension
* @param N2 Number of elements in the second dimension
* @param N3 Number of elements in the third dimension
* @param N4 Number of elements in the fourth dimension
* @param N5 Number of elements in the fifth dimension
*/
explicit Array( size_t N1, size_t N2, size_t N3, size_t N4, size_t N5 );
/*!
* Create a multi-dimensional Array with the given number of elements
* @param N Number of elements in each dimension
@@ -79,6 +303,19 @@ public:
*/
explicit Array( const std::vector<size_t> &N, const TYPE *data = NULL );
/*!
* Create a 1D Array with the range
* @param range Range of the data
*/
explicit Array( const Range<TYPE> &range );
/*!
* Create a 1D Array with the given initializer list
* @param data Input data
*/
Array( std::initializer_list<TYPE> data );
/*!
* Copy constructor
* @param rhs Array to copy
@@ -109,7 +346,7 @@ public:
*/
Array &operator=( const std::vector<TYPE> &rhs );
public: // Views/copies/subset
/*!
* Create a 1D Array view to a raw block of data
* @param N Number of elements in the array
@@ -141,8 +378,7 @@ public:
* @param N Number of elements in each dimension
* @param data Pointer to the data
*/
static std::shared_ptr<Array> view(
const std::vector<size_t> &N, std::shared_ptr<TYPE> const &data );
static std::shared_ptr<Array> view( const ArraySize &N, std::shared_ptr<TYPE> const &data );
/*!
@@ -178,7 +414,7 @@ public:
* @param data Pointer to the data
*/
static std::shared_ptr<const Array> constView(
const std::vector<size_t> &N, std::shared_ptr<const TYPE> const &data );
const ArraySize &N, std::shared_ptr<const TYPE> const &data );
/*!
@@ -192,7 +428,20 @@ public:
* @param N Number of elements in each dimension
* @param data Pointer to the data
*/
void view2( const std::vector<size_t> &N, std::shared_ptr<TYPE> const &data );
void view2( const ArraySize &N, std::shared_ptr<TYPE> const &data );
/*!
* Make this object a view of the raw data (expert use only).
* Use view2( N, std::shared_ptr(data,[](TYPE*){}) ) instead.
* Note: this interface is not recommended as it does not protect from
* the src data being deleted while still being used by the Array.
* Additionally for maximum performance it does not set the internal shared_ptr
* so functions like getPtr and resize will not work correctly.
* @param ndim Number of dimensions
* @param dims Number of elements in each dimension
* @param data Pointer to the data
*/
void viewRaw( int ndim, const size_t *dims, TYPE *data );
/*!
* Make this object a view of the raw data (expert use only).
@@ -204,41 +453,30 @@ public:
* @param N Number of elements in each dimension
* @param data Pointer to the data
*/
void viewRaw( const std::initializer_list<size_t> &N, TYPE *data );
/*!
* Make this object a view of the raw data (expert use only).
* Use view2( N, std::shared_ptr(data,[](TYPE*){}) ) instead.
* Note: this interface is not recommended as it does not protect from
* the src data being deleted while still being used by the Array.
* Additionally for maximum performance it does not set the internal shared_ptr
* so functions like getPtr and resize will not work correctly.
* @param N Number of elements in each dimension
* @param data Pointer to the data
*/
void viewRaw( const std::vector<size_t> &N, TYPE *data );
void viewRaw( const ArraySize &N, TYPE *data );
/*!
* Convert an array of one type to another. This may or may not allocate new memory.
* @param array Input array
*/
template <class TYPE2>
static std::shared_ptr<Array<TYPE2>> convert( std::shared_ptr<Array<TYPE>> array );
template<class TYPE2>
static std::shared_ptr<Array<TYPE2>> convert( std::shared_ptr<Array<TYPE, FUN>> array );
/*!
* Convert an array of one type to another. This may or may not allocate new memory.
* @param array Input array
*/
template <class TYPE2>
static std::shared_ptr<const Array<TYPE2>> convert( std::shared_ptr<const Array<TYPE>> array );
template<class TYPE2>
static std::shared_ptr<const Array<TYPE2>> convert(
std::shared_ptr<const Array<TYPE, FUN>> array );
/*!
* Copy and convert data from another array to this array
* @param array Source array
*/
template <class TYPE2>
template<class TYPE2>
void copy( const Array<TYPE2> &array );
/*!
@@ -246,16 +484,23 @@ public:
* Note: The current array must be allocated to the proper size first.
* @param array Source array
*/
template <class TYPE2>
template<class TYPE2>
void copy( const TYPE2 *array );
/*!
* Copy and convert data from this array to a raw vector.
* @param array Source array
*/
template <class TYPE2>
template<class TYPE2>
void copyTo( TYPE2 *array ) const;
/*!
* Copy and convert data from this array to a raw vector.
* @param array Source array
*/
template<class TYPE2>
Array<TYPE2, FUN> cloneTo() const;
/*!
* Fill the array with the given value
@@ -274,7 +519,7 @@ public:
* @param base Base array
* @param exp Exponent value
*/
void pow( const Array<TYPE> &baseArray, const TYPE &exp );
void pow( const Array<TYPE, FUN> &base, const TYPE &exp );
//! Destructor
~Array();
@@ -285,23 +530,27 @@ public:
//! Return the size of the Array
inline int ndim() const { return d_ndim; }
inline int ndim() const { return d_size.ndim(); }
//! Return the size of the Array
inline std::vector<size_t> size() const { return std::vector<size_t>( d_N, d_N + d_ndim ); }
inline ArraySize &size() { return d_size; }
//! Return the size of the Array
inline size_t size( int d ) const { return d_N[d]; }
inline ArraySize size() const { return d_size; }
//! Return the size of the Array
inline size_t length() const { return d_length; }
inline size_t size( int d ) const { return d_size[d]; }
//! Return the size of the Array
inline size_t length() const { return d_size.length(); }
//! Return true if the Array is empty
inline bool empty() const { return d_length == 0; }
inline bool empty() const { return d_size.length() == 0; }
/*!
@@ -329,7 +578,8 @@ public:
* Resize the Array
* @param N Number of elements in each dimension
*/
void resize( const std::vector<size_t> &N );
void resize( const ArraySize &N );
/*!
* Resize the given dimension of the array
@@ -344,48 +594,73 @@ public:
* Reshape the Array (total size of array will not change)
* @param N Number of elements in each dimension
*/
void reshape( const std::vector<size_t> &N );
void reshape( const ArraySize &N );
/*!
* Subset the Array (total size of array will not change)
* @param index Index to subset (imin,imax,jmin,jmax,kmin,kmax,...)
*/
template<class TYPE2=TYPE>
Array<TYPE2> subset( const std::vector<size_t> &index ) const;
template<class TYPE2 = TYPE>
Array<TYPE2, FUN> subset( const std::vector<size_t> &index ) const;
/*!
* Subset the Array (total size of array will not change)
* @param index Index to subset (ix:kx:jx,iy:ky:jy,...)
*/
template<class TYPE2 = TYPE>
Array<TYPE2, FUN> subset( const std::vector<Range<size_t>> &index ) const;
/*!
* Copy data from an array into a subset of this array
* @param index Index of the subset (imin,imax,jmin,jmax,kmin,kmax,...)
* @param subset The subset array to copy from
*/
template <class TYPE2>
void copySubset( const std::vector<size_t> &index, const Array<TYPE2> &subset );
template<class TYPE2>
void copySubset( const std::vector<size_t> &index, const Array<TYPE2, FUN> &subset );
/*!
* Copy data from an array into a subset of this array
* @param index Index of the subset
* @param subset The subset array to copy from
*/
template<class TYPE2>
void copySubset( const std::vector<Range<size_t>> &index, const Array<TYPE2, FUN> &subset );
/*!
* Add data from an array into a subset of this array
* @param index Index of the subset (imin,imax,jmin,jmax,kmin,kmax,...)
* @param subset The subset array to add from
*/
void addSubset( const std::vector<size_t> &index, const Array<TYPE> &subset );
void addSubset( const std::vector<size_t> &index, const Array<TYPE, FUN> &subset );
/*!
* Add data from an array into a subset of this array
* @param index Index of the subset
* @param subset The subset array to add from
*/
void addSubset( const std::vector<Range<size_t>> &index, const Array<TYPE, FUN> &subset );
public: // Accessors
/*!
* Access the desired element
* @param i The row index
*/
HOST_DEVICE inline TYPE &operator()( size_t i )
HOST_DEVICE inline TYPE &operator()( size_t i ) ATTRIBUTE_INLINE
{
CHECK_ARRAY_INDEX3D( d_N, i, 0, 0 ) return d_data[i];
return d_data[d_size.index( i )];
}
/*!
* Access the desired element
* @param i The row index
*/
HOST_DEVICE inline const TYPE &operator()( size_t i ) const
HOST_DEVICE inline const TYPE &operator()( size_t i ) const ATTRIBUTE_INLINE
{
CHECK_ARRAY_INDEX3D( d_N, i, 0, 0 ) return d_data[i];
return d_data[d_size.index( i )];
}
/*!
@@ -393,9 +668,9 @@ public:
* @param i The row index
* @param j The column index
*/
HOST_DEVICE inline TYPE &operator()( size_t i, size_t j )
HOST_DEVICE inline TYPE &operator()( size_t i, size_t j ) ATTRIBUTE_INLINE
{
CHECK_ARRAY_INDEX3D( d_N, i, j, 0 ) return d_data[i + j * d_N[0]];
return d_data[d_size.index( i, j )];
}
/*!
@@ -403,9 +678,9 @@ public:
* @param i The row index
* @param j The column index
*/
HOST_DEVICE inline const TYPE &operator()( size_t i, size_t j ) const
HOST_DEVICE inline const TYPE &operator()( size_t i, size_t j ) const ATTRIBUTE_INLINE
{
CHECK_ARRAY_INDEX3D( d_N, i, j, 0 ) return d_data[i + j * d_N[0]];
return d_data[d_size.index( i, j )];
}
/*!
@@ -414,9 +689,9 @@ public:
* @param j The column index
* @param k The third index
*/
HOST_DEVICE inline TYPE &operator()( size_t i, size_t j, size_t k )
HOST_DEVICE inline TYPE &operator()( size_t i, size_t j, size_t k ) ATTRIBUTE_INLINE
{
CHECK_ARRAY_INDEX3D( d_N, i, j, k ) return d_data[GET_ARRAY_INDEX3D( d_N, i, j, k )];
return d_data[d_size.index( i, j, k )];
}
/*!
@@ -425,35 +700,109 @@ public:
* @param j The column index
* @param k The third index
*/
HOST_DEVICE inline const TYPE &operator()( size_t i, size_t j, size_t k ) const
HOST_DEVICE inline const TYPE &operator()( size_t i, size_t j, size_t k ) const ATTRIBUTE_INLINE
{
CHECK_ARRAY_INDEX3D( d_N, i, j, k ) return d_data[GET_ARRAY_INDEX3D( d_N, i, j, k )];
return d_data[d_size.index( i, j, k )];
}
/*!
* Access the desired element
* @param i The row index
* @param j The column index
* @param k The third index
* @param l The fourth index
* @param i1 The first index
* @param i2 The second index
* @param i3 The third index
* @param i4 The fourth index
*/
HOST_DEVICE inline TYPE &operator()( size_t i, size_t j, size_t k, size_t l )
HOST_DEVICE inline TYPE &operator()(
size_t i1, size_t i2, size_t i3, size_t i4 ) ATTRIBUTE_INLINE
{
CHECK_ARRAY_INDEX4D( d_N, i, j, k, l ) return d_data[GET_ARRAY_INDEX4D( d_N, i, j, k, l )];
return d_data[d_size.index( i1, i2, i3, i4 )];
}
/*!
* Access the desired element
* @param i The row index
* @param j The column index
* @param k The third index
* @param l The fourth index
* @param i1 The first index
* @param i2 The second index
* @param i3 The third index
* @param i4 The fourth index
*/
HOST_DEVICE inline const TYPE &operator()( size_t i, size_t j, size_t k, size_t l ) const
HOST_DEVICE inline const TYPE &operator()(
size_t i1, size_t i2, size_t i3, size_t i4 ) const ATTRIBUTE_INLINE
{
CHECK_ARRAY_INDEX4D( d_N, i, j, k, l ) return d_data[GET_ARRAY_INDEX4D( d_N, i, j, k, l )];
return d_data[d_size.index( i1, i2, i3, i4 )];
}
/*!
* Access the desired element
* @param i1 The first index
* @param i2 The second index
* @param i3 The third index
* @param i4 The fourth index
* @param i5 The fifth index
*/
HOST_DEVICE inline TYPE &operator()(
size_t i1, size_t i2, size_t i3, size_t i4, size_t i5 ) ATTRIBUTE_INLINE
{
return d_data[d_size.index( i1, i2, i3, i4, i5 )];
}
/*!
* Access the desired element
* @param i1 The first index
* @param i2 The second index
* @param i3 The third index
* @param i4 The fourth index
* @param i5 The fifth index
*/
HOST_DEVICE inline const TYPE &operator()(
size_t i1, size_t i2, size_t i3, size_t i4, size_t i5 ) const ATTRIBUTE_INLINE
{
return d_data[d_size.index( i1, i2, i3, i4, i5 )];
}
/*!
* Access the desired element as a raw pointer
* @param i The global index
*/
HOST_DEVICE inline TYPE *ptr( size_t i ) ATTRIBUTE_INLINE
{
return i >= d_size.length() ? nullptr : &d_data[i];
}
/*!
* Access the desired element as a raw pointer
* @param i The global index
*/
HOST_DEVICE inline const TYPE *ptr( size_t i ) const ATTRIBUTE_INLINE
{
return i >= d_size.length() ? nullptr : &d_data[i];
}
//! Get iterator to beginning of data
inline TYPE *begin() ATTRIBUTE_INLINE { return d_data; }
//! Get iterator to beginning of data
inline const TYPE *begin() const ATTRIBUTE_INLINE { return d_data; }
//! Get iterator to beginning of data
inline TYPE *end() ATTRIBUTE_INLINE { return d_data + d_size.length(); }
//! Get iterator to beginning of data
inline const TYPE *end() const ATTRIBUTE_INLINE { return d_data + d_size.length(); }
//! Return the pointer to the raw data
inline std::shared_ptr<TYPE> getPtr() ATTRIBUTE_INLINE { return d_ptr; }
//! Return the pointer to the raw data
inline std::shared_ptr<const TYPE> getPtr() const ATTRIBUTE_INLINE { return d_ptr; }
//! Return the pointer to the raw data
HOST_DEVICE inline TYPE *data() ATTRIBUTE_INLINE { return d_data; }
//! Return the pointer to the raw data
HOST_DEVICE inline const TYPE *data() const ATTRIBUTE_INLINE { return d_data; }
public: // Operator overloading
//! Check if two matrices are equal
// Equality means the dimensions and data have to be identical
bool operator==( const Array &rhs ) const;
@@ -461,19 +810,28 @@ public:
//! Check if two matrices are not equal
inline bool operator!=( const Array &rhs ) const { return !this->operator==( rhs ); }
//! Add another array
Array &operator+=( const Array &rhs );
//! Return the pointer to the raw data
inline std::shared_ptr<TYPE> getPtr() { return d_ptr; }
//! Subtract another array
Array &operator-=( const Array &rhs );
//! Return the pointer to the raw data
inline std::shared_ptr<const TYPE> getPtr() const { return d_ptr; }
//! Add a scalar
Array &operator+=( const TYPE &rhs );
//! Return the pointer to the raw data
HOST_DEVICE inline TYPE *data() { return d_data; }
//! Subtract a scalar
Array &operator-=( const TYPE &rhs );
//! Return the pointer to the raw data
HOST_DEVICE inline const TYPE *data() const { return d_data; }
public: // Math operations
//! Concatenates the arrays along the dimension dim.
static Array cat( const std::vector<Array> &x, int dim = 0 );
//! Concatenates a given array with the current array
void cat( const Array &x, int dim = 0 );
//! Initialize the array with random values (defined from the function table)
void rand();
//! Return true if NaNs are present
inline bool NaNs() const;
@@ -491,13 +849,13 @@ public:
inline TYPE mean() const;
//! Return the min of all elements in a given direction
Array<TYPE> min( int dir ) const;
Array<TYPE, FUN> min( int dir ) const;
//! Return the max of all elements in a given direction
Array<TYPE> max( int dir ) const;
Array<TYPE, FUN> max( int dir ) const;
//! Return the sum of all elements in a given direction
Array<TYPE> sum( int dir ) const;
Array<TYPE, FUN> sum( int dir ) const;
//! Return the smallest value
inline TYPE min( const std::vector<size_t> &index ) const;
@@ -511,52 +869,86 @@ public:
//! Return the mean of all elements
inline TYPE mean( const std::vector<size_t> &index ) const;
//! Return the smallest value
inline TYPE min( const std::vector<Range<size_t>> &index ) const;
//! Return the largest value
inline TYPE max( const std::vector<Range<size_t>> &index ) const;
//! Return the sum of all elements
inline TYPE sum( const std::vector<Range<size_t>> &index ) const;
//! Return the mean of all elements
inline TYPE mean( const std::vector<Range<size_t>> &index ) const;
//! Find all elements that match the operator
std::vector<size_t> find(
const TYPE &value, std::function<bool( const TYPE &, const TYPE & )> compare ) const;
//! Add another array
Array &operator+=( const Array &rhs );
//! Subtract another array
Array &operator-=( const Array &rhs );
//! Add a scalar
Array &operator+=( const TYPE &rhs );
//! Subtract a scalar
Array &operator-=( const TYPE &rhs );
//! Print an array
void print( std::ostream& os, const std::string& name="A", const std::string& prefix="" ) const;
void print(
std::ostream &os, const std::string &name = "A", const std::string &prefix = "" ) const;
//! Multiply two arrays
static Array multiply( const Array& a, const Array& b );
static Array multiply( const Array &a, const Array &b );
//! Transpose an array
Array<TYPE> reverseDim( ) const;
Array<TYPE, FUN> reverseDim() const;
//! Replicate an array a given number of times in each direction
Array<TYPE, FUN> repmat( const std::vector<size_t> &N ) const;
//! Coarsen an array using the given filter
Array<TYPE> coarsen( const Array<TYPE>& filter ) const;
Array<TYPE, FUN> coarsen( const Array<TYPE, FUN> &filter ) const;
//! Coarsen an array using the given filter
Array<TYPE> coarsen( const std::vector<size_t>& ratio, std::function<TYPE(const Array<TYPE>&)> filter ) const;
Array<TYPE, FUN> coarsen( const std::vector<size_t> &ratio,
std::function<TYPE( const Array<TYPE, FUN> & )> filter ) const;
/*!
* Perform a element-wise operation y = f(x)
* @param[in] fun The function operation
* @param[in] x The input array
*/
static Array transform( std::function<TYPE( const TYPE & )> fun, const Array &x );
/*!
* Perform a element-wise operation z = f(x,y)
* @param[in] fun The function operation
* @param[in] x The first array
* @param[in] y The second array
*/
static Array transform(
std::function<TYPE( const TYPE &, const TYPE & )> fun, const Array &x, const Array &y );
/*!
* axpby operation: this = alpha*x + beta*this
* @param[in] alpha alpha
* @param[in] x x
* @param[in] beta beta
*/
void axpby( const TYPE &alpha, const Array<TYPE, FUN> &x, const TYPE &beta );
private:
int d_ndim; // Number of dimensions in array
size_t d_N[ARRAY_NDIM_MAX]; // Size of each dimension
size_t d_length; // Total length of array
ArraySize d_size; // Size of each dimension
TYPE *d_data; // Raw pointer to data in array
std::shared_ptr<TYPE> d_ptr; // Shared pointer to data in array
void allocate( const std::vector<size_t> &N );
void allocate( const ArraySize &N );
public:
template<class TYPE2, class FUN2>
inline bool sizeMatch( const Array<TYPE2, FUN2> &rhs ) const
{
return d_size == rhs.d_size;
}
private:
template<class TYPE2>
inline bool sizeMatch( const Array<TYPE2>& rhs ) const;
inline void checkSubsetIndex( const std::vector<size_t> &index ) const;
inline std::array<size_t, 5> getDimArray() const;
static inline void getSubsetArrays( const std::vector<size_t> &index,
std::array<size_t, 5> &first, std::array<size_t, 5> &last, std::array<size_t, 5> &N );
inline void checkSubsetIndex( const std::vector<Range<size_t>> &range ) const;
inline std::vector<Range<size_t>> convert( const std::vector<size_t> &index ) const;
static inline void getSubsetArrays( const std::vector<Range<size_t>> &range,
std::array<size_t, 5> &first, std::array<size_t, 5> &last, std::array<size_t, 5> &inc,
std::array<size_t, 5> &N );
};

File diff suppressed because it is too large Load Diff

81
common/FunctionTable.h Normal file
View File

@@ -0,0 +1,81 @@
#ifndef included_FunctionTable
#define included_FunctionTable
#include "common/Array.h"
#include <functional>
/*!
* Class FunctionTable is a serial function table class that defines
* a series of operations that can be performed on the Array class.
* Users can impliment additional versions of the function table that match
* the interface to change the behavior of the array class.
*/
class FunctionTable final
{
public:
/*!
* Initialize the array with random values
* @param[in] x The array to operate on
*/
template<class TYPE, class FUN>
static void rand( Array<TYPE, FUN> &x );
/*!
* Perform a reduce operator y = f(x)
* @param[in] op The function operation
* Note: the operator is a template parameter
* (compared to a std::function to improve performance)
* @param[in] A The array to operate on
* @return The reduction
*/
template<class TYPE, class FUN, typename LAMBDA>
static inline TYPE reduce( LAMBDA &op, const Array<TYPE, FUN> &A );
/*!
* Perform a element-wise operation y = f(x)
* @param[in] fun The function operation
* Note: the operator is a template parameter
* (compared to a std::function to improve performance)
* @param[in] x The input array to operate on
* @param[out] y The output array
*/
template<class TYPE, class FUN, typename LAMBDA>
static inline void transform( LAMBDA &fun, const Array<TYPE, FUN> &x, Array<TYPE, FUN> &y );
/*!
* Perform a element-wise operation z = f(x,y)
* @param[in] fun The function operation
* Note: the operator is a template parameter
* (compared to a std::function to improve performance)
* @param[in] x The first array
* @param[in] y The second array
* @param[out] z The result
*/
template<class TYPE, class FUN, typename LAMBDA>
static inline void transform(
LAMBDA &fun, const Array<TYPE, FUN> &x, const Array<TYPE, FUN> &y, Array<TYPE, FUN> &z );
/*!
* Multiply two arrays
* @param[in] a The first array
* @param[in] b The second array
* @param[out] c The output array
*/
template<class TYPE, class FUN>
static void multiply(
const Array<TYPE, FUN> &a, const Array<TYPE, FUN> &b, Array<TYPE, FUN> &c );
private:
FunctionTable();
template<class T>
static inline void rand( size_t N, T *x );
};
#include "common/FunctionTable.hpp"
#endif

116
common/FunctionTable.hpp Normal file
View File

@@ -0,0 +1,116 @@
#ifndef included_FunctionTable_hpp
#define included_FunctionTable_hpp
#include "common/FunctionTable.h"
#include "common/Utilities.h"
#include <algorithm>
#include <cstring>
#include <limits>
#include <random>
/********************************************************
* Random number initialization *
********************************************************/
template<class TYPE, class FUN>
void FunctionTable::rand( Array<TYPE, FUN> &x )
{
FunctionTable::rand<TYPE>( x.length(), x.data() );
}
template<>
inline void FunctionTable::rand<double>( size_t N, double *x )
{
std::random_device rd;
std::mt19937 gen( rd() );
std::uniform_real_distribution<> dis( 0, 1 );
for ( size_t i = 0; i < N; i++ )
x[i] = dis( gen );
}
template<>
inline void FunctionTable::rand<float>( size_t N, float *x )
{
std::random_device rd;
std::mt19937 gen( rd() );
std::uniform_real_distribution<> dis( 0, 1 );
for ( size_t i = 0; i < N; i++ )
x[i] = dis( gen );
}
template<>
inline void FunctionTable::rand<int>( size_t N, int *x )
{
std::random_device rd;
std::mt19937 gen( rd() );
std::uniform_int_distribution<> dis;
for ( size_t i = 0; i < N; i++ )
x[i] = dis( gen );
}
/********************************************************
* Reduction *
********************************************************/
template<class TYPE, class FUN, typename LAMBDA>
inline TYPE FunctionTable::reduce( LAMBDA &op, const Array<TYPE, FUN> &A )
{
if ( A.length() == 0 )
return TYPE();
const TYPE *x = A.data();
TYPE y = x[0];
const size_t N = A.length();
for ( size_t i = 1; i < N; i++ )
y = op( x[i], y );
return y;
}
/********************************************************
* Unary transformation *
********************************************************/
template<class TYPE, class FUN, typename LAMBDA>
inline void FunctionTable::transform( LAMBDA &fun, const Array<TYPE, FUN> &x, Array<TYPE, FUN> &y )
{
y.resize( x.size() );
const size_t N = x.length();
for ( size_t i = 0; i < N; i++ )
y( i ) = fun( x( i ) );
}
template<class TYPE, class FUN, typename LAMBDA>
inline void FunctionTable::transform(
LAMBDA &fun, const Array<TYPE, FUN> &x, const Array<TYPE, FUN> &y, Array<TYPE, FUN> &z )
{
if ( !x.sizeMatch( y ) )
throw std::logic_error( "Sizes of x and y do not match" );
z.resize( x.size() );
const size_t N = x.length();
for ( size_t i = 0; i < N; i++ )
z( i ) = fun( x( i ), y( i ) );
}
/********************************************************
* Multiply two arrays *
********************************************************/
template<class TYPE, class FUN>
void FunctionTable::multiply(
const Array<TYPE, FUN> &a, const Array<TYPE, FUN> &b, Array<TYPE, FUN> &c )
{
if ( a.ndim() <= 2 && b.ndim() <= 2 ) {
if ( a.size( 1 ) != b.size( 0 ) )
throw std::logic_error( "Inner dimensions must match" );
c.resize( a.size( 0 ), b.size( 1 ) );
c.fill( 0 );
for ( size_t k = 0; k < b.size( 1 ); k++ ) {
for ( size_t j = 0; j < a.size( 1 ); j++ ) {
for ( size_t i = 0; i < a.size( 0 ); i++ ) {
c( i, k ) += a( i, j ) * b( j, k );
}
}
}
} else {
throw std::logic_error( "Not finished yet" );
}
}
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -1,14 +1,11 @@
#ifndef included_AtomicStackTrace
#define included_AtomicStackTrace
#ifndef included_StackTrace
#define included_StackTrace
#include <functional>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <vector>
#include <thread>
#include <memory>
#include <set>
#include <thread>
#include <vector>
// Check for and include MPI
@@ -39,35 +36,51 @@ struct stack_info {
int line;
//! Default constructor
stack_info() : address( nullptr ), address2( nullptr ), line( 0 ) {}
//! Reset the stack
void clear();
//! Operator==
bool operator==( const stack_info& rhs ) const;
bool operator==( const stack_info &rhs ) const;
//! Operator!=
bool operator!=( const stack_info& rhs ) const;
bool operator!=( const stack_info &rhs ) const;
//! Get the minimum width to print the addresses
int getAddressWidth() const;
//! Print the stack info
std::string print() const;
std::string print( int widthAddress = 16, int widthObject = 20, int widthFunction = 32 ) const;
//! Compute the number of bytes needed to store the object
size_t size() const;
//! Pack the data to a byte array, returning a pointer to the end of the data
char* pack( char* ptr ) const;
char *pack( char *ptr ) const;
//! Unpack the data from a byte array, returning a pointer to the end of the data
const char* unpack( const char* ptr );
const char *unpack( const char *ptr );
//! Pack a vector of data to a memory block
static std::vector<char> packArray( const std::vector<stack_info>& data );
static std::vector<char> packArray( const std::vector<stack_info> &data );
//! Unpack a vector of data from a memory block
static std::vector<stack_info> unpackArray( const char* data );
static std::vector<stack_info> unpackArray( const char *data );
};
struct multi_stack_info {
int N;
stack_info stack;
std::vector<multi_stack_info> children;
int N; // Number of threads/processes
stack_info stack; // Current stack item
std::vector<multi_stack_info> children; // Children
//! Default constructor
multi_stack_info() : N( 0 ) {}
//! Construct from a simple call stack
explicit multi_stack_info( const std::vector<stack_info> & );
//! Copy constructor from a simple call stack
multi_stack_info &operator=( const std::vector<stack_info> & );
//! Reset the stack
void clear();
//! Add the given stack to the multistack
void add( size_t N, const stack_info *stack );
void add( size_t len, const stack_info *stack );
//! Print the stack info
std::vector<std::string> print( const std::string& prefix=std::string() ) const;
std::vector<std::string> print( const std::string &prefix = std::string() ) const;
private:
void print2( const std::string &prefix, int w[3], std::vector<std::string> &text ) const;
int getAddressWidth() const;
int getObjectWidth() const;
int getFunctionWidth() const;
};
@@ -95,7 +108,7 @@ std::vector<stack_info> getCallStack( std::thread::native_handle_type id );
* Note: This functionality may not be availible on all platforms
* @return Returns vector containing the stack
*/
multi_stack_info getAllCallStacks( );
multi_stack_info getAllCallStacks();
/*!
@@ -107,7 +120,17 @@ multi_stack_info getAllCallStacks( );
* Note: This functionality may not be availible on all platforms
* @return Returns vector containing the stack
*/
multi_stack_info getGlobalCallStacks( );
multi_stack_info getGlobalCallStacks();
/*!
* @brief Clean up the stack trace
* @details This function modifies the stack trace to remove entries
* related to acquiring the stack trace in an attempt to make it
* more useful for display/users.
* @param[in,out] stack The stack trace to modify
*/
void cleanupStackTrace( multi_stack_info &stack );
//! Function to return the current call stack for the current thread
@@ -136,8 +159,9 @@ std::string signalName( int signal );
* Return the symbols from the current executable (not availible for all platforms)
* @return Returns 0 if sucessful
*/
int getSymbols(
std::vector<void *> &address, std::vector<char> &type, std::vector<std::string> &obj );
int getSymbols( std::vector<void *> &address,
std::vector<char> &type,
std::vector<std::string> &obj );
/*!
@@ -159,16 +183,17 @@ enum class terminateType { signal, exception };
/*!
* Set the error handlers
* @param[in] Function to terminate the program: abort(msg,type)
* @param[in] abort Function to terminate the program: abort(msg,type)
*/
void setErrorHandlers( std::function<void( std::string, terminateType )> abort );
/*!
* Set the given signals to the handler
* @param[in] Function to terminate the program: abort(msg,type)
* @param[in] signals Signals to handle
* @param[in] handler Function to terminate the program: abort(msg,type)
*/
void setSignals( const std::vector<int>& signals, void (*handler) (int) );
void setSignals( const std::vector<int> &signals, void ( *handler )( int ) );
//! Clear a signal set by setSignals
@@ -176,28 +201,28 @@ void clearSignal( int signal );
//! Clear all signals set by setSignals
void clearSignals( );
void clearSignals();
//! Return a list of all signals that can be caught
std::vector<int> allSignalsToCatch( );
std::vector<int> allSignalsToCatch();
//! Return a default list of signals to catch
std::vector<int> defaultSignalsToCatch( );
std::vector<int> defaultSignalsToCatch();
//! Get a list of the active threads
std::set<std::thread::native_handle_type> activeThreads( );
std::set<std::thread::native_handle_type> activeThreads();
//! Get a handle to this thread
std::thread::native_handle_type thisThread( );
std::thread::native_handle_type thisThread();
//! Initialize globalCallStack functionallity
void globalCallStackInitialize( MPI_Comm comm );
//! Clean up globalCallStack functionallity
void globalCallStackFinalize( );
void globalCallStackFinalize();
/*!
@@ -208,9 +233,10 @@ void globalCallStackFinalize( );
* @param[out] exit_code Exit code returned from child process
* @return Returns string containing the output
*/
std::string exec( const std::string& cmd, int& exit_code );
std::string exec( const std::string &cmd, int &exit_code );
} // namespace StackTrace
#endif

View File

@@ -1,345 +1,379 @@
#include <iostream>
#include <sstream>
#include <vector>
#include <string>
#include "common/UnitTest.h"
#include "common/Utilities.h"
#include <cstring>
#include <iostream>
#include <sstream>
#include <string>
#include <vector>
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
// Windows
// Sleep is defined in milliseconds
#else
// Linux
// usleep is defined in microseconds, create a Sleep command
#define Sleep(x) usleep(x*1000)
#endif
#define pout std::cout
#define printp printf
/********************************************************************
* Empty Constructor *
********************************************************************/
UnitTest::UnitTest() {
#ifdef USE_MPI
comm = MPI_COMM_WORLD;
#endif
* Constructor/Destructor *
********************************************************************/
UnitTest::UnitTest()
{
#ifdef USE_MPI
comm = MPI_COMM_WORLD;
#endif
}
UnitTest::~UnitTest() { reset(); }
void UnitTest::reset()
{
mutex.lock();
// Clear the data forcing a reallocation
std::vector<std::string>().swap( pass_messages );
std::vector<std::string>().swap( fail_messages );
std::vector<std::string>().swap( expected_fail_messages );
mutex.unlock();
}
/********************************************************************
* Print a global report *
* Note: only rank 0 will print, all messages will be aggregated *
********************************************************************/
void UnitTest::report(const int level0) {
* Add a pass, fail, expected failure message in a thread-safe way *
********************************************************************/
void UnitTest::passes( const std::string &in )
{
mutex.lock();
pass_messages.push_back( in );
mutex.unlock();
}
void UnitTest::failure( const std::string &in )
{
mutex.lock();
fail_messages.push_back( in );
mutex.unlock();
}
void UnitTest::expected_failure( const std::string &in )
{
mutex.lock();
expected_fail_messages.push_back( in );
mutex.unlock();
}
/********************************************************************
* Print a global report *
* Note: only rank 0 will print, all messages will be aggregated *
********************************************************************/
inline std::vector<int> UnitTest::allGather( int value ) const
{
int size = getSize();
std::vector<int> data( size, value );
#ifdef USE_MPI
if ( size > 1 )
MPI_Allgather( &value, 1, MPI_INT, data.data(), 1, MPI_INT, comm );
#endif
return data;
}
inline void UnitTest::barrier() const
{
#ifdef USE_MPI
if ( getSize() > 1 )
MPI_Barrier( comm );
#endif
}
static inline void print_messages( const std::vector<std::vector<std::string>> &messages )
{
if ( messages.size() > 1 ) {
for ( size_t i = 0; i < messages.size(); i++ ) {
if ( !messages[i].empty() ) {
printp( " Proccessor %i:\n", static_cast<int>( i ) );
for ( const auto &j : messages[i] )
pout << " " << j << std::endl;
}
}
} else {
for ( const auto &j : messages[0] )
pout << " " << j << std::endl;
}
}
void UnitTest::report( const int level0 ) const
{
mutex.lock();
int size = getSize();
int rank = getRank();
// Broadcast the print level from rank 0
int level = level0;
#ifdef USE_MPI
if ( getSize() > 1 )
MPI_Bcast( &level, 1, MPI_INT, 0, comm );
#endif
if ( level<0 || level > 2 )
ERROR("Invalid print level");
#ifdef USE_MPI
if ( getSize() > 1 )
MPI_Bcast( &level, 1, MPI_INT, 0, comm );
#endif
if ( level < 0 || level > 2 )
ERROR( "Invalid print level" );
// Perform a global all gather to get the number of failures per processor
std::vector<int> N_pass(size,0);
std::vector<int> N_fail(size,0);
std::vector<int> N_expected_fail(size,0);
int local_pass_size = (int) pass_messages.size();
int local_fail_size = (int) fail_messages.size();
int local_expected_fail_size = (int) expected_fail_messages.size();
if ( getSize() > 1 ) {
#ifdef USE_MPI
MPI_Allgather( &local_pass_size, 1, MPI_INT, &N_pass[0], 1, MPI_INT, comm);
MPI_Allgather( &local_fail_size, 1, MPI_INT, &N_fail[0], 1, MPI_INT, comm);
MPI_Allgather( &local_expected_fail_size, 1, MPI_INT, &N_expected_fail[0], 1, MPI_INT, comm);
#endif
} else {
N_pass[0] = local_pass_size;
N_fail[0] = local_fail_size;
N_expected_fail[0] = local_expected_fail_size;
}
int N_pass_tot = 0;
auto N_pass = allGather( pass_messages.size() );
auto N_fail = allGather( fail_messages.size() );
auto N_expected_fail = allGather( expected_fail_messages.size() );
int N_pass_tot = 0;
int N_fail_tot = 0;
int N_expected_fail_tot = 0;
for (int i=0; i<size; i++) {
for ( int i = 0; i < size; i++ ) {
N_pass_tot += N_pass[i];
N_fail_tot += N_fail[i];
N_expected_fail_tot += N_expected_fail[i];
}
// Send all messages to rank 0 (if needed)
std::vector< std::vector<std::string> > pass_messages_rank(size);
std::vector< std::vector<std::string> > fail_messages_rank(size);
std::vector< std::vector<std::string> > expected_fail_rank(size);
std::vector<std::vector<std::string>> pass_messages_rank( size );
std::vector<std::vector<std::string>> fail_messages_rank( size );
std::vector<std::vector<std::string>> expected_fail_rank( size );
// Get the pass messages
if ( ( level==1 && N_pass_tot<=20 ) || level==2 ) {
if ( rank==0 ) {
// Rank 0 should receive all messages
for (int i=0; i<size; i++) {
if ( i==0 )
pass_messages_rank[i] = pass_messages;
else if ( N_pass[i]>0 )
pass_messages_rank[i] = unpack_message_stream(i,1);
}
} else if ( pass_messages.size() ) {
// All other ranks send their message (use non-blocking communication)
pack_message_stream(pass_messages,0,1);
}
}
if ( ( level == 1 && N_pass_tot <= 20 ) || level == 2 )
pass_messages_rank = UnitTest::gatherMessages( pass_messages, 1 );
// Get the fail messages
if ( level==1 || level==2 ) {
if ( rank==0 ) {
// Rank 0 should receive all messages
for (int i=0; i<size; i++) {
if ( i==0 )
fail_messages_rank[i] = fail_messages;
else if ( N_fail[i]>0 )
fail_messages_rank[i] = unpack_message_stream(i,2);
}
} else if ( !fail_messages.empty() ){
// All other ranks send their message (use non-blocking communication)
pack_message_stream(fail_messages,0,2);
}
}
if ( level == 1 || level == 2 )
fail_messages_rank = UnitTest::gatherMessages( fail_messages, 2 );
// Get the expected_fail messages
if ( ( level==1 && N_expected_fail_tot<=50 ) || level==2 ) {
if ( rank==0 ) {
// Rank 0 should receive all messages
for (int i=0; i<size; i++) {
if ( i==0 )
expected_fail_rank[i] = expected_fail_messages;
else if ( N_expected_fail[i]>0 )
expected_fail_rank[i] = unpack_message_stream(i,3);
}
} else if ( !expected_fail_messages.empty() ){
// All other ranks send their message (use non-blocking communication)
pack_message_stream(expected_fail_messages,0,3);
}
}
if ( ( level == 1 && N_expected_fail_tot <= 50 ) || level == 2 )
expected_fail_rank = UnitTest::gatherMessages( expected_fail_messages, 2 );
// Print the results of all messages (only rank 0 will print)
if ( rank==0 ) {
std::cout << std::endl;
if ( rank == 0 ) {
pout << std::endl;
// Print the passed tests
std::cout << "Tests passed" << std::endl;
if ( level==0 || ( level==1 && N_pass_tot>20 ) ) {
pout << "Tests passed" << std::endl;
if ( level == 0 || ( level == 1 && N_pass_tot > 20 ) ) {
// We want to print a summary
if ( size>8 ) {
if ( size > 8 ) {
// Print 1 summary for all processors
std::cout << " " << N_pass_tot << " tests passed (use report level 2 for more detail)" << std::endl;
printp( " %i tests passed (use report level 2 for more detail)\n", N_pass_tot );
} else {
// Print a summary for each processor
for (int i=0; i<size; i++)
std::cout << " " << N_pass[i] << " tests passed (proc " << i << ") (use report level 2 for more detail)" << std::endl;
for ( int i = 0; i < size; i++ )
printp( " %i tests passed (proc %i) (use report level 2 for more detail)\n",
N_pass[i], i );
}
} else {
// We want to print all messages
for (int i=0; i<size; i++) {
ASSERT( (int)pass_messages_rank[i].size() == N_pass[i] );
if ( N_pass[i] > 0 ) {
std::cout << " Proccessor " << i << ":" << std::endl;
for (unsigned int j=0; j<pass_messages_rank[i].size(); j++)
std::cout << " " << pass_messages_rank[i][j] << std::endl;
}
}
for ( int i = 0; i < size; i++ )
ASSERT( (int) pass_messages_rank[i].size() == N_pass[i] );
print_messages( pass_messages_rank );
}
std::cout << std::endl;
pout << std::endl;
// Print the tests that failed
std::cout << "Tests failed" << std::endl;
if ( level==0 ) {
pout << "Tests failed" << std::endl;
if ( level == 0 ) {
// We want to print a summary
if ( size>8 ) {
if ( size > 8 ) {
// Print 1 summary for all processors
std::cout << " " << N_pass_tot << " tests failed (use report level 2 for more detail)" << std::endl;
printp( " %i tests failed (use report level 2 for more detail)\n", N_fail_tot );
} else {
// Print a summary for each processor
for (int i=0; i<size; i++)
std::cout << " " << N_fail[i] << " tests failed (proc " << i << ") (use report level 1 or 2 for more detail)" << std::endl;
for ( int i = 0; i < size; i++ )
printp( " %i tests failed (proc %i) (use report level 2 for more detail)\n",
N_fail[i], i );
}
} else {
// We want to print all messages
for (int i=0; i<size; i++) {
ASSERT( (int)fail_messages_rank[i].size() == N_fail[i] );
if ( N_fail[i] > 0 ) {
std::cout << " Processor " << i << ":" << std::endl;
for (unsigned int j=0; j<fail_messages_rank[i].size(); j++)
std::cout << " " << fail_messages_rank[i][j] << std::endl;
}
}
for ( int i = 0; i < size; i++ )
ASSERT( (int) fail_messages_rank[i].size() == N_fail[i] );
print_messages( fail_messages_rank );
}
std::cout << std::endl;
pout << std::endl;
// Print the tests that expected failed
std::cout << "Tests expected failed" << std::endl;
if ( level==0 || ( level==1 && N_expected_fail_tot>50 ) ) {
pout << "Tests expected failed" << std::endl;
if ( level == 0 || ( level == 1 && N_expected_fail_tot > 50 ) ) {
// We want to print a summary
if ( size>8 ) {
if ( size > 8 ) {
// Print 1 summary for all processors
std::cout << " " << N_expected_fail_tot << " tests expected failed (use report level 2 for more detail)" << std::endl;
printp( " %i tests expected failed (use report level 2 for more detail)\n",
N_expected_fail_tot );
} else {
// Print a summary for each processor
for (int i=0; i<size; i++)
std::cout << " " << N_expected_fail[i] << " tests expected failed (proc " << i << ") (use report level 1 or 2 for more detail)" << std::endl;
for ( int i = 0; i < size; i++ )
printp( " %i tests expected failed (proc %i) (use report level 2 for more "
"detail)\n",
N_expected_fail[i], i );
}
} else {
// We want to print all messages
for (int i=0; i<size; i++) {
ASSERT( (int)expected_fail_rank[i].size() == N_expected_fail[i] );
if ( N_expected_fail[i] > 0 ) {
std::cout << " Processor " << i << ":" << std::endl;
for (unsigned int j=0; j<expected_fail_rank[i].size(); j++)
std::cout << " " << expected_fail_rank[i][j] << std::endl;
}
}
for ( int i = 0; i < size; i++ )
ASSERT( (int) expected_fail_rank[i].size() == N_expected_fail[i] );
print_messages( expected_fail_rank );
}
std::cout << std::endl;
pout << std::endl;
}
// Add a barrier to synchronize all processors (rank 0 is much slower)
#ifdef USE_MPI
if ( getSize() > 1 )
MPI_Barrier(comm);
#endif
}
/********************************************************************
* Pack and send the given messages *
********************************************************************/
void UnitTest::pack_message_stream(const std::vector<std::string>& messages, const int rank, const int tag)
{
#ifdef USE_MPI
// Get the size of the messages
int N_messages = (int) messages.size();
int *msg_size = new int[N_messages];
int msg_size_tot = 0;
for (int i=0; i<N_messages; i++) {
msg_size[i] = (int) messages[i].size();
msg_size_tot += msg_size[i];
}
// Allocate space for the message stream
int size_data = (N_messages+1)*sizeof(int)+msg_size_tot;
char *data = new char[size_data];
// Pack the message stream
int *tmp = (int*) data;
tmp[0] = N_messages;
for (int i=0; i<N_messages; i++)
tmp[i+1] = msg_size[i];
int k = (N_messages+1)*sizeof(int);
for (int i=0; i<N_messages; i++) {
messages[i].copy(&data[k],msg_size[i]);
k += msg_size[i];
}
// Send the message stream (using a non-blocking send)
MPI_Request request;
MPI_Isend( data, size_data, MPI_CHAR, rank, tag, comm, &request );
// Wait for the communication to send and free the temporary memory
MPI_Status status;
MPI_Wait( &request, &status );
delete [] data;
delete [] msg_size;
#endif
barrier();
Utilities::sleep_ms( 10 ); // Need a brief pause to allow any printing to finish
mutex.unlock();
}
/********************************************************************
* receive and unpack a message stream *
********************************************************************/
std::vector<std::string> UnitTest::unpack_message_stream(const int rank, const int tag)
* Gather the messages to rank 0 *
********************************************************************/
std::vector<std::vector<std::string>> UnitTest::gatherMessages(
const std::vector<std::string> &local_messages, int tag ) const
{
#ifdef USE_MPI
// Probe the message to get the message size
MPI_Status status;
MPI_Probe(rank,tag,comm,&status);
int size_data=-1;
MPI_Get_count(&status,MPI_BYTE,&size_data);
ASSERT(size_data>=0);
// Allocate memory to receive the data
char *data = new char[size_data];
// receive the data (using a non-blocking receive)
MPI_Request request;
MPI_Irecv( data, size_data, MPI_CHAR, rank, tag, comm, &request );
// Wait for the communication to be received
MPI_Wait( &request, &status );
// Unpack the message stream
int *tmp = (int*) data;
int N_messages = tmp[0];
int *msg_size = &tmp[1];
std::vector<std::string> messages(N_messages);
int k = (N_messages+1)*sizeof(int);
for (int i=0; i<N_messages; i++) {
messages[i] = std::string(&data[k],msg_size[i]);
k += msg_size[i];
const int rank = getRank();
const int size = getSize();
std::vector<std::vector<std::string>> messages( size );
if ( rank == 0 ) {
// Rank 0 should receive all messages
for ( int i = 0; i < size; i++ ) {
if ( i == 0 )
messages[i] = local_messages;
else
messages[i] = unpack_message_stream( i, tag );
}
// Delete the temporary memory
delete [] data;
return messages;
#else
} else {
// All other ranks send their message (use non-blocking communication)
pack_message_stream( local_messages, 0, tag );
}
return messages;
}
/********************************************************************
* Pack and send the given messages *
********************************************************************/
void UnitTest::pack_message_stream(
const std::vector<std::string> &messages, const int rank, const int tag ) const
{
#ifdef USE_MPI
// Get the size of the messages
auto N_messages = (int) messages.size();
auto *msg_size = new int[N_messages];
int msg_size_tot = 0;
for ( int i = 0; i < N_messages; i++ ) {
msg_size[i] = (int) messages[i].size();
msg_size_tot += msg_size[i];
}
// Allocate space for the message stream
size_t size_data = ( N_messages + 1 ) * sizeof( int ) + msg_size_tot;
auto *data = new char[size_data];
// Pack the message stream
memcpy( data, &N_messages, sizeof( int ) );
memcpy( &data[sizeof( int )], msg_size, N_messages * sizeof( int ) );
size_t k = ( N_messages + 1 ) * sizeof( int );
for ( int i = 0; i < N_messages; i++ ) {
messages[i].copy( &data[k], msg_size[i] );
k += msg_size[i];
}
// Send the message stream (using a non-blocking send)
MPI_Request request;
MPI_Isend( data, size_data, MPI_CHAR, rank, tag, comm, &request );
// Wait for the communication to send and free the temporary memory
MPI_Status status;
MPI_Wait( &request, &status );
delete[] data;
delete[] msg_size;
#else
NULL_USE( messages );
NULL_USE( rank );
NULL_USE( tag );
#endif
}
/********************************************************************
* Receive and unpack a message stream *
********************************************************************/
std::vector<std::string> UnitTest::unpack_message_stream( const int rank, const int tag ) const
{
#ifdef USE_MPI
// Probe the message to get the message size
MPI_Status status;
MPI_Probe( rank, tag, comm, &status );
int size_data = -1;
MPI_Get_count( &status, MPI_BYTE, &size_data );
ASSERT( size_data >= 0 );
// Allocate memory to receive the data
auto *data = new char[size_data];
// receive the data (using a non-blocking receive)
MPI_Request request;
MPI_Irecv( data, size_data, MPI_CHAR, rank, tag, comm, &request );
// Wait for the communication to be received
MPI_Wait( &request, &status );
// Unpack the message stream
int N_messages = 0;
memcpy( &N_messages, data, sizeof( int ) );
if ( N_messages == 0 ) {
delete[] data;
return std::vector<std::string>();
#endif
}
std::vector<int> msg_size( N_messages );
std::vector<std::string> messages( N_messages );
memcpy( msg_size.data(), &data[sizeof( int )], N_messages * sizeof( int ) );
int k = ( N_messages + 1 ) * sizeof( int );
for ( int i = 0; i < N_messages; i++ ) {
messages[i] = std::string( &data[k], msg_size[i] );
k += msg_size[i];
}
delete[] data;
return messages;
#else
NULL_USE( rank );
NULL_USE( tag );
return std::vector<std::string>();
#endif
}
/********************************************************************
* Other functions *
********************************************************************/
int UnitTest::getRank()
* Other functions *
********************************************************************/
int UnitTest::getRank() const
{
int rank = 0;
#ifdef USE_MPI
int flag=0;
MPI_Initialized(&flag);
if ( flag )
MPI_Comm_rank( comm, &rank );
#endif
#ifdef USE_MPI
int flag = 0;
MPI_Initialized( &flag );
if ( flag )
MPI_Comm_rank( comm, &rank );
#endif
return rank;
}
int UnitTest::getSize()
int UnitTest::getSize() const
{
int size = 1;
#ifdef USE_MPI
int flag=0;
MPI_Initialized(&flag);
if ( flag )
MPI_Comm_size( comm, &size );
#endif
#ifdef USE_MPI
int flag = 0;
MPI_Initialized( &flag );
if ( flag )
MPI_Comm_size( comm, &size );
#endif
return size;
}
size_t UnitTest::NumPassGlobal()
size_t UnitTest::NumPassGlobal() const
{
size_t num = pass_messages.size();
#ifdef USE_MPI
if ( getSize() > 1 ) {
int send = static_cast<int>(num);
int sum = 0;
MPI_Allreduce( &send, &sum, 1, MPI_INT, MPI_SUM, comm );
num = static_cast<size_t>(sum);
}
#endif
#ifdef USE_MPI
if ( getSize() > 1 ) {
auto send = static_cast<int>( num );
int sum = 0;
MPI_Allreduce( &send, &sum, 1, MPI_INT, MPI_SUM, comm );
num = static_cast<size_t>( sum );
}
#endif
return num;
}
size_t UnitTest::NumFailGlobal()
size_t UnitTest::NumFailGlobal() const
{
size_t num = fail_messages.size();
#ifdef USE_MPI
if ( getSize() > 1 ) {
int send = static_cast<int>(num);
int sum = 0;
MPI_Allreduce( &send, &sum, 1, MPI_INT, MPI_SUM, comm );
num = static_cast<size_t>(sum);
}
#endif
#ifdef USE_MPI
if ( getSize() > 1 ) {
auto send = static_cast<int>( num );
int sum = 0;
MPI_Allreduce( &send, &sum, 1, MPI_INT, MPI_SUM, comm );
num = static_cast<size_t>( sum );
}
#endif
return num;
}
size_t UnitTest::NumExpectedFailGlobal()
size_t UnitTest::NumExpectedFailGlobal() const
{
size_t num = expected_fail_messages.size();
#ifdef USE_MPI
if ( getSize() > 1 ) {
int send = static_cast<int>(num);
int sum = 0;
MPI_Allreduce( &send, &sum, 1, MPI_INT, MPI_SUM, comm );
num = static_cast<size_t>(sum);
}
#endif
#ifdef USE_MPI
if ( getSize() > 1 ) {
auto send = static_cast<int>( num );
int sum = 0;
MPI_Allreduce( &send, &sum, 1, MPI_INT, MPI_SUM, comm );
num = static_cast<size_t>( sum );
}
#endif
return num;
}

View File

@@ -1,11 +1,12 @@
#ifndef included_UnitTest
#define included_UnitTest
#include <mutex>
#include <sstream>
#include <vector>
#include <string>
#include <vector>
#ifdef USE_MPI
#include "mpi.h"
#include "mpi.h"
#endif
@@ -27,78 +28,92 @@
* \endcode
*/
class UnitTest {
class UnitTest
{
public:
//! Constructor
UnitTest();
//! Indicate a passed test
virtual void passes (const std::string &in) { pass_messages.push_back(in); }
//! Destructor
virtual ~UnitTest();
//! Indicate a failed test
virtual void failure (const std::string &in) { fail_messages.push_back(in); }
//! Indicate a passed test (thread-safe)
virtual void passes( const std::string &in );
//! Indicate an expected failed test
virtual void expected_failure (const std::string &in) { expected_fail_messages.push_back(in); }
//! Indicate a failed test (thread-safe)
virtual void failure( const std::string &in );
//! Indicate an expected failed test (thread-safe)
virtual void expected_failure( const std::string &in );
//! Return the number of passed tests locally
virtual size_t NumPassLocal () { return pass_messages.size(); }
virtual size_t NumPassLocal() const { return pass_messages.size(); }
//! Return the number of failed tests locally
virtual size_t NumFailLocal () { return fail_messages.size(); }
virtual size_t NumFailLocal() const { return fail_messages.size(); }
//! Return the number of expected failed tests locally
virtual size_t NumExpectedFailLocal () { return expected_fail_messages.size(); }
virtual size_t NumExpectedFailLocal() const { return expected_fail_messages.size(); }
//! Return the number of passed tests locally
virtual size_t NumPassGlobal ();
virtual size_t NumPassGlobal() const;
//! Return the number of failed tests locally
virtual size_t NumFailGlobal ();
virtual size_t NumFailGlobal() const;
//! Return the number of expected failed tests locally
virtual size_t NumExpectedFailGlobal ();
virtual size_t NumExpectedFailGlobal() const;
//! Return the rank of the current processor
int getRank ();
int getRank() const;
//! Return the number of processors
int getSize ();
int getSize() const;
/*!
* Print a report of the passed and failed tests.
* Note: This is a blocking call that all processors must execute together.
* Note: Only rank 0 will print the messages (this is necessary as other ranks may not be able to print correctly).
* Note: Only rank 0 will print the messages (this is necessary as other ranks may not be able
* to print correctly).
* @param level Optional integer specifying the level of reporting (default: 1)
* 0: Report the number of tests passed, failed, and expected failures.
* 1: Report the number of passed tests (if <=20) or the number passed otherwise,
* report all failures,
* report the number of expected failed tests (if <=50) or the number passed otherwise.
* 1: Report the number of passed tests (if <=20) or the number passed
* otherwise, report all failures, report the number of expected
* failed tests (if <=50) or the number passed otherwise.
* 2: Report all passed, failed, and expected failed tests.
*/
virtual void report(const int level=1);
virtual void report( const int level = 1 ) const;
//! Clear the messages
void reset();
protected:
std::vector<std::string> pass_messages;
std::vector<std::string> fail_messages;
std::vector<std::string> expected_fail_messages;
#ifdef USE_MPI
MPI_Comm comm;
#endif
mutable std::mutex mutex;
#ifdef USE_MPI
MPI_Comm comm;
#endif
private:
// Make the copy constructor private
UnitTest(const UnitTest& p) {}
UnitTest( const UnitTest & ) {}
// Function to pack the messages into a single data stream and send to the given processor
// Note: This function does not return until the message stream has been sent
void pack_message_stream(const std::vector<std::string>& messages, const int rank, const int tag);
void pack_message_stream(
const std::vector<std::string> &messages, const int rank, const int tag ) const;
// Function to unpack the messages from a single data stream
// Note: This function does not return until the message stream has been received
std::vector<std::string> unpack_message_stream(const int rank, const int tag);
std::vector<std::string> unpack_message_stream( const int rank, const int tag ) const;
// Helper functions
inline void barrier() const;
inline std::vector<int> allGather( int value ) const;
inline std::vector<std::vector<std::string>> gatherMessages(
const std::vector<std::string> &local_messages, int tag ) const;
};

View File

@@ -1,74 +1,107 @@
#ifndef included_Utilities
#define included_Utilities
#include <chrono>
#include <cstdarg>
#include <iostream>
#include <mutex>
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <sys/stat.h>
#include <thread>
#include <vector>
namespace Utilities {
/*!
* Utilities is a Singleton class containing basic routines for error
* reporting, file manipulations, etc. Included are a set of \ref Macros "macros" that are commonly used.
* Aborts the run after printing an error message with file and
* linenumber information.
*/
namespace Utilities
{
/*!
* Aborts the run after printing an error message with file and
* linenumber information.
*/
void abort(const std::string &message, const std::string &filename, const int line);
void abort( const std::string &message, const std::string &filename, const int line );
/*!
* Set the behavior of abort
* @param printMemory Print the current memory usage (default is true)
* @param printStack Print the current call stack (default is true)
* @param throwException Throw an exception instead of MPI_Abort (default is false)
*/
void setAbortBehavior( bool printMemory, bool printStack, bool throwException );
/*!
* Set the behavior of abort
* @param printMemory Print the current memory usage (default is true)
* @param printStack Print the current call stack (default is true)
* @param throwException Throw an exception instead of MPI_Abort (default is false)
*/
void setAbortBehavior( bool printMemory, bool printStack, bool throwException );
//! Function to set the error handlers
void setErrorHandlers();
/*!
* Function to get the memory availible.
* This function will return the total memory availible
* Note: depending on the implimentation, this number may be rounded to
* to a multiple of the page size.
* If this function fails, it will return 0.
*/
size_t getSystemMemory();
/*!
* Function to get the memory usage.
* This function will return the total memory used by the application.
* Note: depending on the implimentation, this number may be rounded to
* to a multiple of the page size.
* If this function fails, it will return 0.
*/
size_t getMemoryUsage();
//! Function to set the error handlers
void setErrorHandlers();
//! Function to get an arbitrary point in time
double time();
/*!
* Function to get the memory availible.
* This function will return the total memory availible
* Note: depending on the implimentation, this number may be rounded to
* to a multiple of the page size.
* If this function fails, it will return 0.
*/
size_t getSystemMemory();
//! Function to get the resolution of time
double tick();
//! Factor a number into it's prime factors
std::vector<int> factor(size_t number);
/*!
* Function to get the memory usage.
* This function will return the total memory used by the application.
* Note: depending on the implimentation, this number may be rounded to
* to a multiple of the page size.
* If this function fails, it will return 0.
*/
size_t getMemoryUsage();
//! Print AMP Banner
void nullUse( void* );
//! Function to get an arbitrary point in time
double time();
//! Function to get the resolution of time
double tick();
//! std::string version of sprintf
inline std::string stringf( const char *format, ... );
/*!
* Sleep for X ms
* @param N Time to sleep (ms)
*/
inline void sleep_ms( int N ) { std::this_thread::sleep_for( std::chrono::milliseconds( N ) ); }
/*!
* Sleep for X s
* @param N Time to sleep (s)
*/
inline void sleep_s( int N ) { std::this_thread::sleep_for( std::chrono::seconds( N ) ); }
//! Factor a number into it's prime factors
std::vector<int> factor(size_t number);
//! Print AMP Banner
void nullUse( void* );
} // namespace Utilities
#include "common/UtilityMacros.h"
// stringf
inline std::string Utilities::stringf( const char *format, ... )
{
va_list ap;
va_start( ap, format );
char tmp[4096];
vsprintf( tmp, format, ap );
va_end( ap );
return std::string( tmp );
}
#endif

View File

@@ -9,8 +9,8 @@
#include <stdexcept>
/*! \defgroup Macros Set of utility macro functions
* \details These functions are a list of C++ macros that are used
/*! \defgroup Macros Set of utility macro functions
* \details These functions are a list of C++ macros that are used
* for common operations, including checking for errors.
* \addtogroup Macros
* @{
@@ -19,13 +19,19 @@
/*! \def NULL_STATEMENT
* \brief A null statement
* \details A statement that does nothing, for insure++ make it something
* \details A statement that does nothing, for insure++ make it something
* more complex than a simple C null statement to avoid a warning.
*/
#ifndef NULL_STATEMENT
#ifdef __INSURE__
#define NULL_STATEMENT do{if(0) int nullstatement=0 }}while(0)
#define NULL_STATEMENT \
do { \
if ( 0 ) \
int nullstatement = 0 \
} while ( 0 )
#else
#define NULL_STATEMENT
#define NULL_STATEMENT
#endif
#endif
@@ -34,9 +40,15 @@
* \details A null use of a variable, use to avoid GNU compiler warnings about unused variables.
* \param variable Variable to pretend to use
*/
#define NULL_USE(variable) do { \
if(0) {char *temp = (char *)&variable; temp++;} \
}while(0)
#ifndef NULL_USE
#define NULL_USE( variable ) \
do { \
if ( 0 ) { \
auto temp = (char *) &variable; \
temp++; \
} \
} while ( 0 )
#endif
/*! \def ERROR(MSG)
@@ -46,9 +58,10 @@
* line number of the abort are also printed.
* \param MSG Error message to print
*/
#define ERROR(MSG) do { \
::Utilities::abort(MSG,__FILE__,__LINE__); \
}while(0)
#define ERROR(MSG) \
do { \
::Utilities::abort( MSG, __FILE__, __LINE__ ); \
} while ( 0 )
/*! \def WARNING(MSG)
@@ -56,11 +69,13 @@
* \details Print a warning without exit. Print file and line number of the warning.
* \param MSG Warning message to print
*/
#define WARNING(MSG) do { \
std::stringstream tboxos; \
tboxos << MSG << std::ends; \
printf("WARNING: %s\n Warning called in %s on line %i\n",tboxos.str().c_str(),__FILE__,__LINE__); \
}while(0)
#define WARNING(MSG) \
do { \
std::stringstream tboxos; \
tboxos << MSG << std::ends; \
printf("WARNING: %s\n Warning called in %s on line %i\n", \
tboxos.str().c_str(),__FILE__,__LINE__); \
}while(0)
/*! \def ASSERT(EXP)
@@ -71,13 +86,14 @@
* The file and line number of the abort are printed along with the stack trace (if availible).
* \param EXP Expression to evaluate
*/
#define ASSERT(EXP) do { \
if ( !(EXP) ) { \
std::stringstream tboxos; \
tboxos << "Failed assertion: " << #EXP << std::ends; \
::Utilities::abort(tboxos.str(), __FILE__, __LINE__); \
} \
}while(0)
#define ASSERT(EXP) \
do { \
if ( !(EXP) ) { \
std::stringstream tboxos; \
tboxos << "Failed assertion: " << #EXP << std::ends; \
::Utilities::abort(tboxos.str(), __FILE__, __LINE__); \
} \
}while(0)
/*! \def INSIST(EXP,MSG)
@@ -99,7 +115,6 @@
}while(0)
/**
* Macro for use when assertions are to be included
* only when debugging.
@@ -118,6 +133,49 @@
#endif
/*! \def DISABLE_WARNINGS
* \brief Reenable warnings
* \details This will re-enable warnings after a call to DIASABLE_WARNINGS
*/
/*! \def ENABLE_WARNINGS
* \brief Supress all warnings
* \details This will start to supress all compile warnings.
* Be sure to follow with ENABLE_WARNINGS
*/
// clang-format off
#ifdef DISABLE_WARNINGS
// Macros previously defined
#elif defined( USING_MSVC )
#define DISABLE_WARNINGS __pragma( warning( push, 0 ) )
#define ENABLE_WARNINGS __pragma( warning( pop ) )
#elif defined( USING_CLANG )
#define DISABLE_WARNINGS \
_Pragma( "clang diagnostic push" ) _Pragma( "clang diagnostic ignored \"-Wall\"" ) \
_Pragma( "clang diagnostic ignored \"-Wextra\"" ) \
_Pragma( "clang diagnostic ignored \"-Wunused-private-field\"" ) \
_Pragma( "clang diagnostic ignored \"-Wmismatched-new-delete\"" )
#define ENABLE_WARNINGS _Pragma( "clang diagnostic pop" )
#elif defined( USING_GCC )
// Note: We cannot disable the -Wliteral-suffix message with this macro because the
// pragma command cannot suppress warnings from the C++ preprocessor. See gcc bug #53431.
#define DISABLE_WARNINGS \
_Pragma( "GCC diagnostic push" ) _Pragma( "GCC diagnostic ignored \"-Wall\"" ) \
_Pragma( "GCC diagnostic ignored \"-Wextra\"" ) \
_Pragma( "GCC diagnostic ignored \"-Wpragmas\"" ) \
_Pragma( "GCC diagnostic ignored \"-Wunused-local-typedefs\"" ) \
_Pragma( "GCC diagnostic ignored \"-Woverloaded-virtual\"" ) \
_Pragma( "GCC diagnostic ignored \"-Wunused-parameter\"" ) \
_Pragma( "GCC diagnostic ignored \"-Warray-bounds\"" ) \
_Pragma( "GCC diagnostic ignored \"-Wterminate\"" )
#define ENABLE_WARNINGS _Pragma( "GCC diagnostic pop" )
#else
#define DISABLE_WARNINGS
#define ENABLE_WARNINGS
#endif
// clang-format on
/*! @} */

View File

@@ -9,9 +9,24 @@
#define ANALYSIS_INTERVAL 1000
#define BLOBID_INTERVAL 1000
enum AnalysisType{ AnalyzeNone=0, IdentifyBlobs=0x01, CopyPhaseIndicator=0x02,
enum class AnalysisType : uint64_t { AnalyzeNone=0, IdentifyBlobs=0x01, CopyPhaseIndicator=0x02,
CopySimState=0x04, ComputeAverages=0x08, CreateRestart=0x10, WriteVis=0x20 };
AnalysisType& operator |=(AnalysisType &lhs, AnalysisType rhs)
{
lhs = static_cast<AnalysisType> (
static_cast<std::underlying_type<AnalysisType>::type>(lhs) |
static_cast<std::underlying_type<AnalysisType>::type>(rhs)
);
return lhs;
}
bool matches( AnalysisType x, AnalysisType y )
{
return static_cast<std::underlying_type<AnalysisType>::type>(x) &
static_cast<std::underlying_type<AnalysisType>::type>(y) != 0;
}
template<class TYPE>
void DeleteArray( const TYPE *p )
@@ -30,7 +45,7 @@ struct AnalysisWaitIdStruct {
// Helper class to write the restart file from a seperate thread
class WriteRestartWorkItem: public ThreadPool::WorkItem
class WriteRestartWorkItem: public ThreadPool::WorkItemRet<void>
{
public:
WriteRestartWorkItem( const char* filename_, std::shared_ptr<double> cDen_,
@@ -41,7 +56,6 @@ public:
WriteCheckpoint(filename,cDen.get(),cfq.get(),N);
PROFILE_STOP("Save Checkpoint",1);
};
virtual bool has_result() const { return false; }
private:
WriteRestartWorkItem();
const char* filename;
@@ -54,7 +68,7 @@ private:
static const std::string id_map_filename = "lbpm_id_map.txt";
typedef std::shared_ptr<std::pair<int,IntArray> > BlobIDstruct;
typedef std::shared_ptr<std::vector<BlobIDType> > BlobIDList;
class BlobIdentificationWorkItem1: public ThreadPool::WorkItem
class BlobIdentificationWorkItem1: public ThreadPool::WorkItemRet<void>
{
public:
BlobIdentificationWorkItem1( int timestep_, int Nx_, int Ny_, int Nz_, const RankInfoStruct& rank_info_,
@@ -75,7 +89,6 @@ public:
new_index->first = ComputeGlobalBlobIDs(Nx-2,Ny-2,Nz-2,rank_info,*phase,dist,vF,vS,ids,newcomm);
PROFILE_STOP("Identify blobs",1);
}
virtual bool has_result() const { return false; }
private:
BlobIdentificationWorkItem1();
int timestep;
@@ -87,7 +100,7 @@ private:
BlobIDList new_list;
MPI_Comm newcomm;
};
class BlobIdentificationWorkItem2: public ThreadPool::WorkItem
class BlobIdentificationWorkItem2: public ThreadPool::WorkItemRet<void>
{
public:
BlobIdentificationWorkItem2( int timestep_, int Nx_, int Ny_, int Nz_, const RankInfoStruct& rank_info_,
@@ -122,7 +135,6 @@ public:
}
PROFILE_STOP("Identify blobs maps",1);
}
virtual bool has_result() const { return false; }
private:
BlobIdentificationWorkItem2();
int timestep;
@@ -137,7 +149,7 @@ private:
// Helper class to write the vis file from a thread
class WriteVisWorkItem: public ThreadPool::WorkItem
class WriteVisWorkItem: public ThreadPool::WorkItemRet<void>
{
public:
WriteVisWorkItem( int timestep_, std::vector<IO::MeshDataStruct>& visData_,
@@ -164,7 +176,6 @@ public:
IO::writeData( timestep, visData, newcomm );
PROFILE_STOP("Save Vis",1);
};
virtual bool has_result() const { return false; }
private:
WriteVisWorkItem();
int timestep;
@@ -177,7 +188,7 @@ private:
// Helper class to run the analysis from within a thread
// Note: Averages will be modified after the constructor is called
class AnalysisWorkItem: public ThreadPool::WorkItem
class AnalysisWorkItem: public ThreadPool::WorkItemRet<void>
{
public:
AnalysisWorkItem( AnalysisType type_, int timestep_, TwoPhase& Averages_,
@@ -191,10 +202,10 @@ public:
Averages.Label_NWP_map = *id_list;
Averages.NumberComponents_WP = 1;
Averages.Label_WP.fill(0.0);
if ( (type&CopyPhaseIndicator) != 0 ) {
if ( matches(type,AnalysisType::CopyPhaseIndicator) ) {
// Averages.ColorToSignedDistance(beta,Averages.Phase,Averages.Phase_tplus);
}
if ( (type&ComputeAverages) != 0 ) {
if ( matches(type,AnalysisType::ComputeAverages) ) {
PROFILE_START("Compute dist",1);
Averages.Initialize();
Averages.ComputeDelPhi();
@@ -212,7 +223,6 @@ public:
PROFILE_STOP("Compute dist",1);
}
}
virtual bool has_result() const { return false; }
private:
AnalysisWorkItem();
AnalysisType type;
@@ -223,6 +233,7 @@ private:
double beta;
};
// Function to start the analysis
void run_analysis( int timestep, int restart_interval,
const RankInfoStruct& rank_info, ScaLBL_Communicator &ScaLBL_Comm, TwoPhase& Averages,
@@ -236,46 +247,45 @@ void run_analysis( int timestep, int restart_interval,
int N = Nx*Ny*Nz;
// Determin the analysis we want to perform
AnalysisType type = AnalyzeNone;
AnalysisType type = AnalysisType::AnalyzeNone;
if ( timestep%ANALYSIS_INTERVAL + 5 == ANALYSIS_INTERVAL ) {
// Copy the phase indicator field for the earlier timestep
type = static_cast<AnalysisType>( type | CopyPhaseIndicator );
type |= AnalysisType::CopyPhaseIndicator;
}
if ( timestep%BLOBID_INTERVAL == 0 ) {
// Identify blobs and update global ids in time
type = static_cast<AnalysisType>( type | IdentifyBlobs );
type |= AnalysisType::IdentifyBlobs;
}
/* #ifdef USE_CUDA
/*#ifdef USE_CUDA
if ( tpool.getQueueSize()<=3 && tpool.getNumThreads()>0 && timestep%50==0 ) {
// Keep a few blob identifications queued up to keep the processors busy,
// allowing us to track the blobs as fast as possible
// Add more detailed estimates of the update frequency required to track blobs
type = static_cast<AnalysisType>( type | IdentifyBlobs );
type |= AnalysisType::IdentifyBlobs;
}
#endif
*/
#endif */
if ( timestep%ANALYSIS_INTERVAL == 0 ) {
// Copy the averages to the CPU (and identify blobs)
type = static_cast<AnalysisType>( type | CopySimState );
type = static_cast<AnalysisType>( type | IdentifyBlobs );
type |= AnalysisType::CopySimState;
type |= AnalysisType::IdentifyBlobs;
}
if ( timestep%ANALYSIS_INTERVAL == 5 ) {
// Run the analysis
type = static_cast<AnalysisType>( type | ComputeAverages );
type |= AnalysisType::ComputeAverages;
}
if (timestep%restart_interval == 0) {
// Write the restart file
type = static_cast<AnalysisType>( type | CreateRestart );
type |= AnalysisType::CreateRestart;
}
if (timestep%restart_interval == 0) {
// Write the visualization data
type = static_cast<AnalysisType>( type | WriteVis );
type = static_cast<AnalysisType>( type | CopySimState );
type = static_cast<AnalysisType>( type | IdentifyBlobs );
type |= AnalysisType::WriteVis;
type |= AnalysisType::CopySimState;
type |= AnalysisType::IdentifyBlobs;
}
// Return if we are not doing anything
if ( type == AnalyzeNone )
if ( type == AnalysisType::AnalyzeNone )
return;
PROFILE_START("start_analysis");
@@ -284,26 +294,28 @@ void run_analysis( int timestep, int restart_interval,
ScaLBL_DeviceBarrier();
PROFILE_START("Copy data to host",1);
std::shared_ptr<DoubleArray> phase;
if ( (type&CopyPhaseIndicator)!=0 || (type&ComputeAverages)!=0 ||
(type&CopySimState)!=0 || (type&IdentifyBlobs)!=0 )
if ( matches(type,AnalysisType::CopyPhaseIndicator) ||
matches(type,AnalysisType::ComputeAverages) ||
matches(type,AnalysisType::CopySimState) ||
matches(type,AnalysisType::IdentifyBlobs) )
{
phase = std::shared_ptr<DoubleArray>(new DoubleArray(Nx,Ny,Nz));
ScaLBL_CopyToHost(phase->data(),Phi,N*sizeof(double));
}
if ( (type&CopyPhaseIndicator)!=0 ) {
if ( matches(type,AnalysisType::CopyPhaseIndicator) ) {
memcpy(Averages.Phase_tplus.data(),phase->data(),N*sizeof(double));
//Averages.ColorToSignedDistance(beta,Averages.Phase,Averages.Phase_tplus);
}
if ( (type&ComputeAverages)!=0 ) {
if ( matches(type,AnalysisType::ComputeAverages) ) {
memcpy(Averages.Phase_tminus.data(),phase->data(),N*sizeof(double));
//Averages.ColorToSignedDistance(beta,Averages.Phase,Averages.Phase_tminus);
}
if ( (type&CopySimState) != 0 ) {
if ( matches(type,AnalysisType::CopySimState) ) {
// Copy the members of Averages to the cpu (phase was copied above)
// Wait
PROFILE_START("Copy-Pressure",1);
ScaLBL_D3Q19_Pressure(fq,Pressure,Np);
ScaLBL_D3Q19_Momentum(fq,Velocity,Np);
ScaLBL_D3Q19_Pressure(fq,Pressure,Np);
ScaLBL_D3Q19_Momentum(fq,Velocity,Np);
ScaLBL_DeviceBarrier();
PROFILE_STOP("Copy-Pressure",1);
PROFILE_START("Copy-Wait",1);
@@ -312,14 +324,14 @@ void run_analysis( int timestep, int restart_interval,
PROFILE_STOP("Copy-Wait",1);
PROFILE_START("Copy-State",1);
memcpy(Averages.Phase.data(),phase->data(),N*sizeof(double));
ScaLBL_Comm.RegularLayout(Map,Pressure,Averages.Press);
ScaLBL_Comm.RegularLayout(Map,&Velocity[0],Averages.Vel_x);
ScaLBL_Comm.RegularLayout(Map,&Velocity[Np],Averages.Vel_y);
ScaLBL_Comm.RegularLayout(Map,&Velocity[2*Np],Averages.Vel_z);
ScaLBL_Comm.RegularLayout(Map,Pressure,Averages.Press);
ScaLBL_Comm.RegularLayout(Map,&Velocity[0],Averages.Vel_x);
ScaLBL_Comm.RegularLayout(Map,&Velocity[Np],Averages.Vel_y);
ScaLBL_Comm.RegularLayout(Map,&Velocity[2*Np],Averages.Vel_z);
PROFILE_STOP("Copy-State",1);
}
std::shared_ptr<double> cDen, cfq;
if ( (type&CreateRestart) != 0 ) {
if ( matches(type,AnalysisType::CreateRestart) ) {
// Copy restart data to the CPU
cDen = std::shared_ptr<double>(new double[2*Np],DeleteArray<double>);
cfq = std::shared_ptr<double>(new double[19*Np],DeleteArray<double>);
@@ -329,14 +341,14 @@ void run_analysis( int timestep, int restart_interval,
PROFILE_STOP("Copy data to host",1);
// Spawn threads to do blob identification work
if ( (type&IdentifyBlobs)!=0 ) {
if ( matches(type,AnalysisType::IdentifyBlobs) ) {
BlobIDstruct new_index(new std::pair<int,IntArray>(0,IntArray()));
BlobIDstruct new_ids(new std::pair<int,IntArray>(0,IntArray()));
BlobIDList new_list(new std::vector<BlobIDType>());
ThreadPool::WorkItem *work1 = new BlobIdentificationWorkItem1(timestep,
Nx,Ny,Nz,rank_info,phase,Averages.SDs,last_ids,new_index,new_ids,new_list);
ThreadPool::WorkItem *work2 = new BlobIdentificationWorkItem2(timestep,
Nx,Ny,Nz,rank_info,phase,Averages.SDs,last_ids,new_index,new_ids,new_list);
auto work1 = new BlobIdentificationWorkItem1(timestep,Nx,Ny,Nz,rank_info,
phase,Averages.SDs,last_ids,new_index,new_ids,new_list);
auto work2 = new BlobIdentificationWorkItem2(timestep,Nx,Ny,Nz,rank_info,
phase,Averages.SDs,last_ids,new_index,new_ids,new_list);
work1->add_dependency(wait.blobID);
work2->add_dependency(tpool.add_work(work1));
wait.blobID = tpool.add_work(work2);
@@ -346,9 +358,8 @@ void run_analysis( int timestep, int restart_interval,
}
// Spawn threads to do the analysis work
if ( (type&ComputeAverages) != 0 ) {
ThreadPool::WorkItem *work = new AnalysisWorkItem(
type,timestep,Averages,last_index,last_id_map,beta);
if ( matches(type,AnalysisType::ComputeAverages) ) {
auto work = new AnalysisWorkItem(type,timestep,Averages,last_index,last_id_map,beta);
work->add_dependency(wait.blobID);
work->add_dependency(wait.analysis);
work->add_dependency(wait.vis); // Make sure we are done using analysis before modifying
@@ -356,35 +367,35 @@ void run_analysis( int timestep, int restart_interval,
}
// Spawn a thread to write the restart file
if ( (type&CreateRestart) != 0 ) {
if ( matches(type,AnalysisType::CreateRestart) ) {
int rank = MPI_WORLD_RANK();
//if (pBC) {
//err = fabs(sat_w - sat_w_previous);
//sat_w_previous = sat_w;
//if (rank==0){
// printf("Timestep %i: change in saturation since last checkpoint is %f \n",timestep,err);
// }
// }
/* if (pBC) {
err = fabs(sat_w - sat_w_previous);
sat_w_previous = sat_w;
if (rank==0){
printf("Timestep %i: change in saturation since last checkpoint is %f \n",timestep,err);
}
} */
// Wait for previous restart files to finish writing (not necessary, but helps to ensure memory usage is limited)
tpool.wait(wait.restart);
// Retain the timestep associated with the restart files
if (rank==0){
FILE *Rst = fopen("Restart.txt","w");
fprintf(Rst,"%i\n",timestep+5);
fclose(Rst);
}
// Retain the timestep associated with the restart files
if (rank==0) {
FILE *Rst = fopen("Restart.txt","w");
fprintf(Rst,"%i\n",timestep+5);
fclose(Rst);
}
// Write the restart file (using a seperate thread)
WriteRestartWorkItem *work = new WriteRestartWorkItem(LocalRestartFile,cDen,cfq,Np);
auto work = new WriteRestartWorkItem(LocalRestartFile,cDen,cfq,Np);
work->add_dependency(wait.restart);
wait.restart = tpool.add_work(work);
}
// Save the results for visualization
if ( (type&CreateRestart) != 0 ) {
if ( matches(type,AnalysisType::CreateRestart) ) {
// Wait for previous restart files to finish writing (not necessary, but helps to ensure memory usage is limited)
tpool.wait(wait.vis);
// Write the vis files
ThreadPool::WorkItem *work = new WriteVisWorkItem( timestep, visData, Averages, fillData );
auto work = new WriteVisWorkItem( timestep, visData, Averages, fillData );
work->add_dependency(wait.blobID);
work->add_dependency(wait.analysis);
work->add_dependency(wait.vis);

View File

@@ -27,4 +27,3 @@ int atomic_pthread_lock_initialized = create_atomic_pthread_lock();
} // AtomicOperations namespace

View File

@@ -5,7 +5,6 @@
#include <stdint.h>
#include <stdio.h>
#include <typeinfo>
#include <stdexcept>
// Choose the OS
#if defined( WIN32 ) || defined( _WIN32 ) || defined( WIN64 ) || defined( _WIN64 )
@@ -89,6 +88,16 @@ inline int32_atomic atomic_get( const int32_atomic volatile *x );
*/
inline int64_atomic atomic_get( const int64_atomic volatile *x );
/**
* \brief Get the value
* \details Read the data in x
* \param[in] x The pointer to the value to get
*/
template<class TYPE>
inline TYPE *atomic_get( volatile TYPE **x );
/**
* \brief Set the value
* \details Set the data in x to y (*x=y)
@@ -185,9 +194,8 @@ inline bool atomic_compare_and_swap( void *volatile *v, void *x, void *y );
* \brief Fetch the current value and "and" with given value
* \details Perform *v = (*v) & x, returning the previous value
* \return Returns the previous value before the "and" operation
* \param[in] v The pointer to the value to check and swap
* \param[in] x The value to compare
* \param[in] y The value to swap iff *v==x
* \param[in] v The pointer to the value to check and and
* \param[in] x The value to and
*/
inline int32_atomic atomic_fetch_and_and( int32_atomic volatile *v, int32_atomic x );
@@ -195,9 +203,8 @@ inline int32_atomic atomic_fetch_and_and( int32_atomic volatile *v, int32_atomic
* \brief Fetch the current value and "and" with given value
* \details Perform *v = (*v) & x, returning the previous value
* \return Returns the previous value before the "and" operation
* \param[in] v The pointer to the value to check and swap
* \param[in] x The value to compare
* \param[in] y The value to swap iff *v==x
* \param[in] v The pointer to the value to check and and
* \param[in] x The value to and
*/
inline int64_atomic atomic_fetch_and_and( int64_atomic volatile *v, int64_atomic x );
@@ -205,9 +212,8 @@ inline int64_atomic atomic_fetch_and_and( int64_atomic volatile *v, int64_atomic
* \brief Fetch the current value and "or" with given value
* \details Perform *v = (*v) | x, returning the previous value
* \return Returns the previous value before the "and" operation
* \param[in] v The pointer to the value to check and swap
* \param[in] x The value to compare
* \param[in] y The value to swap iff *v==x
* \param[in] v The pointer to the value to check and or
* \param[in] x The value to or
*/
inline int32_atomic atomic_fetch_and_or( int32_atomic volatile *v, int32_atomic x );
@@ -216,52 +222,52 @@ inline int32_atomic atomic_fetch_and_or( int32_atomic volatile *v, int32_atomic
* \details Perform *v = (*v) | x, returning the previous value
* \return Returns the previous value before the "and" operation
* \param[in] v The pointer to the value to check and swap
* \param[in] x The value to compare
* \param[in] y The value to swap iff *v==x
* \param[in] v The pointer to the value to check and or
* \param[in] x The value to or
*/
inline int64_atomic atomic_fetch_and_or( int64_atomic volatile *v, int64_atomic x );
/**
* \brief Class to store a pool of objects
* \details This class stores a pool of objects that can be added/removed in a thread-safe way
*/
template<class TYPE,int N_MAX>
template<class TYPE, int N_MAX>
class pool
{
public:
pool( )
public:
pool()
{
d_data = new volatile TYPE*[N_MAX];
for (int i=0; i<N_MAX; i++)
d_data = new volatile TYPE *[N_MAX];
for ( int i = 0; i < N_MAX; i++ )
d_data[i] = new TYPE;
}
~pool( )
~pool()
{
for (int i=0; i<N_MAX; i++)
for ( int i = 0; i < N_MAX; i++ )
if ( d_data[i] != nullptr )
delete d_data[i];
delete [] d_data;
delete[] d_data;
}
inline TYPE* get()
{
int i=0;
while ( true ) {
TYPE* tmp = const_cast<TYPE*>( d_data[i] );
bool swapped = atomic_compare_and_swap( (void* volatile*) &d_data[i], tmp, nullptr );
if ( swapped && ( tmp != nullptr ) )
return tmp;
i = (i+1)%N_MAX;
}
}
inline void put( TYPE* ptr )
inline TYPE *get()
{
int i = 0;
while ( !atomic_compare_and_swap( (void* volatile*) &d_data[i], nullptr, ptr ) )
i = (i+1)%N_MAX;
while ( true ) {
TYPE *tmp = const_cast<TYPE *>( d_data[i] );
bool swapped = atomic_compare_and_swap( (void *volatile *) &d_data[i], tmp, nullptr );
if ( swapped && ( tmp != nullptr ) )
return tmp;
i = ( i + 1 ) % N_MAX;
}
}
private:
inline void put( TYPE *ptr )
{
int i = 0;
while ( !atomic_compare_and_swap( (void *volatile *) &d_data[i], nullptr, ptr ) )
i = ( i + 1 ) % N_MAX;
}
private:
volatile TYPE **d_data;
pool( const pool &rhs );
pool &operator=( const pool &rhs );
@@ -323,10 +329,24 @@ inline int64_atomic atomic_decrement( int64_atomic volatile *x )
{
return OSAtomicDecrement64Barrier( x );
}
int32_atomic atomic_fetch_and_or( int32_atomic volatile *v, int32_atomic x ) { return OSAtomicOr32Orig( x, (volatile uint32_t *) v ); }
int32_atomic atomic_fetch_and_and( int32_atomic volatile *v, int32_atomic x ) { return OSAtomicAnd32Orig( x, (volatile uint32_t *) v); }
int64_atomic atomic_fetch_and_or( int64_atomic volatile *v, int64_atomic x ) { throw std::logic_error("Not availible for this OS"); return 0; }
int64_atomic atomic_fetch_and_and( int64_atomic volatile *v, int64_atomic x ) { throw std::logic_error("Not availible for this OS"); return 0; }
int32_atomic atomic_fetch_and_or( int32_atomic volatile *v, int32_atomic x )
{
return OSAtomicOr32Orig( x, (volatile uint32_t *) v );
}
int32_atomic atomic_fetch_and_and( int32_atomic volatile *v, int32_atomic x )
{
return OSAtomicAnd32Orig( x, (volatile uint32_t *) v );
}
int64_atomic atomic_fetch_and_or( int64_atomic volatile *v, int64_atomic x )
{
throw std::logic_error( "Not availible for this OS" );
return 0;
}
int64_atomic atomic_fetch_and_and( int64_atomic volatile *v, int64_atomic x )
{
throw std::logic_error( "Not availible for this OS" );
return 0;
}
inline int32_atomic atomic_add( int32_atomic volatile *x, int32_atomic y )
{
return OSAtomicAdd32Barrier( y, x );
@@ -352,10 +372,22 @@ int32_atomic atomic_increment( int32_atomic volatile *x ) { return __sync_add_an
int64_atomic atomic_increment( int64_atomic volatile *x ) { return __sync_add_and_fetch( x, 1 ); }
int32_atomic atomic_decrement( int32_atomic volatile *x ) { return __sync_sub_and_fetch( x, 1 ); }
int64_atomic atomic_decrement( int64_atomic volatile *x ) { return __sync_sub_and_fetch( x, 1 ); }
int32_atomic atomic_fetch_and_or( int32_atomic volatile *v, int32_atomic x ) { return __sync_fetch_and_or( v, x ); }
int64_atomic atomic_fetch_and_or( int64_atomic volatile *v, int64_atomic x ) { return __sync_fetch_and_or( v, x ); }
int32_atomic atomic_fetch_and_and( int32_atomic volatile *v, int32_atomic x ) { return __sync_fetch_and_and( v, x ); }
int64_atomic atomic_fetch_and_and( int64_atomic volatile *v, int64_atomic x ) { return __sync_fetch_and_and( v, x ); }
int32_atomic atomic_fetch_and_or( int32_atomic volatile *v, int32_atomic x )
{
return __sync_fetch_and_or( v, x );
}
int64_atomic atomic_fetch_and_or( int64_atomic volatile *v, int64_atomic x )
{
return __sync_fetch_and_or( v, x );
}
int32_atomic atomic_fetch_and_and( int32_atomic volatile *v, int32_atomic x )
{
return __sync_fetch_and_and( v, x );
}
int64_atomic atomic_fetch_and_and( int64_atomic volatile *v, int64_atomic x )
{
return __sync_fetch_and_and( v, x );
}
inline int32_atomic atomic_add( int32_atomic volatile *x, int32_atomic y )
{
return __sync_add_and_fetch( x, y );
@@ -459,31 +491,44 @@ inline int64_atomic atomic_get( const int64_atomic volatile *x )
{
return atomic_add( const_cast<int64_atomic volatile *>( x ), 0 );
}
template<class TYPE>
inline TYPE *atomic_get( volatile TYPE **x )
{
return reinterpret_cast<TYPE *>(
atomic_add( reinterpret_cast<int64_atomic volatile *>( x ), 0 ) );
}
inline void atomic_set( int32_atomic volatile *x, int32_atomic y )
{
int32_atomic tmp = *x;
while ( !atomic_compare_and_swap( x, tmp, y ) ) { tmp = *x; }
while ( !atomic_compare_and_swap( x, tmp, y ) ) {
tmp = *x;
}
}
inline void atomic_set( int64_atomic volatile *x, int64_atomic y )
{
int64_atomic tmp = *x;
while ( !atomic_compare_and_swap( x, tmp, y ) ) { tmp = *x; }
while ( !atomic_compare_and_swap( x, tmp, y ) ) {
tmp = *x;
}
}
inline void atomic_swap( int32_atomic volatile *x, int32_atomic *y )
{
int32_atomic tmp = *x;
while ( !atomic_compare_and_swap( x, tmp, *y ) ) { tmp = *x; }
while ( !atomic_compare_and_swap( x, tmp, *y ) ) {
tmp = *x;
}
*y = tmp;
}
inline void atomic_swap( int64_atomic volatile *x, int64_atomic *y )
{
int64_atomic tmp = *x;
while ( !atomic_compare_and_swap( x, tmp, *y ) ) { tmp = *x; }
while ( !atomic_compare_and_swap( x, tmp, *y ) ) {
tmp = *x;
}
*y = tmp;
}
// Define an atomic counter
struct counter_t {
public:
@@ -499,6 +544,7 @@ public:
inline void setCount( int val ) { count = val; }
// Get the current value of the count
inline int getCount() const { return count; }
private:
counter_t( const counter_t & );
counter_t &operator=( const counter_t & );

View File

@@ -1,52 +1,48 @@
#ifndef included_AtomicModelAtomicList
#define included_AtomicModelAtomicList
#include <functional>
#include <csignal>
#include <atomic>
#include <csignal>
#include <functional>
#include "threadpool/atomic_helpers.h"
/** \class AtomicList
*
* \brief Maintain a sorted list of entries
* \brief Maintain a sorted list of entries
* \details This class implements a basic sorted list that is thread-safe and lock-free.
* Entries are stored smallest to largest according to the compare operator
*/
template< class TYPE, int MAX_SIZE, class COMPARE = std::less<TYPE> >
template<class TYPE, int MAX_SIZE, class COMPARE = std::less<TYPE>>
class AtomicList final
{
public:
//! Default constructor
AtomicList( const TYPE& default_value=TYPE(), const COMPARE& comp=COMPARE() );
AtomicList( const TYPE &default_value = TYPE(), const COMPARE &comp = COMPARE() );
/*!
* \brief Remove an item from the list
* \details Find and remove first entry that meets the given criteria
* @return Return the item that matches the criteria, or the default item if no item matches
* @param comp Comparison function object (i.e. an object that satisfies
* @return Return the item that matches the criteria,
* or the default item if no item matches
* @param compare Comparison function object (i.e. an object that satisfies
* the requirements of Compare) which returns true if the
* given value meets the selection criteria.
* The signature of the comparison function should be equivalent to:
* bool cmp( const TYPE& value, ... );
* @param args Additional arguments for the comparison
*/
template<class Compare, class ... Args>
template<class Compare, class... Args>
inline TYPE remove( Compare compare, Args... args );
//! Remove the first from the list
inline TYPE remove_first( );
inline TYPE remove_first();
/*!
* \brief Insert an item
* \details Insert an item into the list
* @param x Item to insert
* @param comp Comparison function object (i.e. an object that satisfies
* the requirements of Compare) which returns true if the
* first argument is less than (i.e. is ordered before) the second.
* The signature of the comparison function should be equivalent to:
* bool cmp(const TYPE &a, const TYPE &b);
*/
inline void insert( TYPE x );
@@ -54,19 +50,19 @@ public:
* \brief Return the size of the list
* \details Return the number of items in the list
*/
inline int size( ) const { return AtomicOperations::atomic_get(&d_N); }
inline int size() const { return AtomicOperations::atomic_get( &d_N ); }
/*!
* \brief Check if the list is empty
* \details Return true if the list is empty
*/
inline bool empty( ) const { return AtomicOperations::atomic_get(&d_N)==0; }
inline bool empty() const { return AtomicOperations::atomic_get( &d_N ) == 0; }
/*!
* \brief Return the capacity of the list
* \details Return the maximum number of items the list can hold
*/
inline int capacity( ) const { return MAX_SIZE; }
inline int capacity() const { return MAX_SIZE; }
/*!
* \brief Check the list
@@ -76,15 +72,15 @@ public:
* It is intended for debugging purposes only!
* @return This function returns true if the list is in a good working state
*/
inline bool check( );
inline bool check();
//! Return the total number of inserts since object creation
inline int64_t N_insert() const { return AtomicOperations::atomic_get(&d_N_insert); }
inline int64_t N_insert() const { return AtomicOperations::atomic_get( &d_N_insert ); }
//! Return the total number of removals since object creation
inline int64_t N_remove() const { return AtomicOperations::atomic_get(&d_N_remove); }
inline int64_t N_remove() const { return AtomicOperations::atomic_get( &d_N_remove ); }
private:
// Data members
@@ -92,7 +88,7 @@ private:
volatile TYPE d_default;
volatile TYPE d_objects[MAX_SIZE];
volatile AtomicOperations::int32_atomic d_N;
volatile AtomicOperations::int32_atomic d_next[MAX_SIZE+1];
volatile AtomicOperations::int32_atomic d_next[MAX_SIZE + 1];
volatile AtomicOperations::int32_atomic d_unused;
volatile AtomicOperations::int64_atomic d_N_insert;
volatile AtomicOperations::int64_atomic d_N_remove;
@@ -112,12 +108,12 @@ private:
if ( i != -1 )
AtomicOperations::atomic_fetch_and_or( &d_next[i], value );
}
inline int get_unused( )
inline int get_unused()
{
int i = 0;
while ( i == 0 )
i = AtomicOperations::atomic_fetch_and_and( &d_unused, 0 );
AtomicOperations::atomic_fetch_and_or( &d_unused, -(d_next[i]+4)+1 );
AtomicOperations::atomic_fetch_and_or( &d_unused, -( d_next[i] + 4 ) + 1 );
d_next[i] = -3;
return i;
}
@@ -126,14 +122,14 @@ private:
int j = 0;
while ( j == 0 )
AtomicOperations::atomic_swap( &d_unused, &j );
d_next[i] = -3-j;
d_next[i] = -3 - j;
AtomicOperations::atomic_fetch_and_or( &d_unused, i );
}
private:
AtomicList( const AtomicList& );
AtomicList& operator=( const AtomicList& );
AtomicList( const AtomicList & );
AtomicList &operator=( const AtomicList & );
};
@@ -142,7 +138,7 @@ private:
* \brief Pool allocator
* \details This class implements a basic fast pool allocator that is thread-safe.
*/
template< class TYPE, class INT_TYPE=int >
template<class TYPE, class INT_TYPE = int>
class MemoryPool final
{
public:
@@ -150,21 +146,21 @@ public:
explicit MemoryPool( size_t size );
//! destructor
~MemoryPool( );
~MemoryPool();
/*!
* \brief Allocate an object
* \details Allocates a new object from the pool
* @return Return the new pointer, or nullptr if there is no more room in the pool
*/
inline TYPE* allocate( );
inline TYPE *allocate();
/*!
* \brief Insert an item
* \details Insert an item into the list
* @param ptr The pointer to free
*/
inline void free( TYPE* ptr );
inline void free( TYPE *ptr );
private:
// Data members
@@ -172,13 +168,11 @@ private:
volatile AtomicOperations::int32_atomic d_next;
private:
MemoryPool( const MemoryPool& );
MemoryPool& operator=( const MemoryPool& );
MemoryPool( const MemoryPool & );
MemoryPool &operator=( const MemoryPool & );
};
#include "threadpool/atomic_list.hpp"
#endif

View File

@@ -2,41 +2,39 @@
#define included_AtomicList_hpp
#include <stdexcept>
#include <iostream>
#include <stdexcept>
#include <thread>
/******************************************************************
* Constructor *
******************************************************************/
template<class TYPE,int MAX_SIZE,class COMPARE>
AtomicList<TYPE,MAX_SIZE,COMPARE>::AtomicList( const TYPE& default_value, const COMPARE& comp ):
d_compare(comp),
d_default(default_value)
* Constructor *
******************************************************************/
template<class TYPE, int MAX_SIZE, class COMPARE>
AtomicList<TYPE, MAX_SIZE, COMPARE>::AtomicList( const TYPE &default_value, const COMPARE &comp )
: d_compare( comp ), d_default( default_value )
{
d_N = 0;
d_next[0] = -1;
d_unused = 1;
d_N = 0;
d_next[0] = -1;
d_unused = 1;
d_N_insert = 0;
d_N_remove = 0;
for (int i=0; i<MAX_SIZE; i++) {
d_next[i+1] = -5-i;
d_objects[i] = d_default;
for ( int i = 0; i < MAX_SIZE; i++ ) {
d_next[i + 1] = -5 - i;
d_objects[i] = d_default;
}
}
/******************************************************************
* Remove an item *
******************************************************************/
template<class TYPE,int MAX_SIZE,class COMPARE>
template<class Compare, class ... Args>
inline TYPE AtomicList<TYPE,MAX_SIZE,COMPARE>::remove( Compare compare, Args... args )
* Remove an item *
******************************************************************/
template<class TYPE, int MAX_SIZE, class COMPARE>
template<class Compare, class... Args>
inline TYPE AtomicList<TYPE, MAX_SIZE, COMPARE>::remove( Compare compare, Args... args )
{
// Acquiring temporary ownership
int pos = 0;
// Acquiring temporary ownership
int pos = 0;
auto next = lock( 0 );
while ( true ) {
if ( next == -1 ) {
@@ -50,9 +48,10 @@ inline TYPE AtomicList<TYPE,MAX_SIZE,COMPARE>::remove( Compare compare, Args...
// Acquire ownership of the next item
int next2 = lock( next );
// Test to see if the object passes compare
bool test = compare( const_cast<TYPE&>(d_objects[next-1]), args... );
bool test = compare( const_cast<TYPE &>( d_objects[next - 1] ), args... );
if ( test ) {
// We want to return this object, update next to point to another entry and remove the entry
// We want to return this object, update next to point to another entry and remove the
// entry
unlock( next, -3 );
unlock( pos, next2 );
pos = next;
@@ -60,28 +59,28 @@ inline TYPE AtomicList<TYPE,MAX_SIZE,COMPARE>::remove( Compare compare, Args...
}
// Release the ownership and move on
unlock( pos, next );
pos = next;
pos = next;
next = next2;
}
TYPE rtn(d_default);
TYPE rtn( d_default );
if ( pos != -1 ) {
std::swap( rtn, const_cast<TYPE&>( d_objects[pos-1] ) );
std::swap( rtn, const_cast<TYPE &>( d_objects[pos - 1] ) );
put_unused( pos );
AtomicOperations::atomic_decrement( &d_N );
AtomicOperations::atomic_increment( &d_N_remove );
}
return rtn;
}
template<class TYPE,int MAX_SIZE,class COMPARE>
inline TYPE AtomicList<TYPE,MAX_SIZE,COMPARE>::remove_first( )
template<class TYPE, int MAX_SIZE, class COMPARE>
inline TYPE AtomicList<TYPE, MAX_SIZE, COMPARE>::remove_first()
{
TYPE rtn(d_default);
TYPE rtn( d_default );
auto next = lock( 0 );
if ( next != -1 ) {
int next2 = lock( next );
unlock( next, -3 );
unlock( 0, next2 );
std::swap( rtn, const_cast<TYPE&>( d_objects[next-1] ) );
std::swap( rtn, const_cast<TYPE &>( d_objects[next - 1] ) );
put_unused( next );
AtomicOperations::atomic_decrement( &d_N );
AtomicOperations::atomic_increment( &d_N_remove );
@@ -93,10 +92,10 @@ inline TYPE AtomicList<TYPE,MAX_SIZE,COMPARE>::remove_first( )
/******************************************************************
* Insert an item *
******************************************************************/
template<class TYPE,int MAX_SIZE,class COMPARE>
inline void AtomicList<TYPE,MAX_SIZE,COMPARE>::insert( TYPE x )
* Insert an item *
******************************************************************/
template<class TYPE, int MAX_SIZE, class COMPARE>
inline void AtomicList<TYPE, MAX_SIZE, COMPARE>::insert( TYPE x )
{
int N_used = AtomicOperations::atomic_increment( &d_N );
if ( N_used > MAX_SIZE ) {
@@ -105,14 +104,14 @@ inline void AtomicList<TYPE,MAX_SIZE,COMPARE>::insert( TYPE x )
}
// Get an index to store the entry
auto index = get_unused();
if ( index<1 )
if ( index < 1 )
throw std::logic_error( "Internal error" );
// Store the object in d_objects
AtomicOperations::atomic_increment( &d_N_insert );
d_objects[index-1] = x;
d_next[index] = -1;
d_objects[index - 1] = x;
d_next[index] = -1;
// Find the position to store and update the next entires
int pos = 0;
int pos = 0;
auto next = lock( pos );
while ( true ) {
// Get the next item in the list (acquiring temporary ownership)
@@ -122,7 +121,7 @@ inline void AtomicList<TYPE,MAX_SIZE,COMPARE>::insert( TYPE x )
break;
}
// Test to see if the object is < the value being compared
bool test = d_compare.operator()( x, const_cast<TYPE&>(d_objects[next-1]) );
bool test = d_compare.operator()( x, const_cast<TYPE &>( d_objects[next - 1] ) );
if ( test ) {
// We want to store this object before next
d_next[index] = next;
@@ -131,35 +130,35 @@ inline void AtomicList<TYPE,MAX_SIZE,COMPARE>::insert( TYPE x )
}
// Release the ownership and move on
int last = pos;
pos = next;
next = lock( next );
pos = next;
next = lock( next );
unlock( last, pos );
}
}
/******************************************************************
* Check the internal structures of the list *
* This is mostly thread-safe, but blocks all threads *
******************************************************************/
template<class TYPE,int MAX_SIZE,class COMPARE>
inline bool AtomicList<TYPE,MAX_SIZE,COMPARE>::check( )
* Check the internal structures of the list *
* This is mostly thread-safe, but blocks all threads *
******************************************************************/
template<class TYPE, int MAX_SIZE, class COMPARE>
inline bool AtomicList<TYPE, MAX_SIZE, COMPARE>::check()
{
// Get the lock and check for any other threads modifying the list
auto start = lock( 0 );
std::this_thread::sleep_for( std::chrono::microseconds(100) );
std::this_thread::sleep_for( std::chrono::microseconds( 100 ) );
// Perform the checks on the list
bool pass = true;
int N1 = 0;
int N2 = 0;
bool pass = true;
int N1 = 0;
int N2 = 0;
int N_unused = 0;
int N_tail = 0;
for (int i=0; i<MAX_SIZE; i++) {
int N_tail = 0;
for ( int i = 0; i < MAX_SIZE; i++ ) {
if ( d_objects[i] != d_default )
N1++;
}
for (int i=0; i<MAX_SIZE+1; i++) {
int next = i==0 ? start:d_next[i];
for ( int i = 0; i < MAX_SIZE + 1; i++ ) {
int next = i == 0 ? start : d_next[i];
if ( next > 0 ) {
N2++;
} else if ( next < -3 ) {
@@ -170,71 +169,70 @@ inline bool AtomicList<TYPE,MAX_SIZE,COMPARE>::check( )
pass = false;
}
}
pass = pass && N_tail==1 && N1==d_N && N2==d_N && N_unused+d_N==MAX_SIZE;
int it = 0;
pass = pass && N_tail == 1 && N1 == d_N && N2 == d_N && N_unused + d_N == MAX_SIZE;
int it = 0;
int pos = 0;
while ( true ) {
int next = pos==0 ? start:d_next[pos];
int next = pos == 0 ? start : d_next[pos];
if ( next == -1 )
break;
pos = next;
it++;
}
pass = pass && it==d_N;
pass = pass && it == d_N;
// Unlock the list and return the results
unlock( 0, start );
return pass;
}
/******************************************************************
* MemoryPool *
******************************************************************/
template<class TYPE,class INT_TYPE>
MemoryPool<TYPE,INT_TYPE>::MemoryPool( size_t size )
* MemoryPool *
******************************************************************/
template<class TYPE, class INT_TYPE>
MemoryPool<TYPE, INT_TYPE>::MemoryPool( size_t size )
{
static_assert( sizeof(TYPE) >= sizeof(int),
static_assert( sizeof( TYPE ) >= sizeof( int ),
"sizeof(TYPE) must be >= sizeof(int) to ensure proper operation" );
static_assert( sizeof(TYPE) >= sizeof(INT_TYPE),
static_assert( sizeof( TYPE ) >= sizeof( INT_TYPE ),
"sizeof(TYPE) must be >= sizeof(INT_TYPE) to ensure proper operation" );
d_objects = reinterpret_cast<TYPE*>( malloc(sizeof(TYPE)*size) );
d_next = 1;
for (size_t i=0; i<size; i++)
reinterpret_cast<volatile INT_TYPE&>(d_objects[i]) = i+1;
reinterpret_cast<volatile INT_TYPE&>(d_objects[size-1]) = -1;
d_objects = reinterpret_cast<TYPE *>( malloc( sizeof( TYPE ) * size ) );
d_next = 1;
for ( size_t i = 0; i < size; i++ )
reinterpret_cast<volatile INT_TYPE &>( d_objects[i] ) = i + 1;
reinterpret_cast<volatile INT_TYPE &>( d_objects[size - 1] ) = -1;
}
template<class TYPE,class INT_TYPE>
MemoryPool<TYPE,INT_TYPE>::~MemoryPool()
template<class TYPE, class INT_TYPE>
MemoryPool<TYPE, INT_TYPE>::~MemoryPool()
{
free( const_cast<TYPE*>( d_objects ) );
free( const_cast<TYPE *>( d_objects ) );
d_objects = nullptr;
}
template<class TYPE,class INT_TYPE>
inline TYPE* MemoryPool<TYPE,INT_TYPE>::allocate()
template<class TYPE, class INT_TYPE>
inline TYPE *MemoryPool<TYPE, INT_TYPE>::allocate()
{
AtomicOperations::int32_atomic i = 0;
while ( i == 0 )
AtomicOperations::atomic_swap( &d_next, &i );
TYPE *ptr = nullptr;
if ( i!=-1 ) {
INT_TYPE j = reinterpret_cast<volatile INT_TYPE&>(d_objects[i-1]);
ptr = const_cast<TYPE*>( &d_objects[i-1] );
new(ptr) TYPE();
i = j+1;
if ( i != -1 ) {
INT_TYPE j = reinterpret_cast<volatile INT_TYPE &>( d_objects[i - 1] );
ptr = const_cast<TYPE *>( &d_objects[i - 1] );
new ( ptr ) TYPE();
i = j + 1;
}
AtomicOperations::atomic_fetch_and_or( &d_next, i );
return ptr;
}
template<class TYPE,class INT_TYPE>
inline void MemoryPool<TYPE,INT_TYPE>::free( TYPE* ptr )
template<class TYPE, class INT_TYPE>
inline void MemoryPool<TYPE, INT_TYPE>::free( TYPE *ptr )
{
ptr->~TYPE();
AtomicOperations::int32_atomic i = 0;
while ( i == 0 )
AtomicOperations::atomic_swap( &d_next, &i );
reinterpret_cast<INT_TYPE&>(*ptr) = i-1;
i = ptr - d_objects + 1;
reinterpret_cast<INT_TYPE &>( *ptr ) = i - 1;
i = ptr - d_objects + 1;
AtomicOperations::atomic_fetch_and_or( &d_next, i );
}

View File

@@ -1,15 +1,15 @@
#include "threadpool/atomic_helpers.h"
#include "common/UnitTest.h"
#include "common/Utilities.h"
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <vector>
#include <thread>
#include <chrono>
#include <functional>
#include <atomic>
#include <chrono>
#include <cstdio>
#include <cstdlib>
#include <functional>
#include <iostream>
#include <string>
#include <thread>
#include <vector>
#define perr std::cerr
@@ -21,18 +21,18 @@
static void modify_counter( int N, AtomicOperations::counter_t &counter )
{
if ( N > 0 ) {
for (int i=0; i<N; i++)
for ( int i = 0; i < N; i++ )
counter.increment();
} else if ( N < 0 ) {
for (int i=0; i<-N; i++)
for ( int i = 0; i < -N; i++ )
counter.decrement();
}
}
/******************************************************************
* The main program *
******************************************************************/
* The main program *
******************************************************************/
#ifdef USE_WINDOWS
int __cdecl main( int, char ** )
{
@@ -60,25 +60,25 @@ int main( int, char *[] )
// Create the counter we want to test
AtomicOperations::counter_t count;
if ( count.increment() == 1 )
ut.passes("increment count");
ut.passes( "increment count" );
else
ut.failure("increment count");
ut.failure( "increment count" );
if ( count.decrement() == 0 )
ut.passes("decrement count");
ut.passes( "decrement count" );
else
ut.failure("decrement count");
count.setCount(3);
ut.failure( "decrement count" );
count.setCount( 3 );
if ( count.getCount() == 3 )
ut.passes("set count");
ut.passes( "set count" );
else
ut.failure("set count");
count.setCount(0);
ut.failure( "set count" );
count.setCount( 0 );
// Increment the counter in serial
auto start = std::chrono::high_resolution_clock::now();
modify_counter( N_count, count );
auto stop = std::chrono::high_resolution_clock::now();
double time_inc_serial = std::chrono::duration<double>(stop-start).count() / N_count;
auto stop = std::chrono::high_resolution_clock::now();
double time_inc_serial = std::chrono::duration<double>( stop - start ).count() / N_count;
int val = count.getCount();
if ( val != N_count ) {
char tmp[100];
@@ -90,8 +90,8 @@ int main( int, char *[] )
// Decrement the counter in serial
start = std::chrono::high_resolution_clock::now();
modify_counter( -N_count, count );
stop = std::chrono::high_resolution_clock::now();
double time_dec_serial = std::chrono::duration<double>(stop-start).count() / N_count;
stop = std::chrono::high_resolution_clock::now();
double time_dec_serial = std::chrono::duration<double>( stop - start ).count() / N_count;
val = count.getCount();
if ( val != 0 ) {
char tmp[100];
@@ -104,12 +104,13 @@ int main( int, char *[] )
std::vector<std::thread> threads( N_threads );
start = std::chrono::high_resolution_clock::now();
for ( int i = 0; i < N_threads; i++ )
threads[i] = std::thread( modify_counter, N_count, std::ref(count) );
threads[i] = std::thread( modify_counter, N_count, std::ref( count ) );
for ( int i = 0; i < N_threads; i++ )
threads[i].join();
stop = std::chrono::high_resolution_clock::now();
double time_inc_parallel = std::chrono::duration<double>(stop-start).count() / ( N_count * N_threads );
val = count.getCount();
double time_inc_parallel =
std::chrono::duration<double>( stop - start ).count() / ( N_count * N_threads );
val = count.getCount();
if ( val != N_count * N_threads ) {
char tmp[100];
sprintf( tmp, "Count of %i did not match expected count of %i", val, N_count * N_threads );
@@ -120,12 +121,13 @@ int main( int, char *[] )
// Decrement the counter in parallel
start = std::chrono::high_resolution_clock::now();
for ( int i = 0; i < N_threads; i++ )
threads[i] = std::thread( modify_counter, -N_count, std::ref(count) );
threads[i] = std::thread( modify_counter, -N_count, std::ref( count ) );
for ( int i = 0; i < N_threads; i++ )
threads[i].join();
stop = std::chrono::high_resolution_clock::now();
double time_dec_parallel = std::chrono::duration<double>(stop-start).count() / ( N_count * N_threads );
val = count.getCount();
double time_dec_parallel =
std::chrono::duration<double>( stop - start ).count() / ( N_count * N_threads );
val = count.getCount();
if ( val != 0 ) {
char tmp[100];
sprintf( tmp, "Count of %i did not match expected count of %i", val, 0 );
@@ -147,6 +149,6 @@ int main( int, char *[] )
// Finished
ut.report();
int N_errors = static_cast<int>( ut.NumFailGlobal() );
auto N_errors = static_cast<int>( ut.NumFailGlobal() );
return N_errors;
}

View File

@@ -1,210 +1,221 @@
#include "threadpool/atomic_list.h"
#include "common/UnitTest.h"
#include "common/Utilities.h"
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <vector>
#include <thread>
#include <chrono>
#include <functional>
#include <atomic>
#include <algorithm>
#include <atomic>
#include <chrono>
#include <cstdio>
#include <cstdlib>
#include <functional>
#include <iostream>
#include <string>
#include <thread>
#include <vector>
static void modify_list( AtomicList<int,1024>& list )
static void modify_list( AtomicList<int, 1024> &list )
{
const int N_count = 50000;
for (int i=0; i<N_count; i++) {
auto v1 = list.remove_first( );
auto v2 = list.remove( [](int) { return true; } );
auto v3 = list.remove( [](int v) { return v>=(rand()/8); } );
auto v4 = list.remove( [](int v) { return v>=(rand()/4); } );
auto v5 = list.remove( [](int v) { return v>=(rand()/2); } );
if ( v1 !=-1 ) { list.insert( v1 ); }
if ( v2 !=-1 ) { list.insert( v2 ); }
if ( v3 !=-1 ) { list.insert( v3 ); }
if ( v4 !=-1 ) { list.insert( v4 ); }
if ( v5 !=-1 ) { list.insert( v5 ); }
for ( int i = 0; i < N_count; i++ ) {
auto v1 = list.remove_first();
auto v2 = list.remove( []( int ) { return true; } );
auto v3 = list.remove( []( int v ) { return v >= ( rand() / 8 ); } );
auto v4 = list.remove( []( int v ) { return v >= ( rand() / 4 ); } );
auto v5 = list.remove( []( int v ) { return v >= ( rand() / 2 ); } );
if ( v1 != -1 ) {
list.insert( v1 );
}
if ( v2 != -1 ) {
list.insert( v2 );
}
if ( v3 != -1 ) {
list.insert( v3 );
}
if ( v4 != -1 ) {
list.insert( v4 );
}
if ( v5 != -1 ) {
list.insert( v5 );
}
}
}
static bool check_list( const std::vector<int>& x, AtomicList<int,1024>& list )
static bool check_list( const std::vector<int> &x, AtomicList<int, 1024> &list )
{
bool pass = list.check();
pass = pass && (int) x.size() == list.size();
pass = pass && (int) x.size() == list.size();
if ( pass ) {
for (size_t i=0; i<x.size(); i++)
pass = pass && x[i] == list.remove( [](int) { return true; } );
for ( int i : x )
pass = pass && i == list.remove( []( int ) { return true; } );
}
// Restore the list
for (int i=0; i<list.size(); i++)
for ( int i = 0; i < list.size(); i++ )
list.remove_first();
for (size_t i=0; i<x.size(); i++)
list.insert( x[i] );
for ( int i : x )
list.insert( i );
return pass;
}
static inline void clear_list(AtomicList<int,1024>& list )
static inline void clear_list( AtomicList<int, 1024> &list )
{
for (int i=0; i<list.size(); i++)
for ( int i = 0; i < list.size(); i++ )
list.remove_first();
}
/******************************************************************
* The main program *
******************************************************************/
* The main program *
******************************************************************/
int main( int, char *[] )
{
UnitTest ut;
int N_threads = 8; // Number of threads
int N_threads = 8; // Number of threads
// Create the list
AtomicList<int,1024> list(-1);
if ( list.size()==0 && list.check() )
AtomicList<int, 1024> list( -1 );
if ( list.size() == 0 && list.check() )
ut.passes( "Initialize" );
else
ut.failure( "Initialize" );
// Initialize the list with some empty values
for (int i=0; i<80; i++)
for ( int i = 0; i < 80; i++ )
list.insert( rand() );
list.insert( 2 );
list.insert( 1 );
list.insert( rand() );
// Try to pull off a couple of values
int v1 = list.remove( [](int a) { return a==1; } ); // Find the entry with 1
int v2 = list.remove( [](int) { return true; } ); // Get the first entry
int v3 = list.remove( [](int) { return false; } ); // Fail to get an entry
if ( v1==1 && v2==2 && v3==-1 && list.size()==81 && list.check() )
int v1 = list.remove( []( int a ) { return a == 1; } ); // Find the entry with 1
int v2 = list.remove( []( int ) { return true; } ); // Get the first entry
int v3 = list.remove( []( int ) { return false; } ); // Fail to get an entry
if ( v1 == 1 && v2 == 2 && v3 == -1 && list.size() == 81 && list.check() )
ut.passes( "Basic sanity test" );
else
ut.failure( "Basic sanity test" );
// Clear the list
while ( list.remove( [](int) { return true; } ) != -1 ) {}
while ( list.remove( []( int ) { return true; } ) != -1 ) {
}
// Create a list of known values
//std::vector<int> data0(512);
std::vector<int> data0(5*N_threads);
for (size_t i=0; i<data0.size(); i++)
data0[i] = rand();
// std::vector<int> data0(512);
std::vector<int> data0( 5 * N_threads );
for ( int &i : data0 )
i = rand();
auto data = data0;
std::sort( data.begin(), data.end() );
// Test the cost to insert
int N_it = 20;
for (int i=0; i<list.size(); i++)
list.remove( [](int) { return true; } );
for ( int i = 0; i < list.size(); i++ )
list.remove( []( int ) { return true; } );
std::chrono::duration<double> time;
std::chrono::time_point<std::chrono::high_resolution_clock> start, stop;
time = time.zero();
for (int it=0; it<N_it; it++ ) {
for ( int it = 0; it < N_it; it++ ) {
clear_list( list );
start = std::chrono::high_resolution_clock::now();
for (size_t i=0; i<data0.size(); i++)
list.insert( data0[i] );
for ( int i : data0 )
list.insert( i );
stop = std::chrono::high_resolution_clock::now();
time += ( stop - start );
}
printf("insert time/item = %0.0f ns\n",1e9*time.count()/(N_it*data0.size()));
printf( "insert time/item = %0.0f ns\n", 1e9 * time.count() / ( N_it * data0.size() ) );
// Test the cost to remove (first)
time = time.zero();
for (int it=0; it<N_it; it++ ) {
for ( int it = 0; it < N_it; it++ ) {
check_list( data, list );
start = std::chrono::high_resolution_clock::now();
for (size_t i=0; i<data0.size(); i++)
list.remove_first( );
for ( size_t i = 0; i < data0.size(); i++ )
list.remove_first();
stop = std::chrono::high_resolution_clock::now();
time += ( stop - start );
}
printf("remove (first) time/item = %0.0f ns\n",1e9*time.count()/(N_it*data0.size()));
printf( "remove (first) time/item = %0.0f ns\n", 1e9 * time.count() / ( N_it * data0.size() ) );
// Test the cost to remove (in order)
time = time.zero();
for (int it=0; it<N_it; it++ ) {
for ( int it = 0; it < N_it; it++ ) {
check_list( data, list );
start = std::chrono::high_resolution_clock::now();
for (size_t i=0; i<data0.size(); i++)
list.remove( [](int) { return true; } );
for ( size_t i = 0; i < data0.size(); i++ )
list.remove( []( int ) { return true; } );
stop = std::chrono::high_resolution_clock::now();
time += ( stop - start );
}
printf("remove (ordered) time/item = %0.0f ns\n",1e9*time.count()/(N_it*data0.size()));
printf(
"remove (ordered) time/item = %0.0f ns\n", 1e9 * time.count() / ( N_it * data0.size() ) );
// Test the cost to remove (out order)
time = time.zero();
for (int it=0; it<N_it; it++ ) {
for ( int it = 0; it < N_it; it++ ) {
check_list( data, list );
start = std::chrono::high_resolution_clock::now();
for (size_t i=0; i<data0.size(); i++) {
int tmp = data0[i];
list.remove( [tmp](int v) { return v==tmp; } );
for ( int tmp : data0 ) {
list.remove( [tmp]( int v ) { return v == tmp; } );
}
stop = std::chrono::high_resolution_clock::now();
time += ( stop - start );
}
printf("remove (unordered) time/item = %0.0f ns\n",1e9*time.count()/(N_it*data0.size()));
printf(
"remove (unordered) time/item = %0.0f ns\n", 1e9 * time.count() / ( N_it * data0.size() ) );
// Read/write to the list and check the results
int64_t N0 = list.N_remove();
check_list( data, list );
start = std::chrono::high_resolution_clock::now();
modify_list( list );
stop = std::chrono::high_resolution_clock::now();
double time_serial = std::chrono::duration<double>(stop-start).count();
int64_t N1 = list.N_remove();
bool pass = check_list( data, list );
stop = std::chrono::high_resolution_clock::now();
double time_serial = std::chrono::duration<double>( stop - start ).count();
int64_t N1 = list.N_remove();
bool pass = check_list( data, list );
if ( pass )
ut.passes( "Serial get/insert" );
else
ut.failure( "Serial get/insert" );
printf("serial time = %0.5f s\n",time_serial);
printf("serial time/item = %0.0f ns\n",1e9*time_serial/(N1-N0));
printf( "serial time = %0.5f s\n", time_serial );
printf( "serial time/item = %0.0f ns\n", 1e9 * time_serial / ( N1 - N0 ) );
// Have multiple threads reading/writing to the list simultaneously
std::vector<std::thread> threads( N_threads );
start = std::chrono::high_resolution_clock::now();
for ( int i = 0; i < N_threads; i++ )
threads[i] = std::thread( modify_list, std::ref(list) );
threads[i] = std::thread( modify_list, std::ref( list ) );
for ( int i = 0; i < N_threads; i++ )
threads[i].join();
stop = std::chrono::high_resolution_clock::now();
double time_parallel = std::chrono::duration<double>(stop-start).count();
int64_t N2 = list.N_remove();
pass = check_list( data, list );
stop = std::chrono::high_resolution_clock::now();
double time_parallel = std::chrono::duration<double>( stop - start ).count();
int64_t N2 = list.N_remove();
pass = check_list( data, list );
if ( pass )
ut.passes( "Parallel get/insert" );
else
ut.failure( "Parallel get/insert" );
printf("parallel time = %0.5f s\n",time_parallel);
printf("parallel time/item = %0.0f ns\n",1e9*time_parallel/(N2-N1));
printf( "parallel time = %0.5f s\n", time_parallel );
printf( "parallel time/item = %0.0f ns\n", 1e9 * time_parallel / ( N2 - N1 ) );
// Try to over-fill the list
while ( !list.empty() )
list.remove_first();
for (int i=1; i<=list.capacity(); i++)
for ( int i = 1; i <= list.capacity(); i++ )
list.insert( i );
try {
list.insert( list.capacity()+1 );
list.insert( list.capacity() + 1 );
ut.failure( "List overflow" );
} catch (const std::exception& e) {
} catch ( const std::exception &e ) {
ut.passes( "List overflow" );
} catch(...) {
} catch ( ... ) {
ut.failure( "List overflow (unknown exception)" );
}
// Finished
ut.report();
int N_errors = static_cast<int>( ut.NumFailGlobal() );
auto N_errors = static_cast<int>( ut.NumFailGlobal() );
return N_errors;
}

View File

@@ -5,15 +5,15 @@
#include "threadpool/thread_pool.h"
#include "common/UnitTest.h"
#include "common/Utilities.h"
#include <math.h>
#include <algorithm>
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <mutex>
#include <stdexcept>
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <vector>
#include <mutex>
#define MAX( x, y ) ( ( x ) > ( y ) ? ( x ) : ( y ) )
@@ -28,8 +28,8 @@
#include "mpi.h"
#endif
#define to_ns(x) std::chrono::duration_cast<std::chrono::nanoseconds>(x).count()
#define to_ms(x) std::chrono::duration_cast<std::chrono::milliseconds>(x).count()
#define to_ns( x ) std::chrono::duration_cast<std::chrono::nanoseconds>( x ).count()
#define to_ms( x ) std::chrono::duration_cast<std::chrono::milliseconds>( x ).count()
// Wrapper functions for mpi
@@ -82,18 +82,17 @@ void waste_cpu( int N )
// Sleep for the given time
// Note: since we may encounter interrupts, we may not sleep for the desired time
// so we need to perform the sleep in a loop
void sleep_ms( int64_t N ) {
void sleep_ms( int64_t N )
{
auto t1 = std::chrono::high_resolution_clock::now();
auto t2 = std::chrono::high_resolution_clock::now();
while ( to_ms(t2-t1) < N ) {
int N2 = N - to_ms(t2-t1);
std::this_thread::sleep_for( std::chrono::milliseconds(N2) );
while ( to_ms( t2 - t1 ) < N ) {
int N2 = N - to_ms( t2 - t1 );
std::this_thread::sleep_for( std::chrono::milliseconds( N2 ) );
t2 = std::chrono::high_resolution_clock::now();
}
}
void sleep_s( int N ) {
sleep_ms(1000*N);
}
void sleep_s( int N ) { sleep_ms( 1000 * N ); }
// Function to sleep for N seconds then increment a global count
@@ -133,9 +132,9 @@ void print_processor( ThreadPool *tpool )
int processor = ThreadPool::getCurrentProcessor();
char tmp[100];
sprintf( tmp, "%i: Thread,proc = %i,%i\n", rank, thread, processor );
sleep_ms( 10*rank );
sleep_ms( 10 * rank );
print_processor_mutex.lock();
std::cout << tmp;
pout << tmp;
print_processor_mutex.unlock();
sleep_ms( 100 );
}
@@ -161,7 +160,9 @@ int test_member_thread( ThreadPool *tpool )
}
// Functions to test the templates
/******************************************************************
* Test the TPOOL_ADD_WORK macro with variable number of arguments *
******************************************************************/
static int myfun0() { return 0; }
static int myfun1( int ) { return 1; }
static int myfun2( int, float ) { return 2; }
@@ -170,60 +171,6 @@ static int myfun4( int, float, double, char ) { return 4; }
static int myfun5( int, float, double, char, std::string ) { return 5; }
static int myfun6( int, float, double, char, std::string, int ) { return 6; }
static int myfun7( int, float, double, char, std::string, int, int ) { return 7; }
// Function to test instantiation of functions with different number of arguments
// clang-format off
static void vfunarg00() {}
static void vfunarg01( int ) {}
static void vfunarg02( int, char ) {}
static void vfunarg03( int, char, double ) {}
static void vfunarg04( int, char, double, int ) {}
static void vfunarg05( int, char, double, int, char ) {}
static void vfunarg06( int, char, double, int, char, double ) {}
static void vfunarg07( int, char, double, int, char, double, int ) {}
static void vfunarg08( int, char, double, int, char, double, int, char ) {}
static void vfunarg09( int, char, double, int, char, double, int, char, double ) {}
static void vfunarg10( int, char, double, int, char, double, int, char, double, int ) {}
static void vfunarg11( int, char, double, int, char, double, int, char, double, int, char ) {}
static void vfunarg12( int, char, double, int, char, double, int, char, double, int, char, double ) {}
static void vfunarg13( int, char, double, int, char, double, int, char, double, int, char, double, int ) {}
static void vfunarg14( int, char, double, int, char, double, int, char, double, int, char, double, int, char ) {}
static void vfunarg15( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double ) {}
static void vfunarg16( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int ) {}
static void vfunarg17( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char ) {}
static void vfunarg18( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double ) {}
static void vfunarg19( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int ) {}
static void vfunarg20( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char ) {}
static void vfunarg21( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double ) {}
static void vfunarg22( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int ) {}
static void vfunarg23( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char ) {}
static void vfunarg24( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double ) {}
static int funarg00() { return 0; }
static int funarg01( int ) { return 1; }
static int funarg02( int, char ) { return 2; }
static int funarg03( int, char, double ) { return 3; }
static int funarg04( int, char, double, int ) { return 4; }
static int funarg05( int, char, double, int, char ) { return 5; }
static int funarg06( int, char, double, int, char, double ) { return 6; }
static int funarg07( int, char, double, int, char, double, int ) { return 7; }
static int funarg08( int, char, double, int, char, double, int, char ) { return 8; }
static int funarg09( int, char, double, int, char, double, int, char, double ) { return 9; }
static int funarg10( int, char, double, int, char, double, int, char, double, int ) { return 10; }
static int funarg11( int, char, double, int, char, double, int, char, double, int, char ) { return 11; }
static int funarg12( int, char, double, int, char, double, int, char, double, int, char, double ) { return 12; }
static int funarg13( int, char, double, int, char, double, int, char, double, int, char, double, int ) { return 13; }
static int funarg14( int, char, double, int, char, double, int, char, double, int, char, double, int, char ) { return 14; }
static int funarg15( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double ) { return 15; }
static int funarg16( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int ) { return 16; }
static int funarg17( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char ) { return 17; }
static int funarg18( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double ) { return 18; }
static int funarg19( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int ) { return 19; }
static int funarg20( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char ) { return 20; }
static int funarg21( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double ) { return 21; }
static int funarg22( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int ) { return 22; }
static int funarg23( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char ) { return 23; }
static int funarg24( int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double, int, char, double ) { return 24; }
static int test_function_arguements( ThreadPool *tpool )
{
int N_errors = 0;
@@ -231,88 +178,56 @@ static int test_function_arguements( ThreadPool *tpool )
ThreadPool::thread_id_t id0 = TPOOL_ADD_WORK( tpool, myfun0, ( nullptr ) );
ThreadPool::thread_id_t id1 = TPOOL_ADD_WORK( tpool, myfun1, ( (int) 1 ) );
ThreadPool::thread_id_t id2 = TPOOL_ADD_WORK( tpool, myfun2, ( (int) 1, (float) 2 ) );
ThreadPool::thread_id_t id3 = TPOOL_ADD_WORK( tpool, myfun3, ( (int) 1, (float) 2, (double) 3 ) );
ThreadPool::thread_id_t id4 = TPOOL_ADD_WORK( tpool, myfun4, ( (int) 1, (float) 2, (double) 3, (char) 4 ) );
ThreadPool::thread_id_t id5 = TPOOL_ADD_WORK( tpool, myfun5, ( (int) 1, (float) 2, (double) 3, (char) 4, std::string( "test" ) ) );
ThreadPool::thread_id_t id52= TPOOL_ADD_WORK( tpool, myfun5, ( (int) 1, (float) 2, (double) 3, (char) 4, std::string( "test" ) ), -1 );
ThreadPool::thread_id_t id6 = TPOOL_ADD_WORK( tpool, myfun6, ( (int) 1, (float) 2, (double) 3, (char) 4, std::string( "test" ), (int) 1 ) );
ThreadPool::thread_id_t id7 = TPOOL_ADD_WORK( tpool, myfun7, ( (int) 1, (float) 2, (double) 3, (char) 4, std::string( "test" ), (int) 1, (int) 1 ) );
ThreadPool::thread_id_t id3 =
TPOOL_ADD_WORK( tpool, myfun3, ( (int) 1, (float) 2, (double) 3 ) );
ThreadPool::thread_id_t id4 =
TPOOL_ADD_WORK( tpool, myfun4, ( (int) 1, (float) 2, (double) 3, (char) 4 ) );
ThreadPool::thread_id_t id5 = TPOOL_ADD_WORK(
tpool, myfun5, ( (int) 1, (float) 2, (double) 3, (char) 4, std::string( "test" ) ) );
ThreadPool::thread_id_t id52 = TPOOL_ADD_WORK(
tpool, myfun5, ( (int) 1, (float) 2, (double) 3, (char) 4, std::string( "test" ) ), -1 );
ThreadPool::thread_id_t id6 = TPOOL_ADD_WORK( tpool, myfun6,
( (int) 1, (float) 2, (double) 3, (char) 4, std::string( "test" ), (int) 1 ) );
ThreadPool::thread_id_t id7 = TPOOL_ADD_WORK( tpool, myfun7,
( (int) 1, (float) 2, (double) 3, (char) 4, std::string( "test" ), (int) 1, (int) 1 ) );
tpool->wait_pool_finished();
if ( !tpool->isFinished( id0 ) ) { N_errors++; }
if ( tpool->getFunctionRet<int>( id0 ) != 0 ) { N_errors++; }
if ( tpool->getFunctionRet<int>( id1 ) != 1 ) { N_errors++; }
if ( tpool->getFunctionRet<int>( id2 ) != 2 ) { N_errors++; }
if ( tpool->getFunctionRet<int>( id3 ) != 3 ) { N_errors++; }
if ( tpool->getFunctionRet<int>( id4 ) != 4 ) { N_errors++; }
if ( tpool->getFunctionRet<int>( id5 ) != 5 ) { N_errors++; }
if ( tpool->getFunctionRet<int>( id52 ) != 5 ){ N_errors++; }
if ( tpool->getFunctionRet<int>( id6 ) != 6 ) { N_errors++; }
if ( tpool->getFunctionRet<int>( id7 ) != 7 ) { N_errors++; }
// Test all the different numbers of arguments allowed
TPOOL_ADD_WORK( tpool, vfunarg00, ( nullptr ) );
TPOOL_ADD_WORK( tpool, vfunarg01, ( 1 ) );
TPOOL_ADD_WORK( tpool, vfunarg02, ( 1, 'a' ) );
TPOOL_ADD_WORK( tpool, vfunarg03, ( 1, 'a', 3.0 ) );
TPOOL_ADD_WORK( tpool, vfunarg04, ( 1, 'a', 3.0, 4 ) );
TPOOL_ADD_WORK( tpool, vfunarg05, ( 1, 'a', 3.0, 4, 'e' ) );
TPOOL_ADD_WORK( tpool, vfunarg06, ( 1, 'a', 3.0, 4, 'e', 6.0 ) );
TPOOL_ADD_WORK( tpool, vfunarg07, ( 1, 'a', 3.0, 4, 'e', 6.0, 7 ) );
TPOOL_ADD_WORK( tpool, vfunarg08, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h' ) );
TPOOL_ADD_WORK( tpool, vfunarg09, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0 ) );
TPOOL_ADD_WORK( tpool, vfunarg10, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10 ) );
TPOOL_ADD_WORK( tpool, vfunarg11, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k' ) );
TPOOL_ADD_WORK( tpool, vfunarg12, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0 ) );
TPOOL_ADD_WORK( tpool, vfunarg13, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13 ) );
TPOOL_ADD_WORK( tpool, vfunarg14, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n' ) );
TPOOL_ADD_WORK( tpool, vfunarg15, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0 ) );
TPOOL_ADD_WORK( tpool, vfunarg16, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16 ) );
TPOOL_ADD_WORK( tpool, vfunarg17, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q' ) );
TPOOL_ADD_WORK( tpool, vfunarg18, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q', 18.0 ) );
TPOOL_ADD_WORK( tpool, vfunarg19, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q', 18.0, 19 ) );
TPOOL_ADD_WORK( tpool, vfunarg20, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q', 18.0, 19, 't' ) );
TPOOL_ADD_WORK( tpool, vfunarg21, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q', 18.0, 19, 't', 21.0 ) );
TPOOL_ADD_WORK( tpool, vfunarg22, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q', 18.0, 19, 't', 21.0, 22 ) );
TPOOL_ADD_WORK( tpool, vfunarg23, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q', 18.0, 19, 't', 21.0, 22, 'w' ) );
TPOOL_ADD_WORK( tpool, vfunarg24, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q', 18.0, 19, 't', 21.0, 22, 'w', 24.0 ) );
std::vector<ThreadPool::thread_id_t> ids( 25 );
ids[0] = TPOOL_ADD_WORK( tpool, funarg00, ( nullptr ) );
ids[1] = TPOOL_ADD_WORK( tpool, funarg01, ( 1 ) );
ids[2] = TPOOL_ADD_WORK( tpool, funarg02, ( 1, 'a' ) );
ids[3] = TPOOL_ADD_WORK( tpool, funarg03, ( 1, 'a', 3.0 ) );
ids[4] = TPOOL_ADD_WORK( tpool, funarg04, ( 1, 'a', 3.0, 4 ) );
ids[5] = TPOOL_ADD_WORK( tpool, funarg05, ( 1, 'a', 3.0, 4, 'e' ) );
ids[6] = TPOOL_ADD_WORK( tpool, funarg06, ( 1, 'a', 3.0, 4, 'e', 6.0 ) );
ids[7] = TPOOL_ADD_WORK( tpool, funarg07, ( 1, 'a', 3.0, 4, 'e', 6.0, 7 ) );
ids[8] = TPOOL_ADD_WORK( tpool, funarg08, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h' ) );
ids[9] = TPOOL_ADD_WORK( tpool, funarg09, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0 ) );
ids[10] = TPOOL_ADD_WORK( tpool, funarg10, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10 ) );
ids[11] = TPOOL_ADD_WORK( tpool, funarg11, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k' ) );
ids[12] = TPOOL_ADD_WORK( tpool, funarg12, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0 ) );
ids[13] = TPOOL_ADD_WORK( tpool, funarg13, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13 ) );
ids[14] = TPOOL_ADD_WORK( tpool, funarg14, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'h' ) );
ids[15] = TPOOL_ADD_WORK( tpool, funarg15, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'h', 15.0 ) );
ids[16] = TPOOL_ADD_WORK( tpool, funarg16, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16 ) );
ids[17] = TPOOL_ADD_WORK( tpool, funarg17, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q' ) );
ids[18] = TPOOL_ADD_WORK( tpool, funarg18, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q', 18.0 ) );
ids[19] = TPOOL_ADD_WORK( tpool, funarg19, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q', 18.0, 19 ) );
ids[20] = TPOOL_ADD_WORK( tpool, funarg20, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q', 18.0, 19, 't' ) );
ids[21] = TPOOL_ADD_WORK( tpool, funarg21, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q', 18.0, 19, 't', 21.0 ) );
ids[22] = TPOOL_ADD_WORK( tpool, funarg22, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q', 18.0, 19, 't', 21.0, 22 ) );
ids[23] = TPOOL_ADD_WORK( tpool, funarg23, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q', 18.0, 19, 't', 21.0, 22, 'w' ) );
ids[24] = TPOOL_ADD_WORK( tpool, funarg24, ( 1, 'a', 3.0, 4, 'e', 6.0, 7, 'h', 9.0, 10, 'k', 12.0, 13, 'n', 15.0, 16, 'q', 18.0, 19, 't', 21.0, 22, 'w', 24.0 ) );
tpool->wait_all( ids );
for ( size_t i = 0; i < ids.size(); i++ ) {
if ( tpool->getFunctionRet<int>( ids[i] ) != static_cast<int>( i ) )
N_errors++;
if ( !tpool->isFinished( id0 ) ) {
N_errors++;
}
if ( tpool->getFunctionRet<int>( id0 ) != 0 ) {
N_errors++;
}
if ( tpool->getFunctionRet<int>( id1 ) != 1 ) {
N_errors++;
}
if ( tpool->getFunctionRet<int>( id2 ) != 2 ) {
N_errors++;
}
if ( tpool->getFunctionRet<int>( id3 ) != 3 ) {
N_errors++;
}
if ( tpool->getFunctionRet<int>( id4 ) != 4 ) {
N_errors++;
}
if ( tpool->getFunctionRet<int>( id5 ) != 5 ) {
N_errors++;
}
if ( tpool->getFunctionRet<int>( id52 ) != 5 ) {
N_errors++;
}
if ( tpool->getFunctionRet<int>( id6 ) != 6 ) {
N_errors++;
}
if ( tpool->getFunctionRet<int>( id7 ) != 7 ) {
N_errors++;
}
return N_errors;
}
// clang-format on
/******************************************************************
* Examples to derive a user work item *
******************************************************************/
* Examples to derive a user work item *
******************************************************************/
class UserWorkItemVoid : public ThreadPool::WorkItem
{
public:
@@ -323,15 +238,15 @@ public:
NULL_USE( dummy );
}
// User defined run (can do anything)
virtual void run() override
void run() override
{
// Perform the tasks
printf( "Hello work from UserWorkItem (void)" );
}
// Will the routine return a result
virtual bool has_result() const override { return false; }
bool has_result() const override { return false; }
// User defined destructor
virtual ~UserWorkItemVoid() {}
~UserWorkItemVoid() override = default;
};
class UserWorkItemInt : public ThreadPool::WorkItemRet<int>
{
@@ -343,38 +258,31 @@ public:
NULL_USE( dummy );
}
// User defined run (can do anything)
virtual void run() override
void run() override
{
// Perform the tasks
printf( "Hello work from UserWorkItem (int)" );
// Store the results (it's type will match the template)
ThreadPool::WorkItemRet<int>::d_result = 1;
}
// Will the routine return a result
virtual bool has_result() const override { return false; }
// User defined destructor
virtual ~UserWorkItemInt() {}
~UserWorkItemInt() override = default;
};
/******************************************************************
* test the time to run N tasks in parallel *
******************************************************************/
inline double run_parallel( ThreadPool *tpool, int N_tasks, int N_work )
* test the time to run N tasks in parallel *
******************************************************************/
template<class Ret, class... Args>
inline double launchAndTime( ThreadPool &tpool, int N, Ret ( *routine )( Args... ), Args... args )
{
// Make sure the thread pool is empty
tpool->wait_pool_finished();
// Add the work
std::vector<ThreadPool::thread_id_t> ids;
ids.reserve( N_tasks );
tpool.wait_pool_finished();
auto start = std::chrono::high_resolution_clock::now();
for ( int i = 0; i < N_tasks; i++ )
ids.push_back( TPOOL_ADD_WORK( tpool, waste_cpu, ( N_work ) ) );
// Wait for the thread pool to finish
tpool->wait_pool_finished();
// Compute the time spent running the tasks
for ( int i = 0; i < N; i++ )
ThreadPool_add_work( &tpool, 0, routine, args... );
tpool.wait_pool_finished();
auto stop = std::chrono::high_resolution_clock::now();
return std::chrono::duration<double>(stop-start).count();
return std::chrono::duration<double>( stop - start ).count();
}
@@ -384,8 +292,8 @@ ThreadPool::thread_id_t f2( ThreadPool::thread_id_t a ) { return a; }
/******************************************************************
* Test the basic functionallity of the atomics *
******************************************************************/
* Test the basic functionallity of the atomics *
******************************************************************/
int test_atomics()
{
using namespace AtomicOperations;
@@ -411,33 +319,35 @@ int test_atomics()
/******************************************************************
* Test FIFO behavior *
******************************************************************/
void test_FIFO( UnitTest& ut, ThreadPool& tpool )
* Test FIFO behavior *
******************************************************************/
void test_FIFO( UnitTest &ut, ThreadPool &tpool )
{
int rank = getRank();
int size = getSize();
for (int r=0; r<size; r++) {
int rank = getRank();
int size = getSize();
const int N = 4000;
for ( int r = 0; r < size; r++ ) {
barrier();
if ( r != rank )
continue;
continue;
std::vector<ThreadPool::thread_id_t> ids;
for (size_t i=0; i<4000; i++)
ids.push_back( TPOOL_ADD_WORK( &tpool, sleep_inc2, ( 0.001 ) ) );
ids.reserve( N );
for ( size_t i = 0; i < N; i++ )
ids.emplace_back( TPOOL_ADD_WORK( &tpool, sleep_inc2, ( 0.001 ) ) );
bool pass = true;
while ( tpool.N_queued() > 0 ) {
int i1=-1, i2=ids.size();
for (size_t i=0; i<ids.size(); i++) {
int i1 = -1, i2 = ids.size();
for ( int i = N - 1; i >= 0; i-- ) {
bool started = ids[i].started();
if ( started )
i1 = std::max<int>(i1,i); // Last index to processing item
i1 = std::max<int>( i1, i ); // Last index to processing item
else
i2 = std::min<int>(i2,i); // First index to queued item
i2 = std::min<int>( i2, i ); // First index to queued item
}
int diff = i1==-1 ? 0:(i2-i1-1);
if ( abs(diff)>4 ) {
printf("%i %i %i\n",i1,i2,diff);
pass = pass && abs(i2-i1-1)<=2;
int diff = i1 == -1 ? 0 : ( i2 - i1 - 1 );
if ( abs( diff ) > 4 ) {
printf( "%i %i %i\n", i1, i2, diff );
pass = pass && abs( i2 - i1 - 1 ) <= 2;
}
}
ids.clear();
@@ -451,8 +361,8 @@ void test_FIFO( UnitTest& ut, ThreadPool& tpool )
/******************************************************************
* The main program *
******************************************************************/
* The main program *
******************************************************************/
#ifdef USE_WINDOWS
int __cdecl main( int argc, char **argv )
{
@@ -510,11 +420,7 @@ int main( int argc, char *argv[] )
// Get the number of processors availible
barrier();
int N_procs = 0;
try {
N_procs = ThreadPool::getNumberOfProcessors();
} catch ( ... ) {
}
int N_procs = ThreadPool::getNumberOfProcessors();
if ( N_procs > 0 )
ut.passes( "getNumberOfProcessors" );
else
@@ -524,15 +430,11 @@ int main( int argc, char *argv[] )
// Get the processor affinities for the process
barrier();
std::vector<int> cpus;
try {
cpus = ThreadPool::getProcessAffinity();
printp( "%i cpus for current process: ", (int) cpus.size() );
for ( size_t i = 0; i < cpus.size(); i++ )
printp( "%i ", cpus[i] );
printp( "\n" );
} catch ( ... ) {
}
std::vector<int> cpus = ThreadPool::getProcessAffinity();
printp( "%i cpus for current process: ", (int) cpus.size() );
for ( int cpu : cpus )
printp( "%i ", cpu );
printp( "\n" );
if ( !cpus.empty() ) {
ut.passes( "getProcessAffinity" );
} else {
@@ -559,8 +461,8 @@ int main( int argc, char *argv[] )
cpus = ThreadPool::getProcessAffinity();
std::vector<int> cpus = ThreadPool::getProcessAffinity();
printp( "%i cpus for current process (updated): ", (int) cpus.size() );
for ( size_t i = 0; i < cpus.size(); i++ )
printp( "%i ", cpus[i] );
for ( int cpu : cpus )
printp( "%i ", cpu );
printp( "\n" );
pass = cpus.size() > 1;
} else {
@@ -630,8 +532,8 @@ int main( int argc, char *argv[] )
std::vector<int> procs_thread = tpool.getThreadAffinity( i );
if ( procs_thread != procs ) {
printp( "%i: Initial thread affinity: ", rank );
for ( size_t i = 0; i < procs_thread.size(); i++ )
printp( "%i ", procs_thread[i] );
for ( int i : procs_thread )
printp( "%i ", i );
printp( "\n" );
pass = false;
}
@@ -646,15 +548,15 @@ int main( int argc, char *argv[] )
int N_procs_thread = std::max<int>( (int) cpus.size() / N_threads, 1 );
for ( int i = 0; i < N_threads; i++ ) {
std::vector<int> procs_thread( N_procs_thread, -1 );
for ( int j = 0; j < N_procs_thread; j++ )
for ( int j = 0; j < N_procs_thread; j++ )
procs_thread[j] = procs[( i * N_procs_thread + j ) % procs.size()];
tpool.setThreadAffinity( i, procs_thread );
sleep_ms( 10 ); // Give time for OS to update thread affinities
std::vector<int> procs_thread2 = tpool.getThreadAffinity( i );
if ( procs_thread2 != procs_thread ) {
printp( "%i: Final thread affinity: ", rank );
for ( size_t i = 0; i < procs_thread.size(); i++ )
printp( "%i ", procs_thread[i] );
for ( int i : procs_thread )
printp( "%i ", i );
printp( "\n" );
pass = false;
}
@@ -674,8 +576,8 @@ int main( int argc, char *argv[] )
for ( int i = 0; i < N_threads; i++ ) {
std::vector<int> procs_thread = tpool.getThreadAffinity( i );
printp( "Thread affinity: " );
for ( size_t i = 0; i < procs_thread.size(); i++ )
printp( "%i ", procs_thread[i] );
for ( int i : procs_thread )
printp( "%i ", i );
printp( "\n" );
}
@@ -683,9 +585,7 @@ int main( int argc, char *argv[] )
barrier();
ThreadPool::set_OS_warnings( 1 );
print_processor( &tpool );
for ( int i = 0; i < N_threads; i++ )
TPOOL_ADD_WORK( &tpool, print_processor, ( &tpool ) );
tpool.wait_pool_finished();
launchAndTime( tpool, N_threads, print_processor, &tpool );
// Run some basic tests
barrier();
@@ -694,8 +594,8 @@ int main( int argc, char *argv[] )
for ( int i = 0; i < N_work; i++ )
waste_cpu( data1[i] );
}
auto stop = std::chrono::high_resolution_clock::now();
double time = std::chrono::duration<double>(stop-start).count();
auto stop = std::chrono::high_resolution_clock::now();
double time = std::chrono::duration<double>( stop - start ).count();
printp( "Time for serial cycle = %0.0f us\n", 1e6 * time / N_it );
printp( "Time for serial item = %0.0f ns\n", 1e9 * time / ( N_it * N_work ) );
id = TPOOL_ADD_WORK( &tpool, waste_cpu, ( data1[0] ) );
@@ -728,20 +628,14 @@ int main( int argc, char *argv[] )
tpool.wait_pool_finished();
start = std::chrono::high_resolution_clock::now();
sleep_inc( 1 );
stop = std::chrono::high_resolution_clock::now();
double sleep_serial = std::chrono::duration<double>(stop-start).count();
ids2.clear();
start = std::chrono::high_resolution_clock::now();
for ( int i = 0; i < N_threads; i++ )
ids2.push_back( TPOOL_ADD_WORK( &tpool, sleep_inc, ( 1 ) ) );
tpool.wait_all( N_procs_used, &ids2[0] );
stop = std::chrono::high_resolution_clock::now();
ids2.clear();
double sleep_parallel = std::chrono::duration<double>(stop-start).count();
stop = std::chrono::high_resolution_clock::now();
double sleep_serial = std::chrono::duration<double>( stop - start ).count();
double sleep_parallel = launchAndTime( tpool, N_threads, sleep_inc, 1 );
double sleep_speedup = N_procs_used * sleep_serial / sleep_parallel;
printf( "%i: Speedup on %i sleeping threads: %0.3f\n", rank, N_procs_used, sleep_speedup );
printf( "%i: ts = %0.3f, tp = %0.3f\n", rank, sleep_serial, sleep_parallel );
if ( fabs( sleep_serial - 1.0 ) < 0.05 && fabs( sleep_parallel - 1.0 ) < 0.25 && sleep_speedup>3 )
if ( fabs( sleep_serial - 1.0 ) < 0.05 && fabs( sleep_parallel - 1.0 ) < 0.25 &&
sleep_speedup > 3 )
ut.passes( "Passed thread sleep" );
else
ut.failure( "Failed thread sleep" );
@@ -770,11 +664,11 @@ int main( int argc, char *argv[] )
// Run in serial
start = std::chrono::high_resolution_clock::now();
waste_cpu( N );
stop = std::chrono::high_resolution_clock::now();
double time_serial = std::chrono::duration<double>(stop-start).count();
stop = std::chrono::high_resolution_clock::now();
double time_serial = std::chrono::duration<double>( stop - start ).count();
// Run in parallel
double time_parallel2 = run_parallel( &tpool, N_procs_used, N / 1000 );
double time_parallel = run_parallel( &tpool, N_procs_used, N );
double time_parallel = launchAndTime( tpool, N_procs_used, waste_cpu, N );
double time_parallel2 = launchAndTime( tpool, N_procs_used, waste_cpu, N / 1000 );
double speedup = N_procs_used * time_serial / time_parallel;
printf( "%i: Speedup on %i procs: %0.3f\n", rank, N_procs_used, speedup );
printf( "%i: ts = %0.3f, tp = %0.3f, tp2 = %0.3f\n", rank, time_serial, time_parallel,
@@ -823,8 +717,8 @@ int main( int argc, char *argv[] )
ids.reserve( 5 );
global_sleep_count = 0; // Reset the count before this test
ThreadPool::thread_id_t id0;
auto id1 = TPOOL_ADD_WORK( &tpool, sleep_inc, ( 1 ) );
auto id2 = TPOOL_ADD_WORK( &tpool, sleep_inc, ( 2 ) );
auto id1 = TPOOL_ADD_WORK( &tpool, sleep_inc, ( 1 ) );
auto id2 = TPOOL_ADD_WORK( &tpool, sleep_inc, ( 2 ) );
auto *wait1 = new WorkItemFull<bool, int>( check_inc, 1 );
auto *wait2 = new WorkItemFull<bool, int>( check_inc, 2 );
wait1->add_dependency( id0 );
@@ -842,15 +736,15 @@ int main( int argc, char *argv[] )
tpool.wait_pool_finished();
// Test waiting on more dependencies than in the thread pool (changing priorities)
ids.clear();
for (size_t i=0; i<20; i++)
for ( size_t i = 0; i < 20; i++ )
ids.push_back( TPOOL_ADD_WORK( &tpool, sleep_inc2, ( 0.1 ) ) );
auto *wait3 = new WorkItemFull<void,double>( sleep_inc2, 0 );
auto *wait3 = new WorkItemFull<void, double>( sleep_inc2, 0 );
wait3->add_dependencies( ids );
id = tpool.add_work( wait3, 50 );
tpool.wait( id );
bool pass = true;
for (size_t i=0; i<ids.size(); i++)
pass = pass && ids[i].finished();
for ( auto &id : ids )
pass = pass && id.finished();
ids.clear();
if ( pass )
ut.passes( "Dependencies2" );
@@ -896,21 +790,21 @@ int main( int argc, char *argv[] )
for ( int i = 0; i < N_work; i++ )
delete work[i];
auto t4 = std::chrono::high_resolution_clock::now();
time_create += to_ns(t2-t1);
time_run += to_ns(t3-t2);
time_delete += to_ns(t4-t3);
time_create += to_ns( t2 - t1 );
time_run += to_ns( t3 - t2 );
time_delete += to_ns( t4 - t3 );
if ( ( n + 1 ) % 100 == 0 )
printp( "Cycle %i of %i finished\n", n + 1, N_it );
}
stop = std::chrono::high_resolution_clock::now();
time = std::chrono::duration<double>(stop-start).count();
time = std::chrono::duration<double>( stop - start ).count();
PROFILE_STOP( timer_name );
printp( " time = %0.0f ms\n", 1e3 * time );
printp( " time / cycle = %0.0f us\n", 1e6 * time / N_it );
printp( " average time / item = %0.0f ns\n", 1e9 * time / ( N_it * N_work ) );
printp( " create = %i ns\n", static_cast<int>( time_create / ( N_it * N_work ) ) );
printp( " run = %i ns\n", static_cast<int>( time_run / ( N_it * N_work ) ) );
printp( " delete = %i us\n", static_cast<int>( time_delete / ( N_it * N_work ) ) );
printp( " create = %i ns\n", time_create / ( N_it * N_work ) );
printp( " run = %i ns\n", time_run / ( N_it * N_work ) );
printp( " delete = %i us\n", time_delete / ( N_it * N_work ) );
}
// Test the timing adding a single item
@@ -921,17 +815,17 @@ int main( int argc, char *argv[] )
if ( it == 0 ) {
printp( "Testing timmings (adding a single item to empty tpool):\n" );
timer_name = "Add single item to empty pool";
tpool_ptr = &tpool0;
tpool_ptr = &tpool0;
} else if ( it == 1 ) {
printp( "Testing timmings (adding a single item):\n" );
timer_name = "Add single item to tpool";
tpool_ptr = &tpool;
tpool_ptr = &tpool;
}
PROFILE_START( timer_name );
std::vector<ThreadPool::thread_id_t> ids( N_work );
int64_t time_add = 0;
int64_t time_wait = 0;
start = std::chrono::high_resolution_clock::now();
start = std::chrono::high_resolution_clock::now();
for ( int n = 0; n < N_it; n++ ) {
auto t1 = std::chrono::high_resolution_clock::now();
for ( int i = 0; i < N_work; i++ )
@@ -939,19 +833,19 @@ int main( int argc, char *argv[] )
auto t2 = std::chrono::high_resolution_clock::now();
tpool_ptr->wait_all( N_work, &ids[0] );
auto t3 = std::chrono::high_resolution_clock::now();
time_add += to_ns(t2-t1);
time_wait += to_ns(t3-t2);
time_add += to_ns( t2 - t1 );
time_wait += to_ns( t3 - t2 );
if ( ( n + 1 ) % 100 == 0 )
printp( "Cycle %i of %i finished\n", n + 1, N_it );
}
stop = std::chrono::high_resolution_clock::now();
time = std::chrono::duration<double>(stop-start).count();
time = std::chrono::duration<double>( stop - start ).count();
PROFILE_STOP( timer_name );
printp( " time = %0.0f ms\n", 1e3 * time );
printp( " time / cycle = %0.0f us\n", 1e6 * time / N_it );
printp( " average time / item = %0.0f ns\n", 1e9 * time / ( N_it * N_work ) );
printp( " create and add = %i ns\n", static_cast<int>( time_add / ( N_it * N_work ) ) );
printp( " wait = %i us\n", static_cast<int>( time_wait / ( N_it * N_work ) ) );
printp( " create and add = %i ns\n", time_add / ( N_it * N_work ) );
printp( " wait = %i us\n", time_wait / ( N_it * N_work ) );
}
// Test the timing pre-creating the work items and adding multiple at a time
@@ -962,11 +856,11 @@ int main( int argc, char *argv[] )
if ( it == 0 ) {
printp( "Testing timmings (adding a block of items to empty tpool):\n" );
timer_name = "Add multiple items to empty pool";
tpool_ptr = &tpool0;
tpool_ptr = &tpool0;
} else if ( it == 1 ) {
printp( "Testing timmings (adding a block of items):\n" );
timer_name = "Add multiple items to tpool";
tpool_ptr = &tpool;
tpool_ptr = &tpool;
}
PROFILE_START( timer_name );
int64_t time_create_work = 0;
@@ -978,26 +872,26 @@ int main( int argc, char *argv[] )
auto t1 = std::chrono::high_resolution_clock::now();
for ( int i = 0; i < N_work; i++ )
work[i] = ThreadPool::createWork<void, int>( waste_cpu, data1[i] );
auto t2 = std::chrono::high_resolution_clock::now();
auto t2 = std::chrono::high_resolution_clock::now();
auto ids = tpool_ptr->add_work( work, priority );
auto t3 = std::chrono::high_resolution_clock::now();
auto t3 = std::chrono::high_resolution_clock::now();
tpool_ptr->wait_all( ids );
auto t4 = std::chrono::high_resolution_clock::now();
time_create_work += to_ns(t2-t1);
time_add_work += to_ns(t3-t2);
time_wait_work += to_ns(t4-t3);
time_create_work += to_ns( t2 - t1 );
time_add_work += to_ns( t3 - t2 );
time_wait_work += to_ns( t4 - t3 );
if ( ( n + 1 ) % 100 == 0 )
printp( "Cycle %i of %i finished\n", n + 1, N_it );
}
stop = std::chrono::high_resolution_clock::now();
time = std::chrono::duration<double>(stop-start).count();
time = std::chrono::duration<double>( stop - start ).count();
PROFILE_STOP( timer_name );
printp( " time = %0.0f ms\n", 1e3 * time );
printp( " time / cycle = %0.0f us\n", 1e6 * time / N_it );
printp( " average time / item = %0.0f ns\n", 1e9 * time / ( N_it * N_work ) );
printp( " create = %i ns\n", static_cast<int>( time_create_work / ( N_it * N_work ) ) );
printp( " add = %i ns\n", static_cast<int>( time_add_work / ( N_it * N_work ) ) );
printp( " wait = %i ns\n", static_cast<int>( time_wait_work / ( N_it * N_work ) ) );
printp( " create = %i ns\n", time_create_work / ( N_it * N_work ) );
printp( " add = %i ns\n", time_add_work / ( N_it * N_work ) );
printp( " wait = %i ns\n", time_wait_work / ( N_it * N_work ) );
}
// Run a dependency test that tests a simple case that should keep the thread pool busy
@@ -1035,8 +929,8 @@ int main( int argc, char *argv[] )
barrier();
pass = true;
try {
ThreadPool *tpool = new ThreadPool( MAX_NUM_THREADS - 1 );
if ( tpool->getNumThreads() != MAX_NUM_THREADS - 1 )
ThreadPool *tpool = new ThreadPool( ThreadPool::MAX_NUM_THREADS - 1 );
if ( tpool->getNumThreads() != ThreadPool::MAX_NUM_THREADS - 1 )
pass = false;
if ( !ThreadPool::is_valid( tpool ) )
pass = false;
@@ -1056,14 +950,14 @@ int main( int argc, char *argv[] )
// Print the test results
barrier();
ut.report();
int N_errors = static_cast<int>( ut.NumFailGlobal() );
auto N_errors = static_cast<int>( ut.NumFailGlobal() );
// Shudown MPI
pout << "Shutting down\n";
barrier();
#ifdef USE_TIMER
if ( rank == 0 )
MemoryApp::print( std::cout );
MemoryApp::print( pout );
#endif
#ifdef USE_MPI
MPI_Finalize();

View File

@@ -5,14 +5,14 @@
#include "ProfilerApp.h"
#include <algorithm>
#include <bitset>
#include <chrono>
#include <climits>
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <stdexcept>
#include <stdio.h>
#include <stdlib.h>
#include <typeinfo>
#include <thread>
#include <chrono>
#include <typeinfo>
#define perr std::cerr
@@ -22,6 +22,15 @@
// OS specific includes / definitions
// clang-format off
#if defined( WIN32 ) || defined( _WIN32 ) || defined( WIN64 ) || defined( _WIN64 )
#define USE_WINDOWS
#elif defined( __APPLE__ )
#define USE_MAC
#elif defined( __linux ) || defined( __unix ) || defined( __posix )
#define USE_LINUX
#else
#error Unknown OS
#endif
#if defined( USE_WINDOWS )
#include <process.h>
#include <windows.h>
@@ -54,41 +63,45 @@
// Set some macros
#if PROFILE_THREADPOOL_PERFORMANCE
#define PROFILE_THREADPOOL_START( X ) PROFILE_START( X, 3 )
#define PROFILE_THREADPOOL_START2( X ) PROFILE_START2( X, 3 )
#define PROFILE_THREADPOOL_STOP( X ) PROFILE_STOP( X, 3 )
#define PROFILE_THREADPOOL_STOP2( X ) PROFILE_STOP2( X, 3 )
#define PROFILE_THREADPOOL_START( X ) PROFILE_START( X, 3 )
#define PROFILE_THREADPOOL_START2( X ) PROFILE_START2( X, 3 )
#define PROFILE_THREADPOOL_STOP( X ) PROFILE_STOP( X, 3 )
#define PROFILE_THREADPOOL_STOP2( X ) PROFILE_STOP2( X, 3 )
#else
#define PROFILE_THREADPOOL_START( X ) \
do { \
} while ( 0 )
#define PROFILE_THREADPOOL_START2( X ) \
do { \
} while ( 0 )
#define PROFILE_THREADPOOL_STOP( X ) \
do { \
} while ( 0 )
#define PROFILE_THREADPOOL_STOP2( X ) \
do { \
} while ( 0 )
#define PROFILE_THREADPOOL_START( X ) \
do { \
} while ( 0 )
#define PROFILE_THREADPOOL_START2( X ) \
do { \
} while ( 0 )
#define PROFILE_THREADPOOL_STOP( X ) \
do { \
} while ( 0 )
#define PROFILE_THREADPOOL_STOP2( X ) \
do { \
} while ( 0 )
#endif
#if MONITOR_THREADPOOL_PERFORMANCE == 1
#define accumulate( x, t1, t2 ) AtomicOperations::atomic_add( &x, \
std::chrono::duration_cast<std::chrono::nanoseconds>(t2-t1).count() );
#define accumulate( x, t1, t2 ) \
AtomicOperations::atomic_add( \
&x, std::chrono::duration_cast<std::chrono::nanoseconds>( t2 - t1 ).count() );
#endif
#if MONITOR_THREADPOOL_PERFORMANCE == 1
static AtomicOperations::int64_atomic total_add_work_time[5] = {0,0,0,0,0};
static AtomicOperations::int64_atomic total_add_work_time[5] = { 0, 0, 0, 0, 0 };
#endif
// Helper functions
template <class T>
void quicksort( int N, T* data );
template <class T>
inline void quicksort( std::vector<T> &x ) { quicksort((int)x.size(),x.data()); }
static inline int find_id( int, const ThreadPool::thread_id_t*, const ThreadPool::thread_id_t& );
template<class T>
void quicksort( int N, T *data );
template<class T>
inline void quicksort( std::vector<T> &x )
{
quicksort( (int) x.size(), x.data() );
}
static inline int find_id( int, const ThreadPool::thread_id_t *, const ThreadPool::thread_id_t & );
// Function to generate a random size_t number (excluding 0 and ~0)
@@ -116,8 +129,8 @@ static size_t rand_size_t()
/******************************************************************
* Run some basic compile-time checks *
******************************************************************/
* Run some basic compile-time checks *
******************************************************************/
#if MAX_NUM_THREADS % 64 != 0
// We use a bit array for d_active and d_cancel
#error MAX_NUM_THREADS must be a multiple of 64
@@ -130,47 +143,52 @@ static size_t rand_size_t()
// We store the indicies to the queue list as short ints
#error MAX_QUEUED must < 65535
#endif
// Check the c++ std
#if CXX_STD == 98
#error Thread pool class requires c++11 or newer
#endif
/******************************************************************
* Get/Set a bit *
* Note: these functions are thread-safe *
******************************************************************/
* Get/Set a bit *
* Note: these functions are thread-safe *
******************************************************************/
static inline void set_bit( volatile AtomicOperations::int64_atomic *x, size_t index )
{
uint64_t mask = 0x01;
mask <<= index % 64;
size_t i = index / 64;
size_t i = index / 64;
bool test = false;
while ( !test ) {
AtomicOperations::int64_atomic y = x[i];
test = AtomicOperations::atomic_compare_and_swap( &x[i], y, (y|mask) );
test = AtomicOperations::atomic_compare_and_swap( &x[i], y, ( y | mask ) );
}
}
static inline void unset_bit( volatile AtomicOperations::int64_atomic *x, size_t index )
{
uint64_t mask = 0x01;
mask <<= index % 64;
mask = ~mask;
size_t i = index / 64;
mask = ~mask;
size_t i = index / 64;
bool test = false;
while ( !test ) {
AtomicOperations::int64_atomic y = x[i];
test = AtomicOperations::atomic_compare_and_swap( &x[i], y, (y&mask) );
test = AtomicOperations::atomic_compare_and_swap( &x[i], y, ( y & mask ) );
}
}
static inline bool get_bit( const volatile AtomicOperations::int64_atomic *x, size_t index )
{
uint64_t mask = 0x01;
mask <<= index % 64;
AtomicOperations::int64_atomic y = x[index / 64]; // This is thread-safe since we only care about a single bit
// This is thread-safe since we only care about a single bit
AtomicOperations::int64_atomic y = x[index / 64];
return ( y & mask ) != 0;
}
/******************************************************************
* Simple function to check if the parity is odd (true) or even *
******************************************************************/
* Simple function to check if the parity is odd (true) or even *
******************************************************************/
static inline bool is_odd8( size_t x )
{ // This only works for 64-bit integers
x ^= ( x >> 1 );
@@ -181,7 +199,7 @@ static inline bool is_odd8( size_t x )
x ^= ( x >> 32 );
return ( x & 0x01 ) > 0;
}
template <class int_type>
template<class int_type>
static inline int count_bits( int_type x )
{
int count = 0;
@@ -194,8 +212,18 @@ static inline int count_bits( int_type x )
/******************************************************************
* Set the bahvior of OS warnings *
******************************************************************/
* Set the global constants *
******************************************************************/
constexpr int ThreadPool::MAX_NUM_THREADS;
constexpr int ThreadPool::MAX_QUEUED;
constexpr int ThreadPool::MAX_WAIT;
constexpr bool ThreadPool::PROFILE_THREADPOOL_PERFORMANCE;
constexpr bool ThreadPool::MONITOR_THREADPOOL_PERFORMANCE;
/******************************************************************
* Set the behavior of OS warnings *
******************************************************************/
static int global_OS_behavior = 0;
std::mutex OS_warning_mutex;
void ThreadPool::set_OS_warnings( int behavior )
@@ -213,11 +241,14 @@ static void OS_warning( const std::string &message )
}
OS_warning_mutex.unlock();
}
void ThreadPool::setErrorHandler( std::function<void( const std::string & )> fun )
{
d_errorHandler = fun;
}
/******************************************************************
* Function to return the number of processors availible *
******************************************************************/
* Function to return the number of processors availible *
******************************************************************/
int ThreadPool::getNumberOfProcessors()
{
#if defined( USE_LINUX ) || defined( USE_MAC )
@@ -233,8 +264,8 @@ int ThreadPool::getNumberOfProcessors()
/******************************************************************
* Function to return the processor number of the current thread *
******************************************************************/
* Function to return the processor number of the current thread *
******************************************************************/
int ThreadPool::getCurrentProcessor()
{
#if defined( USE_LINUX )
@@ -251,8 +282,8 @@ int ThreadPool::getCurrentProcessor()
/******************************************************************
* Function to get/set the affinity of the current process *
******************************************************************/
* Function to get/set the affinity of the current process *
******************************************************************/
std::vector<int> ThreadPool::getProcessAffinity()
{
std::vector<int> procs;
@@ -325,8 +356,8 @@ void ThreadPool::setProcessAffinity( std::vector<int> procs )
/******************************************************************
* Function to get the thread affinities *
******************************************************************/
* Function to get the thread affinities *
******************************************************************/
#ifdef USE_WINDOWS
DWORD GetThreadAffinityMask( HANDLE thread )
{
@@ -387,7 +418,7 @@ std::vector<int> ThreadPool::getThreadAffinity( int thread ) const
if ( thread >= getNumThreads() )
std::logic_error( "Invalid thread number" );
std::vector<int> procs;
auto handle = const_cast<std::thread&>( d_thread[thread] ).native_handle();
auto handle = const_cast<std::thread &>( d_thread[thread] ).native_handle();
#ifdef USE_LINUX
#ifdef _GNU_SOURCE
cpu_set_t mask;
@@ -423,8 +454,8 @@ std::vector<int> ThreadPool::getThreadAffinity( int thread ) const
/******************************************************************
* Function to set the thread affinity *
******************************************************************/
* Function to set the thread affinity *
******************************************************************/
void ThreadPool::setThreadAffinity( std::vector<int> procs )
{
#ifdef USE_LINUX
@@ -458,7 +489,7 @@ void ThreadPool::setThreadAffinity( int thread, std::vector<int> procs ) const
{
if ( thread >= getNumThreads() )
std::logic_error( "Invalid thread number" );
auto handle = const_cast<std::thread&>( d_thread[thread] ).native_handle();
auto handle = const_cast<std::thread &>( d_thread[thread] ).native_handle();
#ifdef USE_LINUX
#ifdef __USE_GNU
cpu_set_t mask;
@@ -490,15 +521,15 @@ void ThreadPool::setThreadAffinity( int thread, std::vector<int> procs ) const
/******************************************************************
* Function to perform some basic checks before we start *
******************************************************************/
* Function to perform some basic checks before we start *
******************************************************************/
void ThreadPool::check_startup( size_t size0 )
{
// Check the size of the class to make sure that we don't have any
// byte alignment problems between a library implimentation and a calling pacakge
size_t size1 = sizeof( ThreadPool );
size_t size2 = ( (size_t) &d_NULL_HEAD ) - ( ( size_t ) this ) + sizeof( size_t );
size_t size3 = ( (size_t) &d_NULL_TAIL ) - ( ( size_t ) this ) + sizeof( size_t );
size_t size2 = ( (size_t) &d_NULL_HEAD ) - ( (size_t) this ) + sizeof( size_t );
size_t size3 = ( (size_t) &d_NULL_TAIL ) - ( (size_t) this ) + sizeof( size_t );
if ( size0 != size1 || size1 < size2 || size1 < size3 )
throw std::logic_error( "Internal data format problem" );
// Check the size of variables
@@ -517,7 +548,7 @@ void ThreadPool::check_startup( size_t size0 )
ThreadPool::thread_id_t id;
if ( id.getPriority() != -128 )
pass = false;
id.reset( 3, 564, NULL );
id.reset( 3, 564, nullptr );
if ( id.getPriority() != 3 || id.getLocalID() != 564 )
pass = false;
if ( count_bits( 0x0 ) != 0 || count_bits( 0x03 ) != 2 )
@@ -530,8 +561,10 @@ void ThreadPool::check_startup( size_t size0 )
if ( is_odd8( ~( (size_t) 0 ) ) || !is_odd8( thread_id_t::maxThreadID ) )
pass = false;
for ( size_t i = 0; i < 1024; i++ ) {
if ( ( count_bits( thread_id_t::maxThreadID - i ) % 2 == 1 ) != is_odd8( thread_id_t::maxThreadID - i ) ) {
printp( "%i %i %s\n", count_bits( thread_id_t::maxThreadID - i ), is_odd8( thread_id_t::maxThreadID - i ) ? 1 : 0,
if ( ( count_bits( thread_id_t::maxThreadID - i ) % 2 == 1 ) !=
is_odd8( thread_id_t::maxThreadID - i ) ) {
printp( "%i %i %s\n", count_bits( thread_id_t::maxThreadID - i ),
is_odd8( thread_id_t::maxThreadID - i ) ? 1 : 0,
std::bitset<64>( thread_id_t::maxThreadID - i ).to_string().c_str() );
pass = false;
}
@@ -550,27 +583,28 @@ void ThreadPool::check_startup( size_t size0 )
/******************************************************************
* Function to initialize the thread pool *
******************************************************************/
* Function to initialize the thread pool *
******************************************************************/
void ThreadPool::initialize( const int N, const char *affinity, int N_procs, const int *procs )
{
// Initialize the header/tail
d_NULL_HEAD = rand_size_t();
d_NULL_TAIL = d_NULL_HEAD;
// Initialize the variables to NULL values
d_id_assign = 0;
d_signal_empty = false;
d_signal_count = 0;
d_N_threads = 0;
d_num_active = 0;
d_N_added = 0;
d_N_started = 0;
d_N_finished = 0;
d_id_assign = 0;
d_signal_empty = false;
d_signal_count = 0;
d_N_threads = 0;
d_num_active = 0;
d_N_added = 0;
d_N_started = 0;
d_N_finished = 0;
d_max_wait_time = 600;
memset( (void *) d_active, 0, MAX_NUM_THREADS / 8 );
memset( (void *) d_cancel, 0, MAX_NUM_THREADS / 8 );
d_wait_last = nullptr;
for ( int i = 0; i < MAX_WAIT; i++ )
d_wait[i] = nullptr;
for ( auto &i : d_wait )
i = nullptr;
// Initialize the id
d_id_assign = thread_id_t::maxThreadID;
// Create the threads
@@ -579,14 +613,14 @@ void ThreadPool::initialize( const int N, const char *affinity, int N_procs, con
/******************************************************************
* This is the de-constructor *
******************************************************************/
* This is the de-constructor *
******************************************************************/
ThreadPool::~ThreadPool()
{
if ( !is_valid( this ) ) {
std::cerr << "Thread pool is not valid\n";
std::terminate();
}
DISABLE_WARNINGS
if ( !is_valid( this ) )
throw std::logic_error( "Thread pool is not valid" );
ENABLE_WARNINGS
// Destroy the threads
setNumThreads( 0 );
// Delete all remaining data
@@ -598,16 +632,15 @@ ThreadPool::~ThreadPool()
// Print the performance metrics
printp( "ThreadPool Performance:\n" );
printp( "add_work: %lu us, %lu us, %lu us, %lu us, %lu us\n",
total_add_work_time[0]/1000, total_add_work_time[1]/1000,
total_add_work_time[2]/1000, total_add_work_time[3]/1000,
total_add_work_time[4]/1000 );
total_add_work_time[0] / 1000, total_add_work_time[1] / 1000, total_add_work_time[2] / 1000,
total_add_work_time[3] / 1000, total_add_work_time[4] / 1000 );
#endif
}
/******************************************************************
* Check if the pointer points to a valid thread pool object *
******************************************************************/
* Check if the pointer points to a valid thread pool object *
******************************************************************/
bool ThreadPool::is_valid( const ThreadPool *tpool )
{
if ( tpool == nullptr )
@@ -621,8 +654,8 @@ bool ThreadPool::is_valid( const ThreadPool *tpool )
/******************************************************************
* This function creates the threads in the thread pool *
******************************************************************/
* This function creates the threads in the thread pool *
******************************************************************/
void ThreadPool::setNumThreads(
int num_worker_threads, const char *affinity2, int N_procs, const int *procs )
{
@@ -643,8 +676,8 @@ void ThreadPool::setNumThreads(
int d_N_threads_diff = num_worker_threads - d_N_threads;
if ( d_N_threads_diff > 0 ) {
// Check that no threads are in the process of being deleted
for ( int i = 0; i < MAX_NUM_THREADS / 64; i++ ) {
if ( d_cancel[i] != 0 )
for ( long i : d_cancel ) {
if ( i != 0 )
throw std::logic_error(
"Threads are being created and destroyed at the same time" );
}
@@ -670,11 +703,11 @@ void ThreadPool::setNumThreads(
j++;
}
// Wait for all of the threads to finish initialization
while ( 1 ) {
std::this_thread::sleep_for( std::chrono::milliseconds(25) );
while ( true ) {
std::this_thread::sleep_for( std::chrono::milliseconds( 25 ) );
bool wait = false;
for ( int i = 0; i < MAX_NUM_THREADS / 64; i++ ) {
if ( d_cancel[i] != 0 )
for ( long i : d_cancel ) {
if ( i != 0 )
wait = true;
}
if ( !wait )
@@ -684,7 +717,7 @@ void ThreadPool::setNumThreads(
#if defined( USE_LINUX ) || defined( USE_MAC )
pthread_attr_destroy( &attr );
#endif
std::this_thread::sleep_for( std::chrono::milliseconds(25) );
std::this_thread::sleep_for( std::chrono::milliseconds( 25 ) );
delete[] tmp;
} else if ( d_N_threads_diff < 0 ) {
// Reduce the number of threads
@@ -697,7 +730,7 @@ void ThreadPool::setNumThreads(
set_bit( d_cancel, d_N_threads - 1 + i );
// Wake all threads to process the shutdown
d_wait_work.notify_all();
std::this_thread::sleep_for( std::chrono::milliseconds(25) );
std::this_thread::sleep_for( std::chrono::milliseconds( 25 ) );
// Wait for the threads to close
for ( int i = 0; i > d_N_threads_diff; i-- ) {
d_thread[d_N_threads - 1 + i].join();
@@ -732,13 +765,13 @@ void ThreadPool::setNumThreads(
// We do not have a list of cpus to use, do nothing (OS not supported)
} else if ( affinity == "none" ) {
// We are using the default thread affinities (all threads get all procs of the program)
for ( int i = 0; i < d_N_threads; i++ )
for ( int i = 0; i < d_N_threads; i++ )
t_procs[i] = cpus;
} else if ( affinity == "independent" ) {
// We want to use an independent set of processors for each thread
if ( (int) cpus.size() == d_N_threads ) {
// The number of cpus matches the number of threads
for ( int i = 0; i < d_N_threads; i++ )
for ( int i = 0; i < d_N_threads; i++ )
t_procs[i] = std::vector<int>( 1, cpus[i] );
} else if ( (int) cpus.size() > d_N_threads ) {
// There are more cpus than threads, threads will use more the one processor
@@ -752,7 +785,7 @@ void ThreadPool::setNumThreads(
}
} else {
// There are fewer cpus than threads, threads will share a processor
int N_threads_proc =
auto N_threads_proc =
static_cast<int>( ( cpus.size() + d_N_threads - 1 ) / cpus.size() );
for ( int i = 0; i < d_N_threads; i++ )
t_procs[i].push_back( cpus[i / N_threads_proc] );
@@ -776,10 +809,10 @@ void ThreadPool::setNumThreads(
/******************************************************************
* This is the function that controls the individual thread and *
* allows it to do work. *
* Note: this function is lock free *
******************************************************************/
* This is the function that controls the individual thread and *
* allows it to do work. *
* Note: this function is lock free *
******************************************************************/
void ThreadPool::tpool_thread( int thread_id )
{
bool shutdown = false;
@@ -797,8 +830,8 @@ void ThreadPool::tpool_thread( int thread_id )
try {
std::vector<int> cpus = ThreadPool::getProcessAffinity();
printp( "%i cpus for current thread: ", (int) cpus.size() );
for ( size_t i = 0; i < cpus.size(); i++ )
printp( "%i ", cpus[i] );
for ( int cpu : cpus )
printp( "%i ", cpu );
printp( "\n" );
} catch ( ... ) {
printp( "Unable to get process affinity\n" );
@@ -811,24 +844,39 @@ void ThreadPool::tpool_thread( int thread_id )
// Check if there is work to do
if ( d_queue_list.size() > 0 ) {
// Get next work item to process
auto work_id = d_queue_list.remove( []( const thread_id_t& id ) { return id.ready(); } );
auto work_id =
d_queue_list.remove( []( const thread_id_t &id ) { return id.ready(); } );
if ( work_id.isNull() ) {
std::this_thread::yield();
continue;
}
WorkItem *work = work_id.work( );
WorkItem *work = work_id.work();
AtomicOperations::atomic_increment( &d_N_started );
// Start work here
PROFILE_THREADPOOL_START( "thread working" );
work->d_state = 2;
work->run();
work->d_state = 3;
work->d_state = 2;
if ( d_errorHandler ) {
try {
work->run();
} catch ( std::exception &e ) {
auto msg = Utilities::stringf(
"Error, caught exception in thread %i:\n %s\n", thread_id, e.what() );
d_errorHandler( msg );
} catch ( ... ) {
auto msg = Utilities::stringf(
"Error, caught unknown exception in thread %i\n", thread_id );
d_errorHandler( msg );
}
} else {
work->run();
}
work->d_state = 3;
PROFILE_THREADPOOL_STOP( "thread working" );
AtomicOperations::atomic_increment( &d_N_finished );
// Check if any threads are waiting on the current work item
// This can be done without blocking
for ( int i = 0; i < MAX_WAIT; i++ ) {
const wait_ids_struct *wait = const_cast<const wait_ids_struct *>(d_wait[i]);
for ( auto &i : d_wait ) {
auto wait = AtomicOperations::atomic_get( &i );
if ( wait != nullptr )
wait->id_finished( work_id );
}
@@ -849,7 +897,7 @@ void ThreadPool::tpool_thread( int thread_id )
}
// Wait for work
PROFILE_THREADPOOL_STOP2( "thread active" );
d_wait_work.wait_for(1e-3);
d_wait_work.wait_for( 1e-3 );
PROFILE_THREADPOOL_START2( "thread active" );
AtomicOperations::atomic_increment( &d_num_active );
set_bit( d_active, thread_id );
@@ -865,21 +913,22 @@ void ThreadPool::tpool_thread( int thread_id )
/******************************************************************
* This is the function that adds work to the thread pool *
* Note: this version uses a last in - first out work scheduling. *
******************************************************************/
inline void ThreadPool::add_work( const ThreadPool::thread_id_t& id )
* This is the function that adds work to the thread pool *
* Note: this version uses a last in - first out work scheduling. *
******************************************************************/
inline void ThreadPool::add_work( const ThreadPool::thread_id_t &id )
{
auto work = id.work();
auto work = id.work();
work->d_state = 1;
// Check and change priorities of dependency ids
const int priority = id.getPriority();
for (int i=0; i<work->d_N_ids; i++) {
const auto& id1 = work->d_ids[i];
if ( !id1.started() && id1<id ) {
for ( int i = 0; i < work->d_N_ids; i++ ) {
const auto &id1 = work->d_ids[i];
if ( !id1.started() && id1 < id ) {
// Remove and add the id back with a higher priority
auto id2 = d_queue_list.remove( []( const thread_id_t& a, const thread_id_t& b ) { return a==b; }, id1 );
id2.setPriority( std::max(priority,id2.getPriority()) );
auto id2 = d_queue_list.remove(
[]( const thread_id_t &a, const thread_id_t &b ) { return a == b; }, id1 );
id2.setPriority( std::max( priority, id2.getPriority() ) );
d_queue_list.insert( id2 );
}
}
@@ -894,7 +943,7 @@ void ThreadPool::add_work(
if ( N > block_size ) {
size_t i = 0;
while ( i < N ) {
add_work( std::min(N-i,block_size), &work[i], &priority[i], &ids[i] );
add_work( std::min( N - i, block_size ), &work[i], &priority[i], &ids[i] );
i += block_size;
}
return;
@@ -905,7 +954,7 @@ void ThreadPool::add_work(
#endif
// Create the thread ids (can be done without blocking)
for ( size_t i = 0; i < N; i++ )
ids[i].reset( priority[i], AtomicOperations::atomic_decrement(&d_id_assign), work[i] );
ids[i].reset( priority[i], AtomicOperations::atomic_decrement( &d_id_assign ), work[i] );
#if MONITOR_THREADPOOL_PERFORMANCE
auto t2 = std::chrono::high_resolution_clock::now();
accumulate( total_add_work_time[0], t1, t2 );
@@ -913,23 +962,23 @@ void ThreadPool::add_work(
// If there are no threads, perform the work immediately
if ( d_N_threads < 1 ) {
for ( size_t i = 0; i < N; i++ ) {
work[i]->d_state = 2;
work[i]->d_state = 2;
work[i]->run();
work[i]->d_state = 3;
work[i]->d_state = 3;
}
#if MONITOR_THREADPOOL_PERFORMANCE
auto t5 = std::chrono::high_resolution_clock::now();
accumulate( total_add_work_time[4], t2, t5 );
#endif
#if MONITOR_THREADPOOL_PERFORMANCE
auto t5 = std::chrono::high_resolution_clock::now();
accumulate( total_add_work_time[4], t2, t5 );
#endif
PROFILE_THREADPOOL_STOP2( "add_work" );
return;
}
// Wait for enough room in the queue (doesn't need blocking since it isn't that precise)
if ( N > static_cast<size_t>( MAX_QUEUED - d_queue_list.size() ) ) {
int N_wait = static_cast<int>( N - ( MAX_QUEUED - d_queue_list.size() ) );
auto N_wait = static_cast<int>( N - ( MAX_QUEUED - d_queue_list.size() ) );
while ( N_wait > 0 ) {
d_signal_count = static_cast<unsigned char>( std::min( N_wait, 255 ) );
d_wait_finished.wait_for(1e-4);
d_wait_finished.wait_for( 1e-4 );
N_wait = static_cast<int>( N - ( MAX_QUEUED - d_queue_list.size() ) );
}
}
@@ -965,19 +1014,8 @@ void ThreadPool::add_work(
/******************************************************************
* This function removes a finished work item *
******************************************************************/
ThreadPool::WorkItem *ThreadPool::getFinishedWorkItem( ThreadPool::thread_id_t id ) const
{
if ( id.finished() )
return id.work();
return nullptr;
}
/******************************************************************
* This function waits for a some of the work items to finish *
******************************************************************/
* This function waits for a some of the work items to finish *
******************************************************************/
static inline void check_finished(
size_t N_work, const ThreadPool::thread_id_t *ids, size_t &N_finished, bool *finished )
{
@@ -1004,8 +1042,8 @@ int ThreadPool::wait_some(
N_finished++;
}
size_t local_id = ids[k].getLocalID();
bool test = local_id == 0 || local_id > thread_id_t::maxThreadID || local_id <= next_id;
test = test && !finished[k];
bool test = local_id == 0 || local_id > thread_id_t::maxThreadID || local_id <= next_id;
test = test && !finished[k];
if ( test )
throw std::logic_error( "Invalid ids for wait" );
}
@@ -1018,7 +1056,7 @@ int ThreadPool::wait_some(
auto tmp = new wait_ids_struct( N_work, ids, N_wait, d_cond_pool, MAX_WAIT, d_wait );
// Wait for the ids
auto t1 = std::chrono::high_resolution_clock::now();
while ( !tmp->wait_for(0.01) ) {
while ( !tmp->wait_for( 0.01 ) ) {
check_wait_time( t1 );
}
// Update the ids that have finished
@@ -1027,33 +1065,35 @@ int ThreadPool::wait_some(
throw std::logic_error( "Internal error: failed to wait" );
// Delete the wait event struct
// Note: we want to maintain the reference in case a thread is still using it
// Note: technically this should be atomic
std::swap(d_wait_last,tmp);
// Note: technically this should be atomic, but it really isn't necessary here
std::swap( d_wait_last, tmp );
delete tmp;
return N_finished;
}
/******************************************************************
* This function waits for all of the threads to finish their work *
******************************************************************/
void ThreadPool::check_wait_time( std::chrono::time_point<std::chrono::high_resolution_clock>& t1 ) const
* This function waits for all of the threads to finish their work *
******************************************************************/
void ThreadPool::check_wait_time(
std::chrono::time_point<std::chrono::high_resolution_clock> &t1 ) const
{
auto t2 = std::chrono::high_resolution_clock::now();
if ( std::chrono::duration_cast<std::chrono::seconds>(t2-t1).count() > MAX_WAIT_TIME_DEBUG ) {
std::cout << "Warning: Maximum wait time in ThreadPool exceeded, threads may be hung\n";
std::cout << "N_active: " << d_num_active << std::endl;
std::cout << "N_queued: " << d_queue_list.size() << std::endl;
std::cout << "N_added: " << d_N_added << std::endl;
std::cout << "N_started: " << d_N_started << std::endl;
std::cout << "N_finished: " << d_N_finished << std::endl;
std::cout << "queue.insert(): " << d_queue_list.N_insert() << std::endl;
std::cout << "queue.remove(): " << d_queue_list.N_remove() << std::endl;
std::cout << "Stack Trace:\n";
auto call_stack = StackTrace::getAllCallStacks( );
if ( std::chrono::duration_cast<std::chrono::seconds>( t2 - t1 ).count() > d_max_wait_time ) {
pout << "Warning: Maximum wait time in ThreadPool exceeded, threads may be hung\n";
pout << "N_active: " << d_num_active << std::endl;
pout << "N_queued: " << d_queue_list.size() << std::endl;
pout << "N_added: " << d_N_added << std::endl;
pout << "N_started: " << d_N_started << std::endl;
pout << "N_finished: " << d_N_finished << std::endl;
pout << "queue.insert(): " << d_queue_list.N_insert() << std::endl;
pout << "queue.remove(): " << d_queue_list.N_remove() << std::endl;
pout << "Stack Trace:\n";
auto call_stack = StackTrace::getAllCallStacks();
StackTrace::cleanupStackTrace( call_stack );
auto text = call_stack.print( " " );
for ( auto& line : text )
std::cout << line << std::endl;
for ( auto &line : text )
pout << line << std::endl;
t1 = std::chrono::high_resolution_clock::now();
}
}
@@ -1068,82 +1108,91 @@ void ThreadPool::wait_pool_finished() const
while ( d_num_active > 0 || d_queue_list.size() > 0 ) {
check_wait_time( t1 );
d_signal_empty = true;
d_wait_finished.wait_for(10e-6);
d_wait_finished.wait_for( 10e-6 );
}
d_signal_empty = false;
}
/******************************************************************
* Member functions of wait_ids_struct *
******************************************************************/
ThreadPool::wait_ids_struct::wait_ids_struct( size_t N, const ThreadPool::thread_id_t *ids, size_t N_wait,
AtomicOperations::pool<condition_variable,128>& cv_pool, int N_wait_list, volatile wait_ids_struct **list ):
d_wait( N_wait ),
d_N(0),
d_cv_pool( cv_pool ),
d_wait_event( cv_pool.get() )
* Member functions of wait_ids_struct *
******************************************************************/
ThreadPool::wait_ids_struct::wait_ids_struct( size_t N, const ThreadPool::thread_id_t *ids,
size_t N_wait, AtomicOperations::pool<condition_variable, 128> &cv_pool, int N_wait_list,
volatile wait_ids_struct **list )
: d_wait( N_wait ), d_N( 0 ), d_cv_pool( cv_pool ), d_wait_event( cv_pool.get() )
{
d_ids = new ThreadPool::thread_id_t[N];
for ( size_t i = 0; i < N; i++ ) {
if ( ids[i].finished() )
d_wait = std::max(d_wait-1,0);
d_wait = std::max( d_wait - 1, 0 );
else
d_ids[d_N++] = ids[i];
}
quicksort( d_N, d_ids );
d_finished = new bool[d_N];
memset((void*)d_finished,0,d_N);
memset( (void *) d_finished, 0, d_N );
int i = 0;
while ( !AtomicOperations::atomic_compare_and_swap( (void *volatile *) &list[i], nullptr, this ) ) { i = (i+1)%N_wait_list; }
while (
!AtomicOperations::atomic_compare_and_swap( (void *volatile *) &list[i], nullptr, this ) ) {
i = ( i + 1 ) % N_wait_list;
}
d_ptr = &list[i];
}
void ThreadPool::wait_ids_struct::id_finished( const ThreadPool::thread_id_t& id ) const
ThreadPool::wait_ids_struct::~wait_ids_struct()
{
d_cv_pool.put( d_wait_event );
delete[] d_finished;
delete[] d_ids;
}
void ThreadPool::wait_ids_struct::id_finished( const ThreadPool::thread_id_t &id ) const
{
int index = find_id( d_N, d_ids, id );
if ( index >= 0 ) {
d_finished[index] = true;
int N_finished = 0;
for (int i=0; i<d_N; i++)
N_finished += d_finished[i] ? 1:0;
int N_finished = 0;
for ( int i = 0; i < d_N; i++ )
N_finished += d_finished[i] ? 1 : 0;
if ( N_finished >= d_wait ) {
*d_ptr = nullptr;
d_N = 0;
d_wait = 0;
d_N = 0;
AtomicOperations::atomic_compare_and_swap(
(void *volatile *) d_ptr, (void *) *d_ptr, nullptr );
d_wait_event->notify_all();
}
}
}
bool ThreadPool::wait_ids_struct::wait_for( double seconds )
{
for (int i=0; i<d_N; i++) {
for ( int i = 0; i < d_N; i++ ) {
if ( d_ids[i].finished() )
d_finished[i] = true;
}
auto t1 = std::chrono::high_resolution_clock::now();
while ( true ) {
int N_finished = 0;
for (int i=0; i<d_N; i++)
N_finished += d_finished[i] ? 1:0;
if ( N_finished>=d_wait || d_N==0 ) {
for ( int i = 0; i < d_N; i++ )
N_finished += d_finished[i] ? 1 : 0;
if ( N_finished >= d_wait || d_N == 0 ) {
*d_ptr = nullptr;
d_wait = 0;
d_N = 0;
d_N = 0;
break;
}
auto t2 = std::chrono::high_resolution_clock::now();
if ( 1e-6*std::chrono::duration_cast<std::chrono::microseconds>(t2-t1).count() > seconds )
if ( 1e-6 * std::chrono::duration_cast<std::chrono::microseconds>( t2 - t1 ).count() >
seconds )
return false;
d_wait_event->wait_for(1e-5);
d_wait_event->wait_for( 1e-5 );
}
return true;
}
/******************************************************************
* templated quicksort routine *
******************************************************************/
template <class T>
* templated quicksort routine *
******************************************************************/
template<class T>
void quicksort( int n, T *arr )
{
if ( n <= 1 )
@@ -1154,7 +1203,7 @@ void quicksort( int n, T *arr )
jstack = 0;
l = 0;
ir = n - 1;
while ( 1 ) {
while ( true ) {
if ( ir - l < 7 ) { // Insertion sort when subarray small enough.
for ( j = l + 1; j <= ir; j++ ) {
a = arr[j];
@@ -1231,8 +1280,8 @@ void quicksort( int n, T *arr )
/************************************************************************
* Function to find the id in a sorted vector *
************************************************************************/
* Function to find the id in a sorted vector *
************************************************************************/
inline int find_id( int n, const ThreadPool::thread_id_t *x, const ThreadPool::thread_id_t &id )
{
if ( n == 0 )
@@ -1243,7 +1292,7 @@ inline int find_id( int n, const ThreadPool::thread_id_t *x, const ThreadPool::t
if ( id < x[0] )
return -1;
if ( id == x[n - 1] )
return n-1;
return n - 1;
if ( id > x[n - 1] )
return -1;
// Perform the search
@@ -1264,13 +1313,13 @@ inline int find_id( int n, const ThreadPool::thread_id_t *x, const ThreadPool::t
/************************************************************************
* Function to add dependencies to the work item *
* Note: when expanding the size of d_ids, we need to allocate space for *
* one extra entry for a spinlock. *
************************************************************************/
* Function to add dependencies to the work item *
* Note: when expanding the size of d_ids, we need to allocate space for *
* one extra entry for a spinlock. *
************************************************************************/
void ThreadPool::WorkItem::add_dependencies( size_t N, const ThreadPool::thread_id_t *ids )
{
if ( d_state!=0 ) {
if ( d_state != 0 ) {
// The item has already been added to the threadpool,
// we are not allowed to add dependencies
throw std::logic_error(
@@ -1291,9 +1340,9 @@ void ThreadPool::WorkItem::add_dependencies( size_t N, const ThreadPool::thread_
for ( size_t i = 0; i < d_N_ids; i++ )
const_cast<thread_id_t &>( ids[i] ).swap( tmp[i] );
delete[] tmp;
d_size = N2;
int* lock = reinterpret_cast<int*>(&d_ids[d_size-1]);
*lock = 0;
d_size = N2;
auto *lock = reinterpret_cast<int *>( &d_ids[d_size - 1] );
*lock = 0;
}
const ThreadPool::thread_id_t id0;
for ( size_t i = 0; i < N; i++ ) {

View File

@@ -3,53 +3,25 @@
// PARTICULAR PURPOSE.
#ifndef included_AtomicModelThreadPool
#define included_AtomicModelThreadPool
#include <condition_variable>
#include <iostream>
#include <map>
#include <mutex>
#include <stdarg.h>
#include <stdexcept>
#include <stdio.h>
#include <string.h>
#include <thread>
#include <typeinfo>
#include <vector>
#include <mutex>
#include <thread>
#include <condition_variable>
#include "threadpool/atomic_helpers.h"
#include "threadpool/atomic_list.h"
// Choose the OS
#if defined( WIN32 ) || defined( _WIN32 ) || defined( WIN64 ) || defined( _WIN64 )
// Using windows
#define USE_WINDOWS
#elif defined( __APPLE__ )
// Using MAC
#define USE_MAC
#elif defined( __linux ) || defined( __unix ) || defined( __posix )
// Using linux
#define USE_LINUX
#else
#error Unknown OS
#endif
// Set some definitions
#define MAX_NUM_THREADS 128 // The maximum number of threads (must be a multiple of 64)
#define MAX_QUEUED 1024 // The maximum number of items in the work queue at any moment
#define MAX_WAIT 16 // The maximum number of active waits at any given time
#define MAX_WAIT_TIME_DEBUG 600 // The maximum time in a wait command before printing a warning message
#define PROFILE_THREADPOOL_PERFORMANCE 0 // Add profile timers to the threadpool (default is 0)
#define MONITOR_THREADPOOL_PERFORMANCE 0 // Add detailed performance counters (default is 0)
// Check the c++ std
#if CXX_STD==98
#error Thread pool class requires c++11 or newer
#endif
// clang-format off
/** \class ThreadPool
@@ -75,6 +47,13 @@
*/
class ThreadPool
{
public:
///// Set some global properties
constexpr static int MAX_NUM_THREADS = 128; // The maximum number of threads (must be a multiple of 64)
constexpr static int MAX_QUEUED = 1024; // The maximum number of items in the work queue at any moment
constexpr static int MAX_WAIT = 16; // The maximum number of active waits at any given time
constexpr static bool PROFILE_THREADPOOL_PERFORMANCE = false; // Add profile timers to the threadpool
constexpr static bool MONITOR_THREADPOOL_PERFORMANCE = false; // Add detailed performance counters
public:
///// Member classes
@@ -102,7 +81,7 @@ public:
inline thread_id_t( volatile thread_id_t &&rhs );
inline thread_id_t &operator=( const thread_id_t &rhs ) volatile;
inline thread_id_t &operator=( volatile thread_id_t &&rhs ) volatile;
#ifndef USE_WINDOWS
#if !defined( WIN32 ) && !defined( _WIN32 ) && !defined( WIN64 ) && !defined( _WIN64 )
inline thread_id_t( const thread_id_t &rhs );
inline thread_id_t &operator=( thread_id_t &&rhs );
inline thread_id_t &operator=( const thread_id_t &rhs );
@@ -245,7 +224,7 @@ public:
//! Run the work item
virtual void run() override = 0;
//! Will the routine return a result
virtual bool has_result() const override = 0;
virtual bool has_result() const override final { return !std::is_same<return_type,void>::value; }
//! Return the results
return_type get_results() const { return d_result; }
//! Virtual destructor
@@ -353,10 +332,12 @@ public:
* in the ThreadPool without checking the existing work unless the desired number of
* threads is 0. In this case, the function will wait for all work items to finish
* before deleting the existing work threads.
* Member threads may not call this function.
* @param N The desired number of worker threads
* @param affinity The affinity scheduler to use:
* none - Let the OS handle the affinities (default)
* independent - Give each thread an independent set of processors
* @param procs The processors to use (defaults to the process affinitiy list)
*/
@@ -368,6 +349,16 @@ public:
}
/*!
* \brief Function to set the maximum wait time
* \details This function sets the maximum time the thread pool will
* wait before warning about a possible hung thread.
* Default is to wait 10 minutes.
* @param time The number of seconds to wait (seconds)
*/
inline void setMaxWaitTimeDebug( const int time ) { d_max_wait_time = time; }
/*!
* \brief Function to return the current thread number
* \details This function will return the thread number of current active thread.
@@ -400,16 +391,14 @@ public:
* @param id The id of the work item
*/
template <class return_type>
inline return_type getFunctionRet( const thread_id_t &id ) const;
static inline return_type getFunctionRet( const thread_id_t &id );
/*!
* \brief Function to create a work item
* \details This function creates a work item that can be added to the queue
* @param work Pointer to the work item to add
* Note that the threadpool will automatically destroy the item when
* finished
* @param priority A value indicating the priority of the work item (0-default)
* @param routine Function to call from the thread pool
* @param args Function arguments to pass
*/
template <class Ret, class... Args>
static inline WorkItem* createWork( Ret( *routine )( Args... ), Args... args );
@@ -505,6 +494,7 @@ public:
* If successful it returns the indicies of the finished work items (the index in the array ids).
* Note: any thread may call this routine, but they will block until finished.
* For worker threads this may eventually lead to a deadlock.
* @param N_wait Number of work items to wait for
* @param ids Vector of work items to wait for
*/
inline std::vector<int> wait_some( int N_wait, const std::vector<thread_id_t> &ids ) const;
@@ -552,6 +542,69 @@ public:
//! Return the number of items queued
int N_queued( ) const { return d_queue_list.size(); }
//! Set the error handler for threads
void setErrorHandler( std::function<void(const std::string&)> fun );
public: // Static interface
/*!
* \brief Function to return the number of work threads
* \details This function returns the number of threads in the thread pool,
* or 0 if the thread pool is empty or does not exist
* @param tpool Threadpool to add work to (may be null)
*/
static inline int numThreads( const ThreadPool* tpool ) { return tpool ? tpool->getNumThreads() : 0; }
/*!
* \brief Function to add a work item
* \details This function adds a work item to the queue
* Note: any thread may call this routine.
* @param tpool Threadpool to add work to (may be null)
* @param work Pointer to the work item to add
* Note that the threadpool will automatically destroy the item when finished
* @param priority A value indicating the priority of the work item (0-default)
*/
static inline thread_id_t add_work( ThreadPool* tpool, ThreadPool::WorkItem *work, int priority = 0 );
/*!
* \brief Function to add multiple work items
* \details This function adds multiple work item to the queue
* Note: any thread may call this routine.
* @param tpool Threadpool to add work to (may be null)
* @param work Vector of pointers to the work items to add
* Note that the threadpool will automatically destroy the item when finished
* @param priority Vector of values indicating the priority of the work items
*/
static inline std::vector<thread_id_t> add_work( ThreadPool* tpool, const std::vector<ThreadPool::WorkItem *> &work,
const std::vector<int> &priority = std::vector<int>() );
/*!
* \brief Function to wait until all of the given work items have finished their work
* \details This is the function waits for all given of the work items to finish. It returns 0
* if successful.
* Note: any thread may call this routine, but they will block until finished.
* For worker threads this may eventually lead to a deadlock.
* @param tpool Threadpool containing work (must match call to add_work)
* @param ids Vector of work items to wait for
*/
static inline int wait_all( const ThreadPool* tpool, const std::vector<thread_id_t> &ids );
/*!
* \brief Function to wait until all work items in the thread pool have finished their work
* \details This function will wait until all work has finished.
* Note: member threads may not call this function.
* Only one non-member thread should call this routine at a time.
* @param tpool Threadpool containing work (must match call to add_work)
*/
static inline void wait_pool_finished( const ThreadPool* tpool ) { if ( tpool ) { tpool->wait_pool_finished(); } }
private:
typedef AtomicOperations::int32_atomic int32_atomic;
@@ -593,7 +646,7 @@ private:
public:
wait_ids_struct( size_t N, const ThreadPool::thread_id_t *ids, size_t N_wait,
AtomicOperations::pool<condition_variable,128>& cv_pool, int N_wait_list, volatile wait_ids_struct **list );
~wait_ids_struct( ) { d_cv_pool.put( d_wait_event ); delete [] d_finished; delete [] d_ids; }
~wait_ids_struct( );
void id_finished( const ThreadPool::thread_id_t& id ) const;
bool wait_for( double seconds );
private:
@@ -628,7 +681,10 @@ private:
inline void add_work( const ThreadPool::thread_id_t& id );
// Function to get a work item that has finished
WorkItem *getFinishedWorkItem( ThreadPool::thread_id_t id ) const;
static inline WorkItem *getFinishedWorkItem( const ThreadPool::thread_id_t& id )
{
return id.finished() ? id.work():nullptr;
}
// This function provides a wrapper (needed for the threads)
static inline void create_new_thread( ThreadPool *tpool, int id )
@@ -676,10 +732,13 @@ private:
std::thread::id d_threadId[MAX_NUM_THREADS]; // Unique id for each thread
queue_type d_queue_list; // The work queue
size_t d_NULL_TAIL; // Null data buffer to check memory bounds
int d_max_wait_time; // The maximum time in a wait command before printing a warning message
std::function<void(const std::string&)> d_errorHandler;
};
#include "threadpool/thread_pool.hpp"
// clang-format on
#endif

View File

@@ -23,7 +23,7 @@
*/
#define TPOOL_TUPLE_TO_SEQ( t ) TPOOL_TUPLE_TO_SEQ_##II t
#define TPOOL_TUPLE_TO_SEQ_II( a, ... ) a, ##__VA_ARGS__
#ifdef USE_WINDOWS
#if defined( WIN32 ) || defined( _WIN32 ) || defined( WIN64 ) || defined( _WIN64 )
#define TPOOL_GET_PRIORITY( a, N, c, ... ) N
#define TPOOL_ADD_WORK( TPOOL, FUNCTION, ARGS, ... ) \
ThreadPool_add_work( TPOOL, TPOOL_GET_PRIORITY( 0, __VA_ARGS__, 0, 0 ) + 0, FUNCTION, \
@@ -40,35 +40,35 @@
// \cond HIDDEN_SYMBOLS
// Unpack a tuple and call a function
template <int...>
template<int...>
struct index_tuple {
};
template <int I, typename IndexTuple, typename... Types>
template<int I, typename IndexTuple, typename... Types>
struct make_indexes_impl;
template <int I, int... Indexes, typename T, typename... Types>
template<int I, int... Indexes, typename T, typename... Types>
struct make_indexes_impl<I, index_tuple<Indexes...>, T, Types...> {
typedef typename make_indexes_impl<I + 1, index_tuple<Indexes..., I>, Types...>::type type;
};
template <int I, int... Indexes>
template<int I, int... Indexes>
struct make_indexes_impl<I, index_tuple<Indexes...>> {
typedef index_tuple<Indexes...> type;
};
template <typename... Types>
template<typename... Types>
struct make_indexes : make_indexes_impl<0, index_tuple<>, Types...> {
};
template <class Ret, class... Args, int... Indexes>
inline Ret apply_helper( Ret ( *pf )( Args... ), index_tuple<Indexes...>, std::tuple<Args...> &&tup )
template<class Ret, class... Args, int... Indexes>
inline Ret apply_helper(
Ret ( *pf )( Args... ), index_tuple<Indexes...>, std::tuple<Args...> &&tup )
{
return pf( std::forward<Args>( std::get<Indexes>( tup ) )... );
}
template <class Ret, class... Args>
template<class Ret, class... Args>
inline Ret apply( Ret ( *pf )( Args... ), const std::tuple<Args...> &tup )
{
return apply_helper( pf, typename make_indexes<Args...>::type(), std::tuple<Args...>( tup ) );
}
template <class Ret, class... Args>
template<class Ret, class... Args>
inline Ret apply( Ret ( *pf )( Args... ), std::tuple<Args...> &&tup )
{
return apply_helper(
@@ -77,21 +77,21 @@ inline Ret apply( Ret ( *pf )( Args... ), std::tuple<Args...> &&tup )
// Specialization for no return argument
template <>
template<>
class ThreadPool::WorkItemRet<void> : public ThreadPool::WorkItem
{
public:
virtual void run() override = 0;
virtual bool has_result() const override { return false; }
void get_results() {}
virtual ~WorkItemRet() {}
virtual bool has_result() const override final { return false; }
};
// Final class for the work item
template <class Ret, class... Args>
template<class Ret, class... Args>
class WorkItemFull;
template <class... Args>
template<class... Args>
class WorkItemFull<void, Args...> : public ThreadPool::WorkItemRet<void>
{
private:
@@ -104,14 +104,10 @@ public:
: ThreadPool::WorkItemRet<void>(), routine( routine2 ), args( ts... )
{
}
virtual void run() override
{
apply( routine, args );
}
virtual bool has_result() const override { return false; }
virtual void run() override { apply( routine, args ); }
virtual ~WorkItemFull() {}
};
template <class Ret, class... Args>
template<class Ret, class... Args>
class WorkItemFull : public ThreadPool::WorkItemRet<Ret>
{
private:
@@ -124,62 +120,60 @@ public:
: ThreadPool::WorkItemRet<Ret>(), routine( routine2 ), args( ts... )
{
}
virtual void run() override
{
this->d_result = apply( routine, args );
}
virtual bool has_result() const override { return true; }
virtual void run() override { this->d_result = apply( routine, args ); }
virtual ~WorkItemFull() {}
};
// Functions to add work to the thread pool
template <class Ret, class... Ts>
template<class Ret, class... Ts>
inline ThreadPool::thread_id_t ThreadPool_add_work(
ThreadPool *tpool, int priority, Ret ( *routine )( Ts... ), Ts... ts )
{
ThreadPool::WorkItem *work = new WorkItemFull<Ret, Ts...>( routine, ts... );
return tpool->add_work( work, priority );
auto work = new WorkItemFull<Ret, Ts...>( routine, ts... );
return ThreadPool::add_work( tpool, work, priority );
}
template <class Ret>
template<class Ret>
inline ThreadPool::thread_id_t ThreadPool_add_work(
ThreadPool *tpool, int priority, Ret ( *routine )(), void * )
{
ThreadPool::WorkItem *work = new WorkItemFull<Ret>( routine );
return tpool->add_work( work, priority );
auto work = new WorkItemFull<Ret>( routine );
return ThreadPool::add_work( tpool, work, priority );
}
template <class Ret, class... Args>
inline ThreadPool::WorkItem* ThreadPool::createWork( Ret( *routine )( Args... ), Args... args )
template<class Ret, class... Args>
inline ThreadPool::WorkItem *ThreadPool::createWork( Ret ( *routine )( Args... ), Args... args )
{
return new WorkItemFull<Ret, Args...>( routine, args... );
}
/******************************************************************
* Function to get the returned function value *
******************************************************************/
template <class T> inline constexpr T zeroConstructor();
template<> inline constexpr bool zeroConstructor<bool>( ) { return false; }
template<> inline constexpr char zeroConstructor<char>( ) { return 0; }
template<> inline constexpr unsigned char zeroConstructor<unsigned char>( ) { return 0; }
template<> inline constexpr int zeroConstructor<int>( ) { return 0; }
template<> inline constexpr unsigned int zeroConstructor<unsigned int>( ) { return 0; }
template<> inline constexpr long zeroConstructor<long>( ) { return 0; }
template<> inline constexpr unsigned long zeroConstructor<unsigned long>( ) { return 0; }
template<> inline constexpr float zeroConstructor<float>( ) { return 0; }
template<> inline constexpr double zeroConstructor<double>( ) { return 0; }
template <class T> inline constexpr T zeroConstructor() { return T(); }
template <class Ret>
inline Ret ThreadPool::getFunctionRet( const ThreadPool::thread_id_t &id ) const
* Function to get the returned function value *
******************************************************************/
// clang-format off
template<class T> inline constexpr T zeroConstructor();
template<> inline constexpr bool zeroConstructor<bool>() { return false; }
template<> inline constexpr char zeroConstructor<char>() { return 0; }
template<> inline constexpr unsigned char zeroConstructor<unsigned char>() { return 0; }
template<> inline constexpr int zeroConstructor<int>() { return 0; }
template<> inline constexpr unsigned int zeroConstructor<unsigned int>() { return 0; }
template<> inline constexpr long zeroConstructor<long>() { return 0; }
template<> inline constexpr unsigned long zeroConstructor<unsigned long>() { return 0; }
template<> inline constexpr float zeroConstructor<float>() { return 0; }
template<> inline constexpr double zeroConstructor<double>() { return 0; }
template<class T> inline constexpr T zeroConstructor() { return T(); }
template<class Ret>
inline Ret ThreadPool::getFunctionRet( const ThreadPool::thread_id_t &id )
{
WorkItemRet<Ret> *work = dynamic_cast<WorkItemRet<Ret>*>( getFinishedWorkItem( id ) );
auto work = dynamic_cast<WorkItemRet<Ret> *>( getFinishedWorkItem( id ) );
return work == nullptr ? zeroConstructor<Ret>() : work->get_results();
}
// clang-format on
/******************************************************************
* Inline functions to wait for the work items to finish *
******************************************************************/
* Inline functions to wait for the work items to finish *
******************************************************************/
inline int ThreadPool::wait( ThreadPool::thread_id_t id ) const
{
bool finished;
@@ -218,7 +212,7 @@ inline int ThreadPool::wait_any( const std::vector<thread_id_t> &ids ) const
}
inline int ThreadPool::wait_all( size_t N_work, const ThreadPool::thread_id_t *ids ) const
{
if ( N_work==0 )
if ( N_work == 0 )
return 0;
auto finished = new bool[N_work];
wait_some( N_work, ids, N_work, finished );
@@ -234,25 +228,32 @@ inline int ThreadPool::wait_all( const std::vector<thread_id_t> &ids ) const
delete[] finished;
return 0;
}
inline std::vector<int> ThreadPool::wait_some( int N_wait, const std::vector<thread_id_t> &ids ) const
inline int ThreadPool::wait_all( const ThreadPool *tpool, const std::vector<thread_id_t> &ids )
{
auto finished = new bool[ids.size()];
if ( tpool )
return tpool->wait_all( ids );
return ids.size();
}
inline std::vector<int> ThreadPool::wait_some(
int N_wait, const std::vector<thread_id_t> &ids ) const
{
auto finished = new bool[ids.size()];
int N_finished = wait_some( ids.size(), ids.data(), N_wait, finished );
std::vector<int> index(N_finished,-1);
for ( size_t i=0, j=0; i < ids.size(); i++ ) {
std::vector<int> index( N_finished, -1 );
for ( size_t i = 0, j = 0; i < ids.size(); i++ ) {
if ( finished[i] ) {
index[j] = i;
j++;
}
}
delete [] finished;
delete[] finished;
return index;
}
/******************************************************************
* Functions to add work items. *
******************************************************************/
* Functions to add work items. *
******************************************************************/
inline ThreadPool::thread_id_t ThreadPool::add_work( WorkItem *work, int priority )
{
ThreadPool::thread_id_t id;
@@ -280,11 +281,37 @@ inline std::vector<ThreadPool::thread_id_t> ThreadPool::add_work(
delete[] priority2;
return ids;
}
inline ThreadPool::thread_id_t ThreadPool::add_work(
ThreadPool *tpool, ThreadPool::WorkItem *work, int priority )
{
ThreadPool::thread_id_t id;
if ( tpool ) {
id = tpool->add_work( work, priority );
} else {
id.reset( priority, std::rand(), work );
work->d_state = 2;
work->run();
work->d_state = 3;
}
return id;
}
inline std::vector<ThreadPool::thread_id_t> ThreadPool::add_work( ThreadPool *tpool,
const std::vector<ThreadPool::WorkItem *> &work, const std::vector<int> &priority )
{
if ( tpool ) {
return tpool->add_work( work, priority );
} else {
std::vector<ThreadPool::thread_id_t> ids( work.size() );
for ( size_t i = 0; i < work.size(); i++ )
ids[i] = add_work( tpool, work[i], priority[i] );
return ids;
}
}
/******************************************************************
* Class functions to for the thread id *
******************************************************************/
* Class functions to for the thread id *
******************************************************************/
inline ThreadPool::thread_id_t::thread_id_t()
: d_id( nullThreadID ), d_count( NULL ), d_work( NULL )
{
@@ -326,7 +353,7 @@ inline ThreadPool::thread_id_t::thread_id_t( const volatile ThreadPool::thread_i
if ( d_count != NULL )
AtomicOperations::atomic_increment( d_count );
}
#ifndef USE_WINDOWS
#if !defined( WIN32 ) && !defined( _WIN32 ) && !defined( WIN64 ) && !defined( _WIN64 )
inline ThreadPool::thread_id_t::thread_id_t( const thread_id_t &rhs )
: d_id( rhs.d_id ), d_count( rhs.d_count ), d_work( rhs.d_work )
{
@@ -417,8 +444,8 @@ inline uint64_t ThreadPool::thread_id_t::createId( int priority, uint64_t local_
if ( priority >= 0 )
tmp2 |= 0x80;
uint64_t id = tmp2;
id = ( id << 56 ) + local_id;
return id;
id = ( id << 56 ) + local_id;
return id;
}
inline void ThreadPool::thread_id_t::reset( int priority, uint64_t local_id, void *work )
{
@@ -435,8 +462,8 @@ inline void ThreadPool::thread_id_t::reset( int priority, uint64_t local_id, voi
d_count = nullptr;
d_work = nullptr;
if ( work != nullptr ) {
d_work = work;
d_count = &(reinterpret_cast<WorkItem *>( work )->d_count);
d_work = work;
d_count = &( reinterpret_cast<WorkItem *>( work )->d_count );
*d_count = 1;
}
}
@@ -473,7 +500,7 @@ inline bool ThreadPool::thread_id_t::ready() const
bool ready = true;
if ( !isNull() ) {
auto tmp = work();
for (size_t i=0; i<tmp->d_N_ids; i++)
for ( size_t i = 0; i < tmp->d_N_ids; i++ )
ready = ready && tmp->d_ids[i].finished();
}
return ready;
@@ -481,21 +508,22 @@ inline bool ThreadPool::thread_id_t::ready() const
/******************************************************************
* This function checks if the id is valid *
******************************************************************/
* This function checks if the id is valid *
******************************************************************/
inline bool ThreadPool::isValid( const ThreadPool::thread_id_t &id ) const
{
static_assert( sizeof(atomic_64)==8, "atomic_64 must be a 64-bit integer" );
static_assert( sizeof( atomic_64 ) == 8, "atomic_64 must be a 64-bit integer" );
uint64_t local_id = id.getLocalID();
uint64_t next_id = d_id_assign - 1;
return local_id!=0 && id.initialized() && local_id<=thread_id_t::maxThreadID && local_id>next_id;
return local_id != 0 && id.initialized() && local_id <= thread_id_t::maxThreadID &&
local_id > next_id;
}
/******************************************************************
* Function to get the thread number *
* (-1 if it is not a member thread) *
******************************************************************/
* Function to get the thread number *
* (-1 if it is not a member thread) *
******************************************************************/
inline int ThreadPool::getThreadNumber() const
{
std::thread::id id = std::this_thread::get_id();