diff --git a/StackTrace/StackTrace.cpp b/StackTrace/StackTrace.cpp index b288ed75..bcae7a66 100644 --- a/StackTrace/StackTrace.cpp +++ b/StackTrace/StackTrace.cpp @@ -1263,7 +1263,7 @@ static int backtrace_thread( if ( tid == pthread_self() ) { count = ::backtrace( buffer, size ); } else { - // Note: this will get the backtrace, but terminates the thread in the process!!! + // Send a signal to the desired thread to get the call stack StackTrace_mutex.lock(); struct sigaction sa; sigfillset( &sa.sa_mask ); diff --git a/common/MPI.I b/common/MPI.I index 6d44858a..47e7789c 100644 --- a/common/MPI.I +++ b/common/MPI.I @@ -7,1178 +7,1133 @@ #include - #define MPI_CLASS MPI #define MPI_CLASS_ERROR ERROR #define MPI_CLASS_ASSERT ASSERT #undef NULL_USE -#define NULL_USE( variable ) \ - do { \ - if ( 0 ) { \ - auto static t = (char *) &variable; \ - t++; \ - } \ - } while ( 0 ) - +#define NULL_USE(variable) \ + do { \ + if (0) { \ + auto static t = (char *)&variable; \ + t++; \ + } \ + } while (0) namespace Utilities { - // Function to test if a type is a std::pair -template -struct is_pair : std::false_type { -}; -template -struct is_pair> : std::true_type { -}; - +template struct is_pair : std::false_type {}; +template +struct is_pair> : std::true_type {}; // Function to test if a type can be passed by MPI -template -constexpr typename std::enable_if::value,bool>::type - is_mpi_copyable() -{ +template +constexpr + typename std::enable_if::value, bool>::type + is_mpi_copyable() { return true; } -template -constexpr typename std::enable_if::value&&is_pair::value,bool>::type - is_mpi_copyable() -{ - return is_mpi_copyable() && is_mpi_copyable(); +template +constexpr typename std::enable_if::value && + is_pair::value, + bool>::type +is_mpi_copyable() { + return is_mpi_copyable() && + is_mpi_copyable(); } -template -constexpr typename std::enable_if::value&&!is_pair::value,bool>::type - is_mpi_copyable() -{ +template +constexpr typename std::enable_if::value && + !is_pair::value, + bool>::type +is_mpi_copyable() { return false; } - /************************************************************************ * sumReduce * ************************************************************************/ -template -inline TYPE MPI_CLASS::sumReduce( const TYPE value ) const -{ - if ( comm_size > 1 ) { +template inline TYPE MPI_CLASS::sumReduce(const TYPE value) const { + if (comm_size > 1) { TYPE tmp = value; - call_sumReduce( &tmp, 1 ); + call_sumReduce(&tmp, 1); return tmp; } else { return value; } } -template -inline void MPI_CLASS::sumReduce( TYPE *x, const int n ) const -{ - if ( comm_size > 1 ) - call_sumReduce( x, n ); +template inline void MPI_CLASS::sumReduce(TYPE *x, int n) const { + if (comm_size > 1) + call_sumReduce(x, n); } -template -inline void MPI_CLASS::sumReduce( const TYPE *x, TYPE *y, const int n ) const -{ - if ( comm_size > 1 ) { - call_sumReduce( x, y, n ); +template +inline void MPI_CLASS::sumReduce(const TYPE *x, TYPE *y, int n) const { + if (comm_size > 1) { + call_sumReduce(x, y, n); } else { - for ( int i = 0; i < n; i++ ) + for (int i = 0; i < n; i++) y[i] = x[i]; } } -// Define specializations of call_sumReduce(TYPE*, const int) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::call_sumReduce( unsigned char *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( char *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( unsigned int *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( int *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( unsigned long int *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( long int *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( size_t *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( float *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( double *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce>( std::complex *, const int ) const; +// Define specializations of call_sumReduce(TYPE*, int) +#if defined(USE_MPI) || defined(USE_EXT_MPI) +template <> +void MPI_CLASS::call_sumReduce(unsigned char *, int) const; +template <> void MPI_CLASS::call_sumReduce(char *, int) const; +template <> +void MPI_CLASS::call_sumReduce(unsigned int *, int) const; +template <> void MPI_CLASS::call_sumReduce(int *, int) const; +template <> +void MPI_CLASS::call_sumReduce(unsigned long int *, + int) const; +template <> void MPI_CLASS::call_sumReduce(long int *, int) const; +template <> void MPI_CLASS::call_sumReduce(size_t *, int) const; +template <> void MPI_CLASS::call_sumReduce(float *, int) const; +template <> void MPI_CLASS::call_sumReduce(double *, int) const; +template <> +void MPI_CLASS::call_sumReduce>(std::complex *, + int) const; #endif -// Default instantiations of call_sumReduce(TYPE*, const int) -template -void MPI_CLASS::call_sumReduce( TYPE *, const int ) const -{ +// Default instantiations of call_sumReduce(TYPE*, int) +template void MPI_CLASS::call_sumReduce(TYPE *, int) const { char message[200]; - sprintf( message, "Default instantion of sumReduce in parallel is not supported (%s)", - typeid( TYPE ).name() ); - MPI_CLASS_ERROR( message ); + sprintf(message, + "Default instantion of sumReduce in parallel is not supported (%s)", + typeid(TYPE).name()); + MPI_CLASS_ERROR(message); } -// Define specializations of call_sumReduce(const TYPE*, TYPE*, const int) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::call_sumReduce( - const unsigned char *, unsigned char *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( const char *, char *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( - const unsigned int *, unsigned int *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( const int *, int *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( - const unsigned long int *, unsigned long int *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( const long int *, long int *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( const size_t *, size_t *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( const float *, float *, const int ) const; -template<> -void MPI_CLASS::call_sumReduce( const double *, double *, const int ) const; -template<> +// Define specializations of call_sumReduce(const TYPE*, TYPE*, int) +#if defined(USE_MPI) || defined(USE_EXT_MPI) +template <> +void MPI_CLASS::call_sumReduce(const unsigned char *, + unsigned char *, int) const; +template <> +void MPI_CLASS::call_sumReduce(const char *, char *, int) const; +template <> +void MPI_CLASS::call_sumReduce(const unsigned int *, + unsigned int *, int) const; +template <> void MPI_CLASS::call_sumReduce(const int *, int *, int) const; +template <> +void MPI_CLASS::call_sumReduce(const unsigned long int *, + unsigned long int *, + int) const; +template <> +void MPI_CLASS::call_sumReduce(const long int *, long int *, + int) const; +template <> +void MPI_CLASS::call_sumReduce(const size_t *, size_t *, int) const; +template <> +void MPI_CLASS::call_sumReduce(const float *, float *, int) const; +template <> +void MPI_CLASS::call_sumReduce(const double *, double *, int) const; +template <> void MPI_CLASS::call_sumReduce>( - const std::complex *, std::complex *, const int ) const; + const std::complex *, std::complex *, int) const; #endif -// Default instantiations of call_sumReduce(const TYPE*, TYPE*, const int) -template -void MPI_CLASS::call_sumReduce( const TYPE *x, TYPE *y, const int n ) const -{ - NULL_USE( x ); - NULL_USE( y ); - NULL_USE( n ); +// Default instantiations of call_sumReduce(const TYPE*, TYPE*, int) +template +void MPI_CLASS::call_sumReduce(const TYPE *x, TYPE *y, int n) const { + NULL_USE(x); + NULL_USE(y); + NULL_USE(n); char message[200]; - sprintf( message, "Default instantion of sumReduce in parallel is not supported (%s)", - typeid( TYPE ).name() ); - MPI_CLASS_ERROR( message ); + sprintf(message, + "Default instantion of sumReduce in parallel is not supported (%s)", + typeid(TYPE).name()); + MPI_CLASS_ERROR(message); } - /************************************************************************ * minReduce * ************************************************************************/ -template -inline TYPE MPI_CLASS::minReduce( const TYPE value ) const -{ - if ( comm_size > 1 ) { +template inline TYPE MPI_CLASS::minReduce(const TYPE value) const { + if (comm_size > 1) { TYPE tmp = value; - call_minReduce( &tmp, 1, nullptr ); + call_minReduce(&tmp, 1, nullptr); return tmp; } else { return value; } } -template -inline void MPI_CLASS::minReduce( TYPE *x, const int n, int *rank_of_min ) const -{ - if ( comm_size > 1 ) { - call_minReduce( x, n, rank_of_min ); +template +inline void MPI_CLASS::minReduce(TYPE *x, int n, int *rank_of_min) const { + if (comm_size > 1) { + call_minReduce(x, n, rank_of_min); } else { - if ( rank_of_min != nullptr ) { - for ( int i = 0; i < n; i++ ) + if (rank_of_min != nullptr) { + for (int i = 0; i < n; i++) rank_of_min[i] = 0; } } } -template -inline void MPI_CLASS::minReduce( const TYPE *x, TYPE *y, const int n, int *rank_of_min ) const -{ - if ( comm_size > 1 ) { - call_minReduce( x, y, n, rank_of_min ); +template +inline void MPI_CLASS::minReduce(const TYPE *x, TYPE *y, int n, + int *rank_of_min) const { + if (comm_size > 1) { + call_minReduce(x, y, n, rank_of_min); } else { - for ( int i = 0; i < n; i++ ) { + for (int i = 0; i < n; i++) { y[i] = x[i]; - if ( rank_of_min != nullptr ) + if (rank_of_min != nullptr) rank_of_min[i] = 0; } } } -// Define specializations of call_minReduce(TYPE*, const int, int*) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::call_minReduce( unsigned char *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( char *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( unsigned int *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( int *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( unsigned long int *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( long int *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( - unsigned long long int *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( long long int *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( size_t *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( float *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( double *, const int, int * ) const; +// Define specializations of call_minReduce(TYPE*, int, int*) +#if defined(USE_MPI) || defined(USE_EXT_MPI) +template <> +void MPI_CLASS::call_minReduce(unsigned char *, int, + int *) const; +template <> void MPI_CLASS::call_minReduce(char *, int, int *) const; +template <> +void MPI_CLASS::call_minReduce(unsigned int *, int, int *) const; +template <> void MPI_CLASS::call_minReduce(int *, int, int *) const; +template <> +void MPI_CLASS::call_minReduce(unsigned long int *, int, + int *) const; +template <> +void MPI_CLASS::call_minReduce(long int *, int, int *) const; +template <> +void MPI_CLASS::call_minReduce(unsigned long long int *, + int, int *) const; +template <> +void MPI_CLASS::call_minReduce(long long int *, int, + int *) const; +template <> void MPI_CLASS::call_minReduce(size_t *, int, int *) const; +template <> void MPI_CLASS::call_minReduce(float *, int, int *) const; +template <> void MPI_CLASS::call_minReduce(double *, int, int *) const; #endif -// Default instantiations of call_minReduce(TYPE*, const int, int*) -template -void MPI_CLASS::call_minReduce( TYPE *, const int, int * ) const -{ +// Default instantiations of call_minReduce(TYPE*, int, int*) +template void MPI_CLASS::call_minReduce(TYPE *, int, int *) const { char message[200]; - sprintf( message, "Default instantion of minReduce in parallel is not supported (%s)", - typeid( TYPE ).name() ); - MPI_CLASS_ERROR( message ); + sprintf(message, + "Default instantion of minReduce in parallel is not supported (%s)", + typeid(TYPE).name()); + MPI_CLASS_ERROR(message); } -// Define specializations of call_minReduce(const TYPE*, TYPE*, const int, int*) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::call_minReduce( - const unsigned char *, unsigned char *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( const char *, char *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( - const unsigned int *, unsigned int *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( const int *, int *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( - const unsigned long int *, unsigned long int *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( const long int *, long int *, const int, int * ) const; -template<> +// Define specializations of call_minReduce(const TYPE*, TYPE*, int, int*) +#if defined(USE_MPI) || defined(USE_EXT_MPI) +template <> +void MPI_CLASS::call_minReduce(const unsigned char *, + unsigned char *, int, + int *) const; +template <> +void MPI_CLASS::call_minReduce(const char *, char *, int, int *) const; +template <> +void MPI_CLASS::call_minReduce(const unsigned int *, + unsigned int *, int, int *) const; +template <> +void MPI_CLASS::call_minReduce(const int *, int *, int, int *) const; +template <> +void MPI_CLASS::call_minReduce(const unsigned long int *, + unsigned long int *, int, + int *) const; +template <> +void MPI_CLASS::call_minReduce(const long int *, long int *, int, + int *) const; +template <> void MPI_CLASS::call_minReduce( - const unsigned long long int *, unsigned long long int *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( - const long long int *, long long int *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( const size_t *, size_t *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( const float *, float *, const int, int * ) const; -template<> -void MPI_CLASS::call_minReduce( const double *, double *, const int, int * ) const; + const unsigned long long int *, unsigned long long int *, int, int *) const; +template <> +void MPI_CLASS::call_minReduce(const long long int *, + long long int *, int, + int *) const; +template <> +void MPI_CLASS::call_minReduce(const size_t *, size_t *, int, + int *) const; +template <> +void MPI_CLASS::call_minReduce(const float *, float *, int, int *) const; +template <> +void MPI_CLASS::call_minReduce(const double *, double *, int, + int *) const; #endif -// Default instantiations of call_minReduce(const TYPE*, TYPE*, const int, int*) -template -void MPI_CLASS::call_minReduce( const TYPE *, TYPE *, const int, int * ) const -{ +// Default instantiations of call_minReduce(const TYPE*, TYPE*, int, int*) +template +void MPI_CLASS::call_minReduce(const TYPE *, TYPE *, int, int *) const { char message[200]; - sprintf( message, "Default instantion of minReduce in parallel is not supported (%s)", - typeid( TYPE ).name() ); - MPI_CLASS_ERROR( message ); + sprintf(message, + "Default instantion of minReduce in parallel is not supported (%s)", + typeid(TYPE).name()); + MPI_CLASS_ERROR(message); } - /************************************************************************ * maxReduce * ************************************************************************/ -template -inline TYPE MPI_CLASS::maxReduce( const TYPE value ) const -{ - if ( comm_size > 1 ) { +template inline TYPE MPI_CLASS::maxReduce(const TYPE value) const { + if (comm_size > 1) { TYPE tmp = value; - call_maxReduce( &tmp, 1, nullptr ); + call_maxReduce(&tmp, 1, nullptr); return tmp; } else { return value; } } -template -inline void MPI_CLASS::maxReduce( TYPE *x, const int n, int *rank_of_max ) const -{ - if ( comm_size > 1 ) { - call_maxReduce( x, n, rank_of_max ); +template +inline void MPI_CLASS::maxReduce(TYPE *x, int n, int *rank_of_max) const { + if (comm_size > 1) { + call_maxReduce(x, n, rank_of_max); } else { - if ( rank_of_max != nullptr ) { - for ( int i = 0; i < n; i++ ) + if (rank_of_max != nullptr) { + for (int i = 0; i < n; i++) rank_of_max[i] = 0; } } } -template -inline void MPI_CLASS::maxReduce( const TYPE *x, TYPE *y, const int n, int *rank_of_max ) const -{ - if ( comm_size > 1 ) { - call_maxReduce( x, y, n, rank_of_max ); +template +inline void MPI_CLASS::maxReduce(const TYPE *x, TYPE *y, int n, + int *rank_of_max) const { + if (comm_size > 1) { + call_maxReduce(x, y, n, rank_of_max); } else { - for ( int i = 0; i < n; i++ ) { + for (int i = 0; i < n; i++) { y[i] = x[i]; - if ( rank_of_max != nullptr ) + if (rank_of_max != nullptr) rank_of_max[i] = 0; } } } -// Define specializations of call_maxReduce(TYPE*, const int, int*) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::call_maxReduce( unsigned char *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( char *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( unsigned int *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( int *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( unsigned long int *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( long int *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( - unsigned long long int *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( long long int *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( size_t *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( float *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( double *, const int, int * ) const; +// Define specializations of call_maxReduce(TYPE*, int, int*) +#if defined(USE_MPI) || defined(USE_EXT_MPI) +template <> +void MPI_CLASS::call_maxReduce(unsigned char *, int, + int *) const; +template <> void MPI_CLASS::call_maxReduce(char *, int, int *) const; +template <> +void MPI_CLASS::call_maxReduce(unsigned int *, int, int *) const; +template <> void MPI_CLASS::call_maxReduce(int *, int, int *) const; +template <> +void MPI_CLASS::call_maxReduce(unsigned long int *, int, + int *) const; +template <> +void MPI_CLASS::call_maxReduce(long int *, int, int *) const; +template <> +void MPI_CLASS::call_maxReduce(unsigned long long int *, + int, int *) const; +template <> +void MPI_CLASS::call_maxReduce(long long int *, int, + int *) const; +template <> void MPI_CLASS::call_maxReduce(size_t *, int, int *) const; +template <> void MPI_CLASS::call_maxReduce(float *, int, int *) const; +template <> void MPI_CLASS::call_maxReduce(double *, int, int *) const; #endif -// Default instantiations of call_maxReduce(TYPE*, const int, int*) -template -void MPI_CLASS::call_maxReduce( TYPE *, const int, int * ) const -{ +// Default instantiations of call_maxReduce(TYPE*, int, int*) +template void MPI_CLASS::call_maxReduce(TYPE *, int, int *) const { char message[200]; - sprintf( message, "Default instantion of maxReduce in parallel is not supported (%s)", - typeid( TYPE ).name() ); - MPI_CLASS_ERROR( message ); + sprintf(message, + "Default instantion of maxReduce in parallel is not supported (%s)", + typeid(TYPE).name()); + MPI_CLASS_ERROR(message); } -// Define specializations of call_maxReduce(const TYPE*, TYPE*, const int, int*) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::call_maxReduce( - const unsigned char *, unsigned char *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( const char *, char *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( - const unsigned int *, unsigned int *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( const int *, int *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( - const unsigned long int *, unsigned long int *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( const long int *, long int *, const int, int * ) const; -template<> +// Define specializations of call_maxReduce(const TYPE*, TYPE*, int, int*) +#if defined(USE_MPI) || defined(USE_EXT_MPI) +template <> +void MPI_CLASS::call_maxReduce(const unsigned char *, + unsigned char *, int, + int *) const; +template <> +void MPI_CLASS::call_maxReduce(const char *, char *, int, int *) const; +template <> +void MPI_CLASS::call_maxReduce(const unsigned int *, + unsigned int *, int, int *) const; +template <> +void MPI_CLASS::call_maxReduce(const int *, int *, int, int *) const; +template <> +void MPI_CLASS::call_maxReduce(const unsigned long int *, + unsigned long int *, int, + int *) const; +template <> +void MPI_CLASS::call_maxReduce(const long int *, long int *, int, + int *) const; +template <> void MPI_CLASS::call_maxReduce( - const unsigned long long int *, unsigned long long int *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( - const long long int *, long long int *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( const size_t *, size_t *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( const float *, float *, const int, int * ) const; -template<> -void MPI_CLASS::call_maxReduce( const double *, double *, const int, int * ) const; + const unsigned long long int *, unsigned long long int *, int, int *) const; +template <> +void MPI_CLASS::call_maxReduce(const long long int *, + long long int *, int, + int *) const; +template <> +void MPI_CLASS::call_maxReduce(const size_t *, size_t *, int, + int *) const; +template <> +void MPI_CLASS::call_maxReduce(const float *, float *, int, int *) const; +template <> +void MPI_CLASS::call_maxReduce(const double *, double *, int, + int *) const; #endif -// Default instantiations of call_maxReduce(const TYPE*, TYPE*, const int, int*) -template -void MPI_CLASS::call_maxReduce( const TYPE *, TYPE *, const int, int * ) const -{ +// Default instantiations of call_maxReduce(const TYPE*, TYPE*, int, int*) +template +void MPI_CLASS::call_maxReduce(const TYPE *, TYPE *, int, int *) const { char message[200]; - sprintf( message, "Default instantion of maxReduce in parallel is not supported (%s)", - typeid( TYPE ).name() ); - MPI_CLASS_ERROR( message ); + sprintf(message, + "Default instantion of maxReduce in parallel is not supported (%s)", + typeid(TYPE).name()); + MPI_CLASS_ERROR(message); } - /************************************************************************ * bcast * ************************************************************************/ -// Define specializations of bcast(TYPE*, const int, const int) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::call_bcast( unsigned char *, const int, const int ) const; -template<> -void MPI_CLASS::call_bcast( char *, const int, const int ) const; -template<> -void MPI_CLASS::call_bcast( unsigned int *, const int, const int ) const; -template<> -void MPI_CLASS::call_bcast( int *, const int, const int ) const; -template<> -void MPI_CLASS::call_bcast( float *, const int, const int ) const; -template<> -void MPI_CLASS::call_bcast( double *, const int, const int ) const; +// Define specializations of bcast(TYPE*, int, int) +#if defined(USE_MPI) || defined(USE_EXT_MPI) +template <> +void MPI_CLASS::call_bcast(unsigned char *, int, int) const; +template <> void MPI_CLASS::call_bcast(char *, int, int) const; +template <> +void MPI_CLASS::call_bcast(unsigned int *, int, int) const; +template <> void MPI_CLASS::call_bcast(int *, int, int) const; +template <> void MPI_CLASS::call_bcast(float *, int, int) const; +template <> void MPI_CLASS::call_bcast(double *, int, int) const; #else -template<> -void MPI_CLASS::call_bcast( char *, const int, const int ) const; +template <> void MPI_CLASS::call_bcast(char *, int, int) const; #endif -// Default instantiations of bcast(TYPE*, const int, const int) -template -void MPI_CLASS::call_bcast( TYPE *x, const int n, const int root ) const -{ - static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); - call_bcast( (char *) x, (int) n * sizeof( TYPE ), root ); +// Default instantiations of bcast(TYPE*, int, int) +template +void MPI_CLASS::call_bcast(TYPE *x, int n, int root) const { + static_assert(is_mpi_copyable(), "Object is not trivially copyable"); + call_bcast((char *)x, (int)n * sizeof(TYPE), root); } // Specialization of bcast for std::string -template<> -inline std::string MPI_CLASS::bcast( const std::string &value, const int root ) const -{ - if ( comm_size == 1 ) +template <> +inline std::string MPI_CLASS::bcast(const std::string &value, + int root) const { + if (comm_size == 1) return value; - int length = static_cast( value.size() ); - call_bcast( &length, 1, root ); - if ( length == 0 ) + int length = static_cast(value.size()); + call_bcast(&length, 1, root); + if (length == 0) return std::string(); char *str = new char[length + 1]; - if ( root == comm_rank ) { - for ( int i = 0; i < length; i++ ) + if (root == comm_rank) { + for (int i = 0; i < length; i++) str[i] = value[i]; } - call_bcast( str, length, root ); + call_bcast(str, length, root); str[length] = 0; - std::string result( str ); + std::string result(str); delete[] str; return result; } -template<> -inline void MPI_CLASS::bcast( std::string *, const int, const int ) const -{ - MPI_CLASS_ERROR( "Cannot bcast an array of strings" ); +template <> +inline void MPI_CLASS::bcast(std::string *, int, int) const { + MPI_CLASS_ERROR("Cannot bcast an array of strings"); } // Default implimentation of bcast -template -inline TYPE MPI_CLASS::bcast( const TYPE &value, const int root ) const -{ - if ( root >= comm_size ) - MPI_CLASS_ERROR( "root cannot be >= size in bcast" ); - if ( comm_size > 1 ) { +template +inline TYPE MPI_CLASS::bcast(const TYPE &value, int root) const { + if (root >= comm_size) + MPI_CLASS_ERROR("root cannot be >= size in bcast"); + if (comm_size > 1) { TYPE tmp = value; - call_bcast( &tmp, 1, root ); + call_bcast(&tmp, 1, root); return tmp; } else { return value; } } -template -inline void MPI_CLASS::bcast( TYPE *x, const int n, const int root ) const -{ - if ( root >= comm_size ) - MPI_CLASS_ERROR( "root cannot be >= size in bcast" ); - if ( comm_size > 1 ) - call_bcast( x, n, root ); +template +inline void MPI_CLASS::bcast(TYPE *x, int n, int root) const { + if (root >= comm_size) + MPI_CLASS_ERROR("root cannot be >= size in bcast"); + if (comm_size > 1) + call_bcast(x, n, root); } - /************************************************************************ * send * ************************************************************************/ -// Define specializations of send(const TYPE*, const int, const int, int) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::send( const char *, const int, const int, int ) const; -template<> -void MPI_CLASS::send( const int *, int, const int, int ) const; -template<> -void MPI_CLASS::send( const float *, const int, const int, int ) const; -template<> -void MPI_CLASS::send( const double *, const int, const int, int ) const; +// Define specializations of send(const TYPE*, int, int, int) +#if defined(USE_MPI) || defined(USE_EXT_MPI) +template <> void MPI_CLASS::send(const char *, int, int, int) const; +template <> void MPI_CLASS::send(const int *, int, int, int) const; +template <> void MPI_CLASS::send(const float *, int, int, int) const; +template <> void MPI_CLASS::send(const double *, int, int, int) const; #else -template<> -void MPI_CLASS::send( const char *, const int, const int, int ) const; +template <> void MPI_CLASS::send(const char *, int, int, int) const; #endif -// Default instantiations of send(const TYPE*, const int, const int, int) -template -inline void MPI_CLASS::send( - const TYPE *buf, const int length, const int recv_proc_number, int tag ) const -{ - static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); - send( (const char *) buf, length * sizeof( TYPE ), recv_proc_number, tag ); +// Default instantiations of send(const TYPE*, int, int, int) +template +inline void MPI_CLASS::send(const TYPE *buf, int length, int recv_proc_number, + int tag) const { + static_assert(is_mpi_copyable(), "Object is not trivially copyable"); + send((const char *)buf, length * sizeof(TYPE), recv_proc_number, tag); } - /************************************************************************ * Isend * ************************************************************************/ -// Define specializations of Isend(const TYPE*, const int, const int, const int) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -MPI_Request MPI_CLASS::Isend( const char *, const int, const int, const int ) const; -template<> -MPI_Request MPI_CLASS::Isend( const int *, int, const int, const int ) const; -template<> -MPI_Request MPI_CLASS::Isend( const float *, const int, const int, const int ) const; -template<> -MPI_Request MPI_CLASS::Isend( const double *, const int, const int, const int ) const; +// Define specializations of Isend(const TYPE*, int, int, int) +#if defined(USE_MPI) || defined(USE_EXT_MPI) +template <> +MPI_Request MPI_CLASS::Isend(const char *, int, int, int) const; +template <> MPI_Request MPI_CLASS::Isend(const int *, int, int, int) const; +template <> +MPI_Request MPI_CLASS::Isend(const float *, int, int, int) const; +template <> +MPI_Request MPI_CLASS::Isend(const double *, int, int, int) const; #else -template<> -MPI_Request MPI_CLASS::Isend( const char *, const int, const int, const int ) const; +template <> +MPI_Request MPI_CLASS::Isend(const char *, int, int, int) const; #endif -// Default instantiations of Isend(const TYPE*, const int, const int, const int) -template -inline MPI_Request MPI_CLASS::Isend( - const TYPE *buf, const int length, const int recv_proc_number, const int tag ) const -{ - static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); - return Isend( (const char *) buf, length * sizeof( TYPE ), recv_proc_number, tag ); +// Default instantiations of Isend(const TYPE*, int, int, int) +template +inline MPI_Request MPI_CLASS::Isend(const TYPE *buf, int length, + int recv_proc_number, int tag) const { + static_assert(is_mpi_copyable(), "Object is not trivially copyable"); + return Isend((const char *)buf, length * sizeof(TYPE), + recv_proc_number, tag); } - /************************************************************************ * recv * ************************************************************************/ -// Define specializations of recv(TYPE*, int&, const int, const bool, int) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::recv( char *, int &, const int, const bool, int ) const; -template<> -void MPI_CLASS::recv( int *, int &, const int, const bool, int ) const; -template<> -void MPI_CLASS::recv( float *, int &, const int, const bool, int ) const; -template<> -void MPI_CLASS::recv( double *, int &, const int, const bool, int ) const; +// Define specializations of recv(TYPE*, int&, int, const bool, int) +#if defined(USE_MPI) || defined(USE_EXT_MPI) +template <> +void MPI_CLASS::recv(char *, int &, int, const bool, int) const; +template <> void MPI_CLASS::recv(int *, int &, int, const bool, int) const; +template <> +void MPI_CLASS::recv(float *, int &, int, const bool, int) const; +template <> +void MPI_CLASS::recv(double *, int &, int, const bool, int) const; #else -template<> -void MPI_CLASS::recv( char *, int &, const int, const bool, int ) const; +template <> +void MPI_CLASS::recv(char *, int &, int, const bool, int) const; #endif -// Default instantiations of recv(TYPE*, int&, const int, const bool, int) -template -inline void MPI_CLASS::recv( - TYPE *buf, int &length, const int send_proc_number, const bool get_length, int tag ) const -{ - static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); - int size = length * sizeof( TYPE ); - recv( (char *) buf, size, send_proc_number, get_length, tag ); - if ( get_length ) { - MPI_CLASS_ASSERT( size % sizeof( TYPE ) == 0 ); - length = size / sizeof( TYPE ); +// Default instantiations of recv(TYPE*, int&, int, const bool, int) +template +inline void MPI_CLASS::recv(TYPE *buf, int &length, int send_proc_number, + const bool get_length, int tag) const { + static_assert(is_mpi_copyable(), "Object is not trivially copyable"); + int size = length * sizeof(TYPE); + recv((char *)buf, size, send_proc_number, get_length, tag); + if (get_length) { + MPI_CLASS_ASSERT(size % sizeof(TYPE) == 0); + length = size / sizeof(TYPE); } } - /************************************************************************ * Irecv * ************************************************************************/ -// Define specializations of recv(TYPE*, int&, const int, const bool, int) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -MPI_Request MPI_CLASS::Irecv( char *, const int, const int, const int ) const; -template<> -MPI_Request MPI_CLASS::Irecv( int *, const int, const int, const int ) const; -template<> -MPI_Request MPI_CLASS::Irecv( float *, const int, const int, const int ) const; -template<> -MPI_Request MPI_CLASS::Irecv( double *, const int, const int, const int ) const; +// Define specializations of recv(TYPE*, int&, int, const bool, int) +#if defined(USE_MPI) || defined(USE_EXT_MPI) +template <> MPI_Request MPI_CLASS::Irecv(char *, int, int, int) const; +template <> MPI_Request MPI_CLASS::Irecv(int *, int, int, int) const; +template <> MPI_Request MPI_CLASS::Irecv(float *, int, int, int) const; +template <> MPI_Request MPI_CLASS::Irecv(double *, int, int, int) const; #else -template<> -MPI_Request MPI_CLASS::Irecv( char *, const int, const int, const int ) const; +template <> MPI_Request MPI_CLASS::Irecv(char *, int, int, int) const; #endif -// Default instantiations of recv(TYPE*, int&, const int, const bool, int) -template -inline MPI_Request MPI_CLASS::Irecv( - TYPE *buf, const int length, const int send_proc, const int tag ) const -{ - static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); - return Irecv( (char *) buf, length * sizeof( TYPE ), send_proc, tag ); +// Default instantiations of recv(TYPE*, int&, int, const bool, int) +template +inline MPI_Request MPI_CLASS::Irecv(TYPE *buf, int length, int send_proc, + int tag) const { + static_assert(is_mpi_copyable(), "Object is not trivially copyable"); + return Irecv((char *)buf, length * sizeof(TYPE), send_proc, tag); } - /************************************************************************ * sendrecv * ************************************************************************/ -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::sendrecv( const char*, int, int, int, char*, int, int, int ) const; -template<> -void MPI_CLASS::sendrecv( const int*, int, int, int, int*, int, int, int ) const; -template<> -void MPI_CLASS::sendrecv( const float*, int, int, int, float*, int, int, int ) const; -template<> -void MPI_CLASS::sendrecv( const double*, int, int, int, double*, int, int, int ) const; -template -void MPI_CLASS::sendrecv( const TYPE *sendbuf, int sendcount, int dest, int sendtag, - TYPE *recvbuf, int recvcount, int source, int recvtag ) const -{ - if ( getSize() == 1 ) { - ASSERT( dest == 0 ); - ASSERT( source == 0 ); - ASSERT( sendcount == recvcount ); - ASSERT( sendtag == recvtag ); - memcpy( recvbuf, sendbuf, sendcount * sizeof( TYPE ) ); +#if defined(USE_MPI) || defined(USE_EXT_MPI) +template <> +void MPI_CLASS::sendrecv(const char *, int, int, int, char *, int, int, + int) const; +template <> +void MPI_CLASS::sendrecv(const int *, int, int, int, int *, int, int, + int) const; +template <> +void MPI_CLASS::sendrecv(const float *, int, int, int, float *, int, int, + int) const; +template <> +void MPI_CLASS::sendrecv(const double *, int, int, int, double *, int, + int, int) const; +template +void MPI_CLASS::sendrecv(const TYPE *sendbuf, int sendcount, int dest, + int sendtag, TYPE *recvbuf, int recvcount, int source, + int recvtag) const { + if (getSize() == 1) { + ASSERT(dest == 0); + ASSERT(source == 0); + ASSERT(sendcount == recvcount); + ASSERT(sendtag == recvtag); + memcpy(recvbuf, sendbuf, sendcount * sizeof(TYPE)); } else { - ERROR( "Not implimented for " + std::string( typeid( TYPE ).name() ) ); + ERROR("Not implimented for " + std::string(typeid(TYPE).name())); } } #else -template -void MPI_CLASS::sendrecv( const TYPE *sendbuf, int sendcount, int dest, int sendtag, - TYPE *recvbuf, int recvcount, int source, int recvtag ) const -{ - ASSERT( dest == 0 ); - ASSERT( source == 0 ); - ASSERT( sendcount == recvcount ); - ASSERT( sendtag == recvtag ); - memcpy( recvbuf, sendbuf, sendcount * sizeof( TYPE ) ); +template +void MPI_CLASS::sendrecv(const TYPE *sendbuf, int sendcount, int dest, + int sendtag, TYPE *recvbuf, int recvcount, int source, + int recvtag) const { + ASSERT(dest == 0); + ASSERT(source == 0); + ASSERT(sendcount == recvcount); + ASSERT(sendtag == recvtag); + memcpy(recvbuf, sendbuf, sendcount * sizeof(TYPE)); } #endif - - /************************************************************************ * allGather * ************************************************************************/ -template -std::vector MPI_CLASS::allGather( const TYPE &x ) const -{ - static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); - if ( getSize() <= 1 ) - return std::vector( 1, x ); - std::vector data( getSize() ); - allGather( x, data.data() ); +template +std::vector MPI_CLASS::allGather(const TYPE &x) const { + static_assert(is_mpi_copyable(), "Object is not trivially copyable"); + if (getSize() <= 1) + return std::vector(1, x); + std::vector data(getSize()); + allGather(x, data.data()); return data; } -template -std::vector MPI_CLASS::allGather( const std::vector &x ) const -{ - static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); - if ( getSize() <= 1 ) +template +std::vector MPI_CLASS::allGather(const std::vector &x) const { + static_assert(is_mpi_copyable(), "Object is not trivially copyable"); + if (getSize() <= 1) return x; - std::vector count = allGather( x.size() ); - std::vector disp( getSize(), 0 ); + std::vector count = allGather(x.size()); + std::vector disp(getSize(), 0); size_t N = count[0]; - for ( size_t i = 1; i < count.size(); i++ ) { + for (size_t i = 1; i < count.size(); i++) { disp[i] = disp[i - 1] + count[i - 1]; N += count[i]; } - std::vector data( N ); - allGather( x.data(), x.size(), data.data(), count.data(), disp.data(), true ); + std::vector data(N); + allGather(x.data(), x.size(), data.data(), count.data(), disp.data(), + true); return data; } // Specialization of MPI_CLASS::allGather for std::string -template<> -inline void MPI_CLASS::allGather( const std::string &x_in, std::string *x_out ) const -{ +template <> +inline void MPI_CLASS::allGather(const std::string &x_in, + std::string *x_out) const { // Get the bytes recvied per processor - std::vector recv_cnt( comm_size, 0 ); - allGather( (int) x_in.size() + 1, &recv_cnt[0] ); - std::vector recv_disp( comm_size, 0 ); - for ( int i = 1; i < comm_size; i++ ) + std::vector recv_cnt(comm_size, 0); + allGather((int)x_in.size() + 1, &recv_cnt[0]); + std::vector recv_disp(comm_size, 0); + for (int i = 1; i < comm_size; i++) recv_disp[i] = recv_disp[i - 1] + recv_cnt[i - 1]; // Call the vector form of allGather for the char arrays - char *recv_data = new char[recv_disp[comm_size - 1] + recv_cnt[comm_size - 1]]; - allGather( - x_in.c_str(), (int) x_in.size() + 1, recv_data, &recv_cnt[0], &recv_disp[0], true ); - for ( int i = 0; i < comm_size; i++ ) - x_out[i] = std::string( &recv_data[recv_disp[i]] ); + char *recv_data = + new char[recv_disp[comm_size - 1] + recv_cnt[comm_size - 1]]; + allGather(x_in.c_str(), (int)x_in.size() + 1, recv_data, &recv_cnt[0], + &recv_disp[0], true); + for (int i = 0; i < comm_size; i++) + x_out[i] = std::string(&recv_data[recv_disp[i]]); delete[] recv_data; } // Default instantiation of MPI_CLASS::allGather -template -inline void MPI_CLASS::allGather( const TYPE &x_in, TYPE *x_out ) const -{ - static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); - if ( comm_size > 1 ) { +template +inline void MPI_CLASS::allGather(const TYPE &x_in, TYPE *x_out) const { + static_assert(is_mpi_copyable(), "Object is not trivially copyable"); + if (comm_size > 1) { // We can use the vector form of allGather with a char array to ge the data we want - call_allGather( x_in, x_out ); + call_allGather(x_in, x_out); } else { // Single processor case x_out[0] = x_in; } } // Specialization of MPI_CLASS::allGather for std::string -template<> -inline int MPI_CLASS::allGather( - const std::string *, const int, std::string *, int *, int *, bool ) const -{ - MPI_CLASS_ERROR( "Cannot allGather an array of strings" ); +template <> +inline int MPI_CLASS::allGather(const std::string *, int, + std::string *, int *, int *, + bool) const { + MPI_CLASS_ERROR("Cannot allGather an array of strings"); return 0; } // Define specializations of call_allGather(const TYPE, TYPE*) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::call_allGather( const unsigned char &, unsigned char * ) const; -template<> -void MPI_CLASS::call_allGather( const char &, char * ) const; -template<> -void MPI_CLASS::call_allGather( const unsigned int &, unsigned int * ) const; -template<> -void MPI_CLASS::call_allGather( const int &, int * ) const; -template<> -void MPI_CLASS::call_allGather( - const unsigned long int &, unsigned long int * ) const; -template<> -void MPI_CLASS::call_allGather( const long int &, long int * ) const; -template<> -void MPI_CLASS::call_allGather( const float &, float * ) const; -template<> -void MPI_CLASS::call_allGather( const double &, double * ) const; +#if defined(USE_MPI) || defined(USE_EXT_MPI) +template <> +void MPI_CLASS::call_allGather(const unsigned char &, + unsigned char *) const; +template <> void MPI_CLASS::call_allGather(const char &, char *) const; +template <> +void MPI_CLASS::call_allGather(const unsigned int &, + unsigned int *) const; +template <> void MPI_CLASS::call_allGather(const int &, int *) const; +template <> +void MPI_CLASS::call_allGather(const unsigned long int &, + unsigned long int *) const; +template <> +void MPI_CLASS::call_allGather(const long int &, long int *) const; +template <> void MPI_CLASS::call_allGather(const float &, float *) const; +template <> +void MPI_CLASS::call_allGather(const double &, double *) const; #endif // Default instantiation of MPI_CLASS::allGather -template -int MPI_CLASS::allGather( const TYPE *send_data, const int send_cnt, TYPE *recv_data, int *recv_cnt, - int *recv_disp, bool known_recv ) const -{ +template +int MPI_CLASS::allGather(const TYPE *send_data, int send_cnt, TYPE *recv_data, + int *recv_cnt, int *recv_disp, bool known_recv) const { // Check the inputs - if ( known_recv && ( recv_cnt == nullptr || recv_disp == nullptr ) ) - MPI_CLASS_ERROR( "Error calling allGather" ); + if (known_recv && (recv_cnt == nullptr || recv_disp == nullptr)) + MPI_CLASS_ERROR("Error calling allGather"); // Check if we are dealing with a single processor - if ( comm_size == 1 ) { - if ( send_data == nullptr && send_cnt > 0 ) { - MPI_CLASS_ERROR( "send_data is null" ); - } else if ( !known_recv ) { + if (comm_size == 1) { + if (send_data == nullptr && send_cnt > 0) { + MPI_CLASS_ERROR("send_data is null"); + } else if (!known_recv) { // We do not know the recieved sizes - for ( int i = 0; i < send_cnt; i++ ) + for (int i = 0; i < send_cnt; i++) recv_data[i] = send_data[i]; - if ( recv_cnt != nullptr ) + if (recv_cnt != nullptr) recv_cnt[0] = send_cnt; - if ( recv_disp != nullptr ) + if (recv_disp != nullptr) recv_disp[0] = 0; } else { // We know the recieved sizes - for ( int i = 0; i < send_cnt; i++ ) + for (int i = 0; i < send_cnt; i++) recv_data[i + recv_disp[0]] = send_data[i]; } return send_cnt; } // Get the sizes of the recieved data (if necessary) - int *recv_cnt2 = recv_cnt; + int *recv_cnt2 = recv_cnt; int *recv_disp2 = recv_disp; - if ( !known_recv ) { - if ( recv_cnt == nullptr ) + if (!known_recv) { + if (recv_cnt == nullptr) recv_cnt2 = new int[comm_size]; - if ( recv_disp == nullptr ) + if (recv_disp == nullptr) recv_disp2 = new int[comm_size]; - call_allGather( send_cnt, recv_cnt2 ); + call_allGather(send_cnt, recv_cnt2); recv_disp2[0] = 0; - for ( int i = 1; i < comm_size; i++ ) + for (int i = 1; i < comm_size; i++) recv_disp2[i] = recv_disp2[i - 1] + recv_cnt2[i - 1]; } int N_recv = 0; - for ( int i = 0; i < comm_size; i++ ) + for (int i = 0; i < comm_size; i++) N_recv += recv_cnt2[i]; // Send/recv the data - call_allGather( send_data, send_cnt, recv_data, recv_cnt2, recv_disp2 ); + call_allGather(send_data, send_cnt, recv_data, recv_cnt2, recv_disp2); // Delete any temporary memory - if ( recv_cnt == nullptr ) + if (recv_cnt == nullptr) delete[] recv_cnt2; - if ( recv_disp == nullptr ) + if (recv_disp == nullptr) delete[] recv_disp2; return N_recv; } // Default instantiations of call_allGather(const TYPE, TYPE*) -template -void MPI_CLASS::call_allGather( const TYPE &x_in, TYPE *x_out ) const -{ - static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); - allGather( (const char *) &x_in, (int) sizeof( TYPE ), (char *) x_out ); +template +void MPI_CLASS::call_allGather(const TYPE &x_in, TYPE *x_out) const { + static_assert(is_mpi_copyable(), "Object is not trivially copyable"); + allGather((const char *)&x_in, (int)sizeof(TYPE), (char *)x_out); } // Define specializations of call_allGather(const TYPE*, int, TYPE*, int*, int*) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::call_allGather( - const unsigned char *, int, unsigned char *, int *, int * ) const; -template<> -void MPI_CLASS::call_allGather( const char *, int, char *, int *, int * ) const; -template<> -void MPI_CLASS::call_allGather( - const unsigned int *, int, unsigned int *, int *, int * ) const; -template<> -void MPI_CLASS::call_allGather( const int *, int, int *, int *, int * ) const; -template<> -void MPI_CLASS::call_allGather( - const unsigned long int *, int, unsigned long int *, int *, int * ) const; -template<> -void MPI_CLASS::call_allGather( const long int *, int, long int *, int *, int * ) const; -template<> -void MPI_CLASS::call_allGather( const float *, int, float *, int *, int * ) const; -template<> -void MPI_CLASS::call_allGather( const double *, int, double *, int *, int * ) const; +#if defined(USE_MPI) || defined(USE_EXT_MPI) +template <> +void MPI_CLASS::call_allGather(const unsigned char *, int, + unsigned char *, int *, + int *) const; +template <> +void MPI_CLASS::call_allGather(const char *, int, char *, int *, + int *) const; +template <> +void MPI_CLASS::call_allGather(const unsigned int *, int, + unsigned int *, int *, + int *) const; +template <> +void MPI_CLASS::call_allGather(const int *, int, int *, int *, + int *) const; +template <> +void MPI_CLASS::call_allGather(const unsigned long int *, + int, unsigned long int *, + int *, int *) const; +template <> +void MPI_CLASS::call_allGather(const long int *, int, long int *, + int *, int *) const; +template <> +void MPI_CLASS::call_allGather(const float *, int, float *, int *, + int *) const; +template <> +void MPI_CLASS::call_allGather(const double *, int, double *, int *, + int *) const; #else -template<> -void MPI_CLASS::call_allGather( const char *, int, char *, int *, int * ) const; +template <> +void MPI_CLASS::call_allGather(const char *, int, char *, int *, + int *) const; #endif // Default instantiations of int call_allGather(const TYPE*, int, TYPE*, int*) -template -void MPI_CLASS::call_allGather( - const TYPE *x_in, int size_in, TYPE *x_out, int *size_out, int *disp_out ) const -{ +template +void MPI_CLASS::call_allGather(const TYPE *x_in, int size_in, TYPE *x_out, + int *size_out, int *disp_out) const { int *size2 = new int[comm_size]; int *disp2 = new int[comm_size]; - for ( int i = 0; i < comm_size; i++ ) { - size2[i] = size_out[i] * sizeof( TYPE ); - disp2[i] = disp_out[i] * sizeof( TYPE ); + for (int i = 0; i < comm_size; i++) { + size2[i] = size_out[i] * sizeof(TYPE); + disp2[i] = disp_out[i] * sizeof(TYPE); } - static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); - call_allGather( - (const char *) x_in, (int) size_in * sizeof( TYPE ), (char *) x_out, size2, disp2 ); + static_assert(is_mpi_copyable(), "Object is not trivially copyable"); + call_allGather((const char *)x_in, (int)size_in * sizeof(TYPE), + (char *)x_out, size2, disp2); delete[] size2; delete[] disp2; } - /************************************************************************ * setGather * ************************************************************************/ -template -inline void MPI_CLASS::setGather( std::set &set ) const -{ - std::vector send_buf( set.begin(), set.end() ); - std::vector recv_cnt( this->comm_size, 0 ); - this->allGather( (int) send_buf.size(), &recv_cnt[0] ); - std::vector recv_disp( this->comm_size, 0 ); - for ( int i = 1; i < this->comm_size; i++ ) +template +inline void MPI_CLASS::setGather(std::set &set) const { + std::vector send_buf(set.begin(), set.end()); + std::vector recv_cnt(this->comm_size, 0); + this->allGather((int)send_buf.size(), &recv_cnt[0]); + std::vector recv_disp(this->comm_size, 0); + for (int i = 1; i < this->comm_size; i++) recv_disp[i] = recv_disp[i - 1] + recv_cnt[i - 1]; size_t N_recv_tot = 0; - for ( int i = 0; i < this->comm_size; i++ ) + for (int i = 0; i < this->comm_size; i++) N_recv_tot += recv_cnt[i]; - if ( N_recv_tot == 0 ) + if (N_recv_tot == 0) return; - std::vector recv_buf( N_recv_tot ); + std::vector recv_buf(N_recv_tot); TYPE *send_data = nullptr; - if ( send_buf.size() > 0 ) { + if (send_buf.size() > 0) { send_data = &send_buf[0]; } TYPE *recv_data = &recv_buf[0]; - static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); - this->allGather( - send_data, (int) send_buf.size(), recv_data, &recv_cnt[0], &recv_disp[0], true ); - for ( size_t i = 0; i < recv_buf.size(); i++ ) - set.insert( recv_buf[i] ); + static_assert(is_mpi_copyable(), "Object is not trivially copyable"); + this->allGather(send_data, (int)send_buf.size(), recv_data, + &recv_cnt[0], &recv_disp[0], true); + for (size_t i = 0; i < recv_buf.size(); i++) + set.insert(recv_buf[i]); } - /************************************************************************ * mapGather * ************************************************************************/ -template -inline void MPI_CLASS::mapGather( std::map &map ) const -{ +template +inline void MPI_CLASS::mapGather(std::map &map) const { std::vector send_id; std::vector send_data; - send_id.reserve( map.size() ); - send_data.reserve( map.size() ); - for ( auto it = map.begin(); it != map.end(); ++it ) { - send_id.push_back( it->first ); - send_data.push_back( it->second ); + send_id.reserve(map.size()); + send_data.reserve(map.size()); + for (auto it = map.begin(); it != map.end(); ++it) { + send_id.push_back(it->first); + send_data.push_back(it->second); } - int send_size = (int) send_id.size(); - std::vector recv_cnt( this->comm_size, 0 ); - this->allGather( send_size, &recv_cnt[0] ); - std::vector recv_disp( this->comm_size, 0 ); - for ( int i = 1; i < this->comm_size; i++ ) + int send_size = (int)send_id.size(); + std::vector recv_cnt(this->comm_size, 0); + this->allGather(send_size, &recv_cnt[0]); + std::vector recv_disp(this->comm_size, 0); + for (int i = 1; i < this->comm_size; i++) recv_disp[i] = recv_disp[i - 1] + recv_cnt[i - 1]; size_t N_recv_tot = 0; - for ( int i = 0; i < this->comm_size; i++ ) + for (int i = 0; i < this->comm_size; i++) N_recv_tot += recv_cnt[i]; - if ( N_recv_tot == 0 ) + if (N_recv_tot == 0) return; - std::vector recv_id( N_recv_tot ); - std::vector recv_data( N_recv_tot ); - KEY *send_data1 = nullptr; + std::vector recv_id(N_recv_tot); + std::vector recv_data(N_recv_tot); + KEY *send_data1 = nullptr; DATA *send_data2 = nullptr; - if ( send_id.size() > 0 ) { + if (send_id.size() > 0) { send_data1 = &send_id[0]; send_data2 = &send_data[0]; } - static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); - this->allGather( send_data1, send_size, &recv_id[0], &recv_cnt[0], &recv_disp[0], true ); - this->allGather( - send_data2, send_size, &recv_data[0], &recv_cnt[0], &recv_disp[0], true ); + static_assert(is_mpi_copyable(), "Object is not trivially copyable"); + this->allGather(send_data1, send_size, &recv_id[0], &recv_cnt[0], + &recv_disp[0], true); + this->allGather(send_data2, send_size, &recv_data[0], &recv_cnt[0], + &recv_disp[0], true); map = std::map(); - for ( size_t i = 0; i < N_recv_tot; i++ ) - map.insert( std::pair( recv_id[i], recv_data[i] ) ); + for (size_t i = 0; i < N_recv_tot; i++) + map.insert(std::pair(recv_id[i], recv_data[i])); } - /************************************************************************ * sumScan * ************************************************************************/ -template -inline void MPI_CLASS::sumScan( const TYPE *x, TYPE *y, const int n ) const -{ - if ( comm_size > 1 ) { - call_sumScan( x, y, n ); +template +inline void MPI_CLASS::sumScan(const TYPE *x, TYPE *y, int n) const { + if (comm_size > 1) { + call_sumScan(x, y, n); } else { - for ( int i = 0; i < n; i++ ) + for (int i = 0; i < n; i++) y[i] = x[i]; } } // Define specializations of call_sumScan(const TYPE*, TYPE*, int) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::call_sumScan( const unsigned char *, unsigned char *, int ) const; -template<> -void MPI_CLASS::call_sumScan( const char *, char *, int ) const; -template<> -void MPI_CLASS::call_sumScan( const unsigned int *, unsigned int *, int ) const; -template<> -void MPI_CLASS::call_sumScan( const int *, int *, int ) const; -template<> -void MPI_CLASS::call_sumScan( - const unsigned long int *, unsigned long int *, int ) const; -template<> -void MPI_CLASS::call_sumScan( const long int *, long int *, int ) const; -template<> -void MPI_CLASS::call_sumScan( const size_t *, size_t *, int ) const; -template<> -void MPI_CLASS::call_sumScan( const float *, float *, int ) const; -template<> -void MPI_CLASS::call_sumScan( const double *, double *, int ) const; -template<> -void MPI_CLASS::call_sumScan>( - const std::complex *, std::complex *, int ) const; +#if defined(USE_MPI) || defined(USE_EXT_MPI) +template <> +void MPI_CLASS::call_sumScan(const unsigned char *, + unsigned char *, int) const; +template <> void MPI_CLASS::call_sumScan(const char *, char *, int) const; +template <> +void MPI_CLASS::call_sumScan(const unsigned int *, unsigned int *, + int) const; +template <> void MPI_CLASS::call_sumScan(const int *, int *, int) const; +template <> +void MPI_CLASS::call_sumScan(const unsigned long int *, + unsigned long int *, int) const; +template <> +void MPI_CLASS::call_sumScan(const long int *, long int *, int) const; +template <> +void MPI_CLASS::call_sumScan(const size_t *, size_t *, int) const; +template <> +void MPI_CLASS::call_sumScan(const float *, float *, int) const; +template <> +void MPI_CLASS::call_sumScan(const double *, double *, int) const; +template <> +void MPI_CLASS::call_sumScan>(const std::complex *, + std::complex *, + int) const; #endif // Default instantiations of call_sumScan(const TYPE*, TYPE*, int) -template -void MPI_CLASS::call_sumScan( const TYPE *, TYPE *, int ) const -{ +template +void MPI_CLASS::call_sumScan(const TYPE *, TYPE *, int) const { char message[200]; - sprintf( message, "Default instantion of sumScan in parallel is not supported (%s)", - typeid( TYPE ).name() ); - MPI_CLASS_ERROR( message ); + sprintf(message, + "Default instantion of sumScan in parallel is not supported (%s)", + typeid(TYPE).name()); + MPI_CLASS_ERROR(message); } - /************************************************************************ * minScan * ************************************************************************/ -template -inline void MPI_CLASS::minScan( const TYPE *x, TYPE *y, const int n ) const -{ - if ( comm_size > 1 ) { - call_minScan( x, y, n ); +template +inline void MPI_CLASS::minScan(const TYPE *x, TYPE *y, int n) const { + if (comm_size > 1) { + call_minScan(x, y, n); } else { - for ( int i = 0; i < n; i++ ) + for (int i = 0; i < n; i++) y[i] = x[i]; } } // Define specializations of call_minScan(const TYPE*, TYPE*, int) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::call_minScan( const unsigned char *, unsigned char *, int ) const; -template<> -void MPI_CLASS::call_minScan( const char *, char *, int ) const; -template<> -void MPI_CLASS::call_minScan( const unsigned int *, unsigned int *, int ) const; -template<> -void MPI_CLASS::call_minScan( const int *, int *, int ) const; -template<> -void MPI_CLASS::call_minScan( - const unsigned long int *, unsigned long int *, int ) const; -template<> -void MPI_CLASS::call_minScan( const long int *, long int *, int ) const; -template<> -void MPI_CLASS::call_minScan( const size_t *, size_t *, int ) const; -template<> -void MPI_CLASS::call_minScan( const float *, float *, int ) const; -template<> -void MPI_CLASS::call_minScan( const double *, double *, int ) const; +#if defined(USE_MPI) || defined(USE_EXT_MPI) +template <> +void MPI_CLASS::call_minScan(const unsigned char *, + unsigned char *, int) const; +template <> void MPI_CLASS::call_minScan(const char *, char *, int) const; +template <> +void MPI_CLASS::call_minScan(const unsigned int *, unsigned int *, + int) const; +template <> void MPI_CLASS::call_minScan(const int *, int *, int) const; +template <> +void MPI_CLASS::call_minScan(const unsigned long int *, + unsigned long int *, int) const; +template <> +void MPI_CLASS::call_minScan(const long int *, long int *, int) const; +template <> +void MPI_CLASS::call_minScan(const size_t *, size_t *, int) const; +template <> +void MPI_CLASS::call_minScan(const float *, float *, int) const; +template <> +void MPI_CLASS::call_minScan(const double *, double *, int) const; #endif // Default instantiations of call_minScan(const TYPE*, TYPE*, int) -template -void MPI_CLASS::call_minScan( const TYPE *, TYPE *, int ) const -{ +template +void MPI_CLASS::call_minScan(const TYPE *, TYPE *, int) const { char message[200]; - sprintf( message, "Default instantion of minScan in parallel is not supported (%s)", - typeid( TYPE ).name() ); - MPI_CLASS_ERROR( message ); + sprintf(message, + "Default instantion of minScan in parallel is not supported (%s)", + typeid(TYPE).name()); + MPI_CLASS_ERROR(message); } - /************************************************************************ * maxScan * ************************************************************************/ -template -inline void MPI_CLASS::maxScan( const TYPE *x, TYPE *y, const int n ) const -{ - if ( comm_size > 1 ) { - call_maxScan( x, y, n ); +template +inline void MPI_CLASS::maxScan(const TYPE *x, TYPE *y, int n) const { + if (comm_size > 1) { + call_maxScan(x, y, n); } else { - for ( int i = 0; i < n; i++ ) + for (int i = 0; i < n; i++) y[i] = x[i]; } } // Define specializations of call_maxScan(const TYPE*, TYPE*, int) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::call_maxScan( const unsigned char *, unsigned char *, int ) const; -template<> -void MPI_CLASS::call_maxScan( const char *, char *, int ) const; -template<> -void MPI_CLASS::call_maxScan( const unsigned int *, unsigned int *, int ) const; -template<> -void MPI_CLASS::call_maxScan( const int *, int *, int ) const; -template<> -void MPI_CLASS::call_maxScan( - const unsigned long int *, unsigned long int *, int ) const; -template<> -void MPI_CLASS::call_maxScan( const long int *, long int *, int ) const; -template<> -void MPI_CLASS::call_maxScan( const size_t *, size_t *, int ) const; -template<> -void MPI_CLASS::call_maxScan( const float *, float *, int ) const; -template<> -void MPI_CLASS::call_maxScan( const double *, double *, int ) const; +#if defined(USE_MPI) || defined(USE_EXT_MPI) +template <> +void MPI_CLASS::call_maxScan(const unsigned char *, + unsigned char *, int) const; +template <> void MPI_CLASS::call_maxScan(const char *, char *, int) const; +template <> +void MPI_CLASS::call_maxScan(const unsigned int *, unsigned int *, + int) const; +template <> void MPI_CLASS::call_maxScan(const int *, int *, int) const; +template <> +void MPI_CLASS::call_maxScan(const unsigned long int *, + unsigned long int *, int) const; +template <> +void MPI_CLASS::call_maxScan(const long int *, long int *, int) const; +template <> +void MPI_CLASS::call_maxScan(const size_t *, size_t *, int) const; +template <> +void MPI_CLASS::call_maxScan(const float *, float *, int) const; +template <> +void MPI_CLASS::call_maxScan(const double *, double *, int) const; #endif // Default instantiations of call_maxScan(const TYPE*, TYPE*, int) -template -void MPI_CLASS::call_maxScan( const TYPE *, TYPE *, int ) const -{ +template +void MPI_CLASS::call_maxScan(const TYPE *, TYPE *, int) const { char message[200]; - sprintf( message, "Default instantion of maxReduce in parallel is not supported (%s)", - typeid( TYPE ).name() ); - MPI_CLASS_ERROR( message ); + sprintf(message, + "Default instantion of maxReduce in parallel is not supported (%s)", + typeid(TYPE).name()); + MPI_CLASS_ERROR(message); } - /************************************************************************ * allToAll * ************************************************************************/ -// Define specializations of allToAll(const int n, const char*, char* ) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::allToAll( - const int n, const unsigned char *, unsigned char * ) const; -template<> -void MPI_CLASS::allToAll( const int n, const char *, char * ) const; -template<> -void MPI_CLASS::allToAll( const int n, const unsigned int *, unsigned int * ) const; -template<> -void MPI_CLASS::allToAll( const int n, const int *, int * ) const; -template<> -void MPI_CLASS::allToAll( - const int n, const unsigned long int *, unsigned long int * ) const; -template<> -void MPI_CLASS::allToAll( const int n, const long int *, long int * ) const; -template<> -void MPI_CLASS::allToAll( const int n, const float *, float * ) const; -template<> -void MPI_CLASS::allToAll( const int n, const double *, double * ) const; +// Define specializations of allToAll( int n, const char*, char* ) +#if defined(USE_MPI) || defined(USE_EXT_MPI) +template <> +void MPI_CLASS::allToAll(int n, const unsigned char *, + unsigned char *) const; +template <> void MPI_CLASS::allToAll(int n, const char *, char *) const; +template <> +void MPI_CLASS::allToAll(int n, const unsigned int *, + unsigned int *) const; +template <> void MPI_CLASS::allToAll(int n, const int *, int *) const; +template <> +void MPI_CLASS::allToAll(int n, const unsigned long int *, + unsigned long int *) const; +template <> +void MPI_CLASS::allToAll(int n, const long int *, long int *) const; +template <> +void MPI_CLASS::allToAll(int n, const float *, float *) const; +template <> +void MPI_CLASS::allToAll(int n, const double *, double *) const; #endif -// Default instantiations of allToAll(const int n, const char*, char* ) -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template -void MPI_CLASS::allToAll( const int n, const TYPE *send_data, TYPE *recv_data ) const -{ - static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); - allToAll( n * sizeof( TYPE ), (char *) send_data, (char *) recv_data ); +// Default instantiations of allToAll( int n, const char*, char* ) +#if defined(USE_MPI) || defined(USE_EXT_MPI) +template +void MPI_CLASS::allToAll(int n, const TYPE *send_data, TYPE *recv_data) const { + static_assert(is_mpi_copyable(), "Object is not trivially copyable"); + allToAll(n * sizeof(TYPE), (char *)send_data, (char *)recv_data); } #else -template -void MPI_CLASS::allToAll( const int n, const TYPE *send_data, TYPE *recv_data ) const -{ - if ( comm_size != 1 ) - MPI_CLASS_ERROR( "Invalid size for allToAll" ); - for ( int i = 0; i < n; i++ ) +template +void MPI_CLASS::allToAll(int n, const TYPE *send_data, TYPE *recv_data) const { + if (comm_size != 1) + MPI_CLASS_ERROR("Invalid size for allToAll"); + for (int i = 0; i < n; i++) recv_data[i] = send_data[i]; } #endif - /************************************************************************ * allToAll * ************************************************************************/ -template -int MPI_CLASS::allToAll( const TYPE *send_data, const int send_cnt[], const int send_disp[], - TYPE *recv_data, int *recv_cnt, int *recv_disp, bool known_recv ) const -{ +template +int MPI_CLASS::allToAll(const TYPE *send_data, const int send_cnt[], + const int send_disp[], TYPE *recv_data, int *recv_cnt, + int *recv_disp, bool known_recv) const { int N_recieved = 0; - if ( comm_size == 1 ) { + if (comm_size == 1) { // Special case for single-processor communicators - if ( known_recv ) { - if ( recv_cnt[0] != send_cnt[0] && send_cnt[0] > 0 ) - MPI_CLASS_ERROR( "Single processor send/recv are different sizes" ); + if (known_recv) { + if (recv_cnt[0] != send_cnt[0] && send_cnt[0] > 0) + MPI_CLASS_ERROR( + "Single processor send/recv are different sizes"); } else { - if ( recv_cnt != nullptr ) + if (recv_cnt != nullptr) recv_cnt[0] = send_cnt[0]; - if ( recv_disp != nullptr ) + if (recv_disp != nullptr) recv_disp[0] = send_disp[0]; } - for ( int i = 0; i < send_cnt[0]; i++ ) + for (int i = 0; i < send_cnt[0]; i++) recv_data[i + recv_disp[0]] = send_data[i + send_disp[0]]; N_recieved = send_cnt[0]; - } else if ( known_recv ) { + } else if (known_recv) { // The recieve sizes are known - MPI_CLASS_ASSERT( recv_cnt != nullptr && recv_disp != nullptr ); - call_allToAll( send_data, send_cnt, send_disp, recv_data, recv_cnt, recv_disp ); - for ( int i = 0; i < comm_size; i++ ) + MPI_CLASS_ASSERT(recv_cnt != nullptr && recv_disp != nullptr); + call_allToAll(send_data, send_cnt, send_disp, recv_data, recv_cnt, + recv_disp); + for (int i = 0; i < comm_size; i++) N_recieved += recv_cnt[i]; } else { // The recieve sizes are not known, we need to communicate that information first - int *recv_cnt2 = recv_cnt; + int *recv_cnt2 = recv_cnt; int *recv_disp2 = recv_disp; - if ( recv_cnt == nullptr ) + if (recv_cnt == nullptr) recv_cnt2 = new int[comm_size]; - if ( recv_disp == nullptr ) + if (recv_disp == nullptr) recv_disp2 = new int[comm_size]; // Communicate the size we will be recieving from each processor - allToAll( 1, send_cnt, recv_cnt2 ); + allToAll(1, send_cnt, recv_cnt2); recv_disp2[0] = 0; - for ( int i = 1; i < comm_size; i++ ) + for (int i = 1; i < comm_size; i++) recv_disp2[i] = recv_disp2[i - 1] + recv_cnt2[i - 1]; // Send the data - call_allToAll( send_data, send_cnt, send_disp, recv_data, recv_cnt2, recv_disp2 ); - for ( int i = 0; i < comm_size; i++ ) + call_allToAll(send_data, send_cnt, send_disp, recv_data, recv_cnt2, + recv_disp2); + for (int i = 0; i < comm_size; i++) N_recieved += recv_cnt2[i]; - if ( recv_cnt == nullptr ) + if (recv_cnt == nullptr) delete[] recv_cnt2; - if ( recv_disp == nullptr ) + if (recv_disp == nullptr) delete[] recv_disp2; } return N_recieved; } // Define specializations of call_allToAll -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) -template<> -void MPI_CLASS::call_allToAll( const unsigned char *, const int *, const int *, - unsigned char *, const int *, const int * ) const; -template<> -void MPI_CLASS::call_allToAll( - const char *, const int *, const int *, char *, const int *, const int * ) const; -template<> -void MPI_CLASS::call_allToAll( const unsigned int *, const int *, const int *, - unsigned int *, const int *, const int * ) const; -template<> -void MPI_CLASS::call_allToAll( - const int *, const int *, const int *, int *, const int *, const int * ) const; -template<> -void MPI_CLASS::call_allToAll( const unsigned long int *, const int *, - const int *, unsigned long int *, const int *, const int * ) const; -template<> -void MPI_CLASS::call_allToAll( - const long int *, const int *, const int *, long int *, const int *, const int * ) const; -template<> -void MPI_CLASS::call_allToAll( - const float *, const int *, const int *, float *, const int *, const int * ) const; -template<> -void MPI_CLASS::call_allToAll( - const double *, const int *, const int *, double *, const int *, const int * ) const; +#if defined(USE_MPI) || defined(USE_EXT_MPI) +template <> +void MPI_CLASS::call_allToAll(const unsigned char *, const int *, + const int *, unsigned char *, + const int *, const int *) const; +template <> +void MPI_CLASS::call_allToAll(const char *, const int *, const int *, + char *, const int *, const int *) const; +template <> +void MPI_CLASS::call_allToAll(const unsigned int *, const int *, + const int *, unsigned int *, + const int *, const int *) const; +template <> +void MPI_CLASS::call_allToAll(const int *, const int *, const int *, int *, + const int *, const int *) const; +template <> +void MPI_CLASS::call_allToAll(const unsigned long int *, + const int *, const int *, + unsigned long int *, + const int *, + const int *) const; +template <> +void MPI_CLASS::call_allToAll(const long int *, const int *, + const int *, long int *, const int *, + const int *) const; +template <> +void MPI_CLASS::call_allToAll(const float *, const int *, const int *, + float *, const int *, const int *) const; +template <> +void MPI_CLASS::call_allToAll(const double *, const int *, const int *, + double *, const int *, const int *) const; #else -template<> -void MPI_CLASS::call_allToAll( - const char *, const int *, const int *, char *, const int *, const int * ) const; +template <> +void MPI_CLASS::call_allToAll(const char *, const int *, const int *, + char *, const int *, const int *) const; #endif // Default instantiations of call_allToAll -template -void MPI_CLASS::call_allToAll( const TYPE *send_data, const int send_cnt[], const int send_disp[], - TYPE *recv_data, const int *recv_cnt, const int *recv_disp ) const -{ - int *send_cnt2 = new int[comm_size]; - int *recv_cnt2 = new int[comm_size]; +template +void MPI_CLASS::call_allToAll(const TYPE *send_data, const int send_cnt[], + const int send_disp[], TYPE *recv_data, + const int *recv_cnt, const int *recv_disp) const { + int *send_cnt2 = new int[comm_size]; + int *recv_cnt2 = new int[comm_size]; int *send_disp2 = new int[comm_size]; int *recv_disp2 = new int[comm_size]; - for ( int i = 0; i < comm_size; i++ ) { - send_cnt2[i] = send_cnt[i] * sizeof( TYPE ); - send_disp2[i] = send_disp[i] * sizeof( TYPE ); - recv_cnt2[i] = recv_cnt[i] * sizeof( TYPE ); - recv_disp2[i] = recv_disp[i] * sizeof( TYPE ); + for (int i = 0; i < comm_size; i++) { + send_cnt2[i] = send_cnt[i] * sizeof(TYPE); + send_disp2[i] = send_disp[i] * sizeof(TYPE); + recv_cnt2[i] = recv_cnt[i] * sizeof(TYPE); + recv_disp2[i] = recv_disp[i] * sizeof(TYPE); } - static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); - call_allToAll( - (char *) send_data, send_cnt2, send_disp2, (char *) recv_data, recv_cnt2, recv_disp2 ); + static_assert(is_mpi_copyable(), "Object is not trivially copyable"); + call_allToAll((char *)send_data, send_cnt2, send_disp2, + (char *)recv_data, recv_cnt2, recv_disp2); delete[] send_cnt2; delete[] recv_cnt2; delete[] send_disp2; delete[] recv_disp2; } - } // namespace Utilities #endif diff --git a/common/MPI.cpp b/common/MPI.cpp index e75e242e..9eb97bb3 100644 --- a/common/MPI.cpp +++ b/common/MPI.cpp @@ -1115,15 +1115,14 @@ bool MPI_CLASS::anyReduce(const bool value) const { template <> void MPI_CLASS::call_sumReduce(const unsigned char *send, unsigned char *recv, - const int n) const { + int n) const { PROFILE_START("sumReduce1", profile_level); MPI_Allreduce((void *)send, (void *)recv, n, MPI_UNSIGNED_CHAR, MPI_SUM, communicator); PROFILE_STOP("sumReduce1", profile_level); } template <> -void MPI_CLASS::call_sumReduce(unsigned char *x, - const int n) const { +void MPI_CLASS::call_sumReduce(unsigned char *x, int n) const { PROFILE_START("sumReduce2", profile_level); auto send = x; auto recv = new unsigned char[n]; @@ -1136,13 +1135,13 @@ void MPI_CLASS::call_sumReduce(unsigned char *x, // char template <> void MPI_CLASS::call_sumReduce(const char *send, char *recv, - const int n) const { + int n) const { PROFILE_START("sumReduce1", profile_level); MPI_Allreduce((void *)send, (void *)recv, n, MPI_SIGNED_CHAR, MPI_SUM, communicator); PROFILE_STOP("sumReduce1", profile_level); } -template <> void MPI_CLASS::call_sumReduce(char *x, const int n) const { +template <> void MPI_CLASS::call_sumReduce(char *x, int n) const { PROFILE_START("sumReduce2", profile_level); auto send = x; auto recv = new char[n]; @@ -1155,16 +1154,14 @@ template <> void MPI_CLASS::call_sumReduce(char *x, const int n) const { // unsigned int template <> void MPI_CLASS::call_sumReduce(const unsigned int *send, - unsigned int *recv, - const int n) const { + unsigned int *recv, int n) const { PROFILE_START("sumReduce1", profile_level); MPI_Allreduce((void *)send, (void *)recv, n, MPI_UNSIGNED, MPI_SUM, communicator); PROFILE_STOP("sumReduce1", profile_level); } template <> -void MPI_CLASS::call_sumReduce(unsigned int *x, - const int n) const { +void MPI_CLASS::call_sumReduce(unsigned int *x, int n) const { PROFILE_START("sumReduce2", profile_level); auto send = x; auto recv = new unsigned int[n]; @@ -1176,14 +1173,13 @@ void MPI_CLASS::call_sumReduce(unsigned int *x, } // int template <> -void MPI_CLASS::call_sumReduce(const int *send, int *recv, - const int n) const { +void MPI_CLASS::call_sumReduce(const int *send, int *recv, int n) const { PROFILE_START("sumReduce1", profile_level); MPI_Allreduce((void *)send, (void *)recv, n, MPI_INT, MPI_SUM, communicator); PROFILE_STOP("sumReduce1", profile_level); } -template <> void MPI_CLASS::call_sumReduce(int *x, const int n) const { +template <> void MPI_CLASS::call_sumReduce(int *x, int n) const { PROFILE_START("sumReduce2", profile_level); auto send = x; auto recv = new int[n]; @@ -1196,14 +1192,13 @@ template <> void MPI_CLASS::call_sumReduce(int *x, const int n) const { // long int template <> void MPI_CLASS::call_sumReduce(const long int *send, long int *recv, - const int n) const { + int n) const { PROFILE_START("sumReduce1", profile_level); MPI_Allreduce((void *)send, (void *)recv, n, MPI_LONG, MPI_SUM, communicator); PROFILE_STOP("sumReduce1", profile_level); } -template <> -void MPI_CLASS::call_sumReduce(long int *x, const int n) const { +template <> void MPI_CLASS::call_sumReduce(long int *x, int n) const { PROFILE_START("sumReduce2", profile_level); auto send = x; auto recv = new long int[n]; @@ -1217,15 +1212,14 @@ void MPI_CLASS::call_sumReduce(long int *x, const int n) const { template <> void MPI_CLASS::call_sumReduce(const unsigned long *send, unsigned long *recv, - const int n) const { + int n) const { PROFILE_START("sumReduce1", profile_level); MPI_Allreduce((void *)send, (void *)recv, n, MPI_UNSIGNED_LONG, MPI_SUM, communicator); PROFILE_STOP("sumReduce1", profile_level); } template <> -void MPI_CLASS::call_sumReduce(unsigned long *x, - const int n) const { +void MPI_CLASS::call_sumReduce(unsigned long *x, int n) const { PROFILE_START("sumReduce2", profile_level); auto send = x; auto recv = new unsigned long int[n]; @@ -1239,15 +1233,14 @@ void MPI_CLASS::call_sumReduce(unsigned long *x, #ifdef USE_WINDOWS template <> void MPI_CLASS::call_sumReduce(const size_t *send, size_t *recv, - const int n) const { + int n) const { MPI_ASSERT(MPI_SIZE_T != 0); PROFILE_START("sumReduce1", profile_level); MPI_Allreduce((void *)send, (void *)recv, n, MPI_SIZE_T, MPI_SUM, communicator); PROFILE_STOP("sumReduce1", profile_level); } -template <> -void MPI_CLASS::call_sumReduce(size_t *x, const int n) const { +template <> void MPI_CLASS::call_sumReduce(size_t *x, int n) const { MPI_ASSERT(MPI_SIZE_T != 0); PROFILE_START("sumReduce2", profile_level); auto send = x; @@ -1263,13 +1256,13 @@ void MPI_CLASS::call_sumReduce(size_t *x, const int n) const { // float template <> void MPI_CLASS::call_sumReduce(const float *send, float *recv, - const int n) const { + int n) const { PROFILE_START("sumReduce1", profile_level); MPI_Allreduce((void *)send, (void *)recv, n, MPI_FLOAT, MPI_SUM, communicator); PROFILE_STOP("sumReduce1", profile_level); } -template <> void MPI_CLASS::call_sumReduce(float *x, const int n) const { +template <> void MPI_CLASS::call_sumReduce(float *x, int n) const { PROFILE_START("sumReduce2", profile_level); auto send = x; auto recv = new float[n]; @@ -1282,14 +1275,13 @@ template <> void MPI_CLASS::call_sumReduce(float *x, const int n) const { // double template <> void MPI_CLASS::call_sumReduce(const double *send, double *recv, - const int n) const { + int n) const { PROFILE_START("sumReduce1", profile_level); MPI_Allreduce((void *)send, (void *)recv, n, MPI_DOUBLE, MPI_SUM, communicator); PROFILE_STOP("sumReduce1", profile_level); } -template <> -void MPI_CLASS::call_sumReduce(double *x, const int n) const { +template <> void MPI_CLASS::call_sumReduce(double *x, int n) const { PROFILE_START("sumReduce2", profile_level); auto send = x; auto recv = new double[n]; @@ -1302,7 +1294,7 @@ void MPI_CLASS::call_sumReduce(double *x, const int n) const { // std::complex template <> void MPI_CLASS::call_sumReduce>( - const std::complex *x, std::complex *y, const int n) const { + const std::complex *x, std::complex *y, int n) const { PROFILE_START("sumReduce1", profile_level); auto send = new double[2 * n]; auto recv = new double[2 * n]; @@ -1320,7 +1312,7 @@ void MPI_CLASS::call_sumReduce>( } template <> void MPI_CLASS::call_sumReduce>(std::complex *x, - const int n) const { + int n) const { PROFILE_START("sumReduce2", profile_level); auto send = new double[2 * n]; auto recv = new double[2 * n]; @@ -1345,7 +1337,7 @@ void MPI_CLASS::call_sumReduce>(std::complex *x, // unsigned char template <> void MPI_CLASS::call_minReduce(const unsigned char *send, - unsigned char *recv, const int n, + unsigned char *recv, int n, int *comm_rank_of_min) const { if (comm_rank_of_min == nullptr) { PROFILE_START("minReduce1", profile_level); @@ -1363,7 +1355,7 @@ void MPI_CLASS::call_minReduce(const unsigned char *send, } } template <> -void MPI_CLASS::call_minReduce(unsigned char *x, const int n, +void MPI_CLASS::call_minReduce(unsigned char *x, int n, int *comm_rank_of_min) const { if (comm_rank_of_min == nullptr) { PROFILE_START("minReduce2", profile_level); @@ -1386,7 +1378,7 @@ void MPI_CLASS::call_minReduce(unsigned char *x, const int n, } // char template <> -void MPI_CLASS::call_minReduce(const char *send, char *recv, const int n, +void MPI_CLASS::call_minReduce(const char *send, char *recv, int n, int *comm_rank_of_min) const { if (comm_rank_of_min == nullptr) { PROFILE_START("minReduce1", profile_level); @@ -1404,7 +1396,7 @@ void MPI_CLASS::call_minReduce(const char *send, char *recv, const int n, } } template <> -void MPI_CLASS::call_minReduce(char *x, const int n, +void MPI_CLASS::call_minReduce(char *x, int n, int *comm_rank_of_min) const { if (comm_rank_of_min == nullptr) { PROFILE_START("minReduce2", profile_level); @@ -1428,7 +1420,7 @@ void MPI_CLASS::call_minReduce(char *x, const int n, // unsigned int template <> void MPI_CLASS::call_minReduce(const unsigned int *send, - unsigned int *recv, const int n, + unsigned int *recv, int n, int *comm_rank_of_min) const { if (comm_rank_of_min == nullptr) { PROFILE_START("minReduce1", profile_level); @@ -1446,7 +1438,7 @@ void MPI_CLASS::call_minReduce(const unsigned int *send, } } template <> -void MPI_CLASS::call_minReduce(unsigned int *x, const int n, +void MPI_CLASS::call_minReduce(unsigned int *x, int n, int *comm_rank_of_min) const { if (comm_rank_of_min == nullptr) { PROFILE_START("minReduce2", profile_level); @@ -1469,7 +1461,7 @@ void MPI_CLASS::call_minReduce(unsigned int *x, const int n, } // int template <> -void MPI_CLASS::call_minReduce(const int *x, int *y, const int n, +void MPI_CLASS::call_minReduce(const int *x, int *y, int n, int *comm_rank_of_min) const { PROFILE_START("minReduce1", profile_level); if (comm_rank_of_min == nullptr) { @@ -1492,7 +1484,7 @@ void MPI_CLASS::call_minReduce(const int *x, int *y, const int n, PROFILE_STOP("minReduce1", profile_level); } template <> -void MPI_CLASS::call_minReduce(int *x, const int n, +void MPI_CLASS::call_minReduce(int *x, int n, int *comm_rank_of_min) const { PROFILE_START("minReduce2", profile_level); if (comm_rank_of_min == nullptr) { @@ -1523,7 +1515,7 @@ void MPI_CLASS::call_minReduce(int *x, const int n, template <> void MPI_CLASS::call_minReduce(const unsigned long int *send, unsigned long int *recv, - const int n, + int n, int *comm_rank_of_min) const { if (comm_rank_of_min == nullptr) { PROFILE_START("minReduce1", profile_level); @@ -1541,8 +1533,7 @@ void MPI_CLASS::call_minReduce(const unsigned long int *send, } } template <> -void MPI_CLASS::call_minReduce(unsigned long int *x, - const int n, +void MPI_CLASS::call_minReduce(unsigned long int *x, int n, int *comm_rank_of_min) const { if (comm_rank_of_min == nullptr) { PROFILE_START("minReduce2", profile_level); @@ -1565,8 +1556,7 @@ void MPI_CLASS::call_minReduce(unsigned long int *x, } // long int template <> -void MPI_CLASS::call_minReduce(const long int *x, long int *y, - const int n, +void MPI_CLASS::call_minReduce(const long int *x, long int *y, int n, int *comm_rank_of_min) const { PROFILE_START("minReduce1", profile_level); if (comm_rank_of_min == nullptr) { @@ -1589,7 +1579,7 @@ void MPI_CLASS::call_minReduce(const long int *x, long int *y, PROFILE_STOP("minReduce1", profile_level); } template <> -void MPI_CLASS::call_minReduce(long int *x, const int n, +void MPI_CLASS::call_minReduce(long int *x, int n, int *comm_rank_of_min) const { PROFILE_START("minReduce2", profile_level); if (comm_rank_of_min == nullptr) { @@ -1619,8 +1609,8 @@ void MPI_CLASS::call_minReduce(long int *x, const int n, // unsigned long long int template <> void MPI_CLASS::call_minReduce( - const unsigned long long int *send, unsigned long long int *recv, - const int n, int *comm_rank_of_min) const { + const unsigned long long int *send, unsigned long long int *recv, int n, + int *comm_rank_of_min) const { PROFILE_START("minReduce1", profile_level); if (comm_rank_of_min == nullptr) { auto x = new long long int[n]; @@ -1647,7 +1637,7 @@ void MPI_CLASS::call_minReduce( } template <> void MPI_CLASS::call_minReduce( - unsigned long long int *x, const int n, int *comm_rank_of_min) const { + unsigned long long int *x, int n, int *comm_rank_of_min) const { auto recv = new unsigned long long int[n]; call_minReduce(x, recv, n, comm_rank_of_min); for (int i = 0; i < n; i++) @@ -1657,7 +1647,7 @@ void MPI_CLASS::call_minReduce( // long long int template <> void MPI_CLASS::call_minReduce(const long long int *x, - long long int *y, const int n, + long long int *y, int n, int *comm_rank_of_min) const { PROFILE_START("minReduce1", profile_level); if (comm_rank_of_min == nullptr) { @@ -1676,7 +1666,7 @@ void MPI_CLASS::call_minReduce(const long long int *x, PROFILE_STOP("minReduce1", profile_level); } template <> -void MPI_CLASS::call_minReduce(long long int *x, const int n, +void MPI_CLASS::call_minReduce(long long int *x, int n, int *comm_rank_of_min) const { auto recv = new long long int[n]; call_minReduce(x, recv, n, comm_rank_of_min); @@ -1686,7 +1676,7 @@ void MPI_CLASS::call_minReduce(long long int *x, const int n, } // float template <> -void MPI_CLASS::call_minReduce(const float *x, float *y, const int n, +void MPI_CLASS::call_minReduce(const float *x, float *y, int n, int *comm_rank_of_min) const { PROFILE_START("minReduce1", profile_level); if (comm_rank_of_min == nullptr) { @@ -1709,7 +1699,7 @@ void MPI_CLASS::call_minReduce(const float *x, float *y, const int n, PROFILE_STOP("minReduce1", profile_level); } template <> -void MPI_CLASS::call_minReduce(float *x, const int n, +void MPI_CLASS::call_minReduce(float *x, int n, int *comm_rank_of_min) const { PROFILE_START("minReduce2", profile_level); if (comm_rank_of_min == nullptr) { @@ -1738,7 +1728,7 @@ void MPI_CLASS::call_minReduce(float *x, const int n, } // double template <> -void MPI_CLASS::call_minReduce(const double *x, double *y, const int n, +void MPI_CLASS::call_minReduce(const double *x, double *y, int n, int *comm_rank_of_min) const { PROFILE_START("minReduce1", profile_level); if (comm_rank_of_min == nullptr) { @@ -1762,7 +1752,7 @@ void MPI_CLASS::call_minReduce(const double *x, double *y, const int n, PROFILE_STOP("minReduce1", profile_level); } template <> -void MPI_CLASS::call_minReduce(double *x, const int n, +void MPI_CLASS::call_minReduce(double *x, int n, int *comm_rank_of_min) const { PROFILE_START("minReduce2", profile_level); if (comm_rank_of_min == nullptr) { @@ -1799,7 +1789,7 @@ void MPI_CLASS::call_minReduce(double *x, const int n, // unsigned char template <> void MPI_CLASS::call_maxReduce(const unsigned char *send, - unsigned char *recv, const int n, + unsigned char *recv, int n, int *comm_rank_of_max) const { if (comm_rank_of_max == nullptr) { PROFILE_START("maxReduce1", profile_level); @@ -1817,7 +1807,7 @@ void MPI_CLASS::call_maxReduce(const unsigned char *send, } } template <> -void MPI_CLASS::call_maxReduce(unsigned char *x, const int n, +void MPI_CLASS::call_maxReduce(unsigned char *x, int n, int *comm_rank_of_max) const { if (comm_rank_of_max == nullptr) { PROFILE_START("maxReduce2", profile_level); @@ -1840,7 +1830,7 @@ void MPI_CLASS::call_maxReduce(unsigned char *x, const int n, } // char template <> -void MPI_CLASS::call_maxReduce(const char *send, char *recv, const int n, +void MPI_CLASS::call_maxReduce(const char *send, char *recv, int n, int *comm_rank_of_max) const { if (comm_rank_of_max == nullptr) { PROFILE_START("maxReduce1", profile_level); @@ -1858,7 +1848,7 @@ void MPI_CLASS::call_maxReduce(const char *send, char *recv, const int n, } } template <> -void MPI_CLASS::call_maxReduce(char *x, const int n, +void MPI_CLASS::call_maxReduce(char *x, int n, int *comm_rank_of_max) const { if (comm_rank_of_max == nullptr) { PROFILE_START("maxReduce2", profile_level); @@ -1882,7 +1872,7 @@ void MPI_CLASS::call_maxReduce(char *x, const int n, // unsigned int template <> void MPI_CLASS::call_maxReduce(const unsigned int *send, - unsigned int *recv, const int n, + unsigned int *recv, int n, int *comm_rank_of_max) const { if (comm_rank_of_max == nullptr) { PROFILE_START("maxReduce1", profile_level); @@ -1900,7 +1890,7 @@ void MPI_CLASS::call_maxReduce(const unsigned int *send, } } template <> -void MPI_CLASS::call_maxReduce(unsigned int *x, const int n, +void MPI_CLASS::call_maxReduce(unsigned int *x, int n, int *comm_rank_of_max) const { if (comm_rank_of_max == nullptr) { PROFILE_START("maxReduce2", profile_level); @@ -1923,7 +1913,7 @@ void MPI_CLASS::call_maxReduce(unsigned int *x, const int n, } // int template <> -void MPI_CLASS::call_maxReduce(const int *x, int *y, const int n, +void MPI_CLASS::call_maxReduce(const int *x, int *y, int n, int *comm_rank_of_max) const { PROFILE_START("maxReduce1", profile_level); if (comm_rank_of_max == nullptr) { @@ -1946,7 +1936,7 @@ void MPI_CLASS::call_maxReduce(const int *x, int *y, const int n, PROFILE_STOP("maxReduce1", profile_level); } template <> -void MPI_CLASS::call_maxReduce(int *x, const int n, +void MPI_CLASS::call_maxReduce(int *x, int n, int *comm_rank_of_max) const { PROFILE_START("maxReduce2", profile_level); if (comm_rank_of_max == nullptr) { @@ -1975,8 +1965,7 @@ void MPI_CLASS::call_maxReduce(int *x, const int n, } // long int template <> -void MPI_CLASS::call_maxReduce(const long int *x, long int *y, - const int n, +void MPI_CLASS::call_maxReduce(const long int *x, long int *y, int n, int *comm_rank_of_max) const { PROFILE_START("maxReduce1", profile_level); if (comm_rank_of_max == nullptr) { @@ -1999,7 +1988,7 @@ void MPI_CLASS::call_maxReduce(const long int *x, long int *y, PROFILE_STOP("maxReduce1", profile_level); } template <> -void MPI_CLASS::call_maxReduce(long int *x, const int n, +void MPI_CLASS::call_maxReduce(long int *x, int n, int *comm_rank_of_max) const { PROFILE_START("maxReduce2", profile_level); if (comm_rank_of_max == nullptr) { @@ -2030,7 +2019,7 @@ void MPI_CLASS::call_maxReduce(long int *x, const int n, template <> void MPI_CLASS::call_maxReduce(const unsigned long int *send, unsigned long int *recv, - const int n, + int n, int *comm_rank_of_max) const { if (comm_rank_of_max == nullptr) { PROFILE_START("maxReduce1", profile_level); @@ -2048,8 +2037,7 @@ void MPI_CLASS::call_maxReduce(const unsigned long int *send, } } template <> -void MPI_CLASS::call_maxReduce(unsigned long int *x, - const int n, +void MPI_CLASS::call_maxReduce(unsigned long int *x, int n, int *comm_rank_of_max) const { if (comm_rank_of_max == nullptr) { PROFILE_START("maxReduce2", profile_level); @@ -2073,8 +2061,8 @@ void MPI_CLASS::call_maxReduce(unsigned long int *x, // unsigned long long int template <> void MPI_CLASS::call_maxReduce( - const unsigned long long int *send, unsigned long long int *recv, - const int n, int *comm_rank_of_max) const { + const unsigned long long int *send, unsigned long long int *recv, int n, + int *comm_rank_of_max) const { PROFILE_START("maxReduce1", profile_level); if (comm_rank_of_max == nullptr) { auto x = new long long int[n]; @@ -2101,7 +2089,7 @@ void MPI_CLASS::call_maxReduce( } template <> void MPI_CLASS::call_maxReduce( - unsigned long long int *x, const int n, int *comm_rank_of_max) const { + unsigned long long int *x, int n, int *comm_rank_of_max) const { auto recv = new unsigned long long int[n]; call_maxReduce(x, recv, n, comm_rank_of_max); for (int i = 0; i < n; i++) @@ -2111,7 +2099,7 @@ void MPI_CLASS::call_maxReduce( // long long int template <> void MPI_CLASS::call_maxReduce(const long long int *x, - long long int *y, const int n, + long long int *y, int n, int *comm_rank_of_max) const { PROFILE_START("maxReduce1", profile_level); if (comm_rank_of_max == nullptr) { @@ -2130,7 +2118,7 @@ void MPI_CLASS::call_maxReduce(const long long int *x, PROFILE_STOP("maxReduce1", profile_level); } template <> -void MPI_CLASS::call_maxReduce(long long int *x, const int n, +void MPI_CLASS::call_maxReduce(long long int *x, int n, int *comm_rank_of_max) const { auto recv = new long long int[n]; call_maxReduce(x, recv, n, comm_rank_of_max); @@ -2140,7 +2128,7 @@ void MPI_CLASS::call_maxReduce(long long int *x, const int n, } // float template <> -void MPI_CLASS::call_maxReduce(const float *x, float *y, const int n, +void MPI_CLASS::call_maxReduce(const float *x, float *y, int n, int *comm_rank_of_max) const { PROFILE_START("maxReduce1", profile_level); if (comm_rank_of_max == nullptr) { @@ -2164,7 +2152,7 @@ void MPI_CLASS::call_maxReduce(const float *x, float *y, const int n, PROFILE_STOP("maxReduce1", profile_level); } template <> -void MPI_CLASS::call_maxReduce(float *x, const int n, +void MPI_CLASS::call_maxReduce(float *x, int n, int *comm_rank_of_max) const { PROFILE_START("maxReduce2", profile_level); if (comm_rank_of_max == nullptr) { @@ -2193,7 +2181,7 @@ void MPI_CLASS::call_maxReduce(float *x, const int n, } // double template <> -void MPI_CLASS::call_maxReduce(const double *x, double *y, const int n, +void MPI_CLASS::call_maxReduce(const double *x, double *y, int n, int *comm_rank_of_max) const { PROFILE_START("maxReduce1", profile_level); if (comm_rank_of_max == nullptr) { @@ -2217,7 +2205,7 @@ void MPI_CLASS::call_maxReduce(const double *x, double *y, const int n, PROFILE_STOP("maxReduce1", profile_level); } template <> -void MPI_CLASS::call_maxReduce(double *x, const int n, +void MPI_CLASS::call_maxReduce(double *x, int n, int *comm_rank_of_max) const { PROFILE_START("maxReduce2", profile_level); if (comm_rank_of_max == nullptr) { @@ -2253,51 +2241,46 @@ void MPI_CLASS::call_maxReduce(double *x, const int n, #ifdef USE_MPI // char template <> -void MPI_CLASS::call_bcast(unsigned char *x, const int n, - const int root) const { +void MPI_CLASS::call_bcast(unsigned char *x, int n, + int root) const { PROFILE_START("bcast", profile_level); MPI_Bcast(x, n, MPI_UNSIGNED_CHAR, root, communicator); PROFILE_STOP("bcast", profile_level); } -template <> -void MPI_CLASS::call_bcast(char *x, const int n, const int root) const { +template <> void MPI_CLASS::call_bcast(char *x, int n, int root) const { PROFILE_START("bcast", profile_level); MPI_Bcast(x, n, MPI_CHAR, root, communicator); PROFILE_STOP("bcast", profile_level); } // int template <> -void MPI_CLASS::call_bcast(unsigned int *x, const int n, - const int root) const { +void MPI_CLASS::call_bcast(unsigned int *x, int n, + int root) const { PROFILE_START("bcast", profile_level); MPI_Bcast(x, n, MPI_UNSIGNED, root, communicator); PROFILE_STOP("bcast", profile_level); } -template <> -void MPI_CLASS::call_bcast(int *x, const int n, const int root) const { +template <> void MPI_CLASS::call_bcast(int *x, int n, int root) const { PROFILE_START("bcast", profile_level); MPI_Bcast(x, n, MPI_INT, root, communicator); PROFILE_STOP("bcast", profile_level); } // float -template <> -void MPI_CLASS::call_bcast(float *x, const int n, const int root) const { +template <> void MPI_CLASS::call_bcast(float *x, int n, int root) const { PROFILE_START("bcast", profile_level); MPI_Bcast(x, n, MPI_FLOAT, root, communicator); PROFILE_STOP("bcast", profile_level); } // double template <> -void MPI_CLASS::call_bcast(double *x, const int n, - const int root) const { +void MPI_CLASS::call_bcast(double *x, int n, int root) const { PROFILE_START("bcast", profile_level); MPI_Bcast(x, n, MPI_DOUBLE, root, communicator); PROFILE_STOP("bcast", profile_level); } #else // We need a concrete instantiation of bcast(x,n,root); -template <> -void MPI_CLASS::call_bcast(char *, const int, const int) const {} +template <> void MPI_CLASS::call_bcast(char *, int, int) const {} #endif /************************************************************************ @@ -2316,8 +2299,8 @@ void MPI_CLASS::barrier() const { #ifdef USE_MPI // char template <> -void MPI_CLASS::send(const char *buf, const int length, - const int recv_proc_number, int tag) const { +void MPI_CLASS::send(const char *buf, int length, int recv_proc_number, + int tag) const { // Set the tag to 0 if it is < 0 tag = (tag >= 0) ? tag : 0; MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); @@ -2329,8 +2312,8 @@ void MPI_CLASS::send(const char *buf, const int length, } // int template <> -void MPI_CLASS::send(const int *buf, const int length, - const int recv_proc_number, int tag) const { +void MPI_CLASS::send(const int *buf, int length, int recv_proc_number, + int tag) const { // Set the tag to 0 if it is < 0 tag = (tag >= 0) ? tag : 0; MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); @@ -2341,8 +2324,8 @@ void MPI_CLASS::send(const int *buf, const int length, } // float template <> -void MPI_CLASS::send(const float *buf, const int length, - const int recv_proc_number, int tag) const { +void MPI_CLASS::send(const float *buf, int length, int recv_proc_number, + int tag) const { // Set the tag to 0 if it is < 0 tag = (tag >= 0) ? tag : 0; MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); @@ -2354,8 +2337,8 @@ void MPI_CLASS::send(const float *buf, const int length, } // double template <> -void MPI_CLASS::send(const double *buf, const int length, - const int recv_proc_number, int tag) const { +void MPI_CLASS::send(const double *buf, int length, + int recv_proc_number, int tag) const { // Set the tag to 0 if it is < 0 tag = (tag >= 0) ? tag : 0; MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); @@ -2368,8 +2351,7 @@ void MPI_CLASS::send(const double *buf, const int length, #else // We need a concrete instantiation of send for use without MPI template <> -void MPI_CLASS::send(const char *buf, const int length, const int, - int tag) const { +void MPI_CLASS::send(const char *buf, int length, int, int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); PROFILE_START("send", profile_level); @@ -2391,8 +2373,8 @@ void MPI_CLASS::send(const char *buf, const int length, const int, #ifdef USE_MPI // char template <> -MPI_Request MPI_CLASS::Isend(const char *buf, const int length, - const int recv_proc, const int tag) const { +MPI_Request MPI_CLASS::Isend(const char *buf, int length, int recv_proc, + int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); MPI_Request request; @@ -2404,8 +2386,8 @@ MPI_Request MPI_CLASS::Isend(const char *buf, const int length, } // int template <> -MPI_Request MPI_CLASS::Isend(const int *buf, const int length, - const int recv_proc, const int tag) const { +MPI_Request MPI_CLASS::Isend(const int *buf, int length, int recv_proc, + int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); MPI_Request request; @@ -2417,8 +2399,8 @@ MPI_Request MPI_CLASS::Isend(const int *buf, const int length, } // float template <> -MPI_Request MPI_CLASS::Isend(const float *buf, const int length, - const int recv_proc, const int tag) const { +MPI_Request MPI_CLASS::Isend(const float *buf, int length, int recv_proc, + int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); MPI_Request request; @@ -2430,8 +2412,8 @@ MPI_Request MPI_CLASS::Isend(const float *buf, const int length, } // double template <> -MPI_Request MPI_CLASS::Isend(const double *buf, const int length, - const int recv_proc, const int tag) const { +MPI_Request MPI_CLASS::Isend(const double *buf, int length, + int recv_proc, int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); MPI_Request request; @@ -2444,8 +2426,8 @@ MPI_Request MPI_CLASS::Isend(const double *buf, const int length, #else // We need a concrete instantiation of send for use without mpi template <> -MPI_Request MPI_CLASS::Isend(const char *buf, const int length, const int, - const int tag) const { +MPI_Request MPI_CLASS::Isend(const char *buf, int length, int, + int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); PROFILE_START("Isend", profile_level); @@ -2472,8 +2454,8 @@ MPI_Request MPI_CLASS::Isend(const char *buf, const int length, const int, /************************************************************************ * Send byte array to another processor. * ************************************************************************/ -void MPI_CLASS::sendBytes(const void *buf, const int number_bytes, - const int recv_proc_number, int tag) const { +void MPI_CLASS::sendBytes(const void *buf, int number_bytes, + int recv_proc_number, int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); send((const char *)buf, number_bytes, recv_proc_number, tag); @@ -2482,7 +2464,7 @@ void MPI_CLASS::sendBytes(const void *buf, const int number_bytes, /************************************************************************ * Non-blocking send byte array to another processor. * ************************************************************************/ -MPI_Request MPI_CLASS::IsendBytes(const void *buf, const int number_bytes, +MPI_Request MPI_CLASS::IsendBytes(const void *buf, int number_bytes, const int recv_proc, const int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); @@ -2496,7 +2478,7 @@ MPI_Request MPI_CLASS::IsendBytes(const void *buf, const int number_bytes, #ifdef USE_MPI // char template <> -void MPI_CLASS::recv(char *buf, int &length, const int send_proc_number, +void MPI_CLASS::recv(char *buf, int &length, int send_proc_number, const bool get_length, int tag) const { // Set the tag to 0 if it is < 0 tag = (tag >= 0) ? tag : 0; @@ -2518,7 +2500,7 @@ void MPI_CLASS::recv(char *buf, int &length, const int send_proc_number, } // int template <> -void MPI_CLASS::recv(int *buf, int &length, const int send_proc_number, +void MPI_CLASS::recv(int *buf, int &length, int send_proc_number, const bool get_length, int tag) const { // Set the tag to 0 if it is < 0 tag = (tag >= 0) ? tag : 0; @@ -2540,7 +2522,7 @@ void MPI_CLASS::recv(int *buf, int &length, const int send_proc_number, } // float template <> -void MPI_CLASS::recv(float *buf, int &length, const int send_proc_number, +void MPI_CLASS::recv(float *buf, int &length, int send_proc_number, const bool get_length, int tag) const { // Set the tag to 0 if it is < 0 tag = (tag >= 0) ? tag : 0; @@ -2562,9 +2544,8 @@ void MPI_CLASS::recv(float *buf, int &length, const int send_proc_number, } // double template <> -void MPI_CLASS::recv(double *buf, int &length, - const int send_proc_number, const bool get_length, - int tag) const { +void MPI_CLASS::recv(double *buf, int &length, int send_proc_number, + const bool get_length, int tag) const { // Set the tag to 0 if it is < 0 tag = (tag >= 0) ? tag : 0; MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); @@ -2586,7 +2567,7 @@ void MPI_CLASS::recv(double *buf, int &length, #else // We need a concrete instantiation of recv for use without mpi template <> -void MPI_CLASS::recv(char *buf, int &length, const int, const bool, +void MPI_CLASS::recv(char *buf, int &length, int, const bool, int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); @@ -2609,8 +2590,8 @@ void MPI_CLASS::recv(char *buf, int &length, const int, const bool, #ifdef USE_MPI // char template <> -MPI_Request MPI_CLASS::Irecv(char *buf, const int length, - const int send_proc, const int tag) const { +MPI_Request MPI_CLASS::Irecv(char *buf, int length, int send_proc, + int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); MPI_Request request; @@ -2622,8 +2603,8 @@ MPI_Request MPI_CLASS::Irecv(char *buf, const int length, } // int template <> -MPI_Request MPI_CLASS::Irecv(int *buf, const int length, - const int send_proc, const int tag) const { +MPI_Request MPI_CLASS::Irecv(int *buf, int length, int send_proc, + int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); MPI_Request request; @@ -2635,8 +2616,8 @@ MPI_Request MPI_CLASS::Irecv(int *buf, const int length, } // float template <> -MPI_Request MPI_CLASS::Irecv(float *buf, const int length, - const int send_proc, const int tag) const { +MPI_Request MPI_CLASS::Irecv(float *buf, int length, int send_proc, + int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); MPI_Request request; @@ -2648,8 +2629,8 @@ MPI_Request MPI_CLASS::Irecv(float *buf, const int length, } // double template <> -MPI_Request MPI_CLASS::Irecv(double *buf, const int length, - const int send_proc, const int tag) const { +MPI_Request MPI_CLASS::Irecv(double *buf, int length, int send_proc, + int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); MPI_Request request; @@ -2662,8 +2643,7 @@ MPI_Request MPI_CLASS::Irecv(double *buf, const int length, #else // We need a concrete instantiation of irecv for use without mpi template <> -MPI_Request MPI_CLASS::Irecv(char *buf, const int length, const int, - const int tag) const { +MPI_Request MPI_CLASS::Irecv(char *buf, int length, int, int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); PROFILE_START("Irecv", profile_level); @@ -2690,7 +2670,7 @@ MPI_Request MPI_CLASS::Irecv(char *buf, const int length, const int, /************************************************************************ * Recieve byte array to another processor. * ************************************************************************/ -void MPI_CLASS::recvBytes(void *buf, int &number_bytes, const int send_proc, +void MPI_CLASS::recvBytes(void *buf, int &number_bytes, int send_proc, int tag) const { recv((char *)buf, number_bytes, send_proc, false, tag); } @@ -2698,8 +2678,8 @@ void MPI_CLASS::recvBytes(void *buf, int &number_bytes, const int send_proc, /************************************************************************ * Recieve byte array to another processor. * ************************************************************************/ -MPI_Request MPI_CLASS::IrecvBytes(void *buf, const int number_bytes, - const int send_proc, const int tag) const { +MPI_Request MPI_CLASS::IrecvBytes(void *buf, int number_bytes, int send_proc, + int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); return Irecv((char *)buf, number_bytes, send_proc, tag); @@ -2913,7 +2893,7 @@ void MPI_CLASS::call_allGather(const char *, int, char *, int *, ************************************************************************/ #ifdef USE_MPI template <> -void MPI_CLASS::allToAll(const int n, const unsigned char *send, +void MPI_CLASS::allToAll(int n, const unsigned char *send, unsigned char *recv) const { PROFILE_START("allToAll", profile_level); MPI_Alltoall((void *)send, n, MPI_UNSIGNED_CHAR, (void *)recv, n, @@ -2921,15 +2901,14 @@ void MPI_CLASS::allToAll(const int n, const unsigned char *send, PROFILE_STOP("allToAll", profile_level); } template <> -void MPI_CLASS::allToAll(const int n, const char *send, - char *recv) const { +void MPI_CLASS::allToAll(int n, const char *send, char *recv) const { PROFILE_START("allToAll", profile_level); MPI_Alltoall((void *)send, n, MPI_CHAR, (void *)recv, n, MPI_CHAR, communicator); PROFILE_STOP("allToAll", profile_level); } template <> -void MPI_CLASS::allToAll(const int n, const unsigned int *send, +void MPI_CLASS::allToAll(int n, const unsigned int *send, unsigned int *recv) const { PROFILE_START("allToAll", profile_level); MPI_Alltoall((void *)send, n, MPI_UNSIGNED, (void *)recv, n, MPI_UNSIGNED, @@ -2937,14 +2916,14 @@ void MPI_CLASS::allToAll(const int n, const unsigned int *send, PROFILE_STOP("allToAll", profile_level); } template <> -void MPI_CLASS::allToAll(const int n, const int *send, int *recv) const { +void MPI_CLASS::allToAll(int n, const int *send, int *recv) const { PROFILE_START("allToAll", profile_level); MPI_Alltoall((void *)send, n, MPI_INT, (void *)recv, n, MPI_INT, communicator); PROFILE_STOP("allToAll", profile_level); } template <> -void MPI_CLASS::allToAll(const int n, +void MPI_CLASS::allToAll(int n, const unsigned long int *send, unsigned long int *recv) const { PROFILE_START("allToAll", profile_level); @@ -2953,7 +2932,7 @@ void MPI_CLASS::allToAll(const int n, PROFILE_STOP("allToAll", profile_level); } template <> -void MPI_CLASS::allToAll(const int n, const long int *send, +void MPI_CLASS::allToAll(int n, const long int *send, long int *recv) const { PROFILE_START("allToAll", profile_level); MPI_Alltoall((void *)send, n, MPI_LONG, (void *)recv, n, MPI_LONG, @@ -2961,15 +2940,14 @@ void MPI_CLASS::allToAll(const int n, const long int *send, PROFILE_STOP("allToAll", profile_level); } template <> -void MPI_CLASS::allToAll(const int n, const float *send, - float *recv) const { +void MPI_CLASS::allToAll(int n, const float *send, float *recv) const { PROFILE_START("allToAll", profile_level); MPI_Alltoall((void *)send, n, MPI_FLOAT, (void *)recv, n, MPI_FLOAT, communicator); PROFILE_STOP("allToAll", profile_level); } template <> -void MPI_CLASS::allToAll(const int n, const double *send, +void MPI_CLASS::allToAll(int n, const double *send, double *recv) const { PROFILE_START("allToAll", profile_level); MPI_Alltoall((void *)send, n, MPI_DOUBLE, (void *)recv, n, MPI_DOUBLE, @@ -3713,4 +3691,28 @@ MPI MPI::loadBalance(double local, std::vector work) { return split(0, key[getRank()]); } + + +/**************************************************************************** + * Function Persistent Communication * + ****************************************************************************/ +template <> +std::shared_ptr MPI::Isend_init(const double *buf, int N, int proc, int tag) const +{ + std::shared_ptr obj(new MPI_Request, []( MPI_Request *req ) { MPI_Request_free( req ); } ); + MPI_Send_init( buf, N, MPI_DOUBLE, proc, tag, communicator, obj.get() ); + return obj; +} +template<> +std::shared_ptr MPI::Irecv_init(double *buf, int N, int proc, int tag) const +{ + std::shared_ptr obj(new MPI_Request, []( MPI_Request *req ) { MPI_Request_free( req ); } ); + MPI_Recv_init( buf, N, MPI_DOUBLE, proc, tag, communicator, obj.get() ); + return obj; +} +void MPI::Start( MPI_Request &request ) +{ + MPI_Start( &request ); +} + } // namespace Utilities diff --git a/common/MPI.h b/common/MPI.h index f8849aaf..d249d661 100644 --- a/common/MPI.h +++ b/common/MPI.h @@ -26,6 +26,7 @@ redistribution is prohibited. #include #include #include +#include #include #include #include @@ -173,10 +174,9 @@ public: // Member functions * */ static void - balanceProcesses(const MPI &comm = MPI(MPI_COMM_WORLD), - const int method = 1, + balanceProcesses(const MPI &comm = MPI(MPI_COMM_WORLD), int method = 1, const std::vector &procs = std::vector(), - const int N_min = 1, const int N_max = -1); + int N_min = 1, int N_max = -1); //! Query the level of thread support static ThreadSupport queryThreadSupport(); @@ -420,7 +420,7 @@ public: // Member functions * \param x The input/output array for the reduce * \param n The number of values in the array (must match on all nodes) */ - template void sumReduce(type *x, const int n = 1) const; + template void sumReduce(type *x, int n = 1) const; /** * \brief Sum Reduce @@ -432,7 +432,7 @@ public: // Member functions * \param n The number of values in the array (must match on all nodes) */ template - void sumReduce(const type *x, type *y, const int n = 1) const; + void sumReduce(const type *x, type *y, int n = 1) const; /** * \brief Min Reduce @@ -457,7 +457,7 @@ public: // Member functions * minimum value */ template - void minReduce(type *x, const int n = 1, int *rank_of_min = nullptr) const; + void minReduce(type *x, int n = 1, int *rank_of_min = nullptr) const; /** * \brief Sum Reduce @@ -475,7 +475,7 @@ public: // Member functions * minimum value */ template - void minReduce(const type *x, type *y, const int n = 1, + void minReduce(const type *x, type *y, int n = 1, int *rank_of_min = nullptr) const; /** @@ -501,7 +501,7 @@ public: // Member functions * minimum value */ template - void maxReduce(type *x, const int n = 1, int *rank_of_max = nullptr) const; + void maxReduce(type *x, int n = 1, int *rank_of_max = nullptr) const; /** * \brief Sum Reduce @@ -519,7 +519,7 @@ public: // Member functions * minimum value */ template - void maxReduce(const type *x, type *y, const int n = 1, + void maxReduce(const type *x, type *y, int n = 1, int *rank_of_max = nullptr) const; /** @@ -530,8 +530,7 @@ public: // Member functions * \param y The output array for the scan * \param n The number of values in the array (must match on all nodes) */ - template - void sumScan(const type *x, type *y, const int n = 1) const; + template void sumScan(const type *x, type *y, int n = 1) const; /** * \brief Scan Min Reduce @@ -541,8 +540,7 @@ public: // Member functions * \param y The output array for the scan * \param n The number of values in the array (must match on all nodes) */ - template - void minScan(const type *x, type *y, const int n = 1) const; + template void minScan(const type *x, type *y, int n = 1) const; /** * \brief Scan Max Reduce @@ -552,8 +550,7 @@ public: // Member functions * \param y The output array for the scan * \param n The number of values in the array (must match on all nodes) */ - template - void maxScan(const type *x, type *y, const int n = 1) const; + template void maxScan(const type *x, type *y, int n = 1) const; /** * \brief Broadcast @@ -561,7 +558,7 @@ public: // Member functions * \param value The input value for the broadcast. * \param root The processor performing the broadcast */ - template type bcast(const type &value, const int root) const; + template type bcast(const type &value, int root) const; /** * \brief Broadcast @@ -570,8 +567,7 @@ public: // Member functions * \param n The number of values in the array (must match on all nodes) * \param root The processor performing the broadcast */ - template - void bcast(type *value, const int n, const int root) const; + template void bcast(type *value, int n, int root) const; /** * Perform a global barrier across all processors. @@ -595,8 +591,7 @@ public: // Member functions * The matching recv must share this tag. */ template - void send(const type *buf, const int length, const int recv, - int tag = 0) const; + void send(const type *buf, int length, int recv, int tag = 0) const; /*! * @brief This function sends an MPI message with an array of bytes @@ -611,8 +606,7 @@ public: // Member functions * to be sent with this message. Default tag is 0. * The matching recv must share this tag. */ - void sendBytes(const void *buf, const int N_bytes, const int recv, - int tag = 0) const; + void sendBytes(const void *buf, int N_bytes, int recv, int tag = 0) const; /*! * @brief This function sends an MPI message with an array @@ -627,8 +621,8 @@ public: // Member functions * to be sent with this message. */ template - MPI_Request Isend(const type *buf, const int length, const int recv_proc, - const int tag) const; + MPI_Request Isend(const type *buf, int length, int recv_proc, + int tag) const; /*! * @brief This function sends an MPI message with an array of bytes @@ -642,8 +636,8 @@ public: // Member functions * @param tag Integer argument specifying an integer tag * to be sent with this message. */ - MPI_Request IsendBytes(const void *buf, const int N_bytes, - const int recv_proc, const int tag) const; + MPI_Request IsendBytes(const void *buf, int N_bytes, int recv_proc, + int tag) const; /*! * @brief This function receives an MPI message with a data @@ -662,7 +656,7 @@ public: // Member functions * by the tag of the incoming message. Default tag is 0. */ template - inline void recv(type *buf, int length, const int send, int tag) const { + inline void recv(type *buf, int length, int send, int tag) const { int length2 = length; recv(buf, length2, send, false, tag); } @@ -687,7 +681,7 @@ public: // Member functions * by the tag of the incoming message. Default tag is 0. */ template - void recv(type *buf, int &length, const int send, const bool get_length, + void recv(type *buf, int &length, int send, const bool get_length, int tag) const; /*! @@ -703,7 +697,7 @@ public: // Member functions * must be matched by the tag of the incoming message. Default * tag is 0. */ - void recvBytes(void *buf, int &N_bytes, const int send, int tag = 0) const; + void recvBytes(void *buf, int &N_bytes, int send, int tag = 0) const; /*! * @brief This function receives an MPI message with a data @@ -716,8 +710,7 @@ public: // Member functions * be matched by the tag of the incoming message. */ template - MPI_Request Irecv(type *buf, const int length, const int send_proc, - const int tag) const; + MPI_Request Irecv(type *buf, int length, int send_proc, int tag) const; /*! * @brief This function receives an MPI message with an array of @@ -731,8 +724,8 @@ public: // Member functions * @param tag Integer argument specifying a tag which must * be matched by the tag of the incoming message. */ - MPI_Request IrecvBytes(void *buf, const int N_bytes, const int send_proc, - const int tag) const; + MPI_Request IrecvBytes(void *buf, int N_bytes, int send_proc, + int tag) const; /*! * @brief This function sends and recieves data using a blocking call @@ -741,6 +734,39 @@ public: // Member functions void sendrecv(const type *sendbuf, int sendcount, int dest, int sendtag, type *recvbuf, int recvcount, int source, int recvtag) const; + /*! + * @brief This function sets up an Isend call (see MPI_Send_init) + * @param buf Pointer to array buffer with length integers. + * @param length Number of integers in buf that we want to send. + * @param recv_proc Receiving processor number. + * @param tag Tag to send + * @return Returns an MPI_Request. + * Note this returns a unique pointer so the user does not + * need to manually free the request + */ + template + std::shared_ptr Isend_init(const type *buf, int length, int recv_proc, + int tag) const; + + /*! + * @brief This function sets up an Irecv call (see MPI_Recv_init) + * @param buf Pointer to integer array buffer with capacity of length integers. + * @param length Maximum number of values that can be stored in buf. + * @param send_proc Processor number of sender. + * @param tag Tag to match + * @return Returns an MPI_Request. + * Note this returns a unique pointer so the user does not + * need to manually free the request + */ + template + std::shared_ptr Irecv_init(type *buf, int length, int send_proc, int tag) const; + + /*! + * @brief Start the MPI communication + * @param request Request to start + */ + void Start( MPI_Request &request ); + /*! * Each processor sends every other processor a single value. * @param[in] x Input value for allGather @@ -792,7 +818,7 @@ public: // Member functions * and the sizes and displacements will be returned (if desired). */ template - int allGather(const type *send_data, const int send_cnt, type *recv_data, + int allGather(const type *send_data, int send_cnt, type *recv_data, int *recv_cnt = nullptr, int *recv_disp = nullptr, bool known_recv = false) const; @@ -822,7 +848,7 @@ public: // Member functions * @param recv_data Output array of received values (nxN) */ template - void allToAll(const int n, const type *send_data, type *recv_data) const; + void allToAll(int n, const type *send_data, type *recv_data) const; /*! * Each processor sends an array of data to the different processors. @@ -995,23 +1021,20 @@ public: // Member functions MPI loadBalance(double localPerformance, std::vector work); private: // Private helper functions for templated MPI operations; - template void call_sumReduce(type *x, const int n = 1) const; + template void call_sumReduce(type *x, int n = 1) const; template - void call_sumReduce(const type *x, type *y, const int n = 1) const; + void call_sumReduce(const type *x, type *y, int n = 1) const; template - void call_minReduce(type *x, const int n = 1, + void call_minReduce(type *x, int n = 1, int *rank_of_min = nullptr) const; + template + void call_minReduce(const type *x, type *y, int n = 1, int *rank_of_min = nullptr) const; template - void call_minReduce(const type *x, type *y, const int n = 1, - int *rank_of_min = nullptr) const; + void call_maxReduce(type *x, int n = 1, int *rank_of_max = nullptr) const; template - void call_maxReduce(type *x, const int n = 1, + void call_maxReduce(const type *x, type *y, int n = 1, int *rank_of_max = nullptr) const; - template - void call_maxReduce(const type *x, type *y, const int n = 1, - int *rank_of_max = nullptr) const; - template - void call_bcast(type *x, const int n, const int root) const; + template void call_bcast(type *x, int n, int root) const; template void call_allGather(const type &x_in, type *x_out) const; template diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index 96dd891b..bfd1490e 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -322,6 +322,48 @@ ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr Dm){ CommunicationCount = SendCount+RecvCount; //...................................................................................... + + //................................................................................... + // Set up the persistent communication for D3Q19AA (use tags 130-145) + //................................................................................... + req_D3Q19AA.clear(); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_x, 5*sendCount_x, rank_x, 130 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_X, 5*recvCount_X, rank_X, 130 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_X, 5*sendCount_X, rank_X, 131 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_x, 5*recvCount_x, rank_x, 131 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_y, 5*sendCount_y, rank_y, 132 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_Y, 5*recvCount_Y, rank_Y, 132 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_Y, 5*sendCount_Y, rank_Y, 133 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_y, 5*recvCount_y, rank_y, 133 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_z, 5*sendCount_z, rank_z, 134 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_Z, 5*recvCount_Z, rank_Z, 134 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_Z, 5*sendCount_Z, rank_Z, 135 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_z, 5*recvCount_z, rank_z, 135 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_xy, sendCount_xy, rank_xy, 136 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_XY, recvCount_XY, rank_XY, 136 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_XY, sendCount_XY, rank_XY, 137 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_xy, recvCount_xy, rank_xy, 137 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_Xy, sendCount_Xy, rank_Xy, 138 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_xY, recvCount_xY, rank_xY, 138 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_xY, sendCount_xY, rank_xY, 139 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_Xy, recvCount_Xy, rank_Xy, 139 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_xz, sendCount_xz, rank_xz, 140 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_XZ, recvCount_XZ, rank_XZ, 140 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_xZ, sendCount_xZ, rank_xZ, 143 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_Xz, recvCount_Xz, rank_Xz, 143 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_Xz, sendCount_Xz, rank_Xz, 142 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_xZ, recvCount_xZ, rank_xZ, 142 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_XZ, sendCount_XZ, rank_XZ, 141 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_xz, recvCount_xz, rank_xz, 141 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_yz, sendCount_yz, rank_yz, 144 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_YZ, recvCount_YZ, rank_YZ, 144 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_yZ, sendCount_yZ, rank_yZ, 147 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_Yz, recvCount_Yz, rank_Yz, 147 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_Yz, sendCount_Yz, rank_Yz, 146 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_yZ, recvCount_yZ, rank_yZ, 146 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_YZ, sendCount_YZ, rank_YZ, 145 ) ); + req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_yz, recvCount_yz, rank_yz, 145 ) ); + } @@ -419,6 +461,22 @@ ScaLBL_Communicator::~ScaLBL_Communicator() ScaLBL_FreeDeviceMemory( dvcRecvDist_Yz ); ScaLBL_FreeDeviceMemory( dvcRecvDist_YZ ); } + + +void ScaLBL_Communicator::start( std::vector>& requests ) +{ + for ( auto& req : requests ) + MPI_COMM_SCALBL.Start( *req ); +} +void ScaLBL_Communicator::wait( std::vector>& requests ) +{ + std::vector request2; + for ( auto& req : requests ) + request2.push_back( *req ); + MPI_COMM_SCALBL.waitAll( request2.size(), request2.data() ); +} + + double ScaLBL_Communicator::GetPerformance(int *NeighborList, double *fq, int Np){ /* EACH MPI PROCESS GETS ITS OWN MEASUREMENT*/ /* use MRT kernels to check performance without communication / synchronization */ @@ -1397,8 +1455,6 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){ else{ Lock=true; } - // assign tag of 130 to D3Q19 communication - sendtag = recvtag = 130; ScaLBL_DeviceBarrier(); // Pack the distributions //...Packing for x face(2,8,10,12,14)................................ @@ -1473,42 +1529,7 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){ //................................................................................... ScaLBL_DeviceBarrier(); - req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 5*sendCount_x,rank_x,sendtag+0); - req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 5*recvCount_X,rank_X,recvtag+0); - req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 5*sendCount_X,rank_X,sendtag+1); - req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 5*recvCount_x,rank_x,recvtag+1); - req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 5*sendCount_y,rank_y,sendtag+2); - req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 5*recvCount_Y,rank_Y,recvtag+2); - req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 5*sendCount_Y,rank_Y,sendtag+3); - req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 5*recvCount_y,rank_y,recvtag+3); - req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 5*sendCount_z,rank_z,sendtag+4); - req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 5*recvCount_Z,rank_Z,recvtag+4); - req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 5*sendCount_Z,rank_Z,sendtag+5); - req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 5*recvCount_z,rank_z,recvtag+5); - req1[6] = MPI_COMM_SCALBL.Isend(sendbuf_xy, sendCount_xy,rank_xy,sendtag+6); - req2[6] = MPI_COMM_SCALBL.Irecv(recvbuf_XY, recvCount_XY,rank_XY,recvtag+6); - req1[7] = MPI_COMM_SCALBL.Isend(sendbuf_XY, sendCount_XY,rank_XY,sendtag+7); - req2[7] = MPI_COMM_SCALBL.Irecv(recvbuf_xy, recvCount_xy,rank_xy,recvtag+7); - req1[8] = MPI_COMM_SCALBL.Isend(sendbuf_Xy, sendCount_Xy,rank_Xy,sendtag+8); - req2[8] = MPI_COMM_SCALBL.Irecv(recvbuf_xY, recvCount_xY,rank_xY,recvtag+8); - req1[9] = MPI_COMM_SCALBL.Isend(sendbuf_xY, sendCount_xY,rank_xY,sendtag+9); - req2[9] = MPI_COMM_SCALBL.Irecv(recvbuf_Xy, recvCount_Xy,rank_Xy,recvtag+9); - req1[10] = MPI_COMM_SCALBL.Isend(sendbuf_xz, sendCount_xz,rank_xz,sendtag+10); - req2[10] = MPI_COMM_SCALBL.Irecv(recvbuf_XZ, recvCount_XZ,rank_XZ,recvtag+10); - req1[13] = MPI_COMM_SCALBL.Isend(sendbuf_xZ, sendCount_xZ,rank_xZ,sendtag+13); - req2[13] = MPI_COMM_SCALBL.Irecv(recvbuf_Xz, recvCount_Xz,rank_Xz,recvtag+13); - req1[12] = MPI_COMM_SCALBL.Isend(sendbuf_Xz, sendCount_Xz,rank_Xz,sendtag+12); - req2[12] = MPI_COMM_SCALBL.Irecv(recvbuf_xZ, recvCount_xZ,rank_xZ,recvtag+12); - req1[11] = MPI_COMM_SCALBL.Isend(sendbuf_XZ, sendCount_XZ,rank_XZ,sendtag+11); - req2[11] = MPI_COMM_SCALBL.Irecv(recvbuf_xz, recvCount_xz,rank_xz,recvtag+11); - req1[14] = MPI_COMM_SCALBL.Isend(sendbuf_yz, sendCount_yz,rank_yz,sendtag+14); - req2[14] = MPI_COMM_SCALBL.Irecv(recvbuf_YZ, recvCount_YZ,rank_YZ,recvtag+14); - req1[17] = MPI_COMM_SCALBL.Isend(sendbuf_yZ, sendCount_yZ,rank_yZ,sendtag+17); - req2[17] = MPI_COMM_SCALBL.Irecv(recvbuf_Yz, recvCount_Yz,rank_Yz,recvtag+17); - req1[16] = MPI_COMM_SCALBL.Isend(sendbuf_Yz, sendCount_Yz,rank_Yz,sendtag+16); - req2[16] = MPI_COMM_SCALBL.Irecv(recvbuf_yZ, recvCount_yZ,rank_yZ,recvtag+16); - req1[15] = MPI_COMM_SCALBL.Isend(sendbuf_YZ, sendCount_YZ,rank_YZ,sendtag+15); - req2[15] = MPI_COMM_SCALBL.Irecv(recvbuf_yz, recvCount_yz,rank_yz,recvtag+15); + start( req_D3Q19AA ); } @@ -1517,8 +1538,7 @@ void ScaLBL_Communicator::RecvD3Q19AA(double *dist){ // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 //................................................................................... // Wait for completion of D3Q19 communication - MPI_COMM_SCALBL.waitAll(18,req1); - MPI_COMM_SCALBL.waitAll(18,req2); + wait( req_D3Q19AA ); ScaLBL_DeviceBarrier(); //................................................................................... @@ -1695,36 +1715,36 @@ void ScaLBL_Communicator::BiSendD3Q7AA(double *Aq, double *Bq){ ScaLBL_D3Q19_Pack(2,dvcSendList_x,0,sendCount_x,sendbuf_x,Aq,N); ScaLBL_D3Q19_Pack(2,dvcSendList_x,sendCount_x,sendCount_x,sendbuf_x,Bq,N); - req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 2*sendCount_x,rank_x,sendtag+0); - req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 2*recvCount_X,rank_X,recvtag+0); + req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 2*sendCount_x, rank_x,sendtag+0); + req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 2*recvCount_X, rank_X,recvtag+0); //...Packing for X face(1,7,9,11,13)................................ ScaLBL_D3Q19_Pack(1,dvcSendList_X,0,sendCount_X,sendbuf_X,Aq,N); ScaLBL_D3Q19_Pack(1,dvcSendList_X,sendCount_X,sendCount_X,sendbuf_X,Bq,N); - req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 2*sendCount_X,rank_X,sendtag+1); - req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 2*recvCount_x,rank_x,recvtag+1); + req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 2*sendCount_X, rank_X,sendtag+1); + req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 2*recvCount_x, rank_x,recvtag+1); //...Packing for y face(4,8,9,16,18)................................. ScaLBL_D3Q19_Pack(4,dvcSendList_y,0,sendCount_y,sendbuf_y,Aq,N); ScaLBL_D3Q19_Pack(4,dvcSendList_y,sendCount_y,sendCount_y,sendbuf_y,Bq,N); - req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 2*sendCount_y,rank_y,sendtag+2); - req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 2*recvCount_Y,rank_Y,recvtag+2); + req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 2*sendCount_y, rank_y,sendtag+2); + req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 2*recvCount_Y, rank_Y,recvtag+2); //...Packing for Y face(3,7,10,15,17)................................. ScaLBL_D3Q19_Pack(3,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,Aq,N); ScaLBL_D3Q19_Pack(3,dvcSendList_Y,sendCount_Y,sendCount_Y,sendbuf_Y,Bq,N); - req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 2*sendCount_Y,rank_Y,sendtag+3); - req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 2*recvCount_y,rank_y,recvtag+3); + req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 2*sendCount_Y, rank_Y,sendtag+3); + req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 2*recvCount_y, rank_y,recvtag+3); //...Packing for z face(6,12,13,16,17)................................ ScaLBL_D3Q19_Pack(6,dvcSendList_z,0,sendCount_z,sendbuf_z,Aq,N); ScaLBL_D3Q19_Pack(6,dvcSendList_z,sendCount_z,sendCount_z,sendbuf_z,Bq,N); - req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 2*sendCount_z,rank_z,sendtag+4); - req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 2*recvCount_Z,rank_Z,recvtag+4); + req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 2*sendCount_z, rank_z,sendtag+4); + req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 2*recvCount_Z, rank_Z,recvtag+4); //...Packing for Z face(5,11,14,15,18)................................ ScaLBL_D3Q19_Pack(5,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,Aq,N); @@ -1732,8 +1752,8 @@ void ScaLBL_Communicator::BiSendD3Q7AA(double *Aq, double *Bq){ //................................................................................... // Send all the distributions - req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 2*sendCount_Z,rank_Z,sendtag+5); - req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 2*recvCount_z,rank_z,recvtag+5); + req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 2*sendCount_Z, rank_Z,sendtag+5); + req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 2*recvCount_z, rank_z,recvtag+5); } @@ -1810,33 +1830,33 @@ void ScaLBL_Communicator::SendD3Q7AA(double *Aq, int Component){ // Pack the distributions //...Packing for x face(2,8,10,12,14)................................ ScaLBL_D3Q19_Pack(2,dvcSendList_x,0,sendCount_x,sendbuf_x,&Aq[Component*7*N],N); - req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, sendCount_x,rank_x,sendtag+0); - req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, recvCount_X,rank_X,recvtag+0); + req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, sendCount_x, rank_x,sendtag+0); + req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, recvCount_X, rank_X,recvtag+0); //...Packing for X face(1,7,9,11,13)................................ ScaLBL_D3Q19_Pack(1,dvcSendList_X,0,sendCount_X,sendbuf_X,&Aq[Component*7*N],N); - req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, sendCount_X,rank_X,sendtag+1); - req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, recvCount_x,rank_x,recvtag+1); + req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, sendCount_X, rank_X,sendtag+1); + req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, recvCount_x, rank_x,recvtag+1); //...Packing for y face(4,8,9,16,18)................................. ScaLBL_D3Q19_Pack(4,dvcSendList_y,0,sendCount_y,sendbuf_y,&Aq[Component*7*N],N); - req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, sendCount_y,rank_y,sendtag+2); - req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, recvCount_Y,rank_Y,recvtag+2); + req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, sendCount_y, rank_y,sendtag+2); + req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, recvCount_Y, rank_Y,recvtag+2); //...Packing for Y face(3,7,10,15,17)................................. ScaLBL_D3Q19_Pack(3,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,&Aq[Component*7*N],N); - req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, sendCount_Y,rank_Y,sendtag+3); - req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, recvCount_y,rank_y,recvtag+3); + req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, sendCount_Y, rank_Y,sendtag+3); + req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, recvCount_y, rank_y,recvtag+3); //...Packing for z face(6,12,13,16,17)................................ ScaLBL_D3Q19_Pack(6,dvcSendList_z,0,sendCount_z,sendbuf_z,&Aq[Component*7*N],N); - req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, sendCount_z,rank_z,sendtag+4); - req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, recvCount_Z,rank_Z,recvtag+4); + req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, sendCount_z, rank_z,sendtag+4); + req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, recvCount_Z, rank_Z,recvtag+4); //...Packing for Z face(5,11,14,15,18)................................ ScaLBL_D3Q19_Pack(5,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,&Aq[Component*7*N],N); - req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, sendCount_Z,rank_Z,sendtag+5); - req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, recvCount_z,rank_z,recvtag+5); + req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, sendCount_Z, rank_Z,sendtag+5); + req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, recvCount_z, rank_z,recvtag+5); } @@ -1929,18 +1949,18 @@ void ScaLBL_Communicator::TriSendD3Q7AA(double *Aq, double *Bq, double *Cq){ //................................................................................... // Send all the distributions - req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 3*sendCount_x,rank_x,sendtag+0); - req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 3*recvCount_X,rank_X,recvtag+0); - req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 3*sendCount_X,rank_X,sendtag+1); - req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 3*recvCount_x,rank_x,recvtag+1); - req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 3*sendCount_y,rank_y,sendtag+2); - req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 3*recvCount_Y,rank_Y,recvtag+2); - req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 3*sendCount_Y,rank_Y,sendtag+3); - req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 3*recvCount_y,rank_y,recvtag+3); - req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 3*sendCount_z,rank_z,sendtag+4); - req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 3*recvCount_Z,rank_Z,recvtag+4); - req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 3*sendCount_Z,rank_Z,sendtag+5); - req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 3*recvCount_z,rank_z,recvtag+5); + req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 3*sendCount_x, rank_x,sendtag+0); + req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 3*recvCount_X, rank_X,recvtag+0); + req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 3*sendCount_X, rank_X,sendtag+1); + req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 3*recvCount_x, rank_x,recvtag+1); + req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 3*sendCount_y, rank_y,sendtag+2); + req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 3*recvCount_Y, rank_Y,recvtag+2); + req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 3*sendCount_Y, rank_Y,sendtag+3); + req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 3*recvCount_y, rank_y,recvtag+3); + req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 3*sendCount_z, rank_z,sendtag+4); + req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 3*recvCount_Z, rank_Z,recvtag+4); + req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 3*sendCount_Z, rank_Z,sendtag+5); + req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 3*recvCount_z, rank_z,recvtag+5); } @@ -2045,42 +2065,42 @@ void ScaLBL_Communicator::SendHalo(double *data){ // Send / Recv all the phase indcator field values //................................................................................... - req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, sendCount_x,rank_x,sendtag+0); - req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, recvCount_X,rank_X,recvtag+0); - req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, sendCount_X,rank_X,sendtag+1); - req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, recvCount_x,rank_x,recvtag+1); - req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, sendCount_y,rank_y,sendtag+2); - req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, recvCount_Y,rank_Y,recvtag+2); - req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, sendCount_Y,rank_Y,sendtag+3); - req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, recvCount_y,rank_y,recvtag+3); - req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, sendCount_z,rank_z,sendtag+4); - req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, recvCount_Z,rank_Z,recvtag+4); - req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, sendCount_Z,rank_Z,sendtag+5); - req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, recvCount_z,rank_z,recvtag+5); - req1[6] = MPI_COMM_SCALBL.Isend(sendbuf_xy, sendCount_xy,rank_xy,sendtag+6); - req2[6] = MPI_COMM_SCALBL.Irecv(recvbuf_XY, recvCount_XY,rank_XY,recvtag+6); - req1[7] = MPI_COMM_SCALBL.Isend(sendbuf_XY, sendCount_XY,rank_XY,sendtag+7); - req2[7] = MPI_COMM_SCALBL.Irecv(recvbuf_xy, recvCount_xy,rank_xy,recvtag+7); - req1[8] = MPI_COMM_SCALBL.Isend(sendbuf_Xy, sendCount_Xy,rank_Xy,sendtag+8); - req2[8] = MPI_COMM_SCALBL.Irecv(recvbuf_xY, recvCount_xY,rank_xY,recvtag+8); - req1[9] = MPI_COMM_SCALBL.Isend(sendbuf_xY, sendCount_xY,rank_xY,sendtag+9); - req2[9] = MPI_COMM_SCALBL.Irecv(recvbuf_Xy, recvCount_Xy,rank_Xy,recvtag+9); - req1[10] = MPI_COMM_SCALBL.Isend(sendbuf_xz, sendCount_xz,rank_xz,sendtag+10); - req2[10] = MPI_COMM_SCALBL.Irecv(recvbuf_XZ, recvCount_XZ,rank_XZ,recvtag+10); - req1[11] = MPI_COMM_SCALBL.Isend(sendbuf_XZ, sendCount_XZ,rank_XZ,sendtag+11); - req2[11] = MPI_COMM_SCALBL.Irecv(recvbuf_xz, recvCount_xz,rank_xz,recvtag+11); - req1[12] = MPI_COMM_SCALBL.Isend(sendbuf_Xz, sendCount_Xz,rank_Xz,sendtag+12); - req2[12] = MPI_COMM_SCALBL.Irecv(recvbuf_xZ, recvCount_xZ,rank_xZ,recvtag+12); - req1[13] = MPI_COMM_SCALBL.Isend(sendbuf_xZ, sendCount_xZ,rank_xZ,sendtag+13); - req2[13] = MPI_COMM_SCALBL.Irecv(recvbuf_Xz, recvCount_Xz,rank_Xz,recvtag+13); - req1[14] = MPI_COMM_SCALBL.Isend(sendbuf_yz, sendCount_yz,rank_yz,sendtag+14); - req2[14] = MPI_COMM_SCALBL.Irecv(recvbuf_YZ, recvCount_YZ,rank_YZ,recvtag+14); - req1[15] = MPI_COMM_SCALBL.Isend(sendbuf_YZ, sendCount_YZ,rank_YZ,sendtag+15); - req2[15] = MPI_COMM_SCALBL.Irecv(recvbuf_yz, recvCount_yz,rank_yz,recvtag+15); - req1[16] = MPI_COMM_SCALBL.Isend(sendbuf_Yz, sendCount_Yz,rank_Yz,sendtag+16); - req2[16] = MPI_COMM_SCALBL.Irecv(recvbuf_yZ, recvCount_yZ,rank_yZ,recvtag+16); - req1[17] = MPI_COMM_SCALBL.Isend(sendbuf_yZ, sendCount_yZ,rank_yZ,sendtag+17); - req2[17] = MPI_COMM_SCALBL.Irecv(recvbuf_Yz, recvCount_Yz,rank_Yz,recvtag+17); + req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, sendCount_x, rank_x,sendtag+0); + req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, recvCount_X, rank_X,recvtag+0); + req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, sendCount_X, rank_X,sendtag+1); + req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, recvCount_x, rank_x,recvtag+1); + req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, sendCount_y, rank_y,sendtag+2); + req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, recvCount_Y, rank_Y,recvtag+2); + req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, sendCount_Y, rank_Y,sendtag+3); + req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, recvCount_y, rank_y,recvtag+3); + req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, sendCount_z, rank_z,sendtag+4); + req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, recvCount_Z, rank_Z,recvtag+4); + req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, sendCount_Z, rank_Z,sendtag+5); + req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, recvCount_z, rank_z,recvtag+5); + req1[6] = MPI_COMM_SCALBL.Isend(sendbuf_xy, sendCount_xy, rank_xy,sendtag+6); + req2[6] = MPI_COMM_SCALBL.Irecv(recvbuf_XY, recvCount_XY, rank_XY,recvtag+6); + req1[7] = MPI_COMM_SCALBL.Isend(sendbuf_XY, sendCount_XY, rank_XY,sendtag+7); + req2[7] = MPI_COMM_SCALBL.Irecv(recvbuf_xy, recvCount_xy, rank_xy,recvtag+7); + req1[8] = MPI_COMM_SCALBL.Isend(sendbuf_Xy, sendCount_Xy, rank_Xy,sendtag+8); + req2[8] = MPI_COMM_SCALBL.Irecv(recvbuf_xY, recvCount_xY, rank_xY,recvtag+8); + req1[9] = MPI_COMM_SCALBL.Isend(sendbuf_xY, sendCount_xY, rank_xY,sendtag+9); + req2[9] = MPI_COMM_SCALBL.Irecv(recvbuf_Xy, recvCount_Xy, rank_Xy,recvtag+9); + req1[10] = MPI_COMM_SCALBL.Isend(sendbuf_xz, sendCount_xz, rank_xz,sendtag+10); + req2[10] = MPI_COMM_SCALBL.Irecv(recvbuf_XZ, recvCount_XZ, rank_XZ,recvtag+10); + req1[11] = MPI_COMM_SCALBL.Isend(sendbuf_XZ, sendCount_XZ, rank_XZ,sendtag+11); + req2[11] = MPI_COMM_SCALBL.Irecv(recvbuf_xz, recvCount_xz, rank_xz,recvtag+11); + req1[12] = MPI_COMM_SCALBL.Isend(sendbuf_Xz, sendCount_Xz, rank_Xz,sendtag+12); + req2[12] = MPI_COMM_SCALBL.Irecv(recvbuf_xZ, recvCount_xZ, rank_xZ,recvtag+12); + req1[13] = MPI_COMM_SCALBL.Isend(sendbuf_xZ, sendCount_xZ, rank_xZ,sendtag+13); + req2[13] = MPI_COMM_SCALBL.Irecv(recvbuf_Xz, recvCount_Xz, rank_Xz,recvtag+13); + req1[14] = MPI_COMM_SCALBL.Isend(sendbuf_yz, sendCount_yz, rank_yz,sendtag+14); + req2[14] = MPI_COMM_SCALBL.Irecv(recvbuf_YZ, recvCount_YZ, rank_YZ,recvtag+14); + req1[15] = MPI_COMM_SCALBL.Isend(sendbuf_YZ, sendCount_YZ, rank_YZ,sendtag+15); + req2[15] = MPI_COMM_SCALBL.Irecv(recvbuf_yz, recvCount_yz, rank_yz,recvtag+15); + req1[16] = MPI_COMM_SCALBL.Isend(sendbuf_Yz, sendCount_Yz, rank_Yz,sendtag+16); + req2[16] = MPI_COMM_SCALBL.Irecv(recvbuf_yZ, recvCount_yZ, rank_yZ,recvtag+16); + req1[17] = MPI_COMM_SCALBL.Isend(sendbuf_yZ, sendCount_yZ, rank_yZ,sendtag+17); + req2[17] = MPI_COMM_SCALBL.Irecv(recvbuf_Yz, recvCount_Yz, rank_Yz,recvtag+17); //................................................................................... } void ScaLBL_Communicator::RecvHalo(double *data){ diff --git a/common/ScaLBL.h b/common/ScaLBL.h index cb1da09d..c2413dad 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -799,6 +799,12 @@ private: int *dvcRecvDist_x, *dvcRecvDist_y, *dvcRecvDist_z, *dvcRecvDist_X, *dvcRecvDist_Y, *dvcRecvDist_Z; int *dvcRecvDist_xy, *dvcRecvDist_yz, *dvcRecvDist_xz, *dvcRecvDist_Xy, *dvcRecvDist_Yz, *dvcRecvDist_xZ; int *dvcRecvDist_xY, *dvcRecvDist_yZ, *dvcRecvDist_Xz, *dvcRecvDist_XY, *dvcRecvDist_YZ, *dvcRecvDist_XZ; + // MPI requests for persistent communications + std::vector> req_D3Q19AA; + std::vector> req_BiD3Q19AA; + std::vector> req_TriD3Q19AA; + void start( std::vector>& requests ); + void wait( std::vector>& requests ); //...................................................................................... int *bb_dist; int *bb_interactions;