// This file contains the default instantiations for templated operations // Note: Intel compilers need definitions before all default instantions to compile correctly #ifndef included_MPI_I #define included_MPI_I #include "common/Utilities.h" #include #define MPI_CLASS MPI #define MPI_CLASS_ERROR ERROR #define MPI_CLASS_ASSERT ASSERT #undef NULL_USE #define NULL_USE( variable ) \ do { \ if ( 0 ) { \ auto static t = (char *) &variable; \ t++; \ } \ } while ( 0 ) namespace Utilities { // Function to test if a type is a std::pair template struct is_pair : std::false_type { }; template struct is_pair> : std::true_type { }; // Function to test if a type can be passed by MPI template constexpr typename std::enable_if::value,bool>::type is_mpi_copyable() { return true; } template constexpr typename std::enable_if::value&&is_pair::value,bool>::type is_mpi_copyable() { return is_mpi_copyable() && is_mpi_copyable(); } template constexpr typename std::enable_if::value&&!is_pair::value,bool>::type is_mpi_copyable() { return false; } /************************************************************************ * sumReduce * ************************************************************************/ template inline TYPE MPI_CLASS::sumReduce( const TYPE value ) const { if ( comm_size > 1 ) { TYPE tmp = value; call_sumReduce( &tmp, 1 ); return tmp; } else { return value; } } template inline void MPI_CLASS::sumReduce( TYPE *x, const int n ) const { if ( comm_size > 1 ) call_sumReduce( x, n ); } template inline void MPI_CLASS::sumReduce( const TYPE *x, TYPE *y, const int n ) const { if ( comm_size > 1 ) { call_sumReduce( x, y, n ); } else { for ( int i = 0; i < n; i++ ) y[i] = x[i]; } } // Define specializations of call_sumReduce(TYPE*, const int) #if defined( USE_MPI ) || defined( USE_EXT_MPI ) template<> void MPI_CLASS::call_sumReduce( unsigned char *, const int ) const; template<> void MPI_CLASS::call_sumReduce( char *, const int ) const; template<> void MPI_CLASS::call_sumReduce( unsigned int *, const int ) const; template<> void MPI_CLASS::call_sumReduce( int *, const int ) const; template<> void MPI_CLASS::call_sumReduce( unsigned long int *, const int ) const; template<> void MPI_CLASS::call_sumReduce( long int *, const int ) const; template<> void MPI_CLASS::call_sumReduce( size_t *, const int ) const; template<> void MPI_CLASS::call_sumReduce( float *, const int ) const; template<> void MPI_CLASS::call_sumReduce( double *, const int ) const; template<> void MPI_CLASS::call_sumReduce>( std::complex *, const int ) const; #endif // Default instantiations of call_sumReduce(TYPE*, const int) template void MPI_CLASS::call_sumReduce( TYPE *, const int ) const { char message[200]; sprintf( message, "Default instantion of sumReduce in parallel is not supported (%s)", typeid( TYPE ).name() ); MPI_CLASS_ERROR( message ); } // Define specializations of call_sumReduce(const TYPE*, TYPE*, const int) #if defined( USE_MPI ) || defined( USE_EXT_MPI ) template<> void MPI_CLASS::call_sumReduce( const unsigned char *, unsigned char *, const int ) const; template<> void MPI_CLASS::call_sumReduce( const char *, char *, const int ) const; template<> void MPI_CLASS::call_sumReduce( const unsigned int *, unsigned int *, const int ) const; template<> void MPI_CLASS::call_sumReduce( const int *, int *, const int ) const; template<> void MPI_CLASS::call_sumReduce( const unsigned long int *, unsigned long int *, const int ) const; template<> void MPI_CLASS::call_sumReduce( const long int *, long int *, const int ) const; template<> void MPI_CLASS::call_sumReduce( const size_t *, size_t *, const int ) const; template<> void MPI_CLASS::call_sumReduce( const float *, float *, const int ) const; template<> void MPI_CLASS::call_sumReduce( const double *, double *, const int ) const; template<> void MPI_CLASS::call_sumReduce>( const std::complex *, std::complex *, const int ) const; #endif // Default instantiations of call_sumReduce(const TYPE*, TYPE*, const int) template void MPI_CLASS::call_sumReduce( const TYPE *x, TYPE *y, const int n ) const { NULL_USE( x ); NULL_USE( y ); NULL_USE( n ); char message[200]; sprintf( message, "Default instantion of sumReduce in parallel is not supported (%s)", typeid( TYPE ).name() ); MPI_CLASS_ERROR( message ); } /************************************************************************ * minReduce * ************************************************************************/ template inline TYPE MPI_CLASS::minReduce( const TYPE value ) const { if ( comm_size > 1 ) { TYPE tmp = value; call_minReduce( &tmp, 1, nullptr ); return tmp; } else { return value; } } template inline void MPI_CLASS::minReduce( TYPE *x, const int n, int *rank_of_min ) const { if ( comm_size > 1 ) { call_minReduce( x, n, rank_of_min ); } else { if ( rank_of_min != nullptr ) { for ( int i = 0; i < n; i++ ) rank_of_min[i] = 0; } } } template inline void MPI_CLASS::minReduce( const TYPE *x, TYPE *y, const int n, int *rank_of_min ) const { if ( comm_size > 1 ) { call_minReduce( x, y, n, rank_of_min ); } else { for ( int i = 0; i < n; i++ ) { y[i] = x[i]; if ( rank_of_min != nullptr ) rank_of_min[i] = 0; } } } // Define specializations of call_minReduce(TYPE*, const int, int*) #if defined( USE_MPI ) || defined( USE_EXT_MPI ) template<> void MPI_CLASS::call_minReduce( unsigned char *, const int, int * ) const; template<> void MPI_CLASS::call_minReduce( char *, const int, int * ) const; template<> void MPI_CLASS::call_minReduce( unsigned int *, const int, int * ) const; template<> void MPI_CLASS::call_minReduce( int *, const int, int * ) const; template<> void MPI_CLASS::call_minReduce( unsigned long int *, const int, int * ) const; template<> void MPI_CLASS::call_minReduce( long int *, const int, int * ) const; template<> void MPI_CLASS::call_minReduce( unsigned long long int *, const int, int * ) const; template<> void MPI_CLASS::call_minReduce( long long int *, const int, int * ) const; template<> void MPI_CLASS::call_minReduce( size_t *, const int, int * ) const; template<> void MPI_CLASS::call_minReduce( float *, const int, int * ) const; template<> void MPI_CLASS::call_minReduce( double *, const int, int * ) const; #endif // Default instantiations of call_minReduce(TYPE*, const int, int*) template void MPI_CLASS::call_minReduce( TYPE *, const int, int * ) const { char message[200]; sprintf( message, "Default instantion of minReduce in parallel is not supported (%s)", typeid( TYPE ).name() ); MPI_CLASS_ERROR( message ); } // Define specializations of call_minReduce(const TYPE*, TYPE*, const int, int*) #if defined( USE_MPI ) || defined( USE_EXT_MPI ) template<> void MPI_CLASS::call_minReduce( const unsigned char *, unsigned char *, const int, int * ) const; template<> void MPI_CLASS::call_minReduce( const char *, char *, const int, int * ) const; template<> void MPI_CLASS::call_minReduce( const unsigned int *, unsigned int *, const int, int * ) const; template<> void MPI_CLASS::call_minReduce( const int *, int *, const int, int * ) const; template<> void MPI_CLASS::call_minReduce( const unsigned long int *, unsigned long int *, const int, int * ) const; template<> void MPI_CLASS::call_minReduce( const long int *, long int *, const int, int * ) const; template<> void MPI_CLASS::call_minReduce( const unsigned long long int *, unsigned long long int *, const int, int * ) const; template<> void MPI_CLASS::call_minReduce( const long long int *, long long int *, const int, int * ) const; template<> void MPI_CLASS::call_minReduce( const size_t *, size_t *, const int, int * ) const; template<> void MPI_CLASS::call_minReduce( const float *, float *, const int, int * ) const; template<> void MPI_CLASS::call_minReduce( const double *, double *, const int, int * ) const; #endif // Default instantiations of call_minReduce(const TYPE*, TYPE*, const int, int*) template void MPI_CLASS::call_minReduce( const TYPE *, TYPE *, const int, int * ) const { char message[200]; sprintf( message, "Default instantion of minReduce in parallel is not supported (%s)", typeid( TYPE ).name() ); MPI_CLASS_ERROR( message ); } /************************************************************************ * maxReduce * ************************************************************************/ template inline TYPE MPI_CLASS::maxReduce( const TYPE value ) const { if ( comm_size > 1 ) { TYPE tmp = value; call_maxReduce( &tmp, 1, nullptr ); return tmp; } else { return value; } } template inline void MPI_CLASS::maxReduce( TYPE *x, const int n, int *rank_of_max ) const { if ( comm_size > 1 ) { call_maxReduce( x, n, rank_of_max ); } else { if ( rank_of_max != nullptr ) { for ( int i = 0; i < n; i++ ) rank_of_max[i] = 0; } } } template inline void MPI_CLASS::maxReduce( const TYPE *x, TYPE *y, const int n, int *rank_of_max ) const { if ( comm_size > 1 ) { call_maxReduce( x, y, n, rank_of_max ); } else { for ( int i = 0; i < n; i++ ) { y[i] = x[i]; if ( rank_of_max != nullptr ) rank_of_max[i] = 0; } } } // Define specializations of call_maxReduce(TYPE*, const int, int*) #if defined( USE_MPI ) || defined( USE_EXT_MPI ) template<> void MPI_CLASS::call_maxReduce( unsigned char *, const int, int * ) const; template<> void MPI_CLASS::call_maxReduce( char *, const int, int * ) const; template<> void MPI_CLASS::call_maxReduce( unsigned int *, const int, int * ) const; template<> void MPI_CLASS::call_maxReduce( int *, const int, int * ) const; template<> void MPI_CLASS::call_maxReduce( unsigned long int *, const int, int * ) const; template<> void MPI_CLASS::call_maxReduce( long int *, const int, int * ) const; template<> void MPI_CLASS::call_maxReduce( unsigned long long int *, const int, int * ) const; template<> void MPI_CLASS::call_maxReduce( long long int *, const int, int * ) const; template<> void MPI_CLASS::call_maxReduce( size_t *, const int, int * ) const; template<> void MPI_CLASS::call_maxReduce( float *, const int, int * ) const; template<> void MPI_CLASS::call_maxReduce( double *, const int, int * ) const; #endif // Default instantiations of call_maxReduce(TYPE*, const int, int*) template void MPI_CLASS::call_maxReduce( TYPE *, const int, int * ) const { char message[200]; sprintf( message, "Default instantion of maxReduce in parallel is not supported (%s)", typeid( TYPE ).name() ); MPI_CLASS_ERROR( message ); } // Define specializations of call_maxReduce(const TYPE*, TYPE*, const int, int*) #if defined( USE_MPI ) || defined( USE_EXT_MPI ) template<> void MPI_CLASS::call_maxReduce( const unsigned char *, unsigned char *, const int, int * ) const; template<> void MPI_CLASS::call_maxReduce( const char *, char *, const int, int * ) const; template<> void MPI_CLASS::call_maxReduce( const unsigned int *, unsigned int *, const int, int * ) const; template<> void MPI_CLASS::call_maxReduce( const int *, int *, const int, int * ) const; template<> void MPI_CLASS::call_maxReduce( const unsigned long int *, unsigned long int *, const int, int * ) const; template<> void MPI_CLASS::call_maxReduce( const long int *, long int *, const int, int * ) const; template<> void MPI_CLASS::call_maxReduce( const unsigned long long int *, unsigned long long int *, const int, int * ) const; template<> void MPI_CLASS::call_maxReduce( const long long int *, long long int *, const int, int * ) const; template<> void MPI_CLASS::call_maxReduce( const size_t *, size_t *, const int, int * ) const; template<> void MPI_CLASS::call_maxReduce( const float *, float *, const int, int * ) const; template<> void MPI_CLASS::call_maxReduce( const double *, double *, const int, int * ) const; #endif // Default instantiations of call_maxReduce(const TYPE*, TYPE*, const int, int*) template void MPI_CLASS::call_maxReduce( const TYPE *, TYPE *, const int, int * ) const { char message[200]; sprintf( message, "Default instantion of maxReduce in parallel is not supported (%s)", typeid( TYPE ).name() ); MPI_CLASS_ERROR( message ); } /************************************************************************ * bcast * ************************************************************************/ // Define specializations of bcast(TYPE*, const int, const int) #if defined( USE_MPI ) || defined( USE_EXT_MPI ) template<> void MPI_CLASS::call_bcast( unsigned char *, const int, const int ) const; template<> void MPI_CLASS::call_bcast( char *, const int, const int ) const; template<> void MPI_CLASS::call_bcast( unsigned int *, const int, const int ) const; template<> void MPI_CLASS::call_bcast( int *, const int, const int ) const; template<> void MPI_CLASS::call_bcast( float *, const int, const int ) const; template<> void MPI_CLASS::call_bcast( double *, const int, const int ) const; #else template<> void MPI_CLASS::call_bcast( char *, const int, const int ) const; #endif // Default instantiations of bcast(TYPE*, const int, const int) template void MPI_CLASS::call_bcast( TYPE *x, const int n, const int root ) const { static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); call_bcast( (char *) x, (int) n * sizeof( TYPE ), root ); } // Specialization of bcast for std::string template<> inline std::string MPI_CLASS::bcast( const std::string &value, const int root ) const { if ( comm_size == 1 ) return value; int length = static_cast( value.size() ); call_bcast( &length, 1, root ); if ( length == 0 ) return std::string(); char *str = new char[length + 1]; if ( root == comm_rank ) { for ( int i = 0; i < length; i++ ) str[i] = value[i]; } call_bcast( str, length, root ); str[length] = 0; std::string result( str ); delete[] str; return result; } template<> inline void MPI_CLASS::bcast( std::string *, const int, const int ) const { MPI_CLASS_ERROR( "Cannot bcast an array of strings" ); } // Default implimentation of bcast template inline TYPE MPI_CLASS::bcast( const TYPE &value, const int root ) const { if ( root >= comm_size ) MPI_CLASS_ERROR( "root cannot be >= size in bcast" ); if ( comm_size > 1 ) { TYPE tmp = value; call_bcast( &tmp, 1, root ); return tmp; } else { return value; } } template inline void MPI_CLASS::bcast( TYPE *x, const int n, const int root ) const { if ( root >= comm_size ) MPI_CLASS_ERROR( "root cannot be >= size in bcast" ); if ( comm_size > 1 ) call_bcast( x, n, root ); } /************************************************************************ * send * ************************************************************************/ // Define specializations of send(const TYPE*, const int, const int, int) #if defined( USE_MPI ) || defined( USE_EXT_MPI ) template<> void MPI_CLASS::send( const char *, const int, const int, int ) const; template<> void MPI_CLASS::send( const int *, int, const int, int ) const; template<> void MPI_CLASS::send( const float *, const int, const int, int ) const; template<> void MPI_CLASS::send( const double *, const int, const int, int ) const; #else template<> void MPI_CLASS::send( const char *, const int, const int, int ) const; #endif // Default instantiations of send(const TYPE*, const int, const int, int) template inline void MPI_CLASS::send( const TYPE *buf, const int length, const int recv_proc_number, int tag ) const { static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); send( (const char *) buf, length * sizeof( TYPE ), recv_proc_number, tag ); } /************************************************************************ * Isend * ************************************************************************/ // Define specializations of Isend(const TYPE*, const int, const int, const int) #if defined( USE_MPI ) || defined( USE_EXT_MPI ) template<> MPI_Request MPI_CLASS::Isend( const char *, const int, const int, const int ) const; template<> MPI_Request MPI_CLASS::Isend( const int *, int, const int, const int ) const; template<> MPI_Request MPI_CLASS::Isend( const float *, const int, const int, const int ) const; template<> MPI_Request MPI_CLASS::Isend( const double *, const int, const int, const int ) const; #else template<> MPI_Request MPI_CLASS::Isend( const char *, const int, const int, const int ) const; #endif // Default instantiations of Isend(const TYPE*, const int, const int, const int) template inline MPI_Request MPI_CLASS::Isend( const TYPE *buf, const int length, const int recv_proc_number, const int tag ) const { static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); return Isend( (const char *) buf, length * sizeof( TYPE ), recv_proc_number, tag ); } /************************************************************************ * recv * ************************************************************************/ // Define specializations of recv(TYPE*, int&, const int, const bool, int) #if defined( USE_MPI ) || defined( USE_EXT_MPI ) template<> void MPI_CLASS::recv( char *, int &, const int, const bool, int ) const; template<> void MPI_CLASS::recv( int *, int &, const int, const bool, int ) const; template<> void MPI_CLASS::recv( float *, int &, const int, const bool, int ) const; template<> void MPI_CLASS::recv( double *, int &, const int, const bool, int ) const; #else template<> void MPI_CLASS::recv( char *, int &, const int, const bool, int ) const; #endif // Default instantiations of recv(TYPE*, int&, const int, const bool, int) template inline void MPI_CLASS::recv( TYPE *buf, int &length, const int send_proc_number, const bool get_length, int tag ) const { static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); int size = length * sizeof( TYPE ); recv( (char *) buf, size, send_proc_number, get_length, tag ); if ( get_length ) { MPI_CLASS_ASSERT( size % sizeof( TYPE ) == 0 ); length = size / sizeof( TYPE ); } } /************************************************************************ * Irecv * ************************************************************************/ // Define specializations of recv(TYPE*, int&, const int, const bool, int) #if defined( USE_MPI ) || defined( USE_EXT_MPI ) template<> MPI_Request MPI_CLASS::Irecv( char *, const int, const int, const int ) const; template<> MPI_Request MPI_CLASS::Irecv( int *, const int, const int, const int ) const; template<> MPI_Request MPI_CLASS::Irecv( float *, const int, const int, const int ) const; template<> MPI_Request MPI_CLASS::Irecv( double *, const int, const int, const int ) const; #else template<> MPI_Request MPI_CLASS::Irecv( char *, const int, const int, const int ) const; #endif // Default instantiations of recv(TYPE*, int&, const int, const bool, int) template inline MPI_Request MPI_CLASS::Irecv( TYPE *buf, const int length, const int send_proc, const int tag ) const { static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); return Irecv( (char *) buf, length * sizeof( TYPE ), send_proc, tag ); } /************************************************************************ * sendrecv * ************************************************************************/ #if defined( USE_MPI ) || defined( USE_EXT_MPI ) template<> void MPI_CLASS::sendrecv( const char*, int, int, int, char*, int, int, int ) const; template<> void MPI_CLASS::sendrecv( const int*, int, int, int, int*, int, int, int ) const; template<> void MPI_CLASS::sendrecv( const float*, int, int, int, float*, int, int, int ) const; template<> void MPI_CLASS::sendrecv( const double*, int, int, int, double*, int, int, int ) const; template void MPI_CLASS::sendrecv( const TYPE *sendbuf, int sendcount, int dest, int sendtag, TYPE *recvbuf, int recvcount, int source, int recvtag ) const { ERROR( "Not implimented" ); } #else template void MPI_CLASS::sendrecv( const TYPE *sendbuf, int sendcount, int dest, int sendtag, TYPE *recvbuf, int recvcount, int source, int recvtag ) const { ASSERT( dest == 0 ); ASSERT( source == 0 ); ASSERT( sendcount == recvcount ); ASSERT( sendtag == recvtag ); memcpy( recvbuf, sendbuf, sendcount * sizeof( TYPE ) ); } #endif /************************************************************************ * allGather * ************************************************************************/ template std::vector MPI_CLASS::allGather( const TYPE &x ) const { static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); if ( getSize() <= 1 ) return std::vector( 1, x ); std::vector data( getSize() ); allGather( x, data.data() ); return data; } template std::vector MPI_CLASS::allGather( const std::vector &x ) const { static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); if ( getSize() <= 1 ) return x; std::vector count = allGather( x.size() ); std::vector disp( getSize(), 0 ); size_t N = count[0]; for ( size_t i = 1; i < count.size(); i++ ) { disp[i] = disp[i - 1] + count[i - 1]; N += count[i]; } std::vector data( N ); allGather( x.data(), x.size(), data.data(), count.data(), disp.data(), true ); return data; } // Specialization of MPI_CLASS::allGather for std::string template<> inline void MPI_CLASS::allGather( const std::string &x_in, std::string *x_out ) const { // Get the bytes recvied per processor std::vector recv_cnt( comm_size, 0 ); allGather( (int) x_in.size() + 1, &recv_cnt[0] ); std::vector recv_disp( comm_size, 0 ); for ( int i = 1; i < comm_size; i++ ) recv_disp[i] = recv_disp[i - 1] + recv_cnt[i - 1]; // Call the vector form of allGather for the char arrays char *recv_data = new char[recv_disp[comm_size - 1] + recv_cnt[comm_size - 1]]; allGather( x_in.c_str(), (int) x_in.size() + 1, recv_data, &recv_cnt[0], &recv_disp[0], true ); for ( int i = 0; i < comm_size; i++ ) x_out[i] = std::string( &recv_data[recv_disp[i]] ); delete[] recv_data; } // Default instantiation of MPI_CLASS::allGather template inline void MPI_CLASS::allGather( const TYPE &x_in, TYPE *x_out ) const { static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); if ( comm_size > 1 ) { // We can use the vector form of allGather with a char array to ge the data we want call_allGather( x_in, x_out ); } else { // Single processor case x_out[0] = x_in; } } // Specialization of MPI_CLASS::allGather for std::string template<> inline int MPI_CLASS::allGather( const std::string *, const int, std::string *, int *, int *, bool ) const { MPI_CLASS_ERROR( "Cannot allGather an array of strings" ); return 0; } // Define specializations of call_allGather(const TYPE, TYPE*) #if defined( USE_MPI ) || defined( USE_EXT_MPI ) template<> void MPI_CLASS::call_allGather( const unsigned char &, unsigned char * ) const; template<> void MPI_CLASS::call_allGather( const char &, char * ) const; template<> void MPI_CLASS::call_allGather( const unsigned int &, unsigned int * ) const; template<> void MPI_CLASS::call_allGather( const int &, int * ) const; template<> void MPI_CLASS::call_allGather( const unsigned long int &, unsigned long int * ) const; template<> void MPI_CLASS::call_allGather( const long int &, long int * ) const; template<> void MPI_CLASS::call_allGather( const float &, float * ) const; template<> void MPI_CLASS::call_allGather( const double &, double * ) const; #endif // Default instantiation of MPI_CLASS::allGather template int MPI_CLASS::allGather( const TYPE *send_data, const int send_cnt, TYPE *recv_data, int *recv_cnt, int *recv_disp, bool known_recv ) const { // Check the inputs if ( known_recv && ( recv_cnt == nullptr || recv_disp == nullptr ) ) MPI_CLASS_ERROR( "Error calling allGather" ); // Check if we are dealing with a single processor if ( comm_size == 1 ) { if ( send_data == nullptr && send_cnt > 0 ) { MPI_CLASS_ERROR( "send_data is null" ); } else if ( !known_recv ) { // We do not know the recieved sizes for ( int i = 0; i < send_cnt; i++ ) recv_data[i] = send_data[i]; if ( recv_cnt != nullptr ) recv_cnt[0] = send_cnt; if ( recv_disp != nullptr ) recv_disp[0] = 0; } else { // We know the recieved sizes for ( int i = 0; i < send_cnt; i++ ) recv_data[i + recv_disp[0]] = send_data[i]; } return send_cnt; } // Get the sizes of the recieved data (if necessary) int *recv_cnt2 = recv_cnt; int *recv_disp2 = recv_disp; if ( !known_recv ) { if ( recv_cnt == nullptr ) recv_cnt2 = new int[comm_size]; if ( recv_disp == nullptr ) recv_disp2 = new int[comm_size]; call_allGather( send_cnt, recv_cnt2 ); recv_disp2[0] = 0; for ( int i = 1; i < comm_size; i++ ) recv_disp2[i] = recv_disp2[i - 1] + recv_cnt2[i - 1]; } int N_recv = 0; for ( int i = 0; i < comm_size; i++ ) N_recv += recv_cnt2[i]; // Send/recv the data call_allGather( send_data, send_cnt, recv_data, recv_cnt2, recv_disp2 ); // Delete any temporary memory if ( recv_cnt == nullptr ) delete[] recv_cnt2; if ( recv_disp == nullptr ) delete[] recv_disp2; return N_recv; } // Default instantiations of call_allGather(const TYPE, TYPE*) template void MPI_CLASS::call_allGather( const TYPE &x_in, TYPE *x_out ) const { static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); allGather( (const char *) &x_in, (int) sizeof( TYPE ), (char *) x_out ); } // Define specializations of call_allGather(const TYPE*, int, TYPE*, int*, int*) #if defined( USE_MPI ) || defined( USE_EXT_MPI ) template<> void MPI_CLASS::call_allGather( const unsigned char *, int, unsigned char *, int *, int * ) const; template<> void MPI_CLASS::call_allGather( const char *, int, char *, int *, int * ) const; template<> void MPI_CLASS::call_allGather( const unsigned int *, int, unsigned int *, int *, int * ) const; template<> void MPI_CLASS::call_allGather( const int *, int, int *, int *, int * ) const; template<> void MPI_CLASS::call_allGather( const unsigned long int *, int, unsigned long int *, int *, int * ) const; template<> void MPI_CLASS::call_allGather( const long int *, int, long int *, int *, int * ) const; template<> void MPI_CLASS::call_allGather( const float *, int, float *, int *, int * ) const; template<> void MPI_CLASS::call_allGather( const double *, int, double *, int *, int * ) const; #else template<> void MPI_CLASS::call_allGather( const char *, int, char *, int *, int * ) const; #endif // Default instantiations of int call_allGather(const TYPE*, int, TYPE*, int*) template void MPI_CLASS::call_allGather( const TYPE *x_in, int size_in, TYPE *x_out, int *size_out, int *disp_out ) const { int *size2 = new int[comm_size]; int *disp2 = new int[comm_size]; for ( int i = 0; i < comm_size; i++ ) { size2[i] = size_out[i] * sizeof( TYPE ); disp2[i] = disp_out[i] * sizeof( TYPE ); } static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); call_allGather( (const char *) x_in, (int) size_in * sizeof( TYPE ), (char *) x_out, size2, disp2 ); delete[] size2; delete[] disp2; } /************************************************************************ * setGather * ************************************************************************/ template inline void MPI_CLASS::setGather( std::set &set ) const { std::vector send_buf( set.begin(), set.end() ); std::vector recv_cnt( this->comm_size, 0 ); this->allGather( (int) send_buf.size(), &recv_cnt[0] ); std::vector recv_disp( this->comm_size, 0 ); for ( int i = 1; i < this->comm_size; i++ ) recv_disp[i] = recv_disp[i - 1] + recv_cnt[i - 1]; size_t N_recv_tot = 0; for ( int i = 0; i < this->comm_size; i++ ) N_recv_tot += recv_cnt[i]; if ( N_recv_tot == 0 ) return; std::vector recv_buf( N_recv_tot ); TYPE *send_data = nullptr; if ( send_buf.size() > 0 ) { send_data = &send_buf[0]; } TYPE *recv_data = &recv_buf[0]; static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); this->allGather( send_data, (int) send_buf.size(), recv_data, &recv_cnt[0], &recv_disp[0], true ); for ( size_t i = 0; i < recv_buf.size(); i++ ) set.insert( recv_buf[i] ); } /************************************************************************ * mapGather * ************************************************************************/ template inline void MPI_CLASS::mapGather( std::map &map ) const { std::vector send_id; std::vector send_data; send_id.reserve( map.size() ); send_data.reserve( map.size() ); for ( auto it = map.begin(); it != map.end(); ++it ) { send_id.push_back( it->first ); send_data.push_back( it->second ); } int send_size = (int) send_id.size(); std::vector recv_cnt( this->comm_size, 0 ); this->allGather( send_size, &recv_cnt[0] ); std::vector recv_disp( this->comm_size, 0 ); for ( int i = 1; i < this->comm_size; i++ ) recv_disp[i] = recv_disp[i - 1] + recv_cnt[i - 1]; size_t N_recv_tot = 0; for ( int i = 0; i < this->comm_size; i++ ) N_recv_tot += recv_cnt[i]; if ( N_recv_tot == 0 ) return; std::vector recv_id( N_recv_tot ); std::vector recv_data( N_recv_tot ); KEY *send_data1 = nullptr; DATA *send_data2 = nullptr; if ( send_id.size() > 0 ) { send_data1 = &send_id[0]; send_data2 = &send_data[0]; } static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); this->allGather( send_data1, send_size, &recv_id[0], &recv_cnt[0], &recv_disp[0], true ); this->allGather( send_data2, send_size, &recv_data[0], &recv_cnt[0], &recv_disp[0], true ); map = std::map(); for ( size_t i = 0; i < N_recv_tot; i++ ) map.insert( std::pair( recv_id[i], recv_data[i] ) ); } /************************************************************************ * sumScan * ************************************************************************/ template inline void MPI_CLASS::sumScan( const TYPE *x, TYPE *y, const int n ) const { if ( comm_size > 1 ) { call_sumScan( x, y, n ); } else { for ( int i = 0; i < n; i++ ) y[i] = x[i]; } } // Define specializations of call_sumScan(const TYPE*, TYPE*, int) #if defined( USE_MPI ) || defined( USE_EXT_MPI ) template<> void MPI_CLASS::call_sumScan( const unsigned char *, unsigned char *, int ) const; template<> void MPI_CLASS::call_sumScan( const char *, char *, int ) const; template<> void MPI_CLASS::call_sumScan( const unsigned int *, unsigned int *, int ) const; template<> void MPI_CLASS::call_sumScan( const int *, int *, int ) const; template<> void MPI_CLASS::call_sumScan( const unsigned long int *, unsigned long int *, int ) const; template<> void MPI_CLASS::call_sumScan( const long int *, long int *, int ) const; template<> void MPI_CLASS::call_sumScan( const size_t *, size_t *, int ) const; template<> void MPI_CLASS::call_sumScan( const float *, float *, int ) const; template<> void MPI_CLASS::call_sumScan( const double *, double *, int ) const; template<> void MPI_CLASS::call_sumScan>( const std::complex *, std::complex *, int ) const; #endif // Default instantiations of call_sumScan(const TYPE*, TYPE*, int) template void MPI_CLASS::call_sumScan( const TYPE *, TYPE *, int ) const { char message[200]; sprintf( message, "Default instantion of sumScan in parallel is not supported (%s)", typeid( TYPE ).name() ); MPI_CLASS_ERROR( message ); } /************************************************************************ * minScan * ************************************************************************/ template inline void MPI_CLASS::minScan( const TYPE *x, TYPE *y, const int n ) const { if ( comm_size > 1 ) { call_minScan( x, y, n ); } else { for ( int i = 0; i < n; i++ ) y[i] = x[i]; } } // Define specializations of call_minScan(const TYPE*, TYPE*, int) #if defined( USE_MPI ) || defined( USE_EXT_MPI ) template<> void MPI_CLASS::call_minScan( const unsigned char *, unsigned char *, int ) const; template<> void MPI_CLASS::call_minScan( const char *, char *, int ) const; template<> void MPI_CLASS::call_minScan( const unsigned int *, unsigned int *, int ) const; template<> void MPI_CLASS::call_minScan( const int *, int *, int ) const; template<> void MPI_CLASS::call_minScan( const unsigned long int *, unsigned long int *, int ) const; template<> void MPI_CLASS::call_minScan( const long int *, long int *, int ) const; template<> void MPI_CLASS::call_minScan( const size_t *, size_t *, int ) const; template<> void MPI_CLASS::call_minScan( const float *, float *, int ) const; template<> void MPI_CLASS::call_minScan( const double *, double *, int ) const; #endif // Default instantiations of call_minScan(const TYPE*, TYPE*, int) template void MPI_CLASS::call_minScan( const TYPE *, TYPE *, int ) const { char message[200]; sprintf( message, "Default instantion of minScan in parallel is not supported (%s)", typeid( TYPE ).name() ); MPI_CLASS_ERROR( message ); } /************************************************************************ * maxScan * ************************************************************************/ template inline void MPI_CLASS::maxScan( const TYPE *x, TYPE *y, const int n ) const { if ( comm_size > 1 ) { call_maxScan( x, y, n ); } else { for ( int i = 0; i < n; i++ ) y[i] = x[i]; } } // Define specializations of call_maxScan(const TYPE*, TYPE*, int) #if defined( USE_MPI ) || defined( USE_EXT_MPI ) template<> void MPI_CLASS::call_maxScan( const unsigned char *, unsigned char *, int ) const; template<> void MPI_CLASS::call_maxScan( const char *, char *, int ) const; template<> void MPI_CLASS::call_maxScan( const unsigned int *, unsigned int *, int ) const; template<> void MPI_CLASS::call_maxScan( const int *, int *, int ) const; template<> void MPI_CLASS::call_maxScan( const unsigned long int *, unsigned long int *, int ) const; template<> void MPI_CLASS::call_maxScan( const long int *, long int *, int ) const; template<> void MPI_CLASS::call_maxScan( const size_t *, size_t *, int ) const; template<> void MPI_CLASS::call_maxScan( const float *, float *, int ) const; template<> void MPI_CLASS::call_maxScan( const double *, double *, int ) const; #endif // Default instantiations of call_maxScan(const TYPE*, TYPE*, int) template void MPI_CLASS::call_maxScan( const TYPE *, TYPE *, int ) const { char message[200]; sprintf( message, "Default instantion of maxReduce in parallel is not supported (%s)", typeid( TYPE ).name() ); MPI_CLASS_ERROR( message ); } /************************************************************************ * allToAll * ************************************************************************/ // Define specializations of allToAll(const int n, const char*, char* ) #if defined( USE_MPI ) || defined( USE_EXT_MPI ) template<> void MPI_CLASS::allToAll( const int n, const unsigned char *, unsigned char * ) const; template<> void MPI_CLASS::allToAll( const int n, const char *, char * ) const; template<> void MPI_CLASS::allToAll( const int n, const unsigned int *, unsigned int * ) const; template<> void MPI_CLASS::allToAll( const int n, const int *, int * ) const; template<> void MPI_CLASS::allToAll( const int n, const unsigned long int *, unsigned long int * ) const; template<> void MPI_CLASS::allToAll( const int n, const long int *, long int * ) const; template<> void MPI_CLASS::allToAll( const int n, const float *, float * ) const; template<> void MPI_CLASS::allToAll( const int n, const double *, double * ) const; #endif // Default instantiations of allToAll(const int n, const char*, char* ) #if defined( USE_MPI ) || defined( USE_EXT_MPI ) template void MPI_CLASS::allToAll( const int n, const TYPE *send_data, TYPE *recv_data ) const { static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); allToAll( n * sizeof( TYPE ), (char *) send_data, (char *) recv_data ); } #else template void MPI_CLASS::allToAll( const int n, const TYPE *send_data, TYPE *recv_data ) const { if ( comm_size != 1 ) MPI_CLASS_ERROR( "Invalid size for allToAll" ); for ( int i = 0; i < n; i++ ) recv_data[i] = send_data[i]; } #endif /************************************************************************ * allToAll * ************************************************************************/ template int MPI_CLASS::allToAll( const TYPE *send_data, const int send_cnt[], const int send_disp[], TYPE *recv_data, int *recv_cnt, int *recv_disp, bool known_recv ) const { int N_recieved = 0; if ( comm_size == 1 ) { // Special case for single-processor communicators if ( known_recv ) { if ( recv_cnt[0] != send_cnt[0] && send_cnt[0] > 0 ) MPI_CLASS_ERROR( "Single processor send/recv are different sizes" ); } else { if ( recv_cnt != nullptr ) recv_cnt[0] = send_cnt[0]; if ( recv_disp != nullptr ) recv_disp[0] = send_disp[0]; } for ( int i = 0; i < send_cnt[0]; i++ ) recv_data[i + recv_disp[0]] = send_data[i + send_disp[0]]; N_recieved = send_cnt[0]; } else if ( known_recv ) { // The recieve sizes are known MPI_CLASS_ASSERT( recv_cnt != nullptr && recv_disp != nullptr ); call_allToAll( send_data, send_cnt, send_disp, recv_data, recv_cnt, recv_disp ); for ( int i = 0; i < comm_size; i++ ) N_recieved += recv_cnt[i]; } else { // The recieve sizes are not known, we need to communicate that information first int *recv_cnt2 = recv_cnt; int *recv_disp2 = recv_disp; if ( recv_cnt == nullptr ) recv_cnt2 = new int[comm_size]; if ( recv_disp == nullptr ) recv_disp2 = new int[comm_size]; // Communicate the size we will be recieving from each processor allToAll( 1, send_cnt, recv_cnt2 ); recv_disp2[0] = 0; for ( int i = 1; i < comm_size; i++ ) recv_disp2[i] = recv_disp2[i - 1] + recv_cnt2[i - 1]; // Send the data call_allToAll( send_data, send_cnt, send_disp, recv_data, recv_cnt2, recv_disp2 ); for ( int i = 0; i < comm_size; i++ ) N_recieved += recv_cnt2[i]; if ( recv_cnt == nullptr ) delete[] recv_cnt2; if ( recv_disp == nullptr ) delete[] recv_disp2; } return N_recieved; } // Define specializations of call_allToAll #if defined( USE_MPI ) || defined( USE_EXT_MPI ) template<> void MPI_CLASS::call_allToAll( const unsigned char *, const int *, const int *, unsigned char *, const int *, const int * ) const; template<> void MPI_CLASS::call_allToAll( const char *, const int *, const int *, char *, const int *, const int * ) const; template<> void MPI_CLASS::call_allToAll( const unsigned int *, const int *, const int *, unsigned int *, const int *, const int * ) const; template<> void MPI_CLASS::call_allToAll( const int *, const int *, const int *, int *, const int *, const int * ) const; template<> void MPI_CLASS::call_allToAll( const unsigned long int *, const int *, const int *, unsigned long int *, const int *, const int * ) const; template<> void MPI_CLASS::call_allToAll( const long int *, const int *, const int *, long int *, const int *, const int * ) const; template<> void MPI_CLASS::call_allToAll( const float *, const int *, const int *, float *, const int *, const int * ) const; template<> void MPI_CLASS::call_allToAll( const double *, const int *, const int *, double *, const int *, const int * ) const; #else template<> void MPI_CLASS::call_allToAll( const char *, const int *, const int *, char *, const int *, const int * ) const; #endif // Default instantiations of call_allToAll template void MPI_CLASS::call_allToAll( const TYPE *send_data, const int send_cnt[], const int send_disp[], TYPE *recv_data, const int *recv_cnt, const int *recv_disp ) const { int *send_cnt2 = new int[comm_size]; int *recv_cnt2 = new int[comm_size]; int *send_disp2 = new int[comm_size]; int *recv_disp2 = new int[comm_size]; for ( int i = 0; i < comm_size; i++ ) { send_cnt2[i] = send_cnt[i] * sizeof( TYPE ); send_disp2[i] = send_disp[i] * sizeof( TYPE ); recv_cnt2[i] = recv_cnt[i] * sizeof( TYPE ); recv_disp2[i] = recv_disp[i] * sizeof( TYPE ); } static_assert( is_mpi_copyable(), "Object is not trivially copyable" ); call_allToAll( (char *) send_data, send_cnt2, send_disp2, (char *) recv_data, recv_cnt2, recv_disp2 ); delete[] send_cnt2; delete[] recv_cnt2; delete[] send_disp2; delete[] recv_disp2; } } // namespace Utilities #endif