// This file impliments a wrapper class for MPI functions #include "common/MPI.h" #include "common/Utilities.h" #include "ProfilerApp.h" #include "StackTrace/ErrorHandlers.h" #include "StackTrace/StackTrace.h" // Include all other headers #include #include #include #include #include #include #include #include #include #include #include // Include OS specific headers #undef USE_WINDOWS #undef USE_LINUX #undef USE_MAC #if defined( WIN32 ) || defined( _WIN32 ) || defined( WIN64 ) || defined( _WIN64 ) // We are using windows #define USE_WINDOWS #include #include #define sched_yield() Sleep( 0 ) #elif defined( __APPLE__ ) // Using MAC #define USE_MAC #include #elif defined( __linux ) || defined( __linux__ ) || defined( __unix ) || defined( __posix ) // We are using linux #define USE_LINUX #include #include #else #error Unknown OS #endif // Convience defines #define MPI_ERROR ERROR #define MPI_ASSERT ASSERT #define MPI_INSIST INSIST #define MPI_WARNING WARNING #define MPI_CLASS_COMM_NULL MPI_COMM_NULL #define MPI_CLASS_COMM_SELF MPI_COMM_SELF #define MPI_CLASS_COMM_WORLD MPI_COMM_WORLD // Global variable to track create new unique comms (dup and split) #ifndef USE_MPI MPI_Comm uniqueGlobalComm = 11; #endif #if defined( USE_SAMRAI ) && defined( USE_PETSC ) && !defined( USE_MPI ) int MPI_REQUEST_NULL = 3; int MPI_ERR_IN_STATUS = 4; #endif namespace Utilities { // Some special structs to work with MPI #ifdef USE_MPI struct IntIntStruct { int j; int i; }; struct LongIntStruct { long int j; int i; }; struct FloatIntStruct { float f; int i; }; struct DoubleIntStruct { double d; int i; }; #endif // Initialized the static member variables volatile unsigned int MPI_CLASS::N_MPI_Comm_created = 0; volatile unsigned int MPI_CLASS::N_MPI_Comm_destroyed = 0; short MPI_CLASS::profile_level = 127; // Define a type for use with size_t #ifdef USE_MPI static MPI_Datatype MPI_SIZE_T = 0x0; static MPI_Datatype getSizeTDataType() { int size_int, size_long, size_longlong, size_longlong2; MPI_Type_size( MPI_UNSIGNED, &size_int ); MPI_Type_size( MPI_UNSIGNED_LONG, &size_long ); MPI_Type_size( MPI_UNSIGNED_LONG_LONG, &size_longlong ); MPI_Type_size( MPI_LONG_LONG_INT, &size_longlong2 ); if ( sizeof( size_t ) == size_int ) { return MPI_UNSIGNED; } else if ( sizeof( size_t ) == size_long ) { return MPI_UNSIGNED_LONG; } else if ( sizeof( size_t ) == size_longlong ) { return MPI_UNSIGNED_LONG_LONG; } else if ( sizeof( size_t ) == size_longlong2 ) { MPI_WARNING( "Using signed long long datatype for size_t in MPI" ); return MPI_LONG_LONG_INT; // Note: this is not unsigned } else { MPI_ERROR( "No suitable datatype found" ); } return 0; } #endif // Static data for asyncronous communication without MPI // Note: these routines may not be thread-safe yet #ifndef USE_MPI static const int mpi_max_tag = 0x003FFFFF; struct Isendrecv_struct { const char *data; // Pointer to data int status; // Status: 1-sending, 2-recieving }; std::map global_isendrecv_list; static MPI_Request getRequest( MPI_Comm comm, int tag ) { MPI_ASSERT( tag >= 0 && tag <= mpi_max_tag ); // Use hashing function: 2^64*0.5*(sqrt(5)-1) uint64_t a = static_cast( comm ) * 0x9E3779B97F4A7C15; uint64_t b = static_cast( tag ) * 0x9E3779B97F4A7C15; uint64_t hash = a ^ b; MPI_Request request; memcpy( &request, &hash, sizeof( MPI_Request ) ); return request; } #endif // Check the mpi error code #ifdef USE_MPI inline void check_MPI( int error ) { if ( error != MPI_SUCCESS ) MPI_ERROR( "Error calling MPI routine" ); } #endif /****************************************************************** * Some helper functions to convert between signed/unsigned types * ******************************************************************/ DISABLE_WARNINGS static inline constexpr unsigned int offset_int() { return ~static_cast( std::numeric_limits::min() ) + 1; } static inline constexpr unsigned long int offset_long() { return ~static_cast( std::numeric_limits::min() ) + 1; } static inline constexpr unsigned long long int offset_long_long() { return ~static_cast( std::numeric_limits::min() ) + 1; } ENABLE_WARNINGS static inline unsigned int signed_to_unsigned( int x ) { const auto offset = offset_int(); return ( x >= 0 ) ? static_cast( x ) + offset : offset - static_cast( -x ); } static inline unsigned long int signed_to_unsigned( long int x ) { const auto offset = offset_long(); return ( x >= 0 ) ? static_cast( x ) + offset : offset - static_cast( -x ); } static inline unsigned long long int signed_to_unsigned( long long int x ) { const auto offset = offset_long_long(); return ( x >= 0 ) ? static_cast( x ) + offset : offset - static_cast( -x ); } static inline int unsigned_to_signed( unsigned int x ) { const auto offset = offset_int(); return ( x >= offset ) ? static_cast( x - offset ) : -static_cast( offset - x ); } static inline long int unsigned_to_signed( unsigned long int x ) { const auto offset = offset_long(); return ( x >= offset ) ? static_cast( x - offset ) : -static_cast( offset - x ); } static inline long long int unsigned_to_signed( unsigned long long int x ) { const auto offset = offset_long_long(); return ( x >= offset ) ? static_cast( x - offset ) : -static_cast( offset - x ); } /************************************************************************ * Get the MPI version * ************************************************************************/ std::array MPI_CLASS::version() { #ifdef USE_MPI int MPI_version; int MPI_subversion; MPI_Get_version( &MPI_version, &MPI_subversion ); return { MPI_version, MPI_subversion }; #else return { 0, 0 }; #endif } std::string MPI_CLASS::info() { #ifdef USE_MPI #if MPI_VERSION >= 3 int MPI_version_length = 0; char MPI_version_string[MPI_MAX_LIBRARY_VERSION_STRING]; MPI_Get_library_version( MPI_version_string, &MPI_version_length ); if ( MPI_version_length > 0 ) { std::string MPI_info( MPI_version_string, MPI_version_length ); size_t pos = MPI_info.find( '\n' ); while ( pos != std::string::npos ) { MPI_info.insert( pos + 1, " " ); pos = MPI_info.find( '\n', pos + 1 ); } return MPI_info; } #endif auto tmp = version(); return std::to_string( tmp[0] ) + "." + std::to_string( tmp[0] ); #else return std::string(); #endif } /************************************************************************ * Functions to get/set the process affinities * ************************************************************************/ int MPI_CLASS::getNumberOfProcessors() { return std::thread::hardware_concurrency(); } std::vector MPI_CLASS::getProcessAffinity() { std::vector procs; #ifdef USE_LINUX cpu_set_t mask; int error = sched_getaffinity( getpid(), sizeof( cpu_set_t ), &mask ); if ( error != 0 ) MPI_ERROR( "Error getting process affinity" ); for ( int i = 0; i < (int) sizeof( cpu_set_t ) * CHAR_BIT; i++ ) { if ( CPU_ISSET( i, &mask ) ) procs.push_back( i ); } #elif defined( USE_MAC ) // MAC does not support getting or setting the affinity printf( "Warning: MAC does not support getting the process affinity\n" ); procs.clear(); #elif defined( USE_WINDOWS ) HANDLE hProc = GetCurrentProcess(); size_t procMask; size_t sysMask; PDWORD_PTR procMaskPtr = reinterpret_cast( &procMask ); PDWORD_PTR sysMaskPtr = reinterpret_cast( &sysMask ); GetProcessAffinityMask( hProc, procMaskPtr, sysMaskPtr ); for ( int i = 0; i < (int) sizeof( size_t ) * CHAR_BIT; i++ ) { if ( ( procMask & 0x1 ) != 0 ) procs.push_back( i ); procMask >>= 1; } #else #error Unknown OS #endif return procs; } void MPI_CLASS::setProcessAffinity( const std::vector &procs ) { #ifdef USE_LINUX cpu_set_t mask; CPU_ZERO( &mask ); for ( auto cpu : procs ) CPU_SET( cpu, &mask ); int error = sched_setaffinity( getpid(), sizeof( cpu_set_t ), &mask ); if ( error != 0 ) MPI_ERROR( "Error setting process affinity" ); #elif defined( USE_MAC ) // MAC does not support getting or setting the affinity NULL_USE( procs ); #elif defined( USE_WINDOWS ) DWORD mask = 0; for ( size_t i = 0; i < procs.size(); i++ ) mask |= ( (DWORD) 1 ) << procs[i]; HANDLE hProc = GetCurrentProcess(); SetProcessAffinityMask( hProc, mask ); #else #error Unknown OS #endif } /************************************************************************ * Function to check if MPI is active * ************************************************************************/ bool MPI_CLASS::MPI_active() { #ifdef USE_MPI int initialized = 0, finalized = 0; MPI_Initialized( &initialized ); MPI_Finalized( &finalized ); return initialized != 0 && finalized == 0; #else return true; #endif } MPI_CLASS::ThreadSupport MPI_CLASS::queryThreadSupport() { #ifdef USE_MPI int provided = 0; MPI_Query_thread( &provided ); if ( provided == MPI_THREAD_SINGLE ) return ThreadSupport::SINGLE; if ( provided == MPI_THREAD_FUNNELED ) return ThreadSupport::FUNNELED; if ( provided == MPI_THREAD_SERIALIZED ) return ThreadSupport::SERIALIZED; if ( provided == MPI_THREAD_MULTIPLE ) return ThreadSupport::MULTIPLE; return ThreadSupport::SINGLE; #else return ThreadSupport::MULTIPLE; #endif } /************************************************************************ * Function to perform a load balance of the given processes * ************************************************************************/ void MPI_CLASS::balanceProcesses( const MPI_CLASS &globalComm, const int method, const std::vector &procs, const int N_min_in, const int N_max_in ) { // Build the list of processors to use std::vector cpus = procs; if ( cpus.empty() ) { for ( int i = 0; i < getNumberOfProcessors(); i++ ) cpus.push_back( i ); } // Handle the "easy cases" if ( method == 1 ) { // Trivial case where we do not need any communication setProcessAffinity( cpus ); return; } // Get the sub-communicator for the current node MPI_CLASS nodeComm = globalComm.splitByNode(); int N_min = std::min( std::max( N_min_in, 1 ), cpus.size() ); int N_max = N_max_in; if ( N_max == -1 ) N_max = cpus.size(); N_max = std::min( N_max, cpus.size() ); MPI_ASSERT( N_max >= N_min ); // Perform the load balance within the node if ( method == 2 ) { int N_proc = cpus.size() / nodeComm.getSize(); N_proc = std::max( N_proc, N_min ); N_proc = std::min( N_proc, N_max ); std::vector cpus2( N_proc, -1 ); for ( int i = 0; i < N_proc; i++ ) cpus2[i] = cpus[( nodeComm.getRank() * N_proc + i ) % cpus.size()]; setProcessAffinity( cpus2 ); } else { MPI_ERROR( "Unknown method for load balance" ); } } /************************************************************************ * Empty constructor * ************************************************************************/ MPI_CLASS::MPI_CLASS() { // Initialize the data members to a defaul communicator of self #ifdef USE_MPI communicator = MPI_COMM_NULL; d_maxTag = 0x7FFFFFFF; #else communicator = MPI_CLASS_COMM_NULL; d_maxTag = mpi_max_tag; #endif d_ranks = nullptr; d_count = nullptr; d_manage = false; comm_rank = 0; comm_size = 1; d_isNull = true; d_currentTag = nullptr; d_call_abort = true; tmp_alignment = -1; } /************************************************************************ * Empty deconstructor * ************************************************************************/ MPI_CLASS::~MPI_CLASS() { reset(); } void MPI_CLASS::reset() { // Decrement the count if used int count = -1; if ( d_count != nullptr ) count = --( *d_count ); if ( count == 0 ) { // We are holding that last reference to the MPI_Comm object, we need to free it if ( d_manage ) { #ifdef USE_MPI MPI_Comm_set_errhandler( communicator, MPI_ERRORS_ARE_FATAL ); int err = MPI_Comm_free( &communicator ); if ( err != MPI_SUCCESS ) MPI_ERROR( "Problem free'ing MPI_Comm object" ); communicator = MPI_CLASS_COMM_NULL; ++N_MPI_Comm_destroyed; #endif } if ( d_ranks != nullptr ) delete[] d_ranks; delete d_count; } if ( d_currentTag == nullptr ) { // No tag index } else if ( d_currentTag[1] > 1 ) { --( d_currentTag[1] ); } else { delete[] d_currentTag; } d_manage = false; d_count = nullptr; d_ranks = nullptr; comm_rank = 0; comm_size = 1; d_maxTag = 0; d_isNull = true; d_currentTag = nullptr; d_call_abort = true; } /************************************************************************ * Copy constructors * ************************************************************************/ MPI_CLASS::MPI_CLASS( const MPI_CLASS &comm ) : communicator( comm.communicator ), d_isNull( comm.d_isNull ), d_manage( comm.d_manage ), comm_rank( comm.comm_rank ), comm_size( comm.comm_size ), d_ranks( comm.d_ranks ), d_maxTag( comm.d_maxTag ), d_currentTag( comm.d_currentTag ) { // Initialize the data members to the existing comm object if ( d_currentTag != nullptr ) ++d_currentTag[1]; d_call_abort = comm.d_call_abort; // Set and increment the count d_count = comm.d_count; if ( d_count != nullptr ) ++( *d_count ); tmp_alignment = -1; } MPI_CLASS::MPI_CLASS( MPI_CLASS &&rhs ) : MPI_CLASS() { std::swap( communicator, rhs.communicator ); std::swap( d_isNull, rhs.d_isNull ); std::swap( d_manage, rhs.d_manage ); std::swap( d_call_abort, rhs.d_call_abort ); std::swap( profile_level, rhs.profile_level ); std::swap( comm_rank, rhs.comm_rank ); std::swap( comm_size, rhs.comm_size ); std::swap( d_ranks, rhs.d_ranks ); std::swap( d_maxTag, rhs.d_maxTag ); std::swap( d_currentTag, rhs.d_currentTag ); std::swap( d_count, rhs.d_count ); std::swap( tmp_alignment, rhs.tmp_alignment ); } /************************************************************************ * Assignment operators * ************************************************************************/ MPI_CLASS &MPI_CLASS::operator=( const MPI_CLASS &comm ) { if ( this == &comm ) // protect against invalid self-assignment return *this; // Destroy the previous object this->reset(); // Initialize the data members to the existing object this->communicator = comm.communicator; this->comm_rank = comm.comm_rank; this->comm_size = comm.comm_size; this->d_ranks = comm.d_ranks; this->d_isNull = comm.d_isNull; this->d_manage = comm.d_manage; this->d_maxTag = comm.d_maxTag; this->d_call_abort = comm.d_call_abort; this->d_currentTag = comm.d_currentTag; if ( this->d_currentTag != nullptr ) ++( this->d_currentTag[1] ); // Set and increment the count this->d_count = comm.d_count; if ( this->d_count != nullptr ) ++( *d_count ); this->tmp_alignment = -1; return *this; } MPI_CLASS &MPI_CLASS::operator=( MPI_CLASS &&rhs ) { if ( this == &rhs ) // protect against invalid self-assignment return *this; std::swap( communicator, rhs.communicator ); std::swap( d_isNull, rhs.d_isNull ); std::swap( d_manage, rhs.d_manage ); std::swap( d_call_abort, rhs.d_call_abort ); std::swap( profile_level, rhs.profile_level ); std::swap( comm_rank, rhs.comm_rank ); std::swap( comm_size, rhs.comm_size ); std::swap( d_ranks, rhs.d_ranks ); std::swap( d_maxTag, rhs.d_maxTag ); std::swap( d_currentTag, rhs.d_currentTag ); std::swap( d_count, rhs.d_count ); std::swap( tmp_alignment, rhs.tmp_alignment ); return *this; } /************************************************************************ * Constructor from existing MPI communicator * ************************************************************************/ int d_global_currentTag_world1[2] = { 1, 1 }; int d_global_currentTag_world2[2] = { 1, 1 }; int d_global_currentTag_self[2] = { 1, 1 }; #ifdef USE_MPI std::atomic_int d_global_count_world1 = { 1 }; std::atomic_int d_global_count_world2 = { 1 }; std::atomic_int d_global_count_self = { 1 }; #endif MPI_CLASS::MPI_CLASS( MPI_Comm comm, bool manage ) { d_count = nullptr; d_ranks = nullptr; d_manage = false; tmp_alignment = -1; // Check if we are using our version of comm_world if ( comm == MPI_CLASS_COMM_WORLD ) { communicator = MPI_COMM_WORLD; } else if ( comm == MPI_CLASS_COMM_SELF ) { communicator = MPI_COMM_SELF; } else if ( comm == MPI_CLASS_COMM_NULL ) { communicator = MPI_COMM_NULL; } else { communicator = comm; } #ifdef USE_MPI // We are using MPI, use the MPI communicator to initialize the data if ( communicator != MPI_COMM_NULL ) { // Set the MPI_SIZE_T datatype if it has not been set if ( MPI_SIZE_T == 0x0 ) MPI_SIZE_T = getSizeTDataType(); // Attach the error handler StackTrace::setMPIErrorHandler( communicator ); // Get the communicator properties MPI_Comm_rank( communicator, &comm_rank ); MPI_Comm_size( communicator, &comm_size ); int flag, *val; int ierr = MPI_Comm_get_attr( communicator, MPI_TAG_UB, &val, &flag ); MPI_ASSERT( ierr == MPI_SUCCESS ); if ( flag == 0 ) { d_maxTag = 0x7FFFFFFF; // The tag is not a valid attribute (set to 2^31-1) } else { d_maxTag = *val; if ( d_maxTag < 0 ) { d_maxTag = 0x7FFFFFFF; } // The maximum tag is > a signed int (set to 2^31-1) MPI_INSIST( d_maxTag >= 0x7FFF, "maximum tag size is < MPI standard" ); } } else { comm_rank = 1; comm_size = 0; d_maxTag = 0x7FFFFFFF; } d_isNull = communicator == MPI_COMM_NULL; if ( manage && communicator != MPI_COMM_NULL && communicator != MPI_COMM_SELF && communicator != MPI_COMM_WORLD ) d_manage = true; // Create the count (Note: we do not need to worry about thread safety) if ( communicator == MPI_CLASS_COMM_WORLD ) { d_count = &d_global_count_world1; ++( *d_count ); } else if ( communicator == MPI_COMM_WORLD ) { d_count = &d_global_count_world2; ++( *d_count ); } else if ( communicator == MPI_COMM_SELF ) { d_count = &d_global_count_self; ++( *d_count ); } else if ( communicator == MPI_COMM_NULL ) { d_count = nullptr; } else { d_count = new std::atomic_int; *d_count = 1; } if ( d_manage ) ++N_MPI_Comm_created; // Create d_ranks if ( comm_size > 1 ) { d_ranks = new int[comm_size]; d_ranks[0] = -1; } #else // We are not using MPI, intialize based on the communicator NULL_USE( manage ); comm_rank = 0; comm_size = 1; d_maxTag = mpi_max_tag; d_isNull = communicator == MPI_COMM_NULL; if ( d_isNull ) comm_size = 0; #endif if ( communicator == MPI_CLASS_COMM_WORLD ) { d_currentTag = d_global_currentTag_world1; ++( this->d_currentTag[1] ); } else if ( communicator == MPI_COMM_WORLD ) { d_currentTag = d_global_currentTag_world2; ++( this->d_currentTag[1] ); } else if ( communicator == MPI_COMM_SELF ) { d_currentTag = d_global_currentTag_self; ++( this->d_currentTag[1] ); } else if ( communicator == MPI_COMM_NULL ) { d_currentTag = nullptr; } else { d_currentTag = new int[2]; d_currentTag[0] = ( d_maxTag <= 0x10000 ) ? 1 : 0x1FFF; d_currentTag[1] = 1; } d_call_abort = true; } /************************************************************************ * Return the ranks of the communicator in the global comm * ************************************************************************/ std::vector MPI_CLASS::globalRanks() const { // Get my global rank if it has not been set static int myGlobalRank = -1; if ( myGlobalRank == -1 ) { #ifdef USE_MPI if ( MPI_active() ) MPI_Comm_rank( MPI_CLASS_COMM_WORLD, &myGlobalRank ); #else myGlobalRank = 0; #endif } // Check if we are dealing with a serial or null communicator if ( comm_size == 1 ) return std::vector( 1, myGlobalRank ); if ( d_ranks == nullptr || communicator == MPI_COMM_NULL ) return std::vector(); // Fill d_ranks if necessary if ( d_ranks[0] == -1 ) { if ( communicator == MPI_CLASS_COMM_WORLD ) { for ( int i = 0; i < comm_size; i++ ) d_ranks[i] = i; } else { MPI_ASSERT( myGlobalRank != -1 ); this->allGather( myGlobalRank, d_ranks ); } } // Return d_ranks return std::vector( d_ranks, d_ranks + comm_size ); } /************************************************************************ * Generate a random number * ************************************************************************/ size_t MPI_CLASS::rand() const { size_t val = 0; if ( getRank() == 0 ) { static std::random_device rd; static std::mt19937 gen( rd() ); static std::uniform_int_distribution dist; val = dist( gen ); } val = bcast( val, 0 ); return val; } /************************************************************************ * Intersect two communicators * ************************************************************************/ #ifdef USE_MPI static inline void MPI_Group_free2( MPI_Group *group ) { if ( *group != MPI_GROUP_EMPTY ) { // MPICH is fine with free'ing an empty group, OpenMPI crashes MPI_Group_free( group ); } } MPI_CLASS MPI_CLASS::intersect( const MPI_CLASS &comm1, const MPI_CLASS &comm2 ) { MPI_Group group1 = MPI_GROUP_EMPTY, group2 = MPI_GROUP_EMPTY; if ( !comm1.isNull() ) { MPI_Group_free2( &group1 ); MPI_Comm_group( comm1.communicator, &group1 ); } if ( !comm2.isNull() ) { MPI_Group_free2( &group2 ); MPI_Comm_group( comm2.communicator, &group2 ); } MPI_Group group12; MPI_Group_intersection( group1, group2, &group12 ); int compare1, compare2; MPI_Group_compare( group1, group12, &compare1 ); MPI_Group_compare( group2, group12, &compare2 ); MPI_CLASS new_comm( MPI_CLASS_COMM_NULL ); int size; MPI_Group_size( group12, &size ); if ( compare1 != MPI_UNEQUAL && size != 0 ) { // The intersection matches comm1 new_comm = comm1; } else if ( compare2 != MPI_UNEQUAL && size != 0 ) { // The intersection matches comm2 new_comm = comm2; } else if ( comm1.isNull() ) { // comm1 is null, we can return safely (comm1 is needed for communication) } else { // The intersection is smaller than comm1 or comm2 // Check if the new comm is nullptr for all processors int max_size = 0; MPI_Allreduce( &size, &max_size, 1, MPI_INT, MPI_MAX, comm1.communicator ); if ( max_size == 0 ) { // We are dealing with completely disjoint sets new_comm = MPI_CLASS( MPI_CLASS_COMM_NULL, false ); } else { // Create the new comm // Note: OpenMPI crashes if the intersection group is EMPTY for any processors // We will set it to SELF for the EMPTY processors, then create a nullptr comm later if ( group12 == MPI_GROUP_EMPTY ) { MPI_Group_free2( &group12 ); MPI_Comm_group( MPI_COMM_SELF, &group12 ); } MPI_Comm new_MPI_comm; MPI_Comm_create( comm1.communicator, group12, &new_MPI_comm ); if ( size > 0 ) { // This is the valid case where we create a new intersection comm new_comm = MPI_CLASS( new_MPI_comm, true ); } else { // We actually want a null comm for this communicator new_comm = MPI_CLASS( MPI_CLASS_COMM_NULL, false ); MPI_Comm_free( &new_MPI_comm ); } } } MPI_Group_free2( &group1 ); MPI_Group_free2( &group2 ); MPI_Group_free2( &group12 ); return new_comm; } #else MPI_CLASS MPI_CLASS::intersect( const MPI_CLASS &comm1, const MPI_CLASS &comm2 ) { if ( comm1.isNull() || comm2.isNull() ) return MPI_CLASS( MPI_CLASS_COMM_NULL, false ); MPI_ASSERT( comm1.comm_size == 1 && comm2.comm_size == 1 ); return comm1; } #endif /************************************************************************ * Split a comm * ************************************************************************/ MPI_CLASS MPI_CLASS::split( int color, int key ) const { if ( d_isNull ) { return MPI_CLASS( MPI_CLASS_COMM_NULL ); } else if ( comm_size == 1 ) { if ( color == -1 ) return MPI_CLASS( MPI_CLASS_COMM_NULL ); return dup(); } MPI_Comm new_MPI_comm = MPI_CLASS_COMM_NULL; #ifdef USE_MPI // USE MPI to split the communicator if ( color == -1 ) { check_MPI( MPI_Comm_split( communicator, MPI_UNDEFINED, key, &new_MPI_comm ) ); } else { check_MPI( MPI_Comm_split( communicator, color, key, &new_MPI_comm ) ); } #endif // Create the new object NULL_USE( key ); MPI_CLASS new_comm( new_MPI_comm, true ); new_comm.d_call_abort = d_call_abort; return new_comm; } MPI_CLASS MPI_CLASS::splitByNode( int key ) const { // Check if we are dealing with a single processor (trivial case) if ( comm_size == 1 ) return this->split( 0, 0 ); // Get the node name std::string name = MPI_CLASS::getNodeName(); // Gather the names from all ranks std::vector list( comm_size ); allGather( name, &list[0] ); // Create the colors std::vector color( comm_size, -1 ); color[0] = 0; for ( int i = 1; i < comm_size; i++ ) { const std::string tmp1 = list[i]; for ( int j = 0; j < i; j++ ) { const std::string tmp2 = list[j]; if ( tmp1 == tmp2 ) { color[i] = color[j]; break; } color[i] = color[i - 1] + 1; } } MPI_CLASS new_comm = this->split( color[comm_rank], key ); return new_comm; } /************************************************************************ * Duplicate an exisiting comm object * ************************************************************************/ MPI_CLASS MPI_CLASS::dup() const { if ( d_isNull ) return MPI_CLASS( MPI_CLASS_COMM_NULL ); MPI_Comm new_MPI_comm = communicator; #if defined( USE_MPI ) || defined( USE_PETSC ) // USE MPI to duplicate the communicator MPI_Comm_dup( communicator, &new_MPI_comm ); #else new_MPI_comm = uniqueGlobalComm; uniqueGlobalComm++; #endif // Create the new comm object MPI_CLASS new_comm( new_MPI_comm, true ); new_comm.d_isNull = d_isNull; new_comm.d_call_abort = d_call_abort; return new_comm; } /************************************************************************ * Get the node name * ************************************************************************/ std::string MPI_CLASS::getNodeName() { #ifdef USE_MPI int length; char name[MPI_MAX_PROCESSOR_NAME + 1]; memset( name, 0, MPI_MAX_PROCESSOR_NAME + 1 ); MPI_Get_processor_name( name, &length ); return std::string( name ); #else return "Node0"; #endif } /************************************************************************ * Overload operator == * ************************************************************************/ bool MPI_CLASS::operator==( const MPI_CLASS &comm ) const { return communicator == comm.communicator; } /************************************************************************ * Overload operator != * ************************************************************************/ bool MPI_CLASS::operator!=( const MPI_CLASS &comm ) const { return communicator != comm.communicator; } /************************************************************************ * Overload operator < * ************************************************************************/ bool MPI_CLASS::operator<( const MPI_CLASS &comm ) const { MPI_ASSERT( !this->d_isNull && !comm.d_isNull ); bool flag = true; // First check if either communicator is NULL if ( this->d_isNull ) return false; if ( comm.d_isNull ) flag = false; // Use compare to check if the comms are equal if ( compare( comm ) != 0 ) return false; // Check that the size of the other communicator is > the current communicator size if ( comm_size >= comm.comm_size ) flag = false; // Check the union of the communicator groups // this is < comm iff this group is a subgroup of comm's group #ifdef USE_MPI MPI_Group group1 = MPI_GROUP_EMPTY, group2 = MPI_GROUP_EMPTY, group12 = MPI_GROUP_EMPTY; if ( !d_isNull ) MPI_Comm_group( communicator, &group1 ); if ( !comm.d_isNull ) MPI_Comm_group( comm.communicator, &group2 ); MPI_Group_union( group1, group2, &group12 ); int compare; MPI_Group_compare( group2, group12, &compare ); if ( compare == MPI_UNEQUAL ) flag = false; MPI_Group_free( &group1 ); MPI_Group_free( &group2 ); MPI_Group_free( &group12 ); #endif // Perform a global reduce of the flag (equivalent to all operation) return allReduce( flag ); } /************************************************************************ * Overload operator <= * ************************************************************************/ bool MPI_CLASS::operator<=( const MPI_CLASS &comm ) const { MPI_ASSERT( !this->d_isNull && !comm.d_isNull ); bool flag = true; // First check if either communicator is NULL if ( this->d_isNull ) return false; if ( comm.d_isNull ) flag = false; #ifdef USE_MPI int world_size = 0; MPI_Comm_size( MPI_COMM_WORLD, &world_size ); if ( comm.getSize() == world_size ) return true; if ( getSize() == 1 && !comm.d_isNull ) return true; #endif // Use compare to check if the comms are equal if ( compare( comm ) != 0 ) return true; // Check that the size of the other communicator is > the current communicator size // this is <= comm iff this group is a subgroup of comm's group if ( comm_size > comm.comm_size ) flag = false; // Check the unnion of the communicator groups #ifdef USE_MPI MPI_Group group1, group2, group12; MPI_Comm_group( communicator, &group1 ); MPI_Comm_group( comm.communicator, &group2 ); MPI_Group_union( group1, group2, &group12 ); int compare; MPI_Group_compare( group2, group12, &compare ); if ( compare == MPI_UNEQUAL ) flag = false; MPI_Group_free( &group1 ); MPI_Group_free( &group2 ); MPI_Group_free( &group12 ); #endif // Perform a global reduce of the flag (equivalent to all operation) return allReduce( flag ); } /************************************************************************ * Overload operator > * ************************************************************************/ bool MPI_CLASS::operator>( const MPI_CLASS &comm ) const { bool flag = true; // First check if either communicator is NULL if ( this->d_isNull ) return false; if ( comm.d_isNull ) flag = false; // Use compare to check if the comms are equal if ( compare( comm ) != 0 ) return false; // Check that the size of the other communicator is > the current communicator size if ( comm_size <= comm.comm_size ) flag = false; // Check the unnion of the communicator groups // this is > comm iff comm's group is a subgroup of this group #ifdef USE_MPI MPI_Group group1 = MPI_GROUP_EMPTY, group2 = MPI_GROUP_EMPTY, group12 = MPI_GROUP_EMPTY; if ( !d_isNull ) MPI_Comm_group( communicator, &group1 ); if ( !comm.d_isNull ) MPI_Comm_group( comm.communicator, &group2 ); MPI_Group_union( group1, group2, &group12 ); int compare; MPI_Group_compare( group1, group12, &compare ); if ( compare == MPI_UNEQUAL ) flag = false; MPI_Group_free( &group1 ); MPI_Group_free( &group2 ); MPI_Group_free( &group12 ); #endif // Perform a global reduce of the flag (equivalent to all operation) return allReduce( flag ); } /************************************************************************ * Overload operator >= * ************************************************************************/ bool MPI_CLASS::operator>=( const MPI_CLASS &comm ) const { bool flag = true; // First check if either communicator is NULL if ( this->d_isNull ) return false; if ( comm.d_isNull ) flag = false; #ifdef USE_MPI int world_size = 0; MPI_Comm_size( MPI_COMM_WORLD, &world_size ); if ( getSize() == world_size ) return true; if ( comm.getSize() == 1 && !comm.d_isNull ) return true; #endif // Use compare to check if the comms are equal if ( compare( comm ) != 0 ) return true; // Check that the size of the other communicator is > the current communicator size if ( comm_size < comm.comm_size ) flag = false; // Check the unnion of the communicator groups // this is >= comm iff comm's group is a subgroup of this group #ifdef USE_MPI MPI_Group group1 = MPI_GROUP_EMPTY, group2 = MPI_GROUP_EMPTY, group12 = MPI_GROUP_EMPTY; if ( !d_isNull ) MPI_Comm_group( communicator, &group1 ); if ( !comm.d_isNull ) MPI_Comm_group( comm.communicator, &group2 ); MPI_Group_union( group1, group2, &group12 ); int compare; MPI_Group_compare( group1, group12, &compare ); if ( compare == MPI_UNEQUAL ) flag = false; MPI_Group_free( &group1 ); MPI_Group_free( &group2 ); MPI_Group_free( &group12 ); #endif // Perform a global reduce of the flag (equivalent to all operation) return allReduce( flag ); } /************************************************************************ * Compare two comm objects * ************************************************************************/ int MPI_CLASS::compare( const MPI_CLASS &comm ) const { if ( communicator == comm.communicator ) return 1; #ifdef USE_MPI if ( d_isNull || comm.d_isNull ) return 0; int result; check_MPI( MPI_Comm_compare( communicator, comm.communicator, &result ) ); if ( result == MPI_IDENT ) return 2; else if ( result == MPI_CONGRUENT ) return 3; else if ( result == MPI_SIMILAR ) return 4; else if ( result == MPI_UNEQUAL ) return 0; MPI_ERROR( "Unknown results from comm compare" ); #else if ( comm.communicator == MPI_COMM_NULL || communicator == MPI_COMM_NULL ) return 0; else return 3; #endif return 0; } /************************************************************************ * Abort the program. * ************************************************************************/ void MPI_CLASS::setCallAbortInSerialInsteadOfExit( bool flag ) { d_call_abort = flag; } void MPI_CLASS::abort() const { #ifdef USE_MPI MPI_Comm comm = communicator; if ( comm == MPI_COMM_NULL ) comm = MPI_COMM_WORLD; if ( !MPI_active() ) { // MPI is not availible exit( -1 ); } else if ( comm_size > 1 ) { MPI_Abort( comm, -1 ); } else if ( d_call_abort ) { MPI_Abort( comm, -1 ); } else { exit( -1 ); } #else exit( -1 ); #endif } /************************************************************************ * newTag * ************************************************************************/ int MPI_CLASS::newTag() { #ifdef USE_MPI // Syncronize the processes to ensure all ranks enter this call // Needed so the count will match barrier(); // Return and increment the tag int tag = ( *d_currentTag )++; MPI_INSIST( tag <= d_maxTag, "Maximum number of tags exceeded\n" ); return tag; #else static int globalCurrentTag = 1; return globalCurrentTag++; #endif } /************************************************************************ * allReduce * ************************************************************************/ bool MPI_CLASS::allReduce( const bool value ) const { bool ret = value; if ( comm_size > 1 ) { #ifdef USE_MPI MPI_Allreduce( (void *) &value, (void *) &ret, 1, MPI_UNSIGNED_CHAR, MPI_MIN, communicator ); #else MPI_ERROR( "This shouldn't be possible" ); #endif } return ret; } /************************************************************************ * anyReduce * ************************************************************************/ bool MPI_CLASS::anyReduce( const bool value ) const { bool ret = value; if ( comm_size > 1 ) { #ifdef USE_MPI MPI_Allreduce( (void *) &value, (void *) &ret, 1, MPI_UNSIGNED_CHAR, MPI_MAX, communicator ); #else MPI_ERROR( "This shouldn't be possible" ); #endif } return ret; } /************************************************************************ * call_sumReduce * * Note: these specializations are only called when using MPI. * ************************************************************************/ #ifdef USE_MPI // unsigned char template<> void MPI_CLASS::call_sumReduce( const unsigned char *send, unsigned char *recv, const int n ) const { PROFILE_START( "sumReduce1", profile_level ); MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED_CHAR, MPI_SUM, communicator ); PROFILE_STOP( "sumReduce1", profile_level ); } template<> void MPI_CLASS::call_sumReduce( unsigned char *x, const int n ) const { PROFILE_START( "sumReduce2", profile_level ); auto send = x; auto recv = new unsigned char[n]; MPI_Allreduce( send, recv, n, MPI_UNSIGNED_CHAR, MPI_SUM, communicator ); for ( int i = 0; i < n; i++ ) x[i] = recv[i]; delete[] recv; PROFILE_STOP( "sumReduce2", profile_level ); } // char template<> void MPI_CLASS::call_sumReduce( const char *send, char *recv, const int n ) const { PROFILE_START( "sumReduce1", profile_level ); MPI_Allreduce( (void *) send, (void *) recv, n, MPI_SIGNED_CHAR, MPI_SUM, communicator ); PROFILE_STOP( "sumReduce1", profile_level ); } template<> void MPI_CLASS::call_sumReduce( char *x, const int n ) const { PROFILE_START( "sumReduce2", profile_level ); auto send = x; auto recv = new char[n]; MPI_Allreduce( send, recv, n, MPI_SIGNED_CHAR, MPI_SUM, communicator ); for ( int i = 0; i < n; i++ ) x[i] = recv[i]; delete[] recv; PROFILE_STOP( "sumReduce2", profile_level ); } // unsigned int template<> void MPI_CLASS::call_sumReduce( const unsigned int *send, unsigned int *recv, const int n ) const { PROFILE_START( "sumReduce1", profile_level ); MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED, MPI_SUM, communicator ); PROFILE_STOP( "sumReduce1", profile_level ); } template<> void MPI_CLASS::call_sumReduce( unsigned int *x, const int n ) const { PROFILE_START( "sumReduce2", profile_level ); auto send = x; auto recv = new unsigned int[n]; MPI_Allreduce( send, recv, n, MPI_UNSIGNED, MPI_SUM, communicator ); for ( int i = 0; i < n; i++ ) x[i] = recv[i]; delete[] recv; PROFILE_STOP( "sumReduce2", profile_level ); } // int template<> void MPI_CLASS::call_sumReduce( const int *send, int *recv, const int n ) const { PROFILE_START( "sumReduce1", profile_level ); MPI_Allreduce( (void *) send, (void *) recv, n, MPI_INT, MPI_SUM, communicator ); PROFILE_STOP( "sumReduce1", profile_level ); } template<> void MPI_CLASS::call_sumReduce( int *x, const int n ) const { PROFILE_START( "sumReduce2", profile_level ); auto send = x; auto recv = new int[n]; MPI_Allreduce( send, recv, n, MPI_INT, MPI_SUM, communicator ); for ( int i = 0; i < n; i++ ) x[i] = recv[i]; delete[] recv; PROFILE_STOP( "sumReduce2", profile_level ); } // long int template<> void MPI_CLASS::call_sumReduce( const long int *send, long int *recv, const int n ) const { PROFILE_START( "sumReduce1", profile_level ); MPI_Allreduce( (void *) send, (void *) recv, n, MPI_LONG, MPI_SUM, communicator ); PROFILE_STOP( "sumReduce1", profile_level ); } template<> void MPI_CLASS::call_sumReduce( long int *x, const int n ) const { PROFILE_START( "sumReduce2", profile_level ); auto send = x; auto recv = new long int[n]; MPI_Allreduce( send, recv, n, MPI_LONG, MPI_SUM, communicator ); for ( int i = 0; i < n; i++ ) x[i] = recv[i]; delete[] recv; PROFILE_STOP( "sumReduce2", profile_level ); } // unsigned long int template<> void MPI_CLASS::call_sumReduce( const unsigned long *send, unsigned long *recv, const int n ) const { PROFILE_START( "sumReduce1", profile_level ); MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED_LONG, MPI_SUM, communicator ); PROFILE_STOP( "sumReduce1", profile_level ); } template<> void MPI_CLASS::call_sumReduce( unsigned long *x, const int n ) const { PROFILE_START( "sumReduce2", profile_level ); auto send = x; auto recv = new unsigned long int[n]; MPI_Allreduce( send, recv, n, MPI_UNSIGNED_LONG, MPI_SUM, communicator ); for ( int i = 0; i < n; i++ ) x[i] = recv[i]; delete[] recv; PROFILE_STOP( "sumReduce2", profile_level ); } // size_t #ifdef USE_WINDOWS template<> void MPI_CLASS::call_sumReduce( const size_t *send, size_t *recv, const int n ) const { MPI_ASSERT( MPI_SIZE_T != 0 ); PROFILE_START( "sumReduce1", profile_level ); MPI_Allreduce( (void *) send, (void *) recv, n, MPI_SIZE_T, MPI_SUM, communicator ); PROFILE_STOP( "sumReduce1", profile_level ); } template<> void MPI_CLASS::call_sumReduce( size_t *x, const int n ) const { MPI_ASSERT( MPI_SIZE_T != 0 ); PROFILE_START( "sumReduce2", profile_level ); auto send = x; auto recv = new size_t[n]; MPI_Allreduce( (void *) send, (void *) recv, n, MPI_SIZE_T, MPI_SUM, communicator ); for ( int i = 0; i < n; i++ ) x[i] = recv[i]; delete[] recv; PROFILE_STOP( "sumReduce2", profile_level ); } #endif // float template<> void MPI_CLASS::call_sumReduce( const float *send, float *recv, const int n ) const { PROFILE_START( "sumReduce1", profile_level ); MPI_Allreduce( (void *) send, (void *) recv, n, MPI_FLOAT, MPI_SUM, communicator ); PROFILE_STOP( "sumReduce1", profile_level ); } template<> void MPI_CLASS::call_sumReduce( float *x, const int n ) const { PROFILE_START( "sumReduce2", profile_level ); auto send = x; auto recv = new float[n]; MPI_Allreduce( send, recv, n, MPI_FLOAT, MPI_SUM, communicator ); for ( int i = 0; i < n; i++ ) x[i] = recv[i]; delete[] recv; PROFILE_STOP( "sumReduce2", profile_level ); } // double template<> void MPI_CLASS::call_sumReduce( const double *send, double *recv, const int n ) const { PROFILE_START( "sumReduce1", profile_level ); MPI_Allreduce( (void *) send, (void *) recv, n, MPI_DOUBLE, MPI_SUM, communicator ); PROFILE_STOP( "sumReduce1", profile_level ); } template<> void MPI_CLASS::call_sumReduce( double *x, const int n ) const { PROFILE_START( "sumReduce2", profile_level ); auto send = x; auto recv = new double[n]; MPI_Allreduce( send, recv, n, MPI_DOUBLE, MPI_SUM, communicator ); for ( int i = 0; i < n; i++ ) x[i] = recv[i]; delete[] recv; PROFILE_STOP( "sumReduce2", profile_level ); } // std::complex template<> void MPI_CLASS::call_sumReduce>( const std::complex *x, std::complex *y, const int n ) const { PROFILE_START( "sumReduce1", profile_level ); auto send = new double[2 * n]; auto recv = new double[2 * n]; for ( int i = 0; i < n; i++ ) { send[2 * i + 0] = real( x[i] ); send[2 * i + 1] = imag( x[i] ); } MPI_Allreduce( (void *) send, (void *) recv, 2 * n, MPI_DOUBLE, MPI_SUM, communicator ); for ( int i = 0; i < n; i++ ) y[i] = std::complex( recv[2 * i + 0], recv[2 * i + 1] ); delete[] send; delete[] recv; PROFILE_STOP( "sumReduce1", profile_level ); } template<> void MPI_CLASS::call_sumReduce>( std::complex *x, const int n ) const { PROFILE_START( "sumReduce2", profile_level ); auto send = new double[2 * n]; auto recv = new double[2 * n]; for ( int i = 0; i < n; i++ ) { send[2 * i + 0] = real( x[i] ); send[2 * i + 1] = imag( x[i] ); } MPI_Allreduce( send, recv, 2 * n, MPI_DOUBLE, MPI_SUM, communicator ); for ( int i = 0; i < n; i++ ) x[i] = std::complex( recv[2 * i + 0], recv[2 * i + 1] ); delete[] send; delete[] recv; PROFILE_STOP( "sumReduce2", profile_level ); } #endif /************************************************************************ * call_minReduce * * Note: these specializations are only called when using MPI. * ************************************************************************/ #ifdef USE_MPI // unsigned char template<> void MPI_CLASS::call_minReduce( const unsigned char *send, unsigned char *recv, const int n, int *comm_rank_of_min ) const { if ( comm_rank_of_min == nullptr ) { PROFILE_START( "minReduce1", profile_level ); MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED_CHAR, MPI_MIN, communicator ); PROFILE_STOP( "minReduce1", profile_level ); } else { auto tmp = new int[n]; for ( int i = 0; i < n; i++ ) tmp[i] = send[i]; call_minReduce( tmp, n, comm_rank_of_min ); for ( int i = 0; i < n; i++ ) recv[i] = static_cast( tmp[i] ); delete[] tmp; } } template<> void MPI_CLASS::call_minReduce( unsigned char *x, const int n, int *comm_rank_of_min ) const { if ( comm_rank_of_min == nullptr ) { PROFILE_START( "minReduce2", profile_level ); auto send = x; auto recv = new unsigned char[n]; MPI_Allreduce( send, recv, n, MPI_UNSIGNED_CHAR, MPI_MIN, communicator ); for ( int i = 0; i < n; i++ ) x[i] = recv[i]; delete[] recv; PROFILE_STOP( "minReduce2", profile_level ); } else { auto tmp = new int[n]; for ( int i = 0; i < n; i++ ) tmp[i] = x[i]; call_minReduce( tmp, n, comm_rank_of_min ); for ( int i = 0; i < n; i++ ) x[i] = static_cast( tmp[i] ); delete[] tmp; } } // char template<> void MPI_CLASS::call_minReduce( const char *send, char *recv, const int n, int *comm_rank_of_min ) const { if ( comm_rank_of_min == nullptr ) { PROFILE_START( "minReduce1", profile_level ); MPI_Allreduce( (void *) send, (void *) recv, n, MPI_SIGNED_CHAR, MPI_MIN, communicator ); PROFILE_STOP( "minReduce1", profile_level ); } else { auto tmp = new int[n]; for ( int i = 0; i < n; i++ ) tmp[i] = send[i]; call_minReduce( tmp, n, comm_rank_of_min ); for ( int i = 0; i < n; i++ ) recv[i] = static_cast( tmp[i] ); delete[] tmp; } } template<> void MPI_CLASS::call_minReduce( char *x, const int n, int *comm_rank_of_min ) const { if ( comm_rank_of_min == nullptr ) { PROFILE_START( "minReduce2", profile_level ); auto send = x; auto recv = new char[n]; MPI_Allreduce( send, recv, n, MPI_SIGNED_CHAR, MPI_MIN, communicator ); for ( int i = 0; i < n; i++ ) x[i] = recv[i]; delete[] recv; PROFILE_STOP( "minReduce2", profile_level ); } else { auto tmp = new int[n]; for ( int i = 0; i < n; i++ ) tmp[i] = x[i]; call_minReduce( tmp, n, comm_rank_of_min ); for ( int i = 0; i < n; i++ ) x[i] = static_cast( tmp[i] ); delete[] tmp; } } // unsigned int template<> void MPI_CLASS::call_minReduce( const unsigned int *send, unsigned int *recv, const int n, int *comm_rank_of_min ) const { if ( comm_rank_of_min == nullptr ) { PROFILE_START( "minReduce1", profile_level ); MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED, MPI_MIN, communicator ); PROFILE_STOP( "minReduce1", profile_level ); } else { auto tmp = new int[n]; for ( int i = 0; i < n; i++ ) tmp[i] = unsigned_to_signed( send[i] ); call_minReduce( tmp, n, comm_rank_of_min ); for ( int i = 0; i < n; i++ ) recv[i] = signed_to_unsigned( tmp[i] ); delete[] tmp; } } template<> void MPI_CLASS::call_minReduce( unsigned int *x, const int n, int *comm_rank_of_min ) const { if ( comm_rank_of_min == nullptr ) { PROFILE_START( "minReduce2", profile_level ); auto send = x; auto recv = new unsigned int[n]; MPI_Allreduce( send, recv, n, MPI_UNSIGNED, MPI_MIN, communicator ); for ( int i = 0; i < n; i++ ) x[i] = recv[i]; delete[] recv; PROFILE_STOP( "minReduce2", profile_level ); } else { auto tmp = new int[n]; for ( int i = 0; i < n; i++ ) tmp[i] = unsigned_to_signed( x[i] ); call_minReduce( tmp, n, comm_rank_of_min ); for ( int i = 0; i < n; i++ ) x[i] = signed_to_unsigned( tmp[i] ); delete[] tmp; } } // int template<> void MPI_CLASS::call_minReduce( const int *x, int *y, const int n, int *comm_rank_of_min ) const { PROFILE_START( "minReduce1", profile_level ); if ( comm_rank_of_min == nullptr ) { MPI_Allreduce( (void *) x, (void *) y, n, MPI_INT, MPI_MIN, communicator ); } else { auto recv = new IntIntStruct[n]; auto send = new IntIntStruct[n]; for ( int i = 0; i < n; ++i ) { send[i].j = x[i]; send[i].i = comm_rank; } MPI_Allreduce( send, recv, n, MPI_2INT, MPI_MINLOC, communicator ); for ( int i = 0; i < n; ++i ) { y[i] = recv[i].j; comm_rank_of_min[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP( "minReduce1", profile_level ); } template<> void MPI_CLASS::call_minReduce( int *x, const int n, int *comm_rank_of_min ) const { PROFILE_START( "minReduce2", profile_level ); if ( comm_rank_of_min == nullptr ) { auto send = x; auto recv = new int[n]; MPI_Allreduce( send, recv, n, MPI_INT, MPI_MIN, communicator ); for ( int i = 0; i < n; i++ ) x[i] = recv[i]; delete[] recv; } else { auto recv = new IntIntStruct[n]; auto send = new IntIntStruct[n]; for ( int i = 0; i < n; ++i ) { send[i].j = x[i]; send[i].i = comm_rank; } MPI_Allreduce( send, recv, n, MPI_2INT, MPI_MINLOC, communicator ); for ( int i = 0; i < n; ++i ) { x[i] = recv[i].j; comm_rank_of_min[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP( "minReduce2", profile_level ); } // unsigned long int template<> void MPI_CLASS::call_minReduce( const unsigned long int *send, unsigned long int *recv, const int n, int *comm_rank_of_min ) const { if ( comm_rank_of_min == nullptr ) { PROFILE_START( "minReduce1", profile_level ); MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED_LONG, MPI_MIN, communicator ); PROFILE_STOP( "minReduce1", profile_level ); } else { auto tmp = new long int[n]; for ( int i = 0; i < n; i++ ) tmp[i] = unsigned_to_signed( send[i] ); call_minReduce( tmp, n, comm_rank_of_min ); for ( int i = 0; i < n; i++ ) recv[i] = signed_to_unsigned( tmp[i] ); delete[] tmp; } } template<> void MPI_CLASS::call_minReduce( unsigned long int *x, const int n, int *comm_rank_of_min ) const { if ( comm_rank_of_min == nullptr ) { PROFILE_START( "minReduce2", profile_level ); auto send = x; auto recv = new unsigned long int[n]; MPI_Allreduce( send, recv, n, MPI_UNSIGNED_LONG, MPI_MIN, communicator ); for ( int i = 0; i < n; i++ ) x[i] = recv[i]; delete[] recv; PROFILE_STOP( "minReduce2", profile_level ); } else { auto tmp = new long int[n]; for ( int i = 0; i < n; i++ ) tmp[i] = unsigned_to_signed( x[i] ); call_minReduce( tmp, n, comm_rank_of_min ); for ( int i = 0; i < n; i++ ) x[i] = signed_to_unsigned( tmp[i] ); delete[] tmp; } } // long int template<> void MPI_CLASS::call_minReduce( const long int *x, long int *y, const int n, int *comm_rank_of_min ) const { PROFILE_START( "minReduce1", profile_level ); if ( comm_rank_of_min == nullptr ) { MPI_Allreduce( (void *) x, (void *) y, n, MPI_LONG, MPI_MIN, communicator ); } else { auto recv = new LongIntStruct[n]; auto send = new LongIntStruct[n]; for ( int i = 0; i < n; ++i ) { send[i].j = x[i]; send[i].i = comm_rank; } MPI_Allreduce( send, recv, n, MPI_LONG_INT, MPI_MINLOC, communicator ); for ( int i = 0; i < n; ++i ) { y[i] = recv[i].j; comm_rank_of_min[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP( "minReduce1", profile_level ); } template<> void MPI_CLASS::call_minReduce( long int *x, const int n, int *comm_rank_of_min ) const { PROFILE_START( "minReduce2", profile_level ); if ( comm_rank_of_min == nullptr ) { auto send = x; auto recv = new long int[n]; MPI_Allreduce( send, recv, n, MPI_LONG, MPI_MIN, communicator ); for ( long int i = 0; i < n; i++ ) x[i] = recv[i]; delete[] recv; } else { auto recv = new LongIntStruct[n]; auto send = new LongIntStruct[n]; for ( int i = 0; i < n; ++i ) { send[i].j = x[i]; send[i].i = comm_rank; } MPI_Allreduce( send, recv, n, MPI_LONG_INT, MPI_MINLOC, communicator ); for ( int i = 0; i < n; ++i ) { x[i] = recv[i].j; comm_rank_of_min[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP( "minReduce2", profile_level ); } // unsigned long long int template<> void MPI_CLASS::call_minReduce( const unsigned long long int *send, unsigned long long int *recv, const int n, int *comm_rank_of_min ) const { PROFILE_START( "minReduce1", profile_level ); if ( comm_rank_of_min == nullptr ) { auto x = new long long int[n]; auto y = new long long int[n]; for ( int i = 0; i < n; i++ ) x[i] = unsigned_to_signed( send[i] ); MPI_Allreduce( (void *) x, (void *) y, n, MPI_LONG_LONG_INT, MPI_MIN, communicator ); for ( int i = 0; i < n; i++ ) recv[i] = signed_to_unsigned( y[i] ); delete[] x; delete[] y; } else { printf( "minReduce will use double\n" ); auto tmp = new double[n]; for ( int i = 0; i < n; i++ ) tmp[i] = static_cast( send[i] ); call_minReduce( tmp, n, comm_rank_of_min ); for ( int i = 0; i < n; i++ ) recv[i] = static_cast( tmp[i] ); delete[] tmp; } PROFILE_STOP( "minReduce1", profile_level ); } template<> void MPI_CLASS::call_minReduce( unsigned long long int *x, const int n, int *comm_rank_of_min ) const { auto recv = new unsigned long long int[n]; call_minReduce( x, recv, n, comm_rank_of_min ); for ( int i = 0; i < n; i++ ) x[i] = recv[i]; delete[] recv; } // long long int template<> void MPI_CLASS::call_minReduce( const long long int *x, long long int *y, const int n, int *comm_rank_of_min ) const { PROFILE_START( "minReduce1", profile_level ); if ( comm_rank_of_min == nullptr ) { MPI_Allreduce( (void *) x, (void *) y, n, MPI_LONG_LONG_INT, MPI_MIN, communicator ); } else { printf( "minReduce will use double\n" ); auto tmp = new double[n]; for ( int i = 0; i < n; i++ ) tmp[i] = static_cast( x[i] ); call_minReduce( tmp, n, comm_rank_of_min ); for ( int i = 0; i < n; i++ ) y[i] = static_cast( tmp[i] ); delete[] tmp; } PROFILE_STOP( "minReduce1", profile_level ); } template<> void MPI_CLASS::call_minReduce( long long int *x, const int n, int *comm_rank_of_min ) const { auto recv = new long long int[n]; call_minReduce( x, recv, n, comm_rank_of_min ); for ( int i = 0; i < n; i++ ) x[i] = signed_to_unsigned( recv[i] ); delete[] recv; } // float template<> void MPI_CLASS::call_minReduce( const float *x, float *y, const int n, int *comm_rank_of_min ) const { PROFILE_START( "minReduce1", profile_level ); if ( comm_rank_of_min == nullptr ) { MPI_Allreduce( (void *) x, (void *) y, n, MPI_INT, MPI_MIN, communicator ); } else { auto recv = new FloatIntStruct[n]; auto send = new FloatIntStruct[n]; for ( int i = 0; i < n; ++i ) { send[i].f = x[i]; send[i].i = comm_rank; } MPI_Allreduce( send, recv, n, MPI_FLOAT_INT, MPI_MINLOC, communicator ); for ( int i = 0; i < n; ++i ) { y[i] = recv[i].f; comm_rank_of_min[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP( "minReduce1", profile_level ); } template<> void MPI_CLASS::call_minReduce( float *x, const int n, int *comm_rank_of_min ) const { PROFILE_START( "minReduce2", profile_level ); if ( comm_rank_of_min == nullptr ) { auto send = x; auto recv = new float[n]; MPI_Allreduce( send, recv, n, MPI_FLOAT, MPI_MIN, communicator ); for ( int i = 0; i < n; i++ ) x[i] = recv[i]; delete[] recv; } else { auto recv = new FloatIntStruct[n]; auto send = new FloatIntStruct[n]; for ( int i = 0; i < n; ++i ) { send[i].f = x[i]; send[i].i = comm_rank; } MPI_Allreduce( send, recv, n, MPI_FLOAT_INT, MPI_MINLOC, communicator ); for ( int i = 0; i < n; ++i ) { x[i] = recv[i].f; comm_rank_of_min[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP( "minReduce2", profile_level ); } // double template<> void MPI_CLASS::call_minReduce( const double *x, double *y, const int n, int *comm_rank_of_min ) const { PROFILE_START( "minReduce1", profile_level ); if ( comm_rank_of_min == nullptr ) { MPI_Allreduce( (void *) x, (void *) y, n, MPI_DOUBLE, MPI_MIN, communicator ); } else { auto recv = new DoubleIntStruct[n]; auto send = new DoubleIntStruct[n]; for ( int i = 0; i < n; ++i ) { send[i].d = x[i]; send[i].i = comm_rank; } MPI_Allreduce( send, recv, n, MPI_DOUBLE_INT, MPI_MINLOC, communicator ); for ( int i = 0; i < n; ++i ) { y[i] = recv[i].d; comm_rank_of_min[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP( "minReduce1", profile_level ); } template<> void MPI_CLASS::call_minReduce( double *x, const int n, int *comm_rank_of_min ) const { PROFILE_START( "minReduce2", profile_level ); if ( comm_rank_of_min == nullptr ) { auto send = x; auto recv = new double[n]; MPI_Allreduce( send, recv, n, MPI_DOUBLE, MPI_MIN, communicator ); for ( int i = 0; i < n; i++ ) x[i] = recv[i]; delete[] recv; } else { auto recv = new DoubleIntStruct[n]; auto send = new DoubleIntStruct[n]; for ( int i = 0; i < n; ++i ) { send[i].d = x[i]; send[i].i = comm_rank; } MPI_Allreduce( send, recv, n, MPI_DOUBLE_INT, MPI_MINLOC, communicator ); for ( int i = 0; i < n; ++i ) { x[i] = recv[i].d; comm_rank_of_min[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP( "minReduce2", profile_level ); } #endif /************************************************************************ * call_maxReduce * * Note: these specializations are only called when using MPI. * ************************************************************************/ #ifdef USE_MPI // unsigned char template<> void MPI_CLASS::call_maxReduce( const unsigned char *send, unsigned char *recv, const int n, int *comm_rank_of_max ) const { if ( comm_rank_of_max == nullptr ) { PROFILE_START( "maxReduce1", profile_level ); MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED_CHAR, MPI_MAX, communicator ); PROFILE_STOP( "maxReduce1", profile_level ); } else { auto tmp = new int[n]; for ( int i = 0; i < n; i++ ) tmp[i] = send[i]; call_maxReduce( tmp, n, comm_rank_of_max ); for ( int i = 0; i < n; i++ ) recv[i] = static_cast( tmp[i] ); delete[] tmp; } } template<> void MPI_CLASS::call_maxReduce( unsigned char *x, const int n, int *comm_rank_of_max ) const { if ( comm_rank_of_max == nullptr ) { PROFILE_START( "maxReduce2", profile_level ); auto send = x; auto recv = new unsigned char[n]; MPI_Allreduce( send, recv, n, MPI_UNSIGNED_CHAR, MPI_MAX, communicator ); for ( int i = 0; i < n; i++ ) x[i] = recv[i]; delete[] recv; PROFILE_STOP( "maxReduce2", profile_level ); } else { auto tmp = new int[n]; for ( int i = 0; i < n; i++ ) tmp[i] = x[i]; call_maxReduce( tmp, n, comm_rank_of_max ); for ( int i = 0; i < n; i++ ) x[i] = static_cast( tmp[i] ); delete[] tmp; } } // char template<> void MPI_CLASS::call_maxReduce( const char *send, char *recv, const int n, int *comm_rank_of_max ) const { if ( comm_rank_of_max == nullptr ) { PROFILE_START( "maxReduce1", profile_level ); MPI_Allreduce( (void *) send, (void *) recv, n, MPI_SIGNED_CHAR, MPI_MAX, communicator ); PROFILE_STOP( "maxReduce1", profile_level ); } else { auto tmp = new int[n]; for ( int i = 0; i < n; i++ ) tmp[i] = send[i]; call_maxReduce( tmp, n, comm_rank_of_max ); for ( int i = 0; i < n; i++ ) recv[i] = static_cast( tmp[i] ); delete[] tmp; } } template<> void MPI_CLASS::call_maxReduce( char *x, const int n, int *comm_rank_of_max ) const { if ( comm_rank_of_max == nullptr ) { PROFILE_START( "maxReduce2", profile_level ); auto send = x; auto recv = new char[n]; MPI_Allreduce( send, recv, n, MPI_SIGNED_CHAR, MPI_MAX, communicator ); for ( int i = 0; i < n; i++ ) x[i] = recv[i]; delete[] recv; PROFILE_STOP( "maxReduce2", profile_level ); } else { auto tmp = new int[n]; for ( int i = 0; i < n; i++ ) tmp[i] = x[i]; call_maxReduce( tmp, n, comm_rank_of_max ); for ( int i = 0; i < n; i++ ) x[i] = static_cast( tmp[i] ); delete[] tmp; } } // unsigned int template<> void MPI_CLASS::call_maxReduce( const unsigned int *send, unsigned int *recv, const int n, int *comm_rank_of_max ) const { if ( comm_rank_of_max == nullptr ) { PROFILE_START( "maxReduce1", profile_level ); MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED, MPI_MAX, communicator ); PROFILE_STOP( "maxReduce1", profile_level ); } else { auto tmp = new int[n]; for ( int i = 0; i < n; i++ ) tmp[i] = unsigned_to_signed( send[i] ); call_maxReduce( tmp, n, comm_rank_of_max ); for ( int i = 0; i < n; i++ ) recv[i] = signed_to_unsigned( tmp[i] ); delete[] tmp; } } template<> void MPI_CLASS::call_maxReduce( unsigned int *x, const int n, int *comm_rank_of_max ) const { if ( comm_rank_of_max == nullptr ) { PROFILE_START( "maxReduce2", profile_level ); auto send = x; auto recv = new unsigned int[n]; MPI_Allreduce( send, recv, n, MPI_UNSIGNED, MPI_MAX, communicator ); for ( int i = 0; i < n; i++ ) x[i] = recv[i]; delete[] recv; PROFILE_STOP( "maxReduce2", profile_level ); } else { auto tmp = new int[n]; for ( int i = 0; i < n; i++ ) tmp[i] = unsigned_to_signed( x[i] ); call_maxReduce( tmp, n, comm_rank_of_max ); for ( int i = 0; i < n; i++ ) x[i] = signed_to_unsigned( tmp[i] ); delete[] tmp; } } // int template<> void MPI_CLASS::call_maxReduce( const int *x, int *y, const int n, int *comm_rank_of_max ) const { PROFILE_START( "maxReduce1", profile_level ); if ( comm_rank_of_max == nullptr ) { MPI_Allreduce( (void *) x, (void *) y, n, MPI_INT, MPI_MAX, communicator ); } else { auto recv = new IntIntStruct[n]; auto send = new IntIntStruct[n]; for ( int i = 0; i < n; ++i ) { send[i].j = x[i]; send[i].i = comm_rank; } MPI_Allreduce( send, recv, n, MPI_2INT, MPI_MAXLOC, communicator ); for ( int i = 0; i < n; ++i ) { y[i] = recv[i].j; comm_rank_of_max[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP( "maxReduce1", profile_level ); } template<> void MPI_CLASS::call_maxReduce( int *x, const int n, int *comm_rank_of_max ) const { PROFILE_START( "maxReduce2", profile_level ); if ( comm_rank_of_max == nullptr ) { int *send = x; auto recv = new int[n]; MPI_Allreduce( send, recv, n, MPI_INT, MPI_MAX, communicator ); for ( int i = 0; i < n; i++ ) x[i] = recv[i]; delete[] recv; } else { auto recv = new IntIntStruct[n]; auto send = new IntIntStruct[n]; for ( int i = 0; i < n; ++i ) { send[i].j = x[i]; send[i].i = comm_rank; } MPI_Allreduce( send, recv, n, MPI_2INT, MPI_MAXLOC, communicator ); for ( int i = 0; i < n; ++i ) { x[i] = recv[i].j; comm_rank_of_max[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP( "maxReduce2", profile_level ); } // long int template<> void MPI_CLASS::call_maxReduce( const long int *x, long int *y, const int n, int *comm_rank_of_max ) const { PROFILE_START( "maxReduce1", profile_level ); if ( comm_rank_of_max == nullptr ) { MPI_Allreduce( (void *) x, (void *) y, n, MPI_LONG, MPI_MAX, communicator ); } else { auto recv = new LongIntStruct[n]; auto send = new LongIntStruct[n]; for ( int i = 0; i < n; ++i ) { send[i].j = x[i]; send[i].i = comm_rank; } MPI_Allreduce( send, recv, n, MPI_LONG_INT, MPI_MAXLOC, communicator ); for ( int i = 0; i < n; ++i ) { y[i] = recv[i].j; comm_rank_of_max[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP( "maxReduce1", profile_level ); } template<> void MPI_CLASS::call_maxReduce( long int *x, const int n, int *comm_rank_of_max ) const { PROFILE_START( "maxReduce2", profile_level ); if ( comm_rank_of_max == nullptr ) { auto send = x; auto recv = new long int[n]; MPI_Allreduce( send, recv, n, MPI_LONG, MPI_MAX, communicator ); for ( int i = 0; i < n; i++ ) x[i] = recv[i]; delete[] recv; } else { auto recv = new LongIntStruct[n]; auto send = new LongIntStruct[n]; for ( int i = 0; i < n; ++i ) { send[i].j = x[i]; send[i].i = comm_rank; } MPI_Allreduce( send, recv, n, MPI_LONG_INT, MPI_MAXLOC, communicator ); for ( int i = 0; i < n; ++i ) { x[i] = recv[i].j; comm_rank_of_max[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP( "maxReduce2", profile_level ); } // unsigned long int template<> void MPI_CLASS::call_maxReduce( const unsigned long int *send, unsigned long int *recv, const int n, int *comm_rank_of_max ) const { if ( comm_rank_of_max == nullptr ) { PROFILE_START( "maxReduce1", profile_level ); MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED_LONG, MPI_MAX, communicator ); PROFILE_STOP( "maxReduce1", profile_level ); } else { auto tmp = new long int[n]; for ( int i = 0; i < n; i++ ) tmp[i] = unsigned_to_signed( send[i] ); call_maxReduce( tmp, n, comm_rank_of_max ); for ( int i = 0; i < n; i++ ) recv[i] = signed_to_unsigned( tmp[i] ); delete[] tmp; } } template<> void MPI_CLASS::call_maxReduce( unsigned long int *x, const int n, int *comm_rank_of_max ) const { if ( comm_rank_of_max == nullptr ) { PROFILE_START( "maxReduce2", profile_level ); auto send = x; auto recv = new unsigned long int[n]; MPI_Allreduce( send, recv, n, MPI_UNSIGNED_LONG, MPI_MAX, communicator ); for ( int i = 0; i < n; i++ ) x[i] = recv[i]; delete[] recv; PROFILE_STOP( "maxReduce2", profile_level ); } else { auto tmp = new long int[n]; for ( int i = 0; i < n; i++ ) tmp[i] = unsigned_to_signed( x[i] ); call_maxReduce( tmp, n, comm_rank_of_max ); for ( int i = 0; i < n; i++ ) x[i] = signed_to_unsigned( tmp[i] ); delete[] tmp; } } // unsigned long long int template<> void MPI_CLASS::call_maxReduce( const unsigned long long int *send, unsigned long long int *recv, const int n, int *comm_rank_of_max ) const { PROFILE_START( "maxReduce1", profile_level ); if ( comm_rank_of_max == nullptr ) { auto x = new long long int[n]; auto y = new long long int[n]; for ( int i = 0; i < n; i++ ) x[i] = unsigned_to_signed( send[i] ); MPI_Allreduce( (void *) x, (void *) y, n, MPI_LONG_LONG_INT, MPI_MAX, communicator ); for ( int i = 0; i < n; i++ ) recv[i] = signed_to_unsigned( y[i] ); delete[] x; delete[] y; } else { printf( "maxReduce will use double\n" ); auto tmp = new double[n]; for ( int i = 0; i < n; i++ ) tmp[i] = static_cast( send[i] ); call_maxReduce( tmp, n, comm_rank_of_max ); for ( int i = 0; i < n; i++ ) recv[i] = static_cast( tmp[i] ); delete[] tmp; } PROFILE_STOP( "maxReduce1", profile_level ); } template<> void MPI_CLASS::call_maxReduce( unsigned long long int *x, const int n, int *comm_rank_of_max ) const { auto recv = new unsigned long long int[n]; call_maxReduce( x, recv, n, comm_rank_of_max ); for ( int i = 0; i < n; i++ ) x[i] = recv[i]; delete[] recv; } // long long int template<> void MPI_CLASS::call_maxReduce( const long long int *x, long long int *y, const int n, int *comm_rank_of_max ) const { PROFILE_START( "maxReduce1", profile_level ); if ( comm_rank_of_max == nullptr ) { MPI_Allreduce( (void *) x, (void *) y, n, MPI_LONG_LONG_INT, MPI_MAX, communicator ); } else { printf( "maxReduce will use double\n" ); auto tmp = new double[n]; for ( int i = 0; i < n; i++ ) tmp[i] = static_cast( x[i] ); call_maxReduce( tmp, n, comm_rank_of_max ); for ( int i = 0; i < n; i++ ) y[i] = static_cast( tmp[i] ); delete[] tmp; } PROFILE_STOP( "maxReduce1", profile_level ); } template<> void MPI_CLASS::call_maxReduce( long long int *x, const int n, int *comm_rank_of_max ) const { auto recv = new long long int[n]; call_maxReduce( x, recv, n, comm_rank_of_max ); for ( int i = 0; i < n; i++ ) x[i] = signed_to_unsigned( recv[i] ); delete[] recv; } // float template<> void MPI_CLASS::call_maxReduce( const float *x, float *y, const int n, int *comm_rank_of_max ) const { PROFILE_START( "maxReduce1", profile_level ); if ( comm_rank_of_max == nullptr ) { MPI_Allreduce( (void *) x, (void *) y, n, MPI_FLOAT, MPI_MAX, communicator ); } else { auto recv = new FloatIntStruct[n]; auto send = new FloatIntStruct[n]; for ( int i = 0; i < n; ++i ) { send[i].f = x[i]; send[i].i = comm_rank; } MPI_Allreduce( send, recv, n, MPI_FLOAT_INT, MPI_MAXLOC, communicator ); for ( int i = 0; i < n; ++i ) { y[i] = recv[i].f; comm_rank_of_max[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP( "maxReduce1", profile_level ); } template<> void MPI_CLASS::call_maxReduce( float *x, const int n, int *comm_rank_of_max ) const { PROFILE_START( "maxReduce2", profile_level ); if ( comm_rank_of_max == nullptr ) { auto send = x; auto recv = new float[n]; MPI_Allreduce( send, recv, n, MPI_FLOAT, MPI_MAX, communicator ); for ( int i = 0; i < n; i++ ) x[i] = recv[i]; delete[] recv; } else { auto recv = new FloatIntStruct[n]; auto send = new FloatIntStruct[n]; for ( int i = 0; i < n; ++i ) { send[i].f = x[i]; send[i].i = comm_rank; } MPI_Allreduce( send, recv, n, MPI_FLOAT_INT, MPI_MAXLOC, communicator ); for ( int i = 0; i < n; ++i ) { x[i] = recv[i].f; comm_rank_of_max[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP( "maxReduce2", profile_level ); } // double template<> void MPI_CLASS::call_maxReduce( const double *x, double *y, const int n, int *comm_rank_of_max ) const { PROFILE_START( "maxReduce1", profile_level ); if ( comm_rank_of_max == nullptr ) { MPI_Allreduce( (void *) x, (void *) y, n, MPI_DOUBLE, MPI_MAX, communicator ); } else { auto recv = new DoubleIntStruct[n]; auto send = new DoubleIntStruct[n]; for ( int i = 0; i < n; ++i ) { send[i].d = x[i]; send[i].i = comm_rank; } MPI_Allreduce( send, recv, n, MPI_DOUBLE_INT, MPI_MAXLOC, communicator ); for ( int i = 0; i < n; ++i ) { y[i] = recv[i].d; comm_rank_of_max[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP( "maxReduce1", profile_level ); } template<> void MPI_CLASS::call_maxReduce( double *x, const int n, int *comm_rank_of_max ) const { PROFILE_START( "maxReduce2", profile_level ); if ( comm_rank_of_max == nullptr ) { auto send = x; auto recv = new double[n]; MPI_Allreduce( send, recv, n, MPI_DOUBLE, MPI_MAX, communicator ); for ( int i = 0; i < n; i++ ) x[i] = recv[i]; delete[] recv; } else { auto recv = new DoubleIntStruct[n]; auto send = new DoubleIntStruct[n]; for ( int i = 0; i < n; ++i ) { send[i].d = x[i]; send[i].i = comm_rank; } MPI_Allreduce( send, recv, n, MPI_DOUBLE_INT, MPI_MAXLOC, communicator ); for ( int i = 0; i < n; ++i ) { x[i] = recv[i].d; comm_rank_of_max[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP( "maxReduce2", profile_level ); } #endif /************************************************************************ * bcast * * Note: these specializations are only called when using MPI. * ************************************************************************/ #ifdef USE_MPI // char template<> void MPI_CLASS::call_bcast( unsigned char *x, const int n, const int root ) const { PROFILE_START( "bcast", profile_level ); MPI_Bcast( x, n, MPI_UNSIGNED_CHAR, root, communicator ); PROFILE_STOP( "bcast", profile_level ); } template<> void MPI_CLASS::call_bcast( char *x, const int n, const int root ) const { PROFILE_START( "bcast", profile_level ); MPI_Bcast( x, n, MPI_CHAR, root, communicator ); PROFILE_STOP( "bcast", profile_level ); } // int template<> void MPI_CLASS::call_bcast( unsigned int *x, const int n, const int root ) const { PROFILE_START( "bcast", profile_level ); MPI_Bcast( x, n, MPI_UNSIGNED, root, communicator ); PROFILE_STOP( "bcast", profile_level ); } template<> void MPI_CLASS::call_bcast( int *x, const int n, const int root ) const { PROFILE_START( "bcast", profile_level ); MPI_Bcast( x, n, MPI_INT, root, communicator ); PROFILE_STOP( "bcast", profile_level ); } // float template<> void MPI_CLASS::call_bcast( float *x, const int n, const int root ) const { PROFILE_START( "bcast", profile_level ); MPI_Bcast( x, n, MPI_FLOAT, root, communicator ); PROFILE_STOP( "bcast", profile_level ); } // double template<> void MPI_CLASS::call_bcast( double *x, const int n, const int root ) const { PROFILE_START( "bcast", profile_level ); MPI_Bcast( x, n, MPI_DOUBLE, root, communicator ); PROFILE_STOP( "bcast", profile_level ); } #else // We need a concrete instantiation of bcast(x,n,root); template<> void MPI_CLASS::call_bcast( char *, const int, const int ) const { } #endif /************************************************************************ * Perform a global barrier across all processors. * ************************************************************************/ void MPI_CLASS::barrier() const { #ifdef USE_MPI MPI_Barrier( communicator ); #endif } /************************************************************************ * Send data array to another processor. * * Note: these specializations are only called when using MPI. * ************************************************************************/ #ifdef USE_MPI // char template<> void MPI_CLASS::send( const char *buf, const int length, const int recv_proc_number, int tag ) const { // Set the tag to 0 if it is < 0 tag = ( tag >= 0 ) ? tag : 0; MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); // Send the data PROFILE_START( "send", profile_level ); MPI_Send( (void *) buf, length, MPI_CHAR, recv_proc_number, tag, communicator ); PROFILE_STOP( "send", profile_level ); } // int template<> void MPI_CLASS::send( const int *buf, const int length, const int recv_proc_number, int tag ) const { // Set the tag to 0 if it is < 0 tag = ( tag >= 0 ) ? tag : 0; MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); // Send the data PROFILE_START( "send", profile_level ); MPI_Send( (void *) buf, length, MPI_INT, recv_proc_number, tag, communicator ); PROFILE_STOP( "send", profile_level ); } // float template<> void MPI_CLASS::send( const float *buf, const int length, const int recv_proc_number, int tag ) const { // Set the tag to 0 if it is < 0 tag = ( tag >= 0 ) ? tag : 0; MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); // Send the data PROFILE_START( "send", profile_level ); MPI_Send( (void *) buf, length, MPI_FLOAT, recv_proc_number, tag, communicator ); PROFILE_STOP( "send", profile_level ); } // double template<> void MPI_CLASS::send( const double *buf, const int length, const int recv_proc_number, int tag ) const { // Set the tag to 0 if it is < 0 tag = ( tag >= 0 ) ? tag : 0; MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); // Send the data PROFILE_START( "send", profile_level ); MPI_Send( (void *) buf, length, MPI_DOUBLE, recv_proc_number, tag, communicator ); PROFILE_STOP( "send", profile_level ); } #else // We need a concrete instantiation of send for use without MPI template<> void MPI_CLASS::send( const char *buf, const int length, const int, int tag ) const { MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); MPI_INSIST( tag >= 0, "tag must be >= 0" ); PROFILE_START( "send", profile_level ); auto id = getRequest( communicator, tag ); auto it = global_isendrecv_list.find( id ); MPI_INSIST( it == global_isendrecv_list.end(), "send must be paired with a previous call to irecv in serial" ); MPI_ASSERT( it->second.status == 2 ); memcpy( (char *) it->second.data, buf, length ); global_isendrecv_list.erase( it ); PROFILE_START( "send", profile_level ); } #endif /************************************************************************ * Non-blocking send data array to another processor. * * Note: these specializations are only called when using MPI. * ************************************************************************/ #ifdef USE_MPI // char template<> MPI_Request MPI_CLASS::Isend( const char *buf, const int length, const int recv_proc, const int tag ) const { MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); MPI_INSIST( tag >= 0, "tag must be >= 0" ); MPI_Request request; PROFILE_START( "Isend", profile_level ); MPI_Isend( (void *) buf, length, MPI_CHAR, recv_proc, tag, communicator, &request ); PROFILE_STOP( "Isend", profile_level ); return request; } // int template<> MPI_Request MPI_CLASS::Isend( const int *buf, const int length, const int recv_proc, const int tag ) const { MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); MPI_INSIST( tag >= 0, "tag must be >= 0" ); MPI_Request request; PROFILE_START( "Isend", profile_level ); MPI_Isend( (void *) buf, length, MPI_INT, recv_proc, tag, communicator, &request ); PROFILE_STOP( "Isend", profile_level ); return request; } // float template<> MPI_Request MPI_CLASS::Isend( const float *buf, const int length, const int recv_proc, const int tag ) const { MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); MPI_INSIST( tag >= 0, "tag must be >= 0" ); MPI_Request request; PROFILE_START( "Isend", profile_level ); MPI_Isend( (void *) buf, length, MPI_FLOAT, recv_proc, tag, communicator, &request ); PROFILE_STOP( "Isend", profile_level ); return request; } // double template<> MPI_Request MPI_CLASS::Isend( const double *buf, const int length, const int recv_proc, const int tag ) const { MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); MPI_INSIST( tag >= 0, "tag must be >= 0" ); MPI_Request request; PROFILE_START( "Isend", profile_level ); MPI_Isend( (void *) buf, length, MPI_DOUBLE, recv_proc, tag, communicator, &request ); PROFILE_STOP( "Isend", profile_level ); return request; } #else // We need a concrete instantiation of send for use without mpi template<> MPI_Request MPI_CLASS::Isend( const char *buf, const int length, const int, const int tag ) const { MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); MPI_INSIST( tag >= 0, "tag must be >= 0" ); PROFILE_START( "Isend", profile_level ); auto id = getRequest( communicator, tag ); auto it = global_isendrecv_list.find( id ); if ( it == global_isendrecv_list.end() ) { // We are calling isend first Isendrecv_struct data; data.data = buf; data.status = 1; global_isendrecv_list.insert( std::pair( id, data ) ); } else { // We called irecv first MPI_ASSERT( it->second.status == 2 ); memcpy( (char *) it->second.data, buf, length ); global_isendrecv_list.erase( it ); } PROFILE_STOP( "Isend", profile_level ); return id; } #endif /************************************************************************ * Send byte array to another processor. * ************************************************************************/ void MPI_CLASS::sendBytes( const void *buf, const int number_bytes, const int recv_proc_number, int tag ) const { MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); MPI_INSIST( tag >= 0, "tag must be >= 0" ); send( (const char *) buf, number_bytes, recv_proc_number, tag ); } /************************************************************************ * Non-blocking send byte array to another processor. * ************************************************************************/ MPI_Request MPI_CLASS::IsendBytes( const void *buf, const int number_bytes, const int recv_proc, const int tag ) const { MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); MPI_INSIST( tag >= 0, "tag must be >= 0" ); return Isend( (const char *) buf, number_bytes, recv_proc, tag ); } /************************************************************************ * Recieve data array to another processor. * * Note: these specializations are only called when using MPI. * ************************************************************************/ #ifdef USE_MPI // char template<> void MPI_CLASS::recv( char *buf, int &length, const int send_proc_number, const bool get_length, int tag ) const { // Set the tag to 0 if it is < 0 tag = ( tag >= 0 ) ? tag : 0; MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); PROFILE_START( "recv", profile_level ); // Get the recieve length if necessary if ( get_length ) { int bytes = this->probe( send_proc_number, tag ); int recv_length = bytes / sizeof( char ); MPI_INSIST( length >= recv_length, "Recived length is larger than allocated array" ); length = recv_length; } // Send the data MPI_Status status; MPI_Recv( (void *) buf, length, MPI_CHAR, send_proc_number, tag, communicator, &status ); PROFILE_STOP( "recv", profile_level ); } // int template<> void MPI_CLASS::recv( int *buf, int &length, const int send_proc_number, const bool get_length, int tag ) const { // Set the tag to 0 if it is < 0 tag = ( tag >= 0 ) ? tag : 0; MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); PROFILE_START( "recv", profile_level ); // Get the recieve length if necessary if ( get_length ) { int bytes = this->probe( send_proc_number, tag ); int recv_length = bytes / sizeof( int ); MPI_INSIST( length >= recv_length, "Recived length is larger than allocated array" ); length = recv_length; } // Send the data MPI_Status status; MPI_Recv( (void *) buf, length, MPI_INT, send_proc_number, tag, communicator, &status ); PROFILE_STOP( "recv", profile_level ); } // float template<> void MPI_CLASS::recv( float *buf, int &length, const int send_proc_number, const bool get_length, int tag ) const { // Set the tag to 0 if it is < 0 tag = ( tag >= 0 ) ? tag : 0; MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); PROFILE_START( "recv", profile_level ); // Get the recieve length if necessary if ( get_length ) { int bytes = this->probe( send_proc_number, tag ); int recv_length = bytes / sizeof( float ); MPI_INSIST( length >= recv_length, "Recived length is larger than allocated array" ); length = recv_length; } // Send the data MPI_Status status; MPI_Recv( (void *) buf, length, MPI_FLOAT, send_proc_number, tag, communicator, &status ); PROFILE_STOP( "recv", profile_level ); } // double template<> void MPI_CLASS::recv( double *buf, int &length, const int send_proc_number, const bool get_length, int tag ) const { // Set the tag to 0 if it is < 0 tag = ( tag >= 0 ) ? tag : 0; MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); PROFILE_START( "recv", profile_level ); // Get the recieve length if necessary if ( get_length ) { int bytes = this->probe( send_proc_number, tag ); int recv_length = bytes / sizeof( double ); MPI_INSIST( length >= recv_length, "Recived length is larger than allocated array" ); length = recv_length; } // Send the data MPI_Status status; MPI_Recv( (void *) buf, length, MPI_DOUBLE, send_proc_number, tag, communicator, &status ); PROFILE_STOP( "recv", profile_level ); } #else // We need a concrete instantiation of recv for use without mpi template<> void MPI_CLASS::recv( char *buf, int &length, const int, const bool, int tag ) const { MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); MPI_INSIST( tag >= 0, "tag must be >= 0" ); PROFILE_START( "recv", profile_level ); auto id = getRequest( communicator, tag ); auto it = global_isendrecv_list.find( id ); MPI_INSIST( it != global_isendrecv_list.end(), "recv must be paired with a previous call to isend in serial" ); MPI_ASSERT( it->second.status == 1 ); memcpy( buf, it->second.data, length ); global_isendrecv_list.erase( it ); PROFILE_STOP( "recv", profile_level ); } #endif /************************************************************************ * Non-blocking recieve data array to another processor. * * Note: these specializations are only called when using MPI. * ************************************************************************/ #ifdef USE_MPI // char template<> MPI_Request MPI_CLASS::Irecv( char *buf, const int length, const int send_proc, const int tag ) const { MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); MPI_INSIST( tag >= 0, "tag must be >= 0" ); MPI_Request request; PROFILE_START( "Irecv", profile_level ); MPI_Irecv( (void *) buf, length, MPI_CHAR, send_proc, tag, communicator, &request ); PROFILE_STOP( "Irecv", profile_level ); return request; } // int template<> MPI_Request MPI_CLASS::Irecv( int *buf, const int length, const int send_proc, const int tag ) const { MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); MPI_INSIST( tag >= 0, "tag must be >= 0" ); MPI_Request request; PROFILE_START( "Irecv", profile_level ); MPI_Irecv( (void *) buf, length, MPI_INT, send_proc, tag, communicator, &request ); PROFILE_STOP( "Irecv", profile_level ); return request; } // float template<> MPI_Request MPI_CLASS::Irecv( float *buf, const int length, const int send_proc, const int tag ) const { MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); MPI_INSIST( tag >= 0, "tag must be >= 0" ); MPI_Request request; PROFILE_START( "Irecv", profile_level ); MPI_Irecv( (void *) buf, length, MPI_FLOAT, send_proc, tag, communicator, &request ); PROFILE_STOP( "Irecv", profile_level ); return request; } // double template<> MPI_Request MPI_CLASS::Irecv( double *buf, const int length, const int send_proc, const int tag ) const { MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); MPI_INSIST( tag >= 0, "tag must be >= 0" ); MPI_Request request; PROFILE_START( "Irecv", profile_level ); MPI_Irecv( (void *) buf, length, MPI_DOUBLE, send_proc, tag, communicator, &request ); PROFILE_STOP( "Irecv", profile_level ); return request; } #else // We need a concrete instantiation of irecv for use without mpi template<> MPI_Request MPI_CLASS::Irecv( char *buf, const int length, const int, const int tag ) const { MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); MPI_INSIST( tag >= 0, "tag must be >= 0" ); PROFILE_START( "Irecv", profile_level ); auto id = getRequest( communicator, tag ); auto it = global_isendrecv_list.find( id ); if ( it == global_isendrecv_list.end() ) { // We are calling Irecv first Isendrecv_struct data; data.data = buf; data.status = 2; global_isendrecv_list.insert( std::pair( id, data ) ); } else { // We called Isend first MPI_ASSERT( it->second.status == 1 ); memcpy( buf, it->second.data, length ); global_isendrecv_list.erase( it ); } PROFILE_STOP( "Irecv", profile_level ); return id; } #endif /************************************************************************ * Recieve byte array to another processor. * ************************************************************************/ void MPI_CLASS::recvBytes( void *buf, int &number_bytes, const int send_proc, int tag ) const { recv( (char *) buf, number_bytes, send_proc, false, tag ); } /************************************************************************ * Recieve byte array to another processor. * ************************************************************************/ MPI_Request MPI_CLASS::IrecvBytes( void *buf, const int number_bytes, const int send_proc, const int tag ) const { MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" ); MPI_INSIST( tag >= 0, "tag must be >= 0" ); return Irecv( (char *) buf, number_bytes, send_proc, tag ); } /************************************************************************ * sendrecv * ************************************************************************/ #if defined( USE_MPI ) || defined( USE_EXT_MPI ) template<> void MPI_CLASS::sendrecv( const char* sendbuf, int sendcount, int dest, int sendtag, char* recvbuf, int recvcount, int source, int recvtag ) const { PROFILE_START( "sendrecv", profile_level ); MPI_Sendrecv( sendbuf, sendcount, MPI_CHAR, dest, sendtag, recvbuf, recvcount, MPI_CHAR, source, recvtag, communicator, MPI_STATUS_IGNORE ); PROFILE_STOP( "sendrecv", profile_level ); } template<> void MPI_CLASS::sendrecv( const int* sendbuf, int sendcount, int dest, int sendtag, int* recvbuf, int recvcount, int source, int recvtag ) const { PROFILE_START( "sendrecv", profile_level ); MPI_Sendrecv( sendbuf, sendcount, MPI_INT, dest, sendtag, recvbuf, recvcount, MPI_INT, source, recvtag, communicator, MPI_STATUS_IGNORE ); PROFILE_STOP( "sendrecv", profile_level ); } template<> void MPI_CLASS::sendrecv( const float* sendbuf, int sendcount, int dest, int sendtag, float* recvbuf, int recvcount, int source, int recvtag ) const { PROFILE_START( "sendrecv", profile_level ); MPI_Sendrecv( sendbuf, sendcount, MPI_FLOAT, dest, sendtag, recvbuf, recvcount, MPI_FLOAT, source, recvtag, communicator, MPI_STATUS_IGNORE ); PROFILE_STOP( "sendrecv", profile_level ); } template<> void MPI_CLASS::sendrecv( const double* sendbuf, int sendcount, int dest, int sendtag, double* recvbuf, int recvcount, int source, int recvtag ) const { PROFILE_START( "sendrecv", profile_level ); MPI_Sendrecv( sendbuf, sendcount, MPI_DOUBLE, dest, sendtag, recvbuf, recvcount, MPI_DOUBLE, source, recvtag, communicator, MPI_STATUS_IGNORE ); PROFILE_STOP( "sendrecv", profile_level ); } #endif /************************************************************************ * allGather * * Note: these specializations are only called when using MPI. * ************************************************************************/ #ifdef USE_MPI // unsigned char template<> void MPI_CLASS::call_allGather( const unsigned char &x_in, unsigned char *x_out ) const { PROFILE_START( "allGather", profile_level ); MPI_Allgather( (void *) &x_in, 1, MPI_UNSIGNED_CHAR, (void *) x_out, 1, MPI_UNSIGNED_CHAR, communicator ); PROFILE_STOP( "allGather", profile_level ); } template<> void MPI_CLASS::call_allGather( const unsigned char *x_in, int size_in, unsigned char *x_out, int *size_out, int *disp_out ) const { PROFILE_START( "allGatherv", profile_level ); MPI_Allgatherv( (void *) x_in, size_in, MPI_CHAR, (void *) x_out, size_out, disp_out, MPI_CHAR, communicator ); PROFILE_STOP( "allGatherv", profile_level ); } // char template<> void MPI_CLASS::call_allGather( const char &x_in, char *x_out ) const { PROFILE_START( "allGather", profile_level ); MPI_Allgather( (void *) &x_in, 1, MPI_CHAR, (void *) x_out, 1, MPI_CHAR, communicator ); PROFILE_STOP( "allGather", profile_level ); } template<> void MPI_CLASS::call_allGather( const char *x_in, int size_in, char *x_out, int *size_out, int *disp_out ) const { PROFILE_START( "allGatherv", profile_level ); MPI_Allgatherv( (void *) x_in, size_in, MPI_CHAR, (void *) x_out, size_out, disp_out, MPI_CHAR, communicator ); PROFILE_STOP( "allGatherv", profile_level ); } // unsigned int template<> void MPI_CLASS::call_allGather( const unsigned int &x_in, unsigned int *x_out ) const { PROFILE_START( "allGather", profile_level ); MPI_Allgather( (void *) &x_in, 1, MPI_UNSIGNED, (void *) x_out, 1, MPI_UNSIGNED, communicator ); PROFILE_STOP( "allGather", profile_level ); } template<> void MPI_CLASS::call_allGather( const unsigned int *x_in, int size_in, unsigned int *x_out, int *size_out, int *disp_out ) const { PROFILE_START( "allGatherv", profile_level ); MPI_Allgatherv( (void *) x_in, size_in, MPI_UNSIGNED, (void *) x_out, size_out, disp_out, MPI_UNSIGNED, communicator ); PROFILE_STOP( "allGatherv", profile_level ); } // int template<> void MPI_CLASS::call_allGather( const int &x_in, int *x_out ) const { PROFILE_START( "allGather", profile_level ); MPI_Allgather( (void *) &x_in, 1, MPI_INT, (void *) x_out, 1, MPI_INT, communicator ); PROFILE_STOP( "allGather", profile_level ); } template<> void MPI_CLASS::call_allGather( const int *x_in, int size_in, int *x_out, int *size_out, int *disp_out ) const { PROFILE_START( "allGatherv