Files
LBPM/common/MPI.cpp
2020-02-05 07:35:13 -05:00

3807 lines
135 KiB
C++

// This file impliments a wrapper class for MPI functions
#include "common/MPI.h"
#include "common/Utilities.h"
#include "ProfilerApp.h"
#include "StackTrace/ErrorHandlers.h"
#include "StackTrace/StackTrace.h"
// Include all other headers
#include <algorithm>
#include <chrono>
#include <climits>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <limits>
#include <random>
#include <stdexcept>
#include <thread>
#include <typeinfo>
// Include OS specific headers
#undef USE_WINDOWS
#undef USE_LINUX
#undef USE_MAC
#if defined( WIN32 ) || defined( _WIN32 ) || defined( WIN64 ) || defined( _WIN64 )
// We are using windows
#define USE_WINDOWS
#include <process.h>
#include <windows.h>
#define sched_yield() Sleep( 0 )
#elif defined( __APPLE__ )
// Using MAC
#define USE_MAC
#include <sched.h>
#elif defined( __linux ) || defined( __linux__ ) || defined( __unix ) || defined( __posix )
// We are using linux
#define USE_LINUX
#include <sched.h>
#include <unistd.h>
#else
#error Unknown OS
#endif
// Convience defines
#define MPI_ERROR ERROR
#define MPI_ASSERT ASSERT
#define MPI_INSIST INSIST
#define MPI_WARNING WARNING
#define MPI_CLASS_COMM_NULL MPI_COMM_NULL
#define MPI_CLASS_COMM_SELF MPI_COMM_SELF
#define MPI_CLASS_COMM_WORLD MPI_COMM_WORLD
// Global variable to track create new unique comms (dup and split)
#ifndef USE_MPI
MPI_Comm uniqueGlobalComm = 11;
#endif
#if defined( USE_SAMRAI ) && defined( USE_PETSC ) && !defined( USE_MPI )
int MPI_REQUEST_NULL = 3;
int MPI_ERR_IN_STATUS = 4;
#endif
namespace Utilities {
// Some special structs to work with MPI
#ifdef USE_MPI
struct IntIntStruct {
int j;
int i;
};
struct LongIntStruct {
long int j;
int i;
};
struct FloatIntStruct {
float f;
int i;
};
struct DoubleIntStruct {
double d;
int i;
};
#endif
// Initialized the static member variables
volatile unsigned int MPI_CLASS::N_MPI_Comm_created = 0;
volatile unsigned int MPI_CLASS::N_MPI_Comm_destroyed = 0;
short MPI_CLASS::profile_level = 127;
// Define a type for use with size_t
#ifdef USE_MPI
static MPI_Datatype MPI_SIZE_T = 0x0;
static MPI_Datatype getSizeTDataType()
{
int size_int, size_long, size_longlong, size_longlong2;
MPI_Type_size( MPI_UNSIGNED, &size_int );
MPI_Type_size( MPI_UNSIGNED_LONG, &size_long );
MPI_Type_size( MPI_UNSIGNED_LONG_LONG, &size_longlong );
MPI_Type_size( MPI_LONG_LONG_INT, &size_longlong2 );
if ( sizeof( size_t ) == size_int ) {
return MPI_UNSIGNED;
} else if ( sizeof( size_t ) == size_long ) {
return MPI_UNSIGNED_LONG;
} else if ( sizeof( size_t ) == size_longlong ) {
return MPI_UNSIGNED_LONG_LONG;
} else if ( sizeof( size_t ) == size_longlong2 ) {
MPI_WARNING( "Using signed long long datatype for size_t in MPI" );
return MPI_LONG_LONG_INT; // Note: this is not unsigned
} else {
MPI_ERROR( "No suitable datatype found" );
}
return 0;
}
#endif
// Static data for asyncronous communication without MPI
// Note: these routines may not be thread-safe yet
#ifndef USE_MPI
static const int mpi_max_tag = 0x003FFFFF;
struct Isendrecv_struct {
const char *data; // Pointer to data
int status; // Status: 1-sending, 2-recieving
};
std::map<MPI_Request, Isendrecv_struct> global_isendrecv_list;
static MPI_Request getRequest( MPI_Comm comm, int tag )
{
MPI_ASSERT( tag >= 0 && tag <= mpi_max_tag );
// Use hashing function: 2^64*0.5*(sqrt(5)-1)
uint64_t a = static_cast<uint8_t>( comm ) * 0x9E3779B97F4A7C15;
uint64_t b = static_cast<uint8_t>( tag ) * 0x9E3779B97F4A7C15;
uint64_t hash = a ^ b;
MPI_Request request;
memcpy( &request, &hash, sizeof( MPI_Request ) );
return request;
}
#endif
// Check the mpi error code
#ifdef USE_MPI
inline void check_MPI( int error )
{
if ( error != MPI_SUCCESS )
MPI_ERROR( "Error calling MPI routine" );
}
#endif
/******************************************************************
* Some helper functions to convert between signed/unsigned types *
******************************************************************/
DISABLE_WARNINGS
static inline constexpr unsigned int offset_int()
{
return ~static_cast<unsigned int>( std::numeric_limits<int>::min() ) + 1;
}
static inline constexpr unsigned long int offset_long()
{
return ~static_cast<long int>( std::numeric_limits<long int>::min() ) + 1;
}
static inline constexpr unsigned long long int offset_long_long()
{
return ~static_cast<long long int>( std::numeric_limits<long long int>::min() ) + 1;
}
ENABLE_WARNINGS
static inline unsigned int signed_to_unsigned( int x )
{
const auto offset = offset_int();
return ( x >= 0 ) ? static_cast<unsigned int>( x ) + offset :
offset - static_cast<unsigned int>( -x );
}
static inline unsigned long int signed_to_unsigned( long int x )
{
const auto offset = offset_long();
return ( x >= 0 ) ? static_cast<unsigned long int>( x ) + offset :
offset - static_cast<unsigned long int>( -x );
}
static inline unsigned long long int signed_to_unsigned( long long int x )
{
const auto offset = offset_long_long();
return ( x >= 0 ) ? static_cast<unsigned long long int>( x ) + offset :
offset - static_cast<unsigned long long int>( -x );
}
static inline int unsigned_to_signed( unsigned int x )
{
const auto offset = offset_int();
return ( x >= offset ) ? static_cast<int>( x - offset ) : -static_cast<int>( offset - x );
}
static inline long int unsigned_to_signed( unsigned long int x )
{
const auto offset = offset_long();
return ( x >= offset ) ? static_cast<long int>( x - offset ) :
-static_cast<long int>( offset - x );
}
static inline long long int unsigned_to_signed( unsigned long long int x )
{
const auto offset = offset_long_long();
return ( x >= offset ) ? static_cast<long long int>( x - offset ) :
-static_cast<long long int>( offset - x );
}
/************************************************************************
* Get the MPI version *
************************************************************************/
std::array<int, 2> MPI_CLASS::version()
{
#ifdef USE_MPI
int MPI_version;
int MPI_subversion;
MPI_Get_version( &MPI_version, &MPI_subversion );
return { MPI_version, MPI_subversion };
#else
return { 0, 0 };
#endif
}
std::string MPI_CLASS::info()
{
#ifdef USE_MPI
#if MPI_VERSION >= 3
int MPI_version_length = 0;
char MPI_version_string[MPI_MAX_LIBRARY_VERSION_STRING];
MPI_Get_library_version( MPI_version_string, &MPI_version_length );
if ( MPI_version_length > 0 ) {
std::string MPI_info( MPI_version_string, MPI_version_length );
size_t pos = MPI_info.find( '\n' );
while ( pos != std::string::npos ) {
MPI_info.insert( pos + 1, " " );
pos = MPI_info.find( '\n', pos + 1 );
}
return MPI_info;
}
#endif
auto tmp = version();
return std::to_string( tmp[0] ) + "." + std::to_string( tmp[0] );
#else
return std::string();
#endif
}
/************************************************************************
* Functions to get/set the process affinities *
************************************************************************/
int MPI_CLASS::getNumberOfProcessors() { return std::thread::hardware_concurrency(); }
std::vector<int> MPI_CLASS::getProcessAffinity()
{
std::vector<int> procs;
#ifdef USE_LINUX
cpu_set_t mask;
int error = sched_getaffinity( getpid(), sizeof( cpu_set_t ), &mask );
if ( error != 0 )
MPI_ERROR( "Error getting process affinity" );
for ( int i = 0; i < (int) sizeof( cpu_set_t ) * CHAR_BIT; i++ ) {
if ( CPU_ISSET( i, &mask ) )
procs.push_back( i );
}
#elif defined( USE_MAC )
// MAC does not support getting or setting the affinity
printf( "Warning: MAC does not support getting the process affinity\n" );
procs.clear();
#elif defined( USE_WINDOWS )
HANDLE hProc = GetCurrentProcess();
size_t procMask;
size_t sysMask;
PDWORD_PTR procMaskPtr = reinterpret_cast<PDWORD_PTR>( &procMask );
PDWORD_PTR sysMaskPtr = reinterpret_cast<PDWORD_PTR>( &sysMask );
GetProcessAffinityMask( hProc, procMaskPtr, sysMaskPtr );
for ( int i = 0; i < (int) sizeof( size_t ) * CHAR_BIT; i++ ) {
if ( ( procMask & 0x1 ) != 0 )
procs.push_back( i );
procMask >>= 1;
}
#else
#error Unknown OS
#endif
return procs;
}
void MPI_CLASS::setProcessAffinity( const std::vector<int> &procs )
{
#ifdef USE_LINUX
cpu_set_t mask;
CPU_ZERO( &mask );
for ( auto cpu : procs )
CPU_SET( cpu, &mask );
int error = sched_setaffinity( getpid(), sizeof( cpu_set_t ), &mask );
if ( error != 0 )
MPI_ERROR( "Error setting process affinity" );
#elif defined( USE_MAC )
// MAC does not support getting or setting the affinity
NULL_USE( procs );
#elif defined( USE_WINDOWS )
DWORD mask = 0;
for ( size_t i = 0; i < procs.size(); i++ )
mask |= ( (DWORD) 1 ) << procs[i];
HANDLE hProc = GetCurrentProcess();
SetProcessAffinityMask( hProc, mask );
#else
#error Unknown OS
#endif
}
/************************************************************************
* Function to check if MPI is active *
************************************************************************/
bool MPI_CLASS::MPI_active()
{
#ifdef USE_MPI
int initialized = 0, finalized = 0;
MPI_Initialized( &initialized );
MPI_Finalized( &finalized );
return initialized != 0 && finalized == 0;
#else
return true;
#endif
}
MPI_CLASS::ThreadSupport MPI_CLASS::queryThreadSupport()
{
#ifdef USE_MPI
int provided = 0;
MPI_Query_thread( &provided );
if ( provided == MPI_THREAD_SINGLE )
return ThreadSupport::SINGLE;
if ( provided == MPI_THREAD_FUNNELED )
return ThreadSupport::FUNNELED;
if ( provided == MPI_THREAD_SERIALIZED )
return ThreadSupport::SERIALIZED;
if ( provided == MPI_THREAD_MULTIPLE )
return ThreadSupport::MULTIPLE;
return ThreadSupport::SINGLE;
#else
return ThreadSupport::MULTIPLE;
#endif
}
/************************************************************************
* Function to perform a load balance of the given processes *
************************************************************************/
void MPI_CLASS::balanceProcesses( const MPI_CLASS &globalComm, const int method,
const std::vector<int> &procs, const int N_min_in, const int N_max_in )
{
// Build the list of processors to use
std::vector<int> cpus = procs;
if ( cpus.empty() ) {
for ( int i = 0; i < getNumberOfProcessors(); i++ )
cpus.push_back( i );
}
// Handle the "easy cases"
if ( method == 1 ) {
// Trivial case where we do not need any communication
setProcessAffinity( cpus );
return;
}
// Get the sub-communicator for the current node
MPI_CLASS nodeComm = globalComm.splitByNode();
int N_min = std::min<int>( std::max<int>( N_min_in, 1 ), cpus.size() );
int N_max = N_max_in;
if ( N_max == -1 )
N_max = cpus.size();
N_max = std::min<int>( N_max, cpus.size() );
MPI_ASSERT( N_max >= N_min );
// Perform the load balance within the node
if ( method == 2 ) {
int N_proc = cpus.size() / nodeComm.getSize();
N_proc = std::max<int>( N_proc, N_min );
N_proc = std::min<int>( N_proc, N_max );
std::vector<int> cpus2( N_proc, -1 );
for ( int i = 0; i < N_proc; i++ )
cpus2[i] = cpus[( nodeComm.getRank() * N_proc + i ) % cpus.size()];
setProcessAffinity( cpus2 );
} else {
MPI_ERROR( "Unknown method for load balance" );
}
}
/************************************************************************
* Empty constructor *
************************************************************************/
MPI_CLASS::MPI_CLASS()
{
// Initialize the data members to a defaul communicator of self
#ifdef USE_MPI
communicator = MPI_COMM_NULL;
d_maxTag = 0x7FFFFFFF;
#else
communicator = MPI_CLASS_COMM_NULL;
d_maxTag = mpi_max_tag;
#endif
d_ranks = nullptr;
d_count = nullptr;
d_manage = false;
comm_rank = 0;
comm_size = 1;
d_isNull = true;
d_currentTag = nullptr;
d_call_abort = true;
tmp_alignment = -1;
}
/************************************************************************
* Empty deconstructor *
************************************************************************/
MPI_CLASS::~MPI_CLASS() { reset(); }
void MPI_CLASS::reset()
{
// Decrement the count if used
int count = -1;
if ( d_count != nullptr )
count = --( *d_count );
if ( count == 0 ) {
// We are holding that last reference to the MPI_Comm object, we need to free it
if ( d_manage ) {
#ifdef USE_MPI
MPI_Comm_set_errhandler( communicator, MPI_ERRORS_ARE_FATAL );
int err = MPI_Comm_free( &communicator );
if ( err != MPI_SUCCESS )
MPI_ERROR( "Problem free'ing MPI_Comm object" );
communicator = MPI_CLASS_COMM_NULL;
++N_MPI_Comm_destroyed;
#endif
}
if ( d_ranks != nullptr )
delete[] d_ranks;
delete d_count;
}
if ( d_currentTag == nullptr ) {
// No tag index
} else if ( d_currentTag[1] > 1 ) {
--( d_currentTag[1] );
} else {
delete[] d_currentTag;
}
d_manage = false;
d_count = nullptr;
d_ranks = nullptr;
comm_rank = 0;
comm_size = 1;
d_maxTag = 0;
d_isNull = true;
d_currentTag = nullptr;
d_call_abort = true;
}
/************************************************************************
* Copy constructors *
************************************************************************/
MPI_CLASS::MPI_CLASS( const MPI_CLASS &comm )
: communicator( comm.communicator ),
d_isNull( comm.d_isNull ),
d_manage( comm.d_manage ),
comm_rank( comm.comm_rank ),
comm_size( comm.comm_size ),
d_ranks( comm.d_ranks ),
d_maxTag( comm.d_maxTag ),
d_currentTag( comm.d_currentTag )
{
// Initialize the data members to the existing comm object
if ( d_currentTag != nullptr )
++d_currentTag[1];
d_call_abort = comm.d_call_abort;
// Set and increment the count
d_count = comm.d_count;
if ( d_count != nullptr )
++( *d_count );
tmp_alignment = -1;
}
MPI_CLASS::MPI_CLASS( MPI_CLASS &&rhs ) : MPI_CLASS()
{
std::swap( communicator, rhs.communicator );
std::swap( d_isNull, rhs.d_isNull );
std::swap( d_manage, rhs.d_manage );
std::swap( d_call_abort, rhs.d_call_abort );
std::swap( profile_level, rhs.profile_level );
std::swap( comm_rank, rhs.comm_rank );
std::swap( comm_size, rhs.comm_size );
std::swap( d_ranks, rhs.d_ranks );
std::swap( d_maxTag, rhs.d_maxTag );
std::swap( d_currentTag, rhs.d_currentTag );
std::swap( d_count, rhs.d_count );
std::swap( tmp_alignment, rhs.tmp_alignment );
}
/************************************************************************
* Assignment operators *
************************************************************************/
MPI_CLASS &MPI_CLASS::operator=( const MPI_CLASS &comm )
{
if ( this == &comm ) // protect against invalid self-assignment
return *this;
// Destroy the previous object
this->reset();
// Initialize the data members to the existing object
this->communicator = comm.communicator;
this->comm_rank = comm.comm_rank;
this->comm_size = comm.comm_size;
this->d_ranks = comm.d_ranks;
this->d_isNull = comm.d_isNull;
this->d_manage = comm.d_manage;
this->d_maxTag = comm.d_maxTag;
this->d_call_abort = comm.d_call_abort;
this->d_currentTag = comm.d_currentTag;
if ( this->d_currentTag != nullptr )
++( this->d_currentTag[1] );
// Set and increment the count
this->d_count = comm.d_count;
if ( this->d_count != nullptr )
++( *d_count );
this->tmp_alignment = -1;
return *this;
}
MPI_CLASS &MPI_CLASS::operator=( MPI_CLASS &&rhs )
{
if ( this == &rhs ) // protect against invalid self-assignment
return *this;
std::swap( communicator, rhs.communicator );
std::swap( d_isNull, rhs.d_isNull );
std::swap( d_manage, rhs.d_manage );
std::swap( d_call_abort, rhs.d_call_abort );
std::swap( profile_level, rhs.profile_level );
std::swap( comm_rank, rhs.comm_rank );
std::swap( comm_size, rhs.comm_size );
std::swap( d_ranks, rhs.d_ranks );
std::swap( d_maxTag, rhs.d_maxTag );
std::swap( d_currentTag, rhs.d_currentTag );
std::swap( d_count, rhs.d_count );
std::swap( tmp_alignment, rhs.tmp_alignment );
return *this;
}
/************************************************************************
* Constructor from existing MPI communicator *
************************************************************************/
int d_global_currentTag_world1[2] = { 1, 1 };
int d_global_currentTag_world2[2] = { 1, 1 };
int d_global_currentTag_self[2] = { 1, 1 };
#ifdef USE_MPI
std::atomic_int d_global_count_world1 = { 1 };
std::atomic_int d_global_count_world2 = { 1 };
std::atomic_int d_global_count_self = { 1 };
#endif
MPI_CLASS::MPI_CLASS( MPI_Comm comm, bool manage )
{
d_count = nullptr;
d_ranks = nullptr;
d_manage = false;
tmp_alignment = -1;
// Check if we are using our version of comm_world
if ( comm == MPI_CLASS_COMM_WORLD ) {
communicator = MPI_COMM_WORLD;
} else if ( comm == MPI_CLASS_COMM_SELF ) {
communicator = MPI_COMM_SELF;
} else if ( comm == MPI_CLASS_COMM_NULL ) {
communicator = MPI_COMM_NULL;
} else {
communicator = comm;
}
#ifdef USE_MPI
// We are using MPI, use the MPI communicator to initialize the data
if ( communicator != MPI_COMM_NULL ) {
// Set the MPI_SIZE_T datatype if it has not been set
if ( MPI_SIZE_T == 0x0 )
MPI_SIZE_T = getSizeTDataType();
// Attach the error handler
StackTrace::setMPIErrorHandler( communicator );
// Get the communicator properties
MPI_Comm_rank( communicator, &comm_rank );
MPI_Comm_size( communicator, &comm_size );
int flag, *val;
int ierr = MPI_Comm_get_attr( communicator, MPI_TAG_UB, &val, &flag );
MPI_ASSERT( ierr == MPI_SUCCESS );
if ( flag == 0 ) {
d_maxTag = 0x7FFFFFFF; // The tag is not a valid attribute (set to 2^31-1)
} else {
d_maxTag = *val;
if ( d_maxTag < 0 ) {
d_maxTag = 0x7FFFFFFF;
} // The maximum tag is > a signed int (set to 2^31-1)
MPI_INSIST( d_maxTag >= 0x7FFF, "maximum tag size is < MPI standard" );
}
} else {
comm_rank = 1;
comm_size = 0;
d_maxTag = 0x7FFFFFFF;
}
d_isNull = communicator == MPI_COMM_NULL;
if ( manage && communicator != MPI_COMM_NULL && communicator != MPI_COMM_SELF &&
communicator != MPI_COMM_WORLD )
d_manage = true;
// Create the count (Note: we do not need to worry about thread safety)
if ( communicator == MPI_CLASS_COMM_WORLD ) {
d_count = &d_global_count_world1;
++( *d_count );
} else if ( communicator == MPI_COMM_WORLD ) {
d_count = &d_global_count_world2;
++( *d_count );
} else if ( communicator == MPI_COMM_SELF ) {
d_count = &d_global_count_self;
++( *d_count );
} else if ( communicator == MPI_COMM_NULL ) {
d_count = nullptr;
} else {
d_count = new std::atomic_int;
*d_count = 1;
}
if ( d_manage )
++N_MPI_Comm_created;
// Create d_ranks
if ( comm_size > 1 ) {
d_ranks = new int[comm_size];
d_ranks[0] = -1;
}
#else
// We are not using MPI, intialize based on the communicator
NULL_USE( manage );
comm_rank = 0;
comm_size = 1;
d_maxTag = mpi_max_tag;
d_isNull = communicator == MPI_COMM_NULL;
if ( d_isNull )
comm_size = 0;
#endif
if ( communicator == MPI_CLASS_COMM_WORLD ) {
d_currentTag = d_global_currentTag_world1;
++( this->d_currentTag[1] );
} else if ( communicator == MPI_COMM_WORLD ) {
d_currentTag = d_global_currentTag_world2;
++( this->d_currentTag[1] );
} else if ( communicator == MPI_COMM_SELF ) {
d_currentTag = d_global_currentTag_self;
++( this->d_currentTag[1] );
} else if ( communicator == MPI_COMM_NULL ) {
d_currentTag = nullptr;
} else {
d_currentTag = new int[2];
d_currentTag[0] = ( d_maxTag <= 0x10000 ) ? 1 : 0x1FFF;
d_currentTag[1] = 1;
}
d_call_abort = true;
}
/************************************************************************
* Return the ranks of the communicator in the global comm *
************************************************************************/
std::vector<int> MPI_CLASS::globalRanks() const
{
// Get my global rank if it has not been set
static int myGlobalRank = -1;
if ( myGlobalRank == -1 ) {
#ifdef USE_MPI
if ( MPI_active() )
MPI_Comm_rank( MPI_CLASS_COMM_WORLD, &myGlobalRank );
#else
myGlobalRank = 0;
#endif
}
// Check if we are dealing with a serial or null communicator
if ( comm_size == 1 )
return std::vector<int>( 1, myGlobalRank );
if ( d_ranks == nullptr || communicator == MPI_COMM_NULL )
return std::vector<int>();
// Fill d_ranks if necessary
if ( d_ranks[0] == -1 ) {
if ( communicator == MPI_CLASS_COMM_WORLD ) {
for ( int i = 0; i < comm_size; i++ )
d_ranks[i] = i;
} else {
MPI_ASSERT( myGlobalRank != -1 );
this->allGather( myGlobalRank, d_ranks );
}
}
// Return d_ranks
return std::vector<int>( d_ranks, d_ranks + comm_size );
}
/************************************************************************
* Generate a random number *
************************************************************************/
size_t MPI_CLASS::rand() const
{
size_t val = 0;
if ( getRank() == 0 ) {
static std::random_device rd;
static std::mt19937 gen( rd() );
static std::uniform_int_distribution<size_t> dist;
val = dist( gen );
}
val = bcast( val, 0 );
return val;
}
/************************************************************************
* Intersect two communicators *
************************************************************************/
#ifdef USE_MPI
static inline void MPI_Group_free2( MPI_Group *group )
{
if ( *group != MPI_GROUP_EMPTY ) {
// MPICH is fine with free'ing an empty group, OpenMPI crashes
MPI_Group_free( group );
}
}
MPI_CLASS MPI_CLASS::intersect( const MPI_CLASS &comm1, const MPI_CLASS &comm2 )
{
MPI_Group group1 = MPI_GROUP_EMPTY, group2 = MPI_GROUP_EMPTY;
if ( !comm1.isNull() ) {
MPI_Group_free2( &group1 );
MPI_Comm_group( comm1.communicator, &group1 );
}
if ( !comm2.isNull() ) {
MPI_Group_free2( &group2 );
MPI_Comm_group( comm2.communicator, &group2 );
}
MPI_Group group12;
MPI_Group_intersection( group1, group2, &group12 );
int compare1, compare2;
MPI_Group_compare( group1, group12, &compare1 );
MPI_Group_compare( group2, group12, &compare2 );
MPI_CLASS new_comm( MPI_CLASS_COMM_NULL );
int size;
MPI_Group_size( group12, &size );
if ( compare1 != MPI_UNEQUAL && size != 0 ) {
// The intersection matches comm1
new_comm = comm1;
} else if ( compare2 != MPI_UNEQUAL && size != 0 ) {
// The intersection matches comm2
new_comm = comm2;
} else if ( comm1.isNull() ) {
// comm1 is null, we can return safely (comm1 is needed for communication)
} else {
// The intersection is smaller than comm1 or comm2
// Check if the new comm is nullptr for all processors
int max_size = 0;
MPI_Allreduce( &size, &max_size, 1, MPI_INT, MPI_MAX, comm1.communicator );
if ( max_size == 0 ) {
// We are dealing with completely disjoint sets
new_comm = MPI_CLASS( MPI_CLASS_COMM_NULL, false );
} else {
// Create the new comm
// Note: OpenMPI crashes if the intersection group is EMPTY for any processors
// We will set it to SELF for the EMPTY processors, then create a nullptr comm later
if ( group12 == MPI_GROUP_EMPTY ) {
MPI_Group_free2( &group12 );
MPI_Comm_group( MPI_COMM_SELF, &group12 );
}
MPI_Comm new_MPI_comm;
MPI_Comm_create( comm1.communicator, group12, &new_MPI_comm );
if ( size > 0 ) {
// This is the valid case where we create a new intersection comm
new_comm = MPI_CLASS( new_MPI_comm, true );
} else {
// We actually want a null comm for this communicator
new_comm = MPI_CLASS( MPI_CLASS_COMM_NULL, false );
MPI_Comm_free( &new_MPI_comm );
}
}
}
MPI_Group_free2( &group1 );
MPI_Group_free2( &group2 );
MPI_Group_free2( &group12 );
return new_comm;
}
#else
MPI_CLASS MPI_CLASS::intersect( const MPI_CLASS &comm1, const MPI_CLASS &comm2 )
{
if ( comm1.isNull() || comm2.isNull() )
return MPI_CLASS( MPI_CLASS_COMM_NULL, false );
MPI_ASSERT( comm1.comm_size == 1 && comm2.comm_size == 1 );
return comm1;
}
#endif
/************************************************************************
* Split a comm *
************************************************************************/
MPI_CLASS MPI_CLASS::split( int color, int key ) const
{
if ( d_isNull ) {
return MPI_CLASS( MPI_CLASS_COMM_NULL );
} else if ( comm_size == 1 ) {
if ( color == -1 )
return MPI_CLASS( MPI_CLASS_COMM_NULL );
return dup();
}
MPI_Comm new_MPI_comm = MPI_CLASS_COMM_NULL;
#ifdef USE_MPI
// USE MPI to split the communicator
if ( color == -1 ) {
check_MPI( MPI_Comm_split( communicator, MPI_UNDEFINED, key, &new_MPI_comm ) );
} else {
check_MPI( MPI_Comm_split( communicator, color, key, &new_MPI_comm ) );
}
#endif
// Create the new object
NULL_USE( key );
MPI_CLASS new_comm( new_MPI_comm, true );
new_comm.d_call_abort = d_call_abort;
return new_comm;
}
MPI_CLASS MPI_CLASS::splitByNode( int key ) const
{
// Check if we are dealing with a single processor (trivial case)
if ( comm_size == 1 )
return this->split( 0, 0 );
// Get the node name
std::string name = MPI_CLASS::getNodeName();
// Gather the names from all ranks
std::vector<std::string> list( comm_size );
allGather( name, &list[0] );
// Create the colors
std::vector<int> color( comm_size, -1 );
color[0] = 0;
for ( int i = 1; i < comm_size; i++ ) {
const std::string tmp1 = list[i];
for ( int j = 0; j < i; j++ ) {
const std::string tmp2 = list[j];
if ( tmp1 == tmp2 ) {
color[i] = color[j];
break;
}
color[i] = color[i - 1] + 1;
}
}
MPI_CLASS new_comm = this->split( color[comm_rank], key );
return new_comm;
}
/************************************************************************
* Duplicate an exisiting comm object *
************************************************************************/
MPI_CLASS MPI_CLASS::dup() const
{
if ( d_isNull )
return MPI_CLASS( MPI_CLASS_COMM_NULL );
MPI_Comm new_MPI_comm = communicator;
#if defined( USE_MPI ) || defined( USE_PETSC )
// USE MPI to duplicate the communicator
MPI_Comm_dup( communicator, &new_MPI_comm );
#else
new_MPI_comm = uniqueGlobalComm;
uniqueGlobalComm++;
#endif
// Create the new comm object
MPI_CLASS new_comm( new_MPI_comm, true );
new_comm.d_isNull = d_isNull;
new_comm.d_call_abort = d_call_abort;
return new_comm;
}
/************************************************************************
* Get the node name *
************************************************************************/
std::string MPI_CLASS::getNodeName()
{
#ifdef USE_MPI
int length;
char name[MPI_MAX_PROCESSOR_NAME + 1];
memset( name, 0, MPI_MAX_PROCESSOR_NAME + 1 );
MPI_Get_processor_name( name, &length );
return std::string( name );
#else
return "Node0";
#endif
}
/************************************************************************
* Overload operator == *
************************************************************************/
bool MPI_CLASS::operator==( const MPI_CLASS &comm ) const
{
return communicator == comm.communicator;
}
/************************************************************************
* Overload operator != *
************************************************************************/
bool MPI_CLASS::operator!=( const MPI_CLASS &comm ) const
{
return communicator != comm.communicator;
}
/************************************************************************
* Overload operator < *
************************************************************************/
bool MPI_CLASS::operator<( const MPI_CLASS &comm ) const
{
MPI_ASSERT( !this->d_isNull && !comm.d_isNull );
bool flag = true;
// First check if either communicator is NULL
if ( this->d_isNull )
return false;
if ( comm.d_isNull )
flag = false;
// Use compare to check if the comms are equal
if ( compare( comm ) != 0 )
return false;
// Check that the size of the other communicator is > the current communicator size
if ( comm_size >= comm.comm_size )
flag = false;
// Check the union of the communicator groups
// this is < comm iff this group is a subgroup of comm's group
#ifdef USE_MPI
MPI_Group group1 = MPI_GROUP_EMPTY, group2 = MPI_GROUP_EMPTY, group12 = MPI_GROUP_EMPTY;
if ( !d_isNull )
MPI_Comm_group( communicator, &group1 );
if ( !comm.d_isNull )
MPI_Comm_group( comm.communicator, &group2 );
MPI_Group_union( group1, group2, &group12 );
int compare;
MPI_Group_compare( group2, group12, &compare );
if ( compare == MPI_UNEQUAL )
flag = false;
MPI_Group_free( &group1 );
MPI_Group_free( &group2 );
MPI_Group_free( &group12 );
#endif
// Perform a global reduce of the flag (equivalent to all operation)
return allReduce( flag );
}
/************************************************************************
* Overload operator <= *
************************************************************************/
bool MPI_CLASS::operator<=( const MPI_CLASS &comm ) const
{
MPI_ASSERT( !this->d_isNull && !comm.d_isNull );
bool flag = true;
// First check if either communicator is NULL
if ( this->d_isNull )
return false;
if ( comm.d_isNull )
flag = false;
#ifdef USE_MPI
int world_size = 0;
MPI_Comm_size( MPI_COMM_WORLD, &world_size );
if ( comm.getSize() == world_size )
return true;
if ( getSize() == 1 && !comm.d_isNull )
return true;
#endif
// Use compare to check if the comms are equal
if ( compare( comm ) != 0 )
return true;
// Check that the size of the other communicator is > the current communicator size
// this is <= comm iff this group is a subgroup of comm's group
if ( comm_size > comm.comm_size )
flag = false;
// Check the unnion of the communicator groups
#ifdef USE_MPI
MPI_Group group1, group2, group12;
MPI_Comm_group( communicator, &group1 );
MPI_Comm_group( comm.communicator, &group2 );
MPI_Group_union( group1, group2, &group12 );
int compare;
MPI_Group_compare( group2, group12, &compare );
if ( compare == MPI_UNEQUAL )
flag = false;
MPI_Group_free( &group1 );
MPI_Group_free( &group2 );
MPI_Group_free( &group12 );
#endif
// Perform a global reduce of the flag (equivalent to all operation)
return allReduce( flag );
}
/************************************************************************
* Overload operator > *
************************************************************************/
bool MPI_CLASS::operator>( const MPI_CLASS &comm ) const
{
bool flag = true;
// First check if either communicator is NULL
if ( this->d_isNull )
return false;
if ( comm.d_isNull )
flag = false;
// Use compare to check if the comms are equal
if ( compare( comm ) != 0 )
return false;
// Check that the size of the other communicator is > the current communicator size
if ( comm_size <= comm.comm_size )
flag = false;
// Check the unnion of the communicator groups
// this is > comm iff comm's group is a subgroup of this group
#ifdef USE_MPI
MPI_Group group1 = MPI_GROUP_EMPTY, group2 = MPI_GROUP_EMPTY, group12 = MPI_GROUP_EMPTY;
if ( !d_isNull )
MPI_Comm_group( communicator, &group1 );
if ( !comm.d_isNull )
MPI_Comm_group( comm.communicator, &group2 );
MPI_Group_union( group1, group2, &group12 );
int compare;
MPI_Group_compare( group1, group12, &compare );
if ( compare == MPI_UNEQUAL )
flag = false;
MPI_Group_free( &group1 );
MPI_Group_free( &group2 );
MPI_Group_free( &group12 );
#endif
// Perform a global reduce of the flag (equivalent to all operation)
return allReduce( flag );
}
/************************************************************************
* Overload operator >= *
************************************************************************/
bool MPI_CLASS::operator>=( const MPI_CLASS &comm ) const
{
bool flag = true;
// First check if either communicator is NULL
if ( this->d_isNull )
return false;
if ( comm.d_isNull )
flag = false;
#ifdef USE_MPI
int world_size = 0;
MPI_Comm_size( MPI_COMM_WORLD, &world_size );
if ( getSize() == world_size )
return true;
if ( comm.getSize() == 1 && !comm.d_isNull )
return true;
#endif
// Use compare to check if the comms are equal
if ( compare( comm ) != 0 )
return true;
// Check that the size of the other communicator is > the current communicator size
if ( comm_size < comm.comm_size )
flag = false;
// Check the unnion of the communicator groups
// this is >= comm iff comm's group is a subgroup of this group
#ifdef USE_MPI
MPI_Group group1 = MPI_GROUP_EMPTY, group2 = MPI_GROUP_EMPTY, group12 = MPI_GROUP_EMPTY;
if ( !d_isNull )
MPI_Comm_group( communicator, &group1 );
if ( !comm.d_isNull )
MPI_Comm_group( comm.communicator, &group2 );
MPI_Group_union( group1, group2, &group12 );
int compare;
MPI_Group_compare( group1, group12, &compare );
if ( compare == MPI_UNEQUAL )
flag = false;
MPI_Group_free( &group1 );
MPI_Group_free( &group2 );
MPI_Group_free( &group12 );
#endif
// Perform a global reduce of the flag (equivalent to all operation)
return allReduce( flag );
}
/************************************************************************
* Compare two comm objects *
************************************************************************/
int MPI_CLASS::compare( const MPI_CLASS &comm ) const
{
if ( communicator == comm.communicator )
return 1;
#ifdef USE_MPI
if ( d_isNull || comm.d_isNull )
return 0;
int result;
check_MPI( MPI_Comm_compare( communicator, comm.communicator, &result ) );
if ( result == MPI_IDENT )
return 2;
else if ( result == MPI_CONGRUENT )
return 3;
else if ( result == MPI_SIMILAR )
return 4;
else if ( result == MPI_UNEQUAL )
return 0;
MPI_ERROR( "Unknown results from comm compare" );
#else
if ( comm.communicator == MPI_COMM_NULL || communicator == MPI_COMM_NULL )
return 0;
else
return 3;
#endif
return 0;
}
/************************************************************************
* Abort the program. *
************************************************************************/
void MPI_CLASS::setCallAbortInSerialInsteadOfExit( bool flag ) { d_call_abort = flag; }
void MPI_CLASS::abort() const
{
#ifdef USE_MPI
MPI_Comm comm = communicator;
if ( comm == MPI_COMM_NULL )
comm = MPI_COMM_WORLD;
if ( !MPI_active() ) {
// MPI is not availible
exit( -1 );
} else if ( comm_size > 1 ) {
MPI_Abort( comm, -1 );
} else if ( d_call_abort ) {
MPI_Abort( comm, -1 );
} else {
exit( -1 );
}
#else
exit( -1 );
#endif
}
/************************************************************************
* newTag *
************************************************************************/
int MPI_CLASS::newTag()
{
#ifdef USE_MPI
// Syncronize the processes to ensure all ranks enter this call
// Needed so the count will match
barrier();
// Return and increment the tag
int tag = ( *d_currentTag )++;
MPI_INSIST( tag <= d_maxTag, "Maximum number of tags exceeded\n" );
return tag;
#else
static int globalCurrentTag = 1;
return globalCurrentTag++;
#endif
}
/************************************************************************
* allReduce *
************************************************************************/
bool MPI_CLASS::allReduce( const bool value ) const
{
bool ret = value;
if ( comm_size > 1 ) {
#ifdef USE_MPI
MPI_Allreduce(
(void *) &value, (void *) &ret, 1, MPI_UNSIGNED_CHAR, MPI_MIN, communicator );
#else
MPI_ERROR( "This shouldn't be possible" );
#endif
}
return ret;
}
/************************************************************************
* anyReduce *
************************************************************************/
bool MPI_CLASS::anyReduce( const bool value ) const
{
bool ret = value;
if ( comm_size > 1 ) {
#ifdef USE_MPI
MPI_Allreduce(
(void *) &value, (void *) &ret, 1, MPI_UNSIGNED_CHAR, MPI_MAX, communicator );
#else
MPI_ERROR( "This shouldn't be possible" );
#endif
}
return ret;
}
/************************************************************************
* call_sumReduce *
* Note: these specializations are only called when using MPI. *
************************************************************************/
#ifdef USE_MPI
// unsigned char
template<>
void MPI_CLASS::call_sumReduce<unsigned char>(
const unsigned char *send, unsigned char *recv, const int n ) const
{
PROFILE_START( "sumReduce1<unsigned char>", profile_level );
MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED_CHAR, MPI_SUM, communicator );
PROFILE_STOP( "sumReduce1<unsigned char>", profile_level );
}
template<>
void MPI_CLASS::call_sumReduce<unsigned char>( unsigned char *x, const int n ) const
{
PROFILE_START( "sumReduce2<unsigned char>", profile_level );
auto send = x;
auto recv = new unsigned char[n];
MPI_Allreduce( send, recv, n, MPI_UNSIGNED_CHAR, MPI_SUM, communicator );
for ( int i = 0; i < n; i++ )
x[i] = recv[i];
delete[] recv;
PROFILE_STOP( "sumReduce2<unsigned char>", profile_level );
}
// char
template<>
void MPI_CLASS::call_sumReduce<char>( const char *send, char *recv, const int n ) const
{
PROFILE_START( "sumReduce1<char>", profile_level );
MPI_Allreduce( (void *) send, (void *) recv, n, MPI_SIGNED_CHAR, MPI_SUM, communicator );
PROFILE_STOP( "sumReduce1<char>", profile_level );
}
template<>
void MPI_CLASS::call_sumReduce<char>( char *x, const int n ) const
{
PROFILE_START( "sumReduce2<char>", profile_level );
auto send = x;
auto recv = new char[n];
MPI_Allreduce( send, recv, n, MPI_SIGNED_CHAR, MPI_SUM, communicator );
for ( int i = 0; i < n; i++ )
x[i] = recv[i];
delete[] recv;
PROFILE_STOP( "sumReduce2<char>", profile_level );
}
// unsigned int
template<>
void MPI_CLASS::call_sumReduce<unsigned int>(
const unsigned int *send, unsigned int *recv, const int n ) const
{
PROFILE_START( "sumReduce1<unsigned int>", profile_level );
MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED, MPI_SUM, communicator );
PROFILE_STOP( "sumReduce1<unsigned int>", profile_level );
}
template<>
void MPI_CLASS::call_sumReduce<unsigned int>( unsigned int *x, const int n ) const
{
PROFILE_START( "sumReduce2<unsigned int>", profile_level );
auto send = x;
auto recv = new unsigned int[n];
MPI_Allreduce( send, recv, n, MPI_UNSIGNED, MPI_SUM, communicator );
for ( int i = 0; i < n; i++ )
x[i] = recv[i];
delete[] recv;
PROFILE_STOP( "sumReduce2<unsigned int>", profile_level );
}
// int
template<>
void MPI_CLASS::call_sumReduce<int>( const int *send, int *recv, const int n ) const
{
PROFILE_START( "sumReduce1<int>", profile_level );
MPI_Allreduce( (void *) send, (void *) recv, n, MPI_INT, MPI_SUM, communicator );
PROFILE_STOP( "sumReduce1<int>", profile_level );
}
template<>
void MPI_CLASS::call_sumReduce<int>( int *x, const int n ) const
{
PROFILE_START( "sumReduce2<int>", profile_level );
auto send = x;
auto recv = new int[n];
MPI_Allreduce( send, recv, n, MPI_INT, MPI_SUM, communicator );
for ( int i = 0; i < n; i++ )
x[i] = recv[i];
delete[] recv;
PROFILE_STOP( "sumReduce2<int>", profile_level );
}
// long int
template<>
void MPI_CLASS::call_sumReduce<long int>( const long int *send, long int *recv, const int n ) const
{
PROFILE_START( "sumReduce1<long int>", profile_level );
MPI_Allreduce( (void *) send, (void *) recv, n, MPI_LONG, MPI_SUM, communicator );
PROFILE_STOP( "sumReduce1<long int>", profile_level );
}
template<>
void MPI_CLASS::call_sumReduce<long int>( long int *x, const int n ) const
{
PROFILE_START( "sumReduce2<long int>", profile_level );
auto send = x;
auto recv = new long int[n];
MPI_Allreduce( send, recv, n, MPI_LONG, MPI_SUM, communicator );
for ( int i = 0; i < n; i++ )
x[i] = recv[i];
delete[] recv;
PROFILE_STOP( "sumReduce2<long int>", profile_level );
}
// unsigned long int
template<>
void MPI_CLASS::call_sumReduce<unsigned long>(
const unsigned long *send, unsigned long *recv, const int n ) const
{
PROFILE_START( "sumReduce1<unsigned long>", profile_level );
MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED_LONG, MPI_SUM, communicator );
PROFILE_STOP( "sumReduce1<unsigned long>", profile_level );
}
template<>
void MPI_CLASS::call_sumReduce<unsigned long>( unsigned long *x, const int n ) const
{
PROFILE_START( "sumReduce2<unsigned long>", profile_level );
auto send = x;
auto recv = new unsigned long int[n];
MPI_Allreduce( send, recv, n, MPI_UNSIGNED_LONG, MPI_SUM, communicator );
for ( int i = 0; i < n; i++ )
x[i] = recv[i];
delete[] recv;
PROFILE_STOP( "sumReduce2<unsigned long>", profile_level );
}
// size_t
#ifdef USE_WINDOWS
template<>
void MPI_CLASS::call_sumReduce<size_t>( const size_t *send, size_t *recv, const int n ) const
{
MPI_ASSERT( MPI_SIZE_T != 0 );
PROFILE_START( "sumReduce1<size_t>", profile_level );
MPI_Allreduce( (void *) send, (void *) recv, n, MPI_SIZE_T, MPI_SUM, communicator );
PROFILE_STOP( "sumReduce1<size_t>", profile_level );
}
template<>
void MPI_CLASS::call_sumReduce<size_t>( size_t *x, const int n ) const
{
MPI_ASSERT( MPI_SIZE_T != 0 );
PROFILE_START( "sumReduce2<size_t>", profile_level );
auto send = x;
auto recv = new size_t[n];
MPI_Allreduce( (void *) send, (void *) recv, n, MPI_SIZE_T, MPI_SUM, communicator );
for ( int i = 0; i < n; i++ )
x[i] = recv[i];
delete[] recv;
PROFILE_STOP( "sumReduce2<size_t>", profile_level );
}
#endif
// float
template<>
void MPI_CLASS::call_sumReduce<float>( const float *send, float *recv, const int n ) const
{
PROFILE_START( "sumReduce1<float>", profile_level );
MPI_Allreduce( (void *) send, (void *) recv, n, MPI_FLOAT, MPI_SUM, communicator );
PROFILE_STOP( "sumReduce1<float>", profile_level );
}
template<>
void MPI_CLASS::call_sumReduce<float>( float *x, const int n ) const
{
PROFILE_START( "sumReduce2<float>", profile_level );
auto send = x;
auto recv = new float[n];
MPI_Allreduce( send, recv, n, MPI_FLOAT, MPI_SUM, communicator );
for ( int i = 0; i < n; i++ )
x[i] = recv[i];
delete[] recv;
PROFILE_STOP( "sumReduce2<float>", profile_level );
}
// double
template<>
void MPI_CLASS::call_sumReduce<double>( const double *send, double *recv, const int n ) const
{
PROFILE_START( "sumReduce1<double>", profile_level );
MPI_Allreduce( (void *) send, (void *) recv, n, MPI_DOUBLE, MPI_SUM, communicator );
PROFILE_STOP( "sumReduce1<double>", profile_level );
}
template<>
void MPI_CLASS::call_sumReduce<double>( double *x, const int n ) const
{
PROFILE_START( "sumReduce2<double>", profile_level );
auto send = x;
auto recv = new double[n];
MPI_Allreduce( send, recv, n, MPI_DOUBLE, MPI_SUM, communicator );
for ( int i = 0; i < n; i++ )
x[i] = recv[i];
delete[] recv;
PROFILE_STOP( "sumReduce2<double>", profile_level );
}
// std::complex<double>
template<>
void MPI_CLASS::call_sumReduce<std::complex<double>>(
const std::complex<double> *x, std::complex<double> *y, const int n ) const
{
PROFILE_START( "sumReduce1<complex double>", profile_level );
auto send = new double[2 * n];
auto recv = new double[2 * n];
for ( int i = 0; i < n; i++ ) {
send[2 * i + 0] = real( x[i] );
send[2 * i + 1] = imag( x[i] );
}
MPI_Allreduce( (void *) send, (void *) recv, 2 * n, MPI_DOUBLE, MPI_SUM, communicator );
for ( int i = 0; i < n; i++ )
y[i] = std::complex<double>( recv[2 * i + 0], recv[2 * i + 1] );
delete[] send;
delete[] recv;
PROFILE_STOP( "sumReduce1<complex double>", profile_level );
}
template<>
void MPI_CLASS::call_sumReduce<std::complex<double>>( std::complex<double> *x, const int n ) const
{
PROFILE_START( "sumReduce2<complex double>", profile_level );
auto send = new double[2 * n];
auto recv = new double[2 * n];
for ( int i = 0; i < n; i++ ) {
send[2 * i + 0] = real( x[i] );
send[2 * i + 1] = imag( x[i] );
}
MPI_Allreduce( send, recv, 2 * n, MPI_DOUBLE, MPI_SUM, communicator );
for ( int i = 0; i < n; i++ )
x[i] = std::complex<double>( recv[2 * i + 0], recv[2 * i + 1] );
delete[] send;
delete[] recv;
PROFILE_STOP( "sumReduce2<complex double>", profile_level );
}
#endif
/************************************************************************
* call_minReduce *
* Note: these specializations are only called when using MPI. *
************************************************************************/
#ifdef USE_MPI
// unsigned char
template<>
void MPI_CLASS::call_minReduce<unsigned char>(
const unsigned char *send, unsigned char *recv, const int n, int *comm_rank_of_min ) const
{
if ( comm_rank_of_min == nullptr ) {
PROFILE_START( "minReduce1<unsigned char>", profile_level );
MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED_CHAR, MPI_MIN, communicator );
PROFILE_STOP( "minReduce1<unsigned char>", profile_level );
} else {
auto tmp = new int[n];
for ( int i = 0; i < n; i++ )
tmp[i] = send[i];
call_minReduce<int>( tmp, n, comm_rank_of_min );
for ( int i = 0; i < n; i++ )
recv[i] = static_cast<unsigned char>( tmp[i] );
delete[] tmp;
}
}
template<>
void MPI_CLASS::call_minReduce<unsigned char>(
unsigned char *x, const int n, int *comm_rank_of_min ) const
{
if ( comm_rank_of_min == nullptr ) {
PROFILE_START( "minReduce2<unsigned char>", profile_level );
auto send = x;
auto recv = new unsigned char[n];
MPI_Allreduce( send, recv, n, MPI_UNSIGNED_CHAR, MPI_MIN, communicator );
for ( int i = 0; i < n; i++ )
x[i] = recv[i];
delete[] recv;
PROFILE_STOP( "minReduce2<unsigned char>", profile_level );
} else {
auto tmp = new int[n];
for ( int i = 0; i < n; i++ )
tmp[i] = x[i];
call_minReduce<int>( tmp, n, comm_rank_of_min );
for ( int i = 0; i < n; i++ )
x[i] = static_cast<unsigned char>( tmp[i] );
delete[] tmp;
}
}
// char
template<>
void MPI_CLASS::call_minReduce<char>(
const char *send, char *recv, const int n, int *comm_rank_of_min ) const
{
if ( comm_rank_of_min == nullptr ) {
PROFILE_START( "minReduce1<char>", profile_level );
MPI_Allreduce( (void *) send, (void *) recv, n, MPI_SIGNED_CHAR, MPI_MIN, communicator );
PROFILE_STOP( "minReduce1<char>", profile_level );
} else {
auto tmp = new int[n];
for ( int i = 0; i < n; i++ )
tmp[i] = send[i];
call_minReduce<int>( tmp, n, comm_rank_of_min );
for ( int i = 0; i < n; i++ )
recv[i] = static_cast<char>( tmp[i] );
delete[] tmp;
}
}
template<>
void MPI_CLASS::call_minReduce<char>( char *x, const int n, int *comm_rank_of_min ) const
{
if ( comm_rank_of_min == nullptr ) {
PROFILE_START( "minReduce2<char>", profile_level );
auto send = x;
auto recv = new char[n];
MPI_Allreduce( send, recv, n, MPI_SIGNED_CHAR, MPI_MIN, communicator );
for ( int i = 0; i < n; i++ )
x[i] = recv[i];
delete[] recv;
PROFILE_STOP( "minReduce2<char>", profile_level );
} else {
auto tmp = new int[n];
for ( int i = 0; i < n; i++ )
tmp[i] = x[i];
call_minReduce<int>( tmp, n, comm_rank_of_min );
for ( int i = 0; i < n; i++ )
x[i] = static_cast<char>( tmp[i] );
delete[] tmp;
}
}
// unsigned int
template<>
void MPI_CLASS::call_minReduce<unsigned int>(
const unsigned int *send, unsigned int *recv, const int n, int *comm_rank_of_min ) const
{
if ( comm_rank_of_min == nullptr ) {
PROFILE_START( "minReduce1<unsigned int>", profile_level );
MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED, MPI_MIN, communicator );
PROFILE_STOP( "minReduce1<unsigned int>", profile_level );
} else {
auto tmp = new int[n];
for ( int i = 0; i < n; i++ )
tmp[i] = unsigned_to_signed( send[i] );
call_minReduce<int>( tmp, n, comm_rank_of_min );
for ( int i = 0; i < n; i++ )
recv[i] = signed_to_unsigned( tmp[i] );
delete[] tmp;
}
}
template<>
void MPI_CLASS::call_minReduce<unsigned int>(
unsigned int *x, const int n, int *comm_rank_of_min ) const
{
if ( comm_rank_of_min == nullptr ) {
PROFILE_START( "minReduce2<unsigned int>", profile_level );
auto send = x;
auto recv = new unsigned int[n];
MPI_Allreduce( send, recv, n, MPI_UNSIGNED, MPI_MIN, communicator );
for ( int i = 0; i < n; i++ )
x[i] = recv[i];
delete[] recv;
PROFILE_STOP( "minReduce2<unsigned int>", profile_level );
} else {
auto tmp = new int[n];
for ( int i = 0; i < n; i++ )
tmp[i] = unsigned_to_signed( x[i] );
call_minReduce<int>( tmp, n, comm_rank_of_min );
for ( int i = 0; i < n; i++ )
x[i] = signed_to_unsigned( tmp[i] );
delete[] tmp;
}
}
// int
template<>
void MPI_CLASS::call_minReduce<int>(
const int *x, int *y, const int n, int *comm_rank_of_min ) const
{
PROFILE_START( "minReduce1<int>", profile_level );
if ( comm_rank_of_min == nullptr ) {
MPI_Allreduce( (void *) x, (void *) y, n, MPI_INT, MPI_MIN, communicator );
} else {
auto recv = new IntIntStruct[n];
auto send = new IntIntStruct[n];
for ( int i = 0; i < n; ++i ) {
send[i].j = x[i];
send[i].i = comm_rank;
}
MPI_Allreduce( send, recv, n, MPI_2INT, MPI_MINLOC, communicator );
for ( int i = 0; i < n; ++i ) {
y[i] = recv[i].j;
comm_rank_of_min[i] = recv[i].i;
}
delete[] recv;
delete[] send;
}
PROFILE_STOP( "minReduce1<int>", profile_level );
}
template<>
void MPI_CLASS::call_minReduce<int>( int *x, const int n, int *comm_rank_of_min ) const
{
PROFILE_START( "minReduce2<int>", profile_level );
if ( comm_rank_of_min == nullptr ) {
auto send = x;
auto recv = new int[n];
MPI_Allreduce( send, recv, n, MPI_INT, MPI_MIN, communicator );
for ( int i = 0; i < n; i++ )
x[i] = recv[i];
delete[] recv;
} else {
auto recv = new IntIntStruct[n];
auto send = new IntIntStruct[n];
for ( int i = 0; i < n; ++i ) {
send[i].j = x[i];
send[i].i = comm_rank;
}
MPI_Allreduce( send, recv, n, MPI_2INT, MPI_MINLOC, communicator );
for ( int i = 0; i < n; ++i ) {
x[i] = recv[i].j;
comm_rank_of_min[i] = recv[i].i;
}
delete[] recv;
delete[] send;
}
PROFILE_STOP( "minReduce2<int>", profile_level );
}
// unsigned long int
template<>
void MPI_CLASS::call_minReduce<unsigned long int>( const unsigned long int *send,
unsigned long int *recv, const int n, int *comm_rank_of_min ) const
{
if ( comm_rank_of_min == nullptr ) {
PROFILE_START( "minReduce1<unsigned long>", profile_level );
MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED_LONG, MPI_MIN, communicator );
PROFILE_STOP( "minReduce1<unsigned long>", profile_level );
} else {
auto tmp = new long int[n];
for ( int i = 0; i < n; i++ )
tmp[i] = unsigned_to_signed( send[i] );
call_minReduce<long int>( tmp, n, comm_rank_of_min );
for ( int i = 0; i < n; i++ )
recv[i] = signed_to_unsigned( tmp[i] );
delete[] tmp;
}
}
template<>
void MPI_CLASS::call_minReduce<unsigned long int>(
unsigned long int *x, const int n, int *comm_rank_of_min ) const
{
if ( comm_rank_of_min == nullptr ) {
PROFILE_START( "minReduce2<unsigned long>", profile_level );
auto send = x;
auto recv = new unsigned long int[n];
MPI_Allreduce( send, recv, n, MPI_UNSIGNED_LONG, MPI_MIN, communicator );
for ( int i = 0; i < n; i++ )
x[i] = recv[i];
delete[] recv;
PROFILE_STOP( "minReduce2<unsigned long>", profile_level );
} else {
auto tmp = new long int[n];
for ( int i = 0; i < n; i++ )
tmp[i] = unsigned_to_signed( x[i] );
call_minReduce<long int>( tmp, n, comm_rank_of_min );
for ( int i = 0; i < n; i++ )
x[i] = signed_to_unsigned( tmp[i] );
delete[] tmp;
}
}
// long int
template<>
void MPI_CLASS::call_minReduce<long int>(
const long int *x, long int *y, const int n, int *comm_rank_of_min ) const
{
PROFILE_START( "minReduce1<long int>", profile_level );
if ( comm_rank_of_min == nullptr ) {
MPI_Allreduce( (void *) x, (void *) y, n, MPI_LONG, MPI_MIN, communicator );
} else {
auto recv = new LongIntStruct[n];
auto send = new LongIntStruct[n];
for ( int i = 0; i < n; ++i ) {
send[i].j = x[i];
send[i].i = comm_rank;
}
MPI_Allreduce( send, recv, n, MPI_LONG_INT, MPI_MINLOC, communicator );
for ( int i = 0; i < n; ++i ) {
y[i] = recv[i].j;
comm_rank_of_min[i] = recv[i].i;
}
delete[] recv;
delete[] send;
}
PROFILE_STOP( "minReduce1<long int>", profile_level );
}
template<>
void MPI_CLASS::call_minReduce<long int>( long int *x, const int n, int *comm_rank_of_min ) const
{
PROFILE_START( "minReduce2<long int>", profile_level );
if ( comm_rank_of_min == nullptr ) {
auto send = x;
auto recv = new long int[n];
MPI_Allreduce( send, recv, n, MPI_LONG, MPI_MIN, communicator );
for ( long int i = 0; i < n; i++ )
x[i] = recv[i];
delete[] recv;
} else {
auto recv = new LongIntStruct[n];
auto send = new LongIntStruct[n];
for ( int i = 0; i < n; ++i ) {
send[i].j = x[i];
send[i].i = comm_rank;
}
MPI_Allreduce( send, recv, n, MPI_LONG_INT, MPI_MINLOC, communicator );
for ( int i = 0; i < n; ++i ) {
x[i] = recv[i].j;
comm_rank_of_min[i] = recv[i].i;
}
delete[] recv;
delete[] send;
}
PROFILE_STOP( "minReduce2<long int>", profile_level );
}
// unsigned long long int
template<>
void MPI_CLASS::call_minReduce<unsigned long long int>( const unsigned long long int *send,
unsigned long long int *recv, const int n, int *comm_rank_of_min ) const
{
PROFILE_START( "minReduce1<long int>", profile_level );
if ( comm_rank_of_min == nullptr ) {
auto x = new long long int[n];
auto y = new long long int[n];
for ( int i = 0; i < n; i++ )
x[i] = unsigned_to_signed( send[i] );
MPI_Allreduce( (void *) x, (void *) y, n, MPI_LONG_LONG_INT, MPI_MIN, communicator );
for ( int i = 0; i < n; i++ )
recv[i] = signed_to_unsigned( y[i] );
delete[] x;
delete[] y;
} else {
printf( "minReduce<long long int> will use double\n" );
auto tmp = new double[n];
for ( int i = 0; i < n; i++ )
tmp[i] = static_cast<double>( send[i] );
call_minReduce<double>( tmp, n, comm_rank_of_min );
for ( int i = 0; i < n; i++ )
recv[i] = static_cast<long long int>( tmp[i] );
delete[] tmp;
}
PROFILE_STOP( "minReduce1<long int>", profile_level );
}
template<>
void MPI_CLASS::call_minReduce<unsigned long long int>(
unsigned long long int *x, const int n, int *comm_rank_of_min ) const
{
auto recv = new unsigned long long int[n];
call_minReduce<unsigned long long int>( x, recv, n, comm_rank_of_min );
for ( int i = 0; i < n; i++ )
x[i] = recv[i];
delete[] recv;
}
// long long int
template<>
void MPI_CLASS::call_minReduce<long long int>(
const long long int *x, long long int *y, const int n, int *comm_rank_of_min ) const
{
PROFILE_START( "minReduce1<long int>", profile_level );
if ( comm_rank_of_min == nullptr ) {
MPI_Allreduce( (void *) x, (void *) y, n, MPI_LONG_LONG_INT, MPI_MIN, communicator );
} else {
printf( "minReduce<long long int> will use double\n" );
auto tmp = new double[n];
for ( int i = 0; i < n; i++ )
tmp[i] = static_cast<double>( x[i] );
call_minReduce<double>( tmp, n, comm_rank_of_min );
for ( int i = 0; i < n; i++ )
y[i] = static_cast<long long int>( tmp[i] );
delete[] tmp;
}
PROFILE_STOP( "minReduce1<long int>", profile_level );
}
template<>
void MPI_CLASS::call_minReduce<long long int>(
long long int *x, const int n, int *comm_rank_of_min ) const
{
auto recv = new long long int[n];
call_minReduce<long long int>( x, recv, n, comm_rank_of_min );
for ( int i = 0; i < n; i++ )
x[i] = signed_to_unsigned( recv[i] );
delete[] recv;
}
// float
template<>
void MPI_CLASS::call_minReduce<float>(
const float *x, float *y, const int n, int *comm_rank_of_min ) const
{
PROFILE_START( "minReduce1<float>", profile_level );
if ( comm_rank_of_min == nullptr ) {
MPI_Allreduce( (void *) x, (void *) y, n, MPI_INT, MPI_MIN, communicator );
} else {
auto recv = new FloatIntStruct[n];
auto send = new FloatIntStruct[n];
for ( int i = 0; i < n; ++i ) {
send[i].f = x[i];
send[i].i = comm_rank;
}
MPI_Allreduce( send, recv, n, MPI_FLOAT_INT, MPI_MINLOC, communicator );
for ( int i = 0; i < n; ++i ) {
y[i] = recv[i].f;
comm_rank_of_min[i] = recv[i].i;
}
delete[] recv;
delete[] send;
}
PROFILE_STOP( "minReduce1<float>", profile_level );
}
template<>
void MPI_CLASS::call_minReduce<float>( float *x, const int n, int *comm_rank_of_min ) const
{
PROFILE_START( "minReduce2<float>", profile_level );
if ( comm_rank_of_min == nullptr ) {
auto send = x;
auto recv = new float[n];
MPI_Allreduce( send, recv, n, MPI_FLOAT, MPI_MIN, communicator );
for ( int i = 0; i < n; i++ )
x[i] = recv[i];
delete[] recv;
} else {
auto recv = new FloatIntStruct[n];
auto send = new FloatIntStruct[n];
for ( int i = 0; i < n; ++i ) {
send[i].f = x[i];
send[i].i = comm_rank;
}
MPI_Allreduce( send, recv, n, MPI_FLOAT_INT, MPI_MINLOC, communicator );
for ( int i = 0; i < n; ++i ) {
x[i] = recv[i].f;
comm_rank_of_min[i] = recv[i].i;
}
delete[] recv;
delete[] send;
}
PROFILE_STOP( "minReduce2<float>", profile_level );
}
// double
template<>
void MPI_CLASS::call_minReduce<double>(
const double *x, double *y, const int n, int *comm_rank_of_min ) const
{
PROFILE_START( "minReduce1<double>", profile_level );
if ( comm_rank_of_min == nullptr ) {
MPI_Allreduce( (void *) x, (void *) y, n, MPI_DOUBLE, MPI_MIN, communicator );
} else {
auto recv = new DoubleIntStruct[n];
auto send = new DoubleIntStruct[n];
for ( int i = 0; i < n; ++i ) {
send[i].d = x[i];
send[i].i = comm_rank;
}
MPI_Allreduce( send, recv, n, MPI_DOUBLE_INT, MPI_MINLOC, communicator );
for ( int i = 0; i < n; ++i ) {
y[i] = recv[i].d;
comm_rank_of_min[i] = recv[i].i;
}
delete[] recv;
delete[] send;
}
PROFILE_STOP( "minReduce1<double>", profile_level );
}
template<>
void MPI_CLASS::call_minReduce<double>( double *x, const int n, int *comm_rank_of_min ) const
{
PROFILE_START( "minReduce2<double>", profile_level );
if ( comm_rank_of_min == nullptr ) {
auto send = x;
auto recv = new double[n];
MPI_Allreduce( send, recv, n, MPI_DOUBLE, MPI_MIN, communicator );
for ( int i = 0; i < n; i++ )
x[i] = recv[i];
delete[] recv;
} else {
auto recv = new DoubleIntStruct[n];
auto send = new DoubleIntStruct[n];
for ( int i = 0; i < n; ++i ) {
send[i].d = x[i];
send[i].i = comm_rank;
}
MPI_Allreduce( send, recv, n, MPI_DOUBLE_INT, MPI_MINLOC, communicator );
for ( int i = 0; i < n; ++i ) {
x[i] = recv[i].d;
comm_rank_of_min[i] = recv[i].i;
}
delete[] recv;
delete[] send;
}
PROFILE_STOP( "minReduce2<double>", profile_level );
}
#endif
/************************************************************************
* call_maxReduce *
* Note: these specializations are only called when using MPI. *
************************************************************************/
#ifdef USE_MPI
// unsigned char
template<>
void MPI_CLASS::call_maxReduce<unsigned char>(
const unsigned char *send, unsigned char *recv, const int n, int *comm_rank_of_max ) const
{
if ( comm_rank_of_max == nullptr ) {
PROFILE_START( "maxReduce1<unsigned char>", profile_level );
MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED_CHAR, MPI_MAX, communicator );
PROFILE_STOP( "maxReduce1<unsigned char>", profile_level );
} else {
auto tmp = new int[n];
for ( int i = 0; i < n; i++ )
tmp[i] = send[i];
call_maxReduce<int>( tmp, n, comm_rank_of_max );
for ( int i = 0; i < n; i++ )
recv[i] = static_cast<unsigned char>( tmp[i] );
delete[] tmp;
}
}
template<>
void MPI_CLASS::call_maxReduce<unsigned char>(
unsigned char *x, const int n, int *comm_rank_of_max ) const
{
if ( comm_rank_of_max == nullptr ) {
PROFILE_START( "maxReduce2<unsigned char>", profile_level );
auto send = x;
auto recv = new unsigned char[n];
MPI_Allreduce( send, recv, n, MPI_UNSIGNED_CHAR, MPI_MAX, communicator );
for ( int i = 0; i < n; i++ )
x[i] = recv[i];
delete[] recv;
PROFILE_STOP( "maxReduce2<unsigned char>", profile_level );
} else {
auto tmp = new int[n];
for ( int i = 0; i < n; i++ )
tmp[i] = x[i];
call_maxReduce<int>( tmp, n, comm_rank_of_max );
for ( int i = 0; i < n; i++ )
x[i] = static_cast<unsigned char>( tmp[i] );
delete[] tmp;
}
}
// char
template<>
void MPI_CLASS::call_maxReduce<char>(
const char *send, char *recv, const int n, int *comm_rank_of_max ) const
{
if ( comm_rank_of_max == nullptr ) {
PROFILE_START( "maxReduce1<char>", profile_level );
MPI_Allreduce( (void *) send, (void *) recv, n, MPI_SIGNED_CHAR, MPI_MAX, communicator );
PROFILE_STOP( "maxReduce1<char>", profile_level );
} else {
auto tmp = new int[n];
for ( int i = 0; i < n; i++ )
tmp[i] = send[i];
call_maxReduce<int>( tmp, n, comm_rank_of_max );
for ( int i = 0; i < n; i++ )
recv[i] = static_cast<char>( tmp[i] );
delete[] tmp;
}
}
template<>
void MPI_CLASS::call_maxReduce<char>( char *x, const int n, int *comm_rank_of_max ) const
{
if ( comm_rank_of_max == nullptr ) {
PROFILE_START( "maxReduce2<char>", profile_level );
auto send = x;
auto recv = new char[n];
MPI_Allreduce( send, recv, n, MPI_SIGNED_CHAR, MPI_MAX, communicator );
for ( int i = 0; i < n; i++ )
x[i] = recv[i];
delete[] recv;
PROFILE_STOP( "maxReduce2<char>", profile_level );
} else {
auto tmp = new int[n];
for ( int i = 0; i < n; i++ )
tmp[i] = x[i];
call_maxReduce<int>( tmp, n, comm_rank_of_max );
for ( int i = 0; i < n; i++ )
x[i] = static_cast<char>( tmp[i] );
delete[] tmp;
}
}
// unsigned int
template<>
void MPI_CLASS::call_maxReduce<unsigned int>(
const unsigned int *send, unsigned int *recv, const int n, int *comm_rank_of_max ) const
{
if ( comm_rank_of_max == nullptr ) {
PROFILE_START( "maxReduce1<unsigned int>", profile_level );
MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED, MPI_MAX, communicator );
PROFILE_STOP( "maxReduce1<unsigned int>", profile_level );
} else {
auto tmp = new int[n];
for ( int i = 0; i < n; i++ )
tmp[i] = unsigned_to_signed( send[i] );
call_maxReduce<int>( tmp, n, comm_rank_of_max );
for ( int i = 0; i < n; i++ )
recv[i] = signed_to_unsigned( tmp[i] );
delete[] tmp;
}
}
template<>
void MPI_CLASS::call_maxReduce<unsigned int>(
unsigned int *x, const int n, int *comm_rank_of_max ) const
{
if ( comm_rank_of_max == nullptr ) {
PROFILE_START( "maxReduce2<unsigned int>", profile_level );
auto send = x;
auto recv = new unsigned int[n];
MPI_Allreduce( send, recv, n, MPI_UNSIGNED, MPI_MAX, communicator );
for ( int i = 0; i < n; i++ )
x[i] = recv[i];
delete[] recv;
PROFILE_STOP( "maxReduce2<unsigned int>", profile_level );
} else {
auto tmp = new int[n];
for ( int i = 0; i < n; i++ )
tmp[i] = unsigned_to_signed( x[i] );
call_maxReduce<int>( tmp, n, comm_rank_of_max );
for ( int i = 0; i < n; i++ )
x[i] = signed_to_unsigned( tmp[i] );
delete[] tmp;
}
}
// int
template<>
void MPI_CLASS::call_maxReduce<int>(
const int *x, int *y, const int n, int *comm_rank_of_max ) const
{
PROFILE_START( "maxReduce1<int>", profile_level );
if ( comm_rank_of_max == nullptr ) {
MPI_Allreduce( (void *) x, (void *) y, n, MPI_INT, MPI_MAX, communicator );
} else {
auto recv = new IntIntStruct[n];
auto send = new IntIntStruct[n];
for ( int i = 0; i < n; ++i ) {
send[i].j = x[i];
send[i].i = comm_rank;
}
MPI_Allreduce( send, recv, n, MPI_2INT, MPI_MAXLOC, communicator );
for ( int i = 0; i < n; ++i ) {
y[i] = recv[i].j;
comm_rank_of_max[i] = recv[i].i;
}
delete[] recv;
delete[] send;
}
PROFILE_STOP( "maxReduce1<int>", profile_level );
}
template<>
void MPI_CLASS::call_maxReduce<int>( int *x, const int n, int *comm_rank_of_max ) const
{
PROFILE_START( "maxReduce2<int>", profile_level );
if ( comm_rank_of_max == nullptr ) {
int *send = x;
auto recv = new int[n];
MPI_Allreduce( send, recv, n, MPI_INT, MPI_MAX, communicator );
for ( int i = 0; i < n; i++ )
x[i] = recv[i];
delete[] recv;
} else {
auto recv = new IntIntStruct[n];
auto send = new IntIntStruct[n];
for ( int i = 0; i < n; ++i ) {
send[i].j = x[i];
send[i].i = comm_rank;
}
MPI_Allreduce( send, recv, n, MPI_2INT, MPI_MAXLOC, communicator );
for ( int i = 0; i < n; ++i ) {
x[i] = recv[i].j;
comm_rank_of_max[i] = recv[i].i;
}
delete[] recv;
delete[] send;
}
PROFILE_STOP( "maxReduce2<int>", profile_level );
}
// long int
template<>
void MPI_CLASS::call_maxReduce<long int>(
const long int *x, long int *y, const int n, int *comm_rank_of_max ) const
{
PROFILE_START( "maxReduce1<lond int>", profile_level );
if ( comm_rank_of_max == nullptr ) {
MPI_Allreduce( (void *) x, (void *) y, n, MPI_LONG, MPI_MAX, communicator );
} else {
auto recv = new LongIntStruct[n];
auto send = new LongIntStruct[n];
for ( int i = 0; i < n; ++i ) {
send[i].j = x[i];
send[i].i = comm_rank;
}
MPI_Allreduce( send, recv, n, MPI_LONG_INT, MPI_MAXLOC, communicator );
for ( int i = 0; i < n; ++i ) {
y[i] = recv[i].j;
comm_rank_of_max[i] = recv[i].i;
}
delete[] recv;
delete[] send;
}
PROFILE_STOP( "maxReduce1<lond int>", profile_level );
}
template<>
void MPI_CLASS::call_maxReduce<long int>( long int *x, const int n, int *comm_rank_of_max ) const
{
PROFILE_START( "maxReduce2<lond int>", profile_level );
if ( comm_rank_of_max == nullptr ) {
auto send = x;
auto recv = new long int[n];
MPI_Allreduce( send, recv, n, MPI_LONG, MPI_MAX, communicator );
for ( int i = 0; i < n; i++ )
x[i] = recv[i];
delete[] recv;
} else {
auto recv = new LongIntStruct[n];
auto send = new LongIntStruct[n];
for ( int i = 0; i < n; ++i ) {
send[i].j = x[i];
send[i].i = comm_rank;
}
MPI_Allreduce( send, recv, n, MPI_LONG_INT, MPI_MAXLOC, communicator );
for ( int i = 0; i < n; ++i ) {
x[i] = recv[i].j;
comm_rank_of_max[i] = recv[i].i;
}
delete[] recv;
delete[] send;
}
PROFILE_STOP( "maxReduce2<lond int>", profile_level );
}
// unsigned long int
template<>
void MPI_CLASS::call_maxReduce<unsigned long int>( const unsigned long int *send,
unsigned long int *recv, const int n, int *comm_rank_of_max ) const
{
if ( comm_rank_of_max == nullptr ) {
PROFILE_START( "maxReduce1<unsigned long>", profile_level );
MPI_Allreduce( (void *) send, (void *) recv, n, MPI_UNSIGNED_LONG, MPI_MAX, communicator );
PROFILE_STOP( "maxReduce1<unsigned long>", profile_level );
} else {
auto tmp = new long int[n];
for ( int i = 0; i < n; i++ )
tmp[i] = unsigned_to_signed( send[i] );
call_maxReduce<long int>( tmp, n, comm_rank_of_max );
for ( int i = 0; i < n; i++ )
recv[i] = signed_to_unsigned( tmp[i] );
delete[] tmp;
}
}
template<>
void MPI_CLASS::call_maxReduce<unsigned long int>(
unsigned long int *x, const int n, int *comm_rank_of_max ) const
{
if ( comm_rank_of_max == nullptr ) {
PROFILE_START( "maxReduce2<unsigned long>", profile_level );
auto send = x;
auto recv = new unsigned long int[n];
MPI_Allreduce( send, recv, n, MPI_UNSIGNED_LONG, MPI_MAX, communicator );
for ( int i = 0; i < n; i++ )
x[i] = recv[i];
delete[] recv;
PROFILE_STOP( "maxReduce2<unsigned long>", profile_level );
} else {
auto tmp = new long int[n];
for ( int i = 0; i < n; i++ )
tmp[i] = unsigned_to_signed( x[i] );
call_maxReduce<long int>( tmp, n, comm_rank_of_max );
for ( int i = 0; i < n; i++ )
x[i] = signed_to_unsigned( tmp[i] );
delete[] tmp;
}
}
// unsigned long long int
template<>
void MPI_CLASS::call_maxReduce<unsigned long long int>( const unsigned long long int *send,
unsigned long long int *recv, const int n, int *comm_rank_of_max ) const
{
PROFILE_START( "maxReduce1<long int>", profile_level );
if ( comm_rank_of_max == nullptr ) {
auto x = new long long int[n];
auto y = new long long int[n];
for ( int i = 0; i < n; i++ )
x[i] = unsigned_to_signed( send[i] );
MPI_Allreduce( (void *) x, (void *) y, n, MPI_LONG_LONG_INT, MPI_MAX, communicator );
for ( int i = 0; i < n; i++ )
recv[i] = signed_to_unsigned( y[i] );
delete[] x;
delete[] y;
} else {
printf( "maxReduce<long long int> will use double\n" );
auto tmp = new double[n];
for ( int i = 0; i < n; i++ )
tmp[i] = static_cast<double>( send[i] );
call_maxReduce<double>( tmp, n, comm_rank_of_max );
for ( int i = 0; i < n; i++ )
recv[i] = static_cast<long long int>( tmp[i] );
delete[] tmp;
}
PROFILE_STOP( "maxReduce1<long int>", profile_level );
}
template<>
void MPI_CLASS::call_maxReduce<unsigned long long int>(
unsigned long long int *x, const int n, int *comm_rank_of_max ) const
{
auto recv = new unsigned long long int[n];
call_maxReduce<unsigned long long int>( x, recv, n, comm_rank_of_max );
for ( int i = 0; i < n; i++ )
x[i] = recv[i];
delete[] recv;
}
// long long int
template<>
void MPI_CLASS::call_maxReduce<long long int>(
const long long int *x, long long int *y, const int n, int *comm_rank_of_max ) const
{
PROFILE_START( "maxReduce1<long int>", profile_level );
if ( comm_rank_of_max == nullptr ) {
MPI_Allreduce( (void *) x, (void *) y, n, MPI_LONG_LONG_INT, MPI_MAX, communicator );
} else {
printf( "maxReduce<long long int> will use double\n" );
auto tmp = new double[n];
for ( int i = 0; i < n; i++ )
tmp[i] = static_cast<double>( x[i] );
call_maxReduce<double>( tmp, n, comm_rank_of_max );
for ( int i = 0; i < n; i++ )
y[i] = static_cast<long long int>( tmp[i] );
delete[] tmp;
}
PROFILE_STOP( "maxReduce1<long int>", profile_level );
}
template<>
void MPI_CLASS::call_maxReduce<long long int>(
long long int *x, const int n, int *comm_rank_of_max ) const
{
auto recv = new long long int[n];
call_maxReduce<long long int>( x, recv, n, comm_rank_of_max );
for ( int i = 0; i < n; i++ )
x[i] = signed_to_unsigned( recv[i] );
delete[] recv;
}
// float
template<>
void MPI_CLASS::call_maxReduce<float>(
const float *x, float *y, const int n, int *comm_rank_of_max ) const
{
PROFILE_START( "maxReduce1<float>", profile_level );
if ( comm_rank_of_max == nullptr ) {
MPI_Allreduce( (void *) x, (void *) y, n, MPI_FLOAT, MPI_MAX, communicator );
} else {
auto recv = new FloatIntStruct[n];
auto send = new FloatIntStruct[n];
for ( int i = 0; i < n; ++i ) {
send[i].f = x[i];
send[i].i = comm_rank;
}
MPI_Allreduce( send, recv, n, MPI_FLOAT_INT, MPI_MAXLOC, communicator );
for ( int i = 0; i < n; ++i ) {
y[i] = recv[i].f;
comm_rank_of_max[i] = recv[i].i;
}
delete[] recv;
delete[] send;
}
PROFILE_STOP( "maxReduce1<float>", profile_level );
}
template<>
void MPI_CLASS::call_maxReduce<float>( float *x, const int n, int *comm_rank_of_max ) const
{
PROFILE_START( "maxReduce2<float>", profile_level );
if ( comm_rank_of_max == nullptr ) {
auto send = x;
auto recv = new float[n];
MPI_Allreduce( send, recv, n, MPI_FLOAT, MPI_MAX, communicator );
for ( int i = 0; i < n; i++ )
x[i] = recv[i];
delete[] recv;
} else {
auto recv = new FloatIntStruct[n];
auto send = new FloatIntStruct[n];
for ( int i = 0; i < n; ++i ) {
send[i].f = x[i];
send[i].i = comm_rank;
}
MPI_Allreduce( send, recv, n, MPI_FLOAT_INT, MPI_MAXLOC, communicator );
for ( int i = 0; i < n; ++i ) {
x[i] = recv[i].f;
comm_rank_of_max[i] = recv[i].i;
}
delete[] recv;
delete[] send;
}
PROFILE_STOP( "maxReduce2<float>", profile_level );
}
// double
template<>
void MPI_CLASS::call_maxReduce<double>(
const double *x, double *y, const int n, int *comm_rank_of_max ) const
{
PROFILE_START( "maxReduce1<double>", profile_level );
if ( comm_rank_of_max == nullptr ) {
MPI_Allreduce( (void *) x, (void *) y, n, MPI_DOUBLE, MPI_MAX, communicator );
} else {
auto recv = new DoubleIntStruct[n];
auto send = new DoubleIntStruct[n];
for ( int i = 0; i < n; ++i ) {
send[i].d = x[i];
send[i].i = comm_rank;
}
MPI_Allreduce( send, recv, n, MPI_DOUBLE_INT, MPI_MAXLOC, communicator );
for ( int i = 0; i < n; ++i ) {
y[i] = recv[i].d;
comm_rank_of_max[i] = recv[i].i;
}
delete[] recv;
delete[] send;
}
PROFILE_STOP( "maxReduce1<double>", profile_level );
}
template<>
void MPI_CLASS::call_maxReduce<double>( double *x, const int n, int *comm_rank_of_max ) const
{
PROFILE_START( "maxReduce2<double>", profile_level );
if ( comm_rank_of_max == nullptr ) {
auto send = x;
auto recv = new double[n];
MPI_Allreduce( send, recv, n, MPI_DOUBLE, MPI_MAX, communicator );
for ( int i = 0; i < n; i++ )
x[i] = recv[i];
delete[] recv;
} else {
auto recv = new DoubleIntStruct[n];
auto send = new DoubleIntStruct[n];
for ( int i = 0; i < n; ++i ) {
send[i].d = x[i];
send[i].i = comm_rank;
}
MPI_Allreduce( send, recv, n, MPI_DOUBLE_INT, MPI_MAXLOC, communicator );
for ( int i = 0; i < n; ++i ) {
x[i] = recv[i].d;
comm_rank_of_max[i] = recv[i].i;
}
delete[] recv;
delete[] send;
}
PROFILE_STOP( "maxReduce2<double>", profile_level );
}
#endif
/************************************************************************
* bcast *
* Note: these specializations are only called when using MPI. *
************************************************************************/
#ifdef USE_MPI
// char
template<>
void MPI_CLASS::call_bcast<unsigned char>( unsigned char *x, const int n, const int root ) const
{
PROFILE_START( "bcast<unsigned char>", profile_level );
MPI_Bcast( x, n, MPI_UNSIGNED_CHAR, root, communicator );
PROFILE_STOP( "bcast<unsigned char>", profile_level );
}
template<>
void MPI_CLASS::call_bcast<char>( char *x, const int n, const int root ) const
{
PROFILE_START( "bcast<char>", profile_level );
MPI_Bcast( x, n, MPI_CHAR, root, communicator );
PROFILE_STOP( "bcast<char>", profile_level );
}
// int
template<>
void MPI_CLASS::call_bcast<unsigned int>( unsigned int *x, const int n, const int root ) const
{
PROFILE_START( "bcast<unsigned int>", profile_level );
MPI_Bcast( x, n, MPI_UNSIGNED, root, communicator );
PROFILE_STOP( "bcast<unsigned int>", profile_level );
}
template<>
void MPI_CLASS::call_bcast<int>( int *x, const int n, const int root ) const
{
PROFILE_START( "bcast<int>", profile_level );
MPI_Bcast( x, n, MPI_INT, root, communicator );
PROFILE_STOP( "bcast<int>", profile_level );
}
// float
template<>
void MPI_CLASS::call_bcast<float>( float *x, const int n, const int root ) const
{
PROFILE_START( "bcast<float>", profile_level );
MPI_Bcast( x, n, MPI_FLOAT, root, communicator );
PROFILE_STOP( "bcast<float>", profile_level );
}
// double
template<>
void MPI_CLASS::call_bcast<double>( double *x, const int n, const int root ) const
{
PROFILE_START( "bcast<double>", profile_level );
MPI_Bcast( x, n, MPI_DOUBLE, root, communicator );
PROFILE_STOP( "bcast<double>", profile_level );
}
#else
// We need a concrete instantiation of bcast<char>(x,n,root);
template<>
void MPI_CLASS::call_bcast<char>( char *, const int, const int ) const
{
}
#endif
/************************************************************************
* Perform a global barrier across all processors. *
************************************************************************/
void MPI_CLASS::barrier() const
{
#ifdef USE_MPI
MPI_Barrier( communicator );
#endif
}
/************************************************************************
* Send data array to another processor. *
* Note: these specializations are only called when using MPI. *
************************************************************************/
#ifdef USE_MPI
// char
template<>
void MPI_CLASS::send<char>(
const char *buf, const int length, const int recv_proc_number, int tag ) const
{
// Set the tag to 0 if it is < 0
tag = ( tag >= 0 ) ? tag : 0;
MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" );
// Send the data
PROFILE_START( "send<char>", profile_level );
MPI_Send( (void *) buf, length, MPI_CHAR, recv_proc_number, tag, communicator );
PROFILE_STOP( "send<char>", profile_level );
}
// int
template<>
void MPI_CLASS::send<int>(
const int *buf, const int length, const int recv_proc_number, int tag ) const
{
// Set the tag to 0 if it is < 0
tag = ( tag >= 0 ) ? tag : 0;
MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" );
// Send the data
PROFILE_START( "send<int>", profile_level );
MPI_Send( (void *) buf, length, MPI_INT, recv_proc_number, tag, communicator );
PROFILE_STOP( "send<int>", profile_level );
}
// float
template<>
void MPI_CLASS::send<float>(
const float *buf, const int length, const int recv_proc_number, int tag ) const
{
// Set the tag to 0 if it is < 0
tag = ( tag >= 0 ) ? tag : 0;
MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" );
// Send the data
PROFILE_START( "send<float>", profile_level );
MPI_Send( (void *) buf, length, MPI_FLOAT, recv_proc_number, tag, communicator );
PROFILE_STOP( "send<float>", profile_level );
}
// double
template<>
void MPI_CLASS::send<double>(
const double *buf, const int length, const int recv_proc_number, int tag ) const
{
// Set the tag to 0 if it is < 0
tag = ( tag >= 0 ) ? tag : 0;
MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" );
// Send the data
PROFILE_START( "send<double>", profile_level );
MPI_Send( (void *) buf, length, MPI_DOUBLE, recv_proc_number, tag, communicator );
PROFILE_STOP( "send<double>", profile_level );
}
#else
// We need a concrete instantiation of send for use without MPI
template<>
void MPI_CLASS::send<char>( const char *buf, const int length, const int, int tag ) const
{
MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" );
MPI_INSIST( tag >= 0, "tag must be >= 0" );
PROFILE_START( "send<char>", profile_level );
auto id = getRequest( communicator, tag );
auto it = global_isendrecv_list.find( id );
MPI_INSIST( it == global_isendrecv_list.end(),
"send must be paired with a previous call to irecv in serial" );
MPI_ASSERT( it->second.status == 2 );
memcpy( (char *) it->second.data, buf, length );
global_isendrecv_list.erase( it );
PROFILE_START( "send<char>", profile_level );
}
#endif
/************************************************************************
* Non-blocking send data array to another processor. *
* Note: these specializations are only called when using MPI. *
************************************************************************/
#ifdef USE_MPI
// char
template<>
MPI_Request MPI_CLASS::Isend<char>(
const char *buf, const int length, const int recv_proc, const int tag ) const
{
MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" );
MPI_INSIST( tag >= 0, "tag must be >= 0" );
MPI_Request request;
PROFILE_START( "Isend<char>", profile_level );
MPI_Isend( (void *) buf, length, MPI_CHAR, recv_proc, tag, communicator, &request );
PROFILE_STOP( "Isend<char>", profile_level );
return request;
}
// int
template<>
MPI_Request MPI_CLASS::Isend<int>(
const int *buf, const int length, const int recv_proc, const int tag ) const
{
MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" );
MPI_INSIST( tag >= 0, "tag must be >= 0" );
MPI_Request request;
PROFILE_START( "Isend<int>", profile_level );
MPI_Isend( (void *) buf, length, MPI_INT, recv_proc, tag, communicator, &request );
PROFILE_STOP( "Isend<int>", profile_level );
return request;
}
// float
template<>
MPI_Request MPI_CLASS::Isend<float>(
const float *buf, const int length, const int recv_proc, const int tag ) const
{
MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" );
MPI_INSIST( tag >= 0, "tag must be >= 0" );
MPI_Request request;
PROFILE_START( "Isend<float>", profile_level );
MPI_Isend( (void *) buf, length, MPI_FLOAT, recv_proc, tag, communicator, &request );
PROFILE_STOP( "Isend<float>", profile_level );
return request;
}
// double
template<>
MPI_Request MPI_CLASS::Isend<double>(
const double *buf, const int length, const int recv_proc, const int tag ) const
{
MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" );
MPI_INSIST( tag >= 0, "tag must be >= 0" );
MPI_Request request;
PROFILE_START( "Isend<double>", profile_level );
MPI_Isend( (void *) buf, length, MPI_DOUBLE, recv_proc, tag, communicator, &request );
PROFILE_STOP( "Isend<double>", profile_level );
return request;
}
#else
// We need a concrete instantiation of send for use without mpi
template<>
MPI_Request MPI_CLASS::Isend<char>(
const char *buf, const int length, const int, const int tag ) const
{
MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" );
MPI_INSIST( tag >= 0, "tag must be >= 0" );
PROFILE_START( "Isend<char>", profile_level );
auto id = getRequest( communicator, tag );
auto it = global_isendrecv_list.find( id );
if ( it == global_isendrecv_list.end() ) {
// We are calling isend first
Isendrecv_struct data;
data.data = buf;
data.status = 1;
global_isendrecv_list.insert( std::pair<MPI_Request, Isendrecv_struct>( id, data ) );
} else {
// We called irecv first
MPI_ASSERT( it->second.status == 2 );
memcpy( (char *) it->second.data, buf, length );
global_isendrecv_list.erase( it );
}
PROFILE_STOP( "Isend<char>", profile_level );
return id;
}
#endif
/************************************************************************
* Send byte array to another processor. *
************************************************************************/
void MPI_CLASS::sendBytes(
const void *buf, const int number_bytes, const int recv_proc_number, int tag ) const
{
MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" );
MPI_INSIST( tag >= 0, "tag must be >= 0" );
send<char>( (const char *) buf, number_bytes, recv_proc_number, tag );
}
/************************************************************************
* Non-blocking send byte array to another processor. *
************************************************************************/
MPI_Request MPI_CLASS::IsendBytes(
const void *buf, const int number_bytes, const int recv_proc, const int tag ) const
{
MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" );
MPI_INSIST( tag >= 0, "tag must be >= 0" );
return Isend<char>( (const char *) buf, number_bytes, recv_proc, tag );
}
/************************************************************************
* Recieve data array to another processor. *
* Note: these specializations are only called when using MPI. *
************************************************************************/
#ifdef USE_MPI
// char
template<>
void MPI_CLASS::recv<char>(
char *buf, int &length, const int send_proc_number, const bool get_length, int tag ) const
{
// Set the tag to 0 if it is < 0
tag = ( tag >= 0 ) ? tag : 0;
MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" );
PROFILE_START( "recv<char>", profile_level );
// Get the recieve length if necessary
if ( get_length ) {
int bytes = this->probe( send_proc_number, tag );
int recv_length = bytes / sizeof( char );
MPI_INSIST( length >= recv_length, "Recived length is larger than allocated array" );
length = recv_length;
}
// Send the data
MPI_Status status;
MPI_Recv( (void *) buf, length, MPI_CHAR, send_proc_number, tag, communicator, &status );
PROFILE_STOP( "recv<char>", profile_level );
}
// int
template<>
void MPI_CLASS::recv<int>(
int *buf, int &length, const int send_proc_number, const bool get_length, int tag ) const
{
// Set the tag to 0 if it is < 0
tag = ( tag >= 0 ) ? tag : 0;
MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" );
PROFILE_START( "recv<int>", profile_level );
// Get the recieve length if necessary
if ( get_length ) {
int bytes = this->probe( send_proc_number, tag );
int recv_length = bytes / sizeof( int );
MPI_INSIST( length >= recv_length, "Recived length is larger than allocated array" );
length = recv_length;
}
// Send the data
MPI_Status status;
MPI_Recv( (void *) buf, length, MPI_INT, send_proc_number, tag, communicator, &status );
PROFILE_STOP( "recv<int>", profile_level );
}
// float
template<>
void MPI_CLASS::recv<float>(
float *buf, int &length, const int send_proc_number, const bool get_length, int tag ) const
{
// Set the tag to 0 if it is < 0
tag = ( tag >= 0 ) ? tag : 0;
MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" );
PROFILE_START( "recv<float>", profile_level );
// Get the recieve length if necessary
if ( get_length ) {
int bytes = this->probe( send_proc_number, tag );
int recv_length = bytes / sizeof( float );
MPI_INSIST( length >= recv_length, "Recived length is larger than allocated array" );
length = recv_length;
}
// Send the data
MPI_Status status;
MPI_Recv( (void *) buf, length, MPI_FLOAT, send_proc_number, tag, communicator, &status );
PROFILE_STOP( "recv<float>", profile_level );
}
// double
template<>
void MPI_CLASS::recv<double>(
double *buf, int &length, const int send_proc_number, const bool get_length, int tag ) const
{
// Set the tag to 0 if it is < 0
tag = ( tag >= 0 ) ? tag : 0;
MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" );
PROFILE_START( "recv<double>", profile_level );
// Get the recieve length if necessary
if ( get_length ) {
int bytes = this->probe( send_proc_number, tag );
int recv_length = bytes / sizeof( double );
MPI_INSIST( length >= recv_length, "Recived length is larger than allocated array" );
length = recv_length;
}
// Send the data
MPI_Status status;
MPI_Recv( (void *) buf, length, MPI_DOUBLE, send_proc_number, tag, communicator, &status );
PROFILE_STOP( "recv<double>", profile_level );
}
#else
// We need a concrete instantiation of recv for use without mpi
template<>
void MPI_CLASS::recv<char>( char *buf, int &length, const int, const bool, int tag ) const
{
MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" );
MPI_INSIST( tag >= 0, "tag must be >= 0" );
PROFILE_START( "recv<char>", profile_level );
auto id = getRequest( communicator, tag );
auto it = global_isendrecv_list.find( id );
MPI_INSIST( it != global_isendrecv_list.end(),
"recv must be paired with a previous call to isend in serial" );
MPI_ASSERT( it->second.status == 1 );
memcpy( buf, it->second.data, length );
global_isendrecv_list.erase( it );
PROFILE_STOP( "recv<char>", profile_level );
}
#endif
/************************************************************************
* Non-blocking recieve data array to another processor. *
* Note: these specializations are only called when using MPI. *
************************************************************************/
#ifdef USE_MPI
// char
template<>
MPI_Request MPI_CLASS::Irecv<char>(
char *buf, const int length, const int send_proc, const int tag ) const
{
MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" );
MPI_INSIST( tag >= 0, "tag must be >= 0" );
MPI_Request request;
PROFILE_START( "Irecv<char>", profile_level );
MPI_Irecv( (void *) buf, length, MPI_CHAR, send_proc, tag, communicator, &request );
PROFILE_STOP( "Irecv<char>", profile_level );
return request;
}
// int
template<>
MPI_Request MPI_CLASS::Irecv<int>(
int *buf, const int length, const int send_proc, const int tag ) const
{
MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" );
MPI_INSIST( tag >= 0, "tag must be >= 0" );
MPI_Request request;
PROFILE_START( "Irecv<int>", profile_level );
MPI_Irecv( (void *) buf, length, MPI_INT, send_proc, tag, communicator, &request );
PROFILE_STOP( "Irecv<int>", profile_level );
return request;
}
// float
template<>
MPI_Request MPI_CLASS::Irecv<float>(
float *buf, const int length, const int send_proc, const int tag ) const
{
MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" );
MPI_INSIST( tag >= 0, "tag must be >= 0" );
MPI_Request request;
PROFILE_START( "Irecv<float>", profile_level );
MPI_Irecv( (void *) buf, length, MPI_FLOAT, send_proc, tag, communicator, &request );
PROFILE_STOP( "Irecv<float>", profile_level );
return request;
}
// double
template<>
MPI_Request MPI_CLASS::Irecv<double>(
double *buf, const int length, const int send_proc, const int tag ) const
{
MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" );
MPI_INSIST( tag >= 0, "tag must be >= 0" );
MPI_Request request;
PROFILE_START( "Irecv<double>", profile_level );
MPI_Irecv( (void *) buf, length, MPI_DOUBLE, send_proc, tag, communicator, &request );
PROFILE_STOP( "Irecv<double>", profile_level );
return request;
}
#else
// We need a concrete instantiation of irecv for use without mpi
template<>
MPI_Request MPI_CLASS::Irecv<char>( char *buf, const int length, const int, const int tag ) const
{
MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" );
MPI_INSIST( tag >= 0, "tag must be >= 0" );
PROFILE_START( "Irecv<char>", profile_level );
auto id = getRequest( communicator, tag );
auto it = global_isendrecv_list.find( id );
if ( it == global_isendrecv_list.end() ) {
// We are calling Irecv first
Isendrecv_struct data;
data.data = buf;
data.status = 2;
global_isendrecv_list.insert( std::pair<MPI_Request, Isendrecv_struct>( id, data ) );
} else {
// We called Isend first
MPI_ASSERT( it->second.status == 1 );
memcpy( buf, it->second.data, length );
global_isendrecv_list.erase( it );
}
PROFILE_STOP( "Irecv<char>", profile_level );
return id;
}
#endif
/************************************************************************
* Recieve byte array to another processor. *
************************************************************************/
void MPI_CLASS::recvBytes( void *buf, int &number_bytes, const int send_proc, int tag ) const
{
recv<char>( (char *) buf, number_bytes, send_proc, false, tag );
}
/************************************************************************
* Recieve byte array to another processor. *
************************************************************************/
MPI_Request MPI_CLASS::IrecvBytes(
void *buf, const int number_bytes, const int send_proc, const int tag ) const
{
MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" );
MPI_INSIST( tag >= 0, "tag must be >= 0" );
return Irecv<char>( (char *) buf, number_bytes, send_proc, tag );
}
/************************************************************************
* sendrecv *
************************************************************************/
#if defined( USE_MPI ) || defined( USE_EXT_MPI )
template<>
void MPI_CLASS::sendrecv<char>( const char* sendbuf, int sendcount, int dest, int sendtag,
char* recvbuf, int recvcount, int source, int recvtag ) const
{
PROFILE_START( "sendrecv<char>", profile_level );
MPI_Sendrecv( sendbuf, sendcount, MPI_CHAR, dest, sendtag,
recvbuf, recvcount, MPI_CHAR, source, recvtag,
communicator, MPI_STATUS_IGNORE );
PROFILE_STOP( "sendrecv<char>", profile_level );
}
template<>
void MPI_CLASS::sendrecv<int>( const int* sendbuf, int sendcount, int dest, int sendtag,
int* recvbuf, int recvcount, int source, int recvtag ) const
{
PROFILE_START( "sendrecv<int>", profile_level );
MPI_Sendrecv( sendbuf, sendcount, MPI_INT, dest, sendtag,
recvbuf, recvcount, MPI_INT, source, recvtag,
communicator, MPI_STATUS_IGNORE );
PROFILE_STOP( "sendrecv<int>", profile_level );
}
template<>
void MPI_CLASS::sendrecv<float>( const float* sendbuf, int sendcount, int dest, int sendtag,
float* recvbuf, int recvcount, int source, int recvtag ) const
{
PROFILE_START( "sendrecv<float>", profile_level );
MPI_Sendrecv( sendbuf, sendcount, MPI_FLOAT, dest, sendtag,
recvbuf, recvcount, MPI_FLOAT, source, recvtag,
communicator, MPI_STATUS_IGNORE );
PROFILE_STOP( "sendrecv<float>", profile_level );
}
template<>
void MPI_CLASS::sendrecv<double>( const double* sendbuf, int sendcount, int dest, int sendtag,
double* recvbuf, int recvcount, int source, int recvtag ) const
{
PROFILE_START( "sendrecv<double>", profile_level );
MPI_Sendrecv( sendbuf, sendcount, MPI_DOUBLE, dest, sendtag,
recvbuf, recvcount, MPI_DOUBLE, source, recvtag,
communicator, MPI_STATUS_IGNORE );
PROFILE_STOP( "sendrecv<double>", profile_level );
}
#endif
/************************************************************************
* allGather *
* Note: these specializations are only called when using MPI. *
************************************************************************/
#ifdef USE_MPI
// unsigned char
template<>
void MPI_CLASS::call_allGather<unsigned char>(
const unsigned char &x_in, unsigned char *x_out ) const
{
PROFILE_START( "allGather<unsigned char>", profile_level );
MPI_Allgather(
(void *) &x_in, 1, MPI_UNSIGNED_CHAR, (void *) x_out, 1, MPI_UNSIGNED_CHAR, communicator );
PROFILE_STOP( "allGather<unsigned char>", profile_level );
}
template<>
void MPI_CLASS::call_allGather<unsigned char>( const unsigned char *x_in, int size_in,
unsigned char *x_out, int *size_out, int *disp_out ) const
{
PROFILE_START( "allGatherv<unsigned char>", profile_level );
MPI_Allgatherv( (void *) x_in, size_in, MPI_CHAR, (void *) x_out, size_out, disp_out, MPI_CHAR,
communicator );
PROFILE_STOP( "allGatherv<unsigned char>", profile_level );
}
// char
template<>
void MPI_CLASS::call_allGather<char>( const char &x_in, char *x_out ) const
{
PROFILE_START( "allGather<char>", profile_level );
MPI_Allgather( (void *) &x_in, 1, MPI_CHAR, (void *) x_out, 1, MPI_CHAR, communicator );
PROFILE_STOP( "allGather<char>", profile_level );
}
template<>
void MPI_CLASS::call_allGather<char>(
const char *x_in, int size_in, char *x_out, int *size_out, int *disp_out ) const
{
PROFILE_START( "allGatherv<char>", profile_level );
MPI_Allgatherv( (void *) x_in, size_in, MPI_CHAR, (void *) x_out, size_out, disp_out, MPI_CHAR,
communicator );
PROFILE_STOP( "allGatherv<char>", profile_level );
}
// unsigned int
template<>
void MPI_CLASS::call_allGather<unsigned int>( const unsigned int &x_in, unsigned int *x_out ) const
{
PROFILE_START( "allGather<unsigned int>", profile_level );
MPI_Allgather( (void *) &x_in, 1, MPI_UNSIGNED, (void *) x_out, 1, MPI_UNSIGNED, communicator );
PROFILE_STOP( "allGather<unsigned int>", profile_level );
}
template<>
void MPI_CLASS::call_allGather<unsigned int>(
const unsigned int *x_in, int size_in, unsigned int *x_out, int *size_out, int *disp_out ) const
{
PROFILE_START( "allGatherv<unsigned int>", profile_level );
MPI_Allgatherv( (void *) x_in, size_in, MPI_UNSIGNED, (void *) x_out, size_out, disp_out,
MPI_UNSIGNED, communicator );
PROFILE_STOP( "allGatherv<unsigned int>", profile_level );
}
// int
template<>
void MPI_CLASS::call_allGather<int>( const int &x_in, int *x_out ) const
{
PROFILE_START( "allGather<int>", profile_level );
MPI_Allgather( (void *) &x_in, 1, MPI_INT, (void *) x_out, 1, MPI_INT, communicator );
PROFILE_STOP( "allGather<int>", profile_level );
}
template<>
void MPI_CLASS::call_allGather<int>(
const int *x_in, int size_in, int *x_out, int *size_out, int *disp_out ) const
{
PROFILE_START( "allGatherv<int>", profile_level );
MPI_Allgatherv( (void *) x_in, size_in, MPI_INT, (void *) x_out, size_out, disp_out, MPI_INT,
communicator );
PROFILE_STOP( "allGatherv<int>", profile_level );
}
// unsigned long int
template<>
void MPI_CLASS::call_allGather<unsigned long int>(
const unsigned long int &x_in, unsigned long int *x_out ) const
{
PROFILE_START( "allGather<unsigned long>", profile_level );
MPI_Allgather(
(void *) &x_in, 1, MPI_UNSIGNED_LONG, (void *) x_out, 1, MPI_UNSIGNED_LONG, communicator );
PROFILE_STOP( "allGather<unsigned long>", profile_level );
}
template<>
void MPI_CLASS::call_allGather<unsigned long int>( const unsigned long int *x_in, int size_in,
unsigned long int *x_out, int *size_out, int *disp_out ) const
{
PROFILE_START( "allGatherv<unsigned long>", profile_level );
MPI_Allgatherv( (void *) x_in, size_in, MPI_UNSIGNED_LONG, (void *) x_out, size_out, disp_out,
MPI_UNSIGNED_LONG, communicator );
PROFILE_STOP( "allGatherv<unsigned long>", profile_level );
}
// long int
template<>
void MPI_CLASS::call_allGather<long int>( const long int &x_in, long int *x_out ) const
{
PROFILE_START( "allGather<long int>", profile_level );
MPI_Allgather( (void *) &x_in, 1, MPI_LONG, (void *) x_out, 1, MPI_LONG, communicator );
PROFILE_STOP( "allGather<long int>", profile_level );
}
template<>
void MPI_CLASS::call_allGather<long int>(
const long int *x_in, int size_in, long int *x_out, int *size_out, int *disp_out ) const
{
PROFILE_START( "allGatherv<long int>", profile_level );
MPI_Allgatherv( (void *) x_in, size_in, MPI_LONG, (void *) x_out, size_out, disp_out, MPI_LONG,
communicator );
PROFILE_STOP( "allGatherv<long int>", profile_level );
}
// float
template<>
void MPI_CLASS::call_allGather<float>( const float &x_in, float *x_out ) const
{
PROFILE_START( "allGather<float>", profile_level );
MPI_Allgather( (void *) &x_in, 1, MPI_FLOAT, (void *) x_out, 1, MPI_FLOAT, communicator );
PROFILE_STOP( "allGather<float>", profile_level );
}
template<>
void MPI_CLASS::call_allGather<float>(
const float *x_in, int size_in, float *x_out, int *size_out, int *disp_out ) const
{
PROFILE_START( "allGatherv<float>", profile_level );
MPI_Allgatherv( (void *) x_in, size_in, MPI_FLOAT, (void *) x_out, size_out, disp_out,
MPI_FLOAT, communicator );
PROFILE_STOP( "allGatherv<float>", profile_level );
}
// double
template<>
void MPI_CLASS::call_allGather<double>( const double &x_in, double *x_out ) const
{
PROFILE_START( "allGather<double>", profile_level );
MPI_Allgather( (void *) &x_in, 1, MPI_DOUBLE, (void *) x_out, 1, MPI_DOUBLE, communicator );
PROFILE_STOP( "allGather<double>", profile_level );
}
template<>
void MPI_CLASS::call_allGather<double>(
const double *x_in, int size_in, double *x_out, int *size_out, int *disp_out ) const
{
PROFILE_START( "allGatherv<double>", profile_level );
MPI_Allgatherv( (void *) x_in, size_in, MPI_DOUBLE, (void *) x_out, size_out, disp_out,
MPI_DOUBLE, communicator );
PROFILE_STOP( "allGatherv<double>", profile_level );
}
#else
// We need a concrete instantiation of call_allGather<char>(x_in,size_in,x_out,size_out)
template<>
void MPI_CLASS::call_allGather<char>( const char *, int, char *, int *, int * ) const
{
MPI_ERROR( "Internal error in communicator (allGather) " );
}
#endif
/************************************************************************
* allToAll *
* Note: these specializations are only called when using MPI. *
************************************************************************/
#ifdef USE_MPI
template<>
void MPI_CLASS::allToAll<unsigned char>(
const int n, const unsigned char *send, unsigned char *recv ) const
{
PROFILE_START( "allToAll<unsigned char>", profile_level );
MPI_Alltoall(
(void *) send, n, MPI_UNSIGNED_CHAR, (void *) recv, n, MPI_UNSIGNED_CHAR, communicator );
PROFILE_STOP( "allToAll<unsigned char>", profile_level );
}
template<>
void MPI_CLASS::allToAll<char>( const int n, const char *send, char *recv ) const
{
PROFILE_START( "allToAll<char>", profile_level );
MPI_Alltoall( (void *) send, n, MPI_CHAR, (void *) recv, n, MPI_CHAR, communicator );
PROFILE_STOP( "allToAll<char>", profile_level );
}
template<>
void MPI_CLASS::allToAll<unsigned int>(
const int n, const unsigned int *send, unsigned int *recv ) const
{
PROFILE_START( "allToAll<unsigned int>", profile_level );
MPI_Alltoall( (void *) send, n, MPI_UNSIGNED, (void *) recv, n, MPI_UNSIGNED, communicator );
PROFILE_STOP( "allToAll<unsigned int>", profile_level );
}
template<>
void MPI_CLASS::allToAll<int>( const int n, const int *send, int *recv ) const
{
PROFILE_START( "allToAll<int>", profile_level );
MPI_Alltoall( (void *) send, n, MPI_INT, (void *) recv, n, MPI_INT, communicator );
PROFILE_STOP( "allToAll<int>", profile_level );
}
template<>
void MPI_CLASS::allToAll<unsigned long int>(
const int n, const unsigned long int *send, unsigned long int *recv ) const
{
PROFILE_START( "allToAll<unsigned long>", profile_level );
MPI_Alltoall(
(void *) send, n, MPI_UNSIGNED_LONG, (void *) recv, n, MPI_UNSIGNED_LONG, communicator );
PROFILE_STOP( "allToAll<unsigned long>", profile_level );
}
template<>
void MPI_CLASS::allToAll<long int>( const int n, const long int *send, long int *recv ) const
{
PROFILE_START( "allToAll<long int>", profile_level );
MPI_Alltoall( (void *) send, n, MPI_LONG, (void *) recv, n, MPI_LONG, communicator );
PROFILE_STOP( "allToAll<long int>", profile_level );
}
template<>
void MPI_CLASS::allToAll<float>( const int n, const float *send, float *recv ) const
{
PROFILE_START( "allToAll<float>", profile_level );
MPI_Alltoall( (void *) send, n, MPI_FLOAT, (void *) recv, n, MPI_FLOAT, communicator );
PROFILE_STOP( "allToAll<float>", profile_level );
}
template<>
void MPI_CLASS::allToAll<double>( const int n, const double *send, double *recv ) const
{
PROFILE_START( "allToAll<double>", profile_level );
MPI_Alltoall( (void *) send, n, MPI_DOUBLE, (void *) recv, n, MPI_DOUBLE, communicator );
PROFILE_STOP( "allToAll<double>", profile_level );
}
#endif
/************************************************************************
* call_allToAll *
* Note: these specializations are only called when using MPI. *
************************************************************************/
#ifdef USE_MPI
// unsigned char
template<>
void MPI_CLASS::call_allToAll<unsigned char>( const unsigned char *send_data, const int send_cnt[],
const int send_disp[], unsigned char *recv_data, const int *recv_cnt,
const int *recv_disp ) const
{
PROFILE_START( "allToAllv<unsigned char>", profile_level );
MPI_Alltoallv( (void *) send_data, (int *) send_cnt, (int *) send_disp, MPI_UNSIGNED_CHAR,
(void *) recv_data, (int *) recv_cnt, (int *) recv_disp, MPI_UNSIGNED_CHAR, communicator );
PROFILE_STOP( "allToAllv<unsigned char>", profile_level );
}
// char
template<>
void MPI_CLASS::call_allToAll<char>( const char *send_data, const int send_cnt[],
const int send_disp[], char *recv_data, const int *recv_cnt, const int *recv_disp ) const
{
PROFILE_START( "allToAllv<char>", profile_level );
MPI_Alltoallv( (void *) send_data, (int *) send_cnt, (int *) send_disp, MPI_CHAR,
(void *) recv_data, (int *) recv_cnt, (int *) recv_disp, MPI_CHAR, communicator );
PROFILE_STOP( "allToAllv<char>", profile_level );
}
// unsigned int
template<>
void MPI_CLASS::call_allToAll<unsigned int>( const unsigned int *send_data, const int send_cnt[],
const int send_disp[], unsigned int *recv_data, const int *recv_cnt,
const int *recv_disp ) const
{
PROFILE_START( "allToAllv<unsigned int>", profile_level );
MPI_Alltoallv( (void *) send_data, (int *) send_cnt, (int *) send_disp, MPI_UNSIGNED,
(void *) recv_data, (int *) recv_cnt, (int *) recv_disp, MPI_UNSIGNED, communicator );
PROFILE_STOP( "allToAllv<unsigned int>", profile_level );
}
// int
template<>
void MPI_CLASS::call_allToAll<int>( const int *send_data, const int send_cnt[],
const int send_disp[], int *recv_data, const int *recv_cnt, const int *recv_disp ) const
{
PROFILE_START( "allToAllv<int>", profile_level );
MPI_Alltoallv( (void *) send_data, (int *) send_cnt, (int *) send_disp, MPI_INT,
(void *) recv_data, (int *) recv_cnt, (int *) recv_disp, MPI_INT, communicator );
PROFILE_STOP( "allToAllv<int>", profile_level );
}
// unsigned long int
template<>
void MPI_CLASS::call_allToAll<unsigned long int>( const unsigned long int *send_data,
const int send_cnt[], const int send_disp[], unsigned long int *recv_data, const int *recv_cnt,
const int *recv_disp ) const
{
PROFILE_START( "allToAllv<unsigned long>", profile_level );
MPI_Alltoallv( (void *) send_data, (int *) send_cnt, (int *) send_disp, MPI_UNSIGNED_LONG,
(void *) recv_data, (int *) recv_cnt, (int *) recv_disp, MPI_UNSIGNED_LONG, communicator );
PROFILE_STOP( "allToAllv<unsigned long>", profile_level );
}
// long int
template<>
void MPI_CLASS::call_allToAll<long int>( const long int *send_data, const int send_cnt[],
const int send_disp[], long int *recv_data, const int *recv_cnt, const int *recv_disp ) const
{
PROFILE_START( "allToAllv<long int>", profile_level );
MPI_Alltoallv( (void *) send_data, (int *) send_cnt, (int *) send_disp, MPI_LONG,
(void *) recv_data, (int *) recv_cnt, (int *) recv_disp, MPI_LONG, communicator );
PROFILE_STOP( "allToAllv<long int>", profile_level );
}
// float
template<>
void MPI_CLASS::call_allToAll<float>( const float *send_data, const int send_cnt[],
const int send_disp[], float *recv_data, const int *recv_cnt, const int *recv_disp ) const
{
PROFILE_START( "allToAllv<float>", profile_level );
MPI_Alltoallv( (void *) send_data, (int *) send_cnt, (int *) send_disp, MPI_FLOAT,
(void *) recv_data, (int *) recv_cnt, (int *) recv_disp, MPI_FLOAT, communicator );
PROFILE_STOP( "allToAllv<float>", profile_level );
}
// double
template<>
void MPI_CLASS::call_allToAll<double>( const double *send_data, const int send_cnt[],
const int send_disp[], double *recv_data, const int *recv_cnt, const int *recv_disp ) const
{
PROFILE_START( "allToAllv<double>", profile_level );
MPI_Alltoallv( (void *) send_data, (int *) send_cnt, (int *) send_disp, MPI_DOUBLE,
(void *) recv_data, (int *) recv_cnt, (int *) recv_disp, MPI_DOUBLE, communicator );
PROFILE_STOP( "allToAllv<double>", profile_level );
}
#else
// Default instatiation of unsigned char
template<>
void MPI_CLASS::call_allToAll<char>(
const char *, const int[], const int[], char *, const int *, const int * ) const
{
MPI_ERROR( "Should not reach this point" );
}
#endif
/************************************************************************
* call_sumScan *
* Note: these specializations are only called when using MPI. *
************************************************************************/
#ifdef USE_MPI
// unsigned char
template<>
void MPI_CLASS::call_sumScan<unsigned char>(
const unsigned char *send, unsigned char *recv, int n ) const
{
PROFILE_START( "sumScan<unsigned char>", profile_level );
MPI_Scan( (void *) send, (void *) recv, n, MPI_UNSIGNED_CHAR, MPI_SUM, communicator );
PROFILE_STOP( "sumScan<unsigned char>", profile_level );
}
// char
template<>
void MPI_CLASS::call_sumScan<char>( const char *send, char *recv, int n ) const
{
PROFILE_START( "sumScan<char>", profile_level );
MPI_Scan( (void *) send, (void *) recv, n, MPI_SIGNED_CHAR, MPI_SUM, communicator );
PROFILE_STOP( "sumScan<char>", profile_level );
}
// unsigned int
template<>
void MPI_CLASS::call_sumScan<unsigned int>(
const unsigned int *send, unsigned int *recv, int n ) const
{
PROFILE_START( "sumScan<unsigned int>", profile_level );
MPI_Scan( (void *) send, (void *) recv, n, MPI_UNSIGNED, MPI_SUM, communicator );
PROFILE_STOP( "sumScan<unsigned int>", profile_level );
}
// int
template<>
void MPI_CLASS::call_sumScan<int>( const int *send, int *recv, int n ) const
{
PROFILE_START( "sumScan<int>", profile_level );
MPI_Scan( (void *) send, (void *) recv, n, MPI_INT, MPI_SUM, communicator );
PROFILE_STOP( "sumScan<int>", profile_level );
}
// long int
template<>
void MPI_CLASS::call_sumScan<long int>( const long int *send, long int *recv, int n ) const
{
PROFILE_START( "sumScan<long int>", profile_level );
MPI_Scan( (void *) send, (void *) recv, n, MPI_LONG, MPI_SUM, communicator );
PROFILE_STOP( "sumScan<long int>", profile_level );
}
// unsigned long int
template<>
void MPI_CLASS::call_sumScan<unsigned long>(
const unsigned long *send, unsigned long *recv, int n ) const
{
PROFILE_START( "sumScan<unsigned long>", profile_level );
MPI_Scan( (void *) send, (void *) recv, n, MPI_UNSIGNED_LONG, MPI_SUM, communicator );
PROFILE_STOP( "sumScan<unsigned long>", profile_level );
}
// size_t
#ifdef USE_WINDOWS
template<>
void MPI_CLASS::call_sumScan<size_t>( const size_t *send, size_t *recv, int n ) const
{
MPI_ASSERT( MPI_SIZE_T != 0 );
PROFILE_START( "sumScan<size_t>", profile_level );
MPI_Scan( (void *) send, (void *) recv, n, MPI_SIZE_T, MPI_SUM, communicator );
PROFILE_STOP( "sumScan<size_t>", profile_level );
}
#endif
// float
template<>
void MPI_CLASS::call_sumScan<float>( const float *send, float *recv, int n ) const
{
PROFILE_START( "sumScan<float>", profile_level );
MPI_Scan( (void *) send, (void *) recv, n, MPI_FLOAT, MPI_SUM, communicator );
PROFILE_STOP( "sumScan<float>", profile_level );
}
// double
template<>
void MPI_CLASS::call_sumScan<double>( const double *send, double *recv, int n ) const
{
PROFILE_START( "sumScan<double>", profile_level );
MPI_Scan( (void *) send, (void *) recv, n, MPI_DOUBLE, MPI_SUM, communicator );
PROFILE_STOP( "sumScan<double>", profile_level );
}
// std::complex<double>
template<>
void MPI_CLASS::call_sumScan<std::complex<double>>(
const std::complex<double> *x, std::complex<double> *y, int n ) const
{
auto send = new double[2 * n];
auto recv = new double[2 * n];
for ( int i = 0; i < n; i++ ) {
send[2 * i + 0] = real( x[i] );
send[2 * i + 1] = imag( x[i] );
}
MPI_Scan( (void *) send, (void *) recv, 2 * n, MPI_DOUBLE, MPI_SUM, communicator );
for ( int i = 0; i < n; i++ )
y[i] = std::complex<double>( recv[2 * i + 0], recv[2 * i + 1] );
delete[] send;
delete[] recv;
}
#endif
/************************************************************************
* call_minScan *
* Note: these specializations are only called when using MPI. *
************************************************************************/
#ifdef USE_MPI
// unsigned char
template<>
void MPI_CLASS::call_minScan<unsigned char>(
const unsigned char *send, unsigned char *recv, int n ) const
{
PROFILE_START( "minScan<unsigned char>", profile_level );
MPI_Scan( (void *) send, (void *) recv, n, MPI_UNSIGNED_CHAR, MPI_MIN, communicator );
PROFILE_STOP( "minScan<unsigned char>", profile_level );
}
// char
template<>
void MPI_CLASS::call_minScan<char>( const char *send, char *recv, int n ) const
{
PROFILE_START( "minScan<char>", profile_level );
MPI_Scan( (void *) send, (void *) recv, n, MPI_SIGNED_CHAR, MPI_MIN, communicator );
PROFILE_STOP( "minScan<char>", profile_level );
}
// unsigned int
template<>
void MPI_CLASS::call_minScan<unsigned int>(
const unsigned int *send, unsigned int *recv, int n ) const
{
PROFILE_START( "minScan<unsigned int>", profile_level );
MPI_Scan( (void *) send, (void *) recv, n, MPI_UNSIGNED, MPI_MIN, communicator );
PROFILE_STOP( "minScan<unsigned int>", profile_level );
}
// int
template<>
void MPI_CLASS::call_minScan<int>( const int *send, int *recv, int n ) const
{
PROFILE_START( "minScan<int>", profile_level );
MPI_Scan( (void *) send, (void *) recv, n, MPI_INT, MPI_MIN, communicator );
PROFILE_STOP( "minScan<int>", profile_level );
}
// unsigned long int
template<>
void MPI_CLASS::call_minScan<unsigned long int>(
const unsigned long int *send, unsigned long int *recv, int n ) const
{
PROFILE_START( "minScan<unsigned long>", profile_level );
MPI_Scan( (void *) send, (void *) recv, n, MPI_UNSIGNED_LONG, MPI_MIN, communicator );
PROFILE_STOP( "minScan<unsigned long>", profile_level );
}
// long int
template<>
void MPI_CLASS::call_minScan<long int>( const long int *send, long int *recv, int n ) const
{
PROFILE_START( "minScan<long int>", profile_level );
MPI_Scan( (void *) send, (void *) recv, n, MPI_LONG, MPI_MIN, communicator );
PROFILE_STOP( "minScan<long int>", profile_level );
}
// size_t
#ifdef USE_WINDOWS
template<>
void MPI_CLASS::call_minScan<size_t>( const size_t *send, size_t *recv, int n ) const
{
MPI_ASSERT( MPI_SIZE_T != 0 );
PROFILE_START( "minScan<size_t>", profile_level );
MPI_Scan( (void *) send, (void *) recv, n, MPI_SIZE_T, MPI_MIN, communicator );
PROFILE_STOP( "minScan<size_t>", profile_level );
}
#endif
// float
template<>
void MPI_CLASS::call_minScan<float>( const float *send, float *recv, int n ) const
{
PROFILE_START( "minScan<float>", profile_level );
MPI_Scan( (void *) send, (void *) recv, n, MPI_FLOAT, MPI_MIN, communicator );
PROFILE_STOP( "minScan<float>", profile_level );
}
// double
template<>
void MPI_CLASS::call_minScan<double>( const double *send, double *recv, int n ) const
{
PROFILE_START( "minScan<double>", profile_level );
MPI_Scan( (void *) send, (void *) recv, n, MPI_DOUBLE, MPI_MIN, communicator );
PROFILE_STOP( "minScan<double>", profile_level );
}
#endif
/************************************************************************
* call_maxScan *
* Note: these specializations are only called when using MPI. *
************************************************************************/
#ifdef USE_MPI
// unsigned char
template<>
void MPI_CLASS::call_maxScan<unsigned char>(
const unsigned char *send, unsigned char *recv, int n ) const
{
PROFILE_START( "maxScan<unsigned char>", profile_level );
MPI_Scan( (void *) send, (void *) recv, n, MPI_UNSIGNED_CHAR, MPI_MAX, communicator );
PROFILE_STOP( "maxScan<unsigned char>", profile_level );
}
// char
template<>
void MPI_CLASS::call_maxScan<char>( const char *send, char *recv, int n ) const
{
PROFILE_START( "maxScan<char>", profile_level );
MPI_Scan( (void *) send, (void *) recv, n, MPI_SIGNED_CHAR, MPI_MAX, communicator );
PROFILE_STOP( "maxScan<char>", profile_level );
}
// unsigned int
template<>
void MPI_CLASS::call_maxScan<unsigned int>(
const unsigned int *send, unsigned int *recv, int n ) const
{
PROFILE_START( "maxScan<unsigned int>", profile_level );
MPI_Scan( (void *) send, (void *) recv, n, MPI_UNSIGNED, MPI_MAX, communicator );
PROFILE_STOP( "maxScan<unsigned int>", profile_level );
}
// int
template<>
void MPI_CLASS::call_maxScan<int>( const int *send, int *recv, int n ) const
{
PROFILE_START( "maxScan<int>", profile_level );
MPI_Scan( (void *) send, (void *) recv, n, MPI_INT, MPI_MAX, communicator );
PROFILE_STOP( "maxScan<int>", profile_level );
}
// long int
template<>
void MPI_CLASS::call_maxScan<long int>( const long int *send, long int *recv, int n ) const
{
PROFILE_START( "maxScan<long int>", profile_level );
MPI_Scan( (void *) send, (void *) recv, n, MPI_LONG, MPI_MAX, communicator );
PROFILE_STOP( "maxScan<long int>", profile_level );
}
// unsigned long int
template<>
void MPI_CLASS::call_maxScan<unsigned long int>(
const unsigned long int *send, unsigned long int *recv, int n ) const
{
PROFILE_START( "maxScan<unsigned long>", profile_level );
MPI_Scan( (void *) send, (void *) recv, n, MPI_UNSIGNED_LONG, MPI_MAX, communicator );
PROFILE_STOP( "maxScan<unsigned long>", profile_level );
}
// size_t
#ifdef USE_WINDOWS
template<>
void MPI_CLASS::call_maxScan<size_t>( const size_t *send, size_t *recv, int n ) const
{
MPI_ASSERT( MPI_SIZE_T != 0 );
PROFILE_START( "maxScan<size_t>", profile_level );
MPI_Scan( (void *) send, (void *) recv, n, MPI_SIZE_T, MPI_MAX, communicator );
PROFILE_STOP( "maxScan<size_t>", profile_level );
}
#endif
// float
template<>
void MPI_CLASS::call_maxScan<float>( const float *send, float *recv, int n ) const
{
PROFILE_START( "maxScan<float>", profile_level );
MPI_Scan( (void *) send, (void *) recv, n, MPI_INT, MPI_MAX, communicator );
PROFILE_STOP( "maxScan<float>", profile_level );
}
// double
template<>
void MPI_CLASS::call_maxScan<double>( const double *send, double *recv, int n ) const
{
PROFILE_START( "maxScan<double>", profile_level );
MPI_Scan( (void *) send, (void *) recv, n, MPI_DOUBLE, MPI_MAX, communicator );
PROFILE_STOP( "maxScan<double>", profile_level );
}
#endif
/************************************************************************
* Communicate ranks for communication *
************************************************************************/
std::vector<int> MPI_CLASS::commRanks( const std::vector<int> &ranks ) const
{
#ifdef USE_MPI
// Get a byte array with the ranks to communicate
auto data1 = new char[comm_size];
auto data2 = new char[comm_size];
memset( data1, 0, comm_size );
memset( data2, 0, comm_size );
for ( auto &rank : ranks )
data1[rank] = 1;
MPI_Alltoall( data1, 1, MPI_CHAR, data2, 1, MPI_CHAR, communicator );
int N = 0;
for ( int i = 0; i < comm_size; i++ )
N += data2[i];
std::vector<int> ranks_out;
ranks_out.reserve( N );
for ( int i = 0; i < comm_size; i++ ) {
if ( data2[i] )
ranks_out.push_back( i );
}
delete[] data1;
delete[] data2;
return ranks_out;
#else
return ranks;
#endif
}
/************************************************************************
* Wait functions *
************************************************************************/
#ifdef USE_MPI
void MPI_CLASS::wait( MPI_Request request )
{
PROFILE_START( "wait", profile_level );
MPI_Status status;
int flag = 0;
int err = MPI_Test( &request, &flag, &status );
MPI_ASSERT( err == MPI_SUCCESS ); // Check that the first call is valid
while ( !flag ) {
// Put the current thread to sleep to allow other threads to run
sched_yield();
// Check if the request has finished
MPI_Test( &request, &flag, &status );
}
PROFILE_STOP( "wait", profile_level );
}
int MPI_CLASS::waitAny( int count, MPI_Request *request )
{
if ( count == 0 )
return -1;
PROFILE_START( "waitAny", profile_level );
int index = -1;
int flag = 0;
auto status = new MPI_Status[count];
int err = MPI_Testany( count, request, &index, &flag, status );
MPI_ASSERT( err == MPI_SUCCESS ); // Check that the first call is valid
while ( !flag ) {
// Put the current thread to sleep to allow other threads to run
sched_yield();
// Check if the request has finished
MPI_Testany( count, request, &index, &flag, status );
}
MPI_ASSERT( index >= 0 ); // Check that the index is valid
delete[] status;
PROFILE_STOP( "waitAny", profile_level );
return index;
}
void MPI_CLASS::waitAll( int count, MPI_Request *request )
{
if ( count == 0 )
return;
PROFILE_START( "waitAll", profile_level );
int flag = 0;
auto status = new MPI_Status[count];
int err = MPI_Testall( count, request, &flag, status );
MPI_ASSERT( err == MPI_SUCCESS ); // Check that the first call is valid
while ( !flag ) {
// Put the current thread to sleep to allow other threads to run
sched_yield();
// Check if the request has finished
MPI_Testall( count, request, &flag, status );
}
PROFILE_STOP( "waitAll", profile_level );
delete[] status;
}
std::vector<int> MPI_CLASS::waitSome( int count, MPI_Request *request )
{
if ( count == 0 )
return std::vector<int>();
PROFILE_START( "waitSome", profile_level );
std::vector<int> indicies( count, -1 );
auto *status = new MPI_Status[count];
int outcount = 0;
int err = MPI_Testsome( count, request, &outcount, &indicies[0], status );
MPI_ASSERT( err == MPI_SUCCESS ); // Check that the first call is valid
MPI_ASSERT( outcount != MPI_UNDEFINED ); // Check that the first call is valid
while ( outcount == 0 ) {
// Put the current thread to sleep to allow other threads to run
sched_yield();
// Check if the request has finished
MPI_Testsome( count, request, &outcount, &indicies[0], status );
}
indicies.resize( outcount );
delete[] status;
PROFILE_STOP( "waitSome", profile_level );
return indicies;
}
#else
void MPI_CLASS::wait( MPI_Request request )
{
PROFILE_START( "wait", profile_level );
while ( 1 ) {
// Check if the request is in our list
if ( global_isendrecv_list.find( request ) == global_isendrecv_list.end() )
break;
// Put the current thread to sleep to allow other threads to run
sched_yield();
}
PROFILE_STOP( "wait", profile_level );
}
int MPI_CLASS::waitAny( int count, MPI_Request *request )
{
if ( count == 0 )
return -1;
PROFILE_START( "waitAny", profile_level );
int index = 0;
while ( 1 ) {
// Check if the request is in our list
bool found_any = false;
for ( int i = 0; i < count; i++ ) {
if ( global_isendrecv_list.find( request[i] ) == global_isendrecv_list.end() ) {
found_any = true;
index = i;
}
}
if ( found_any )
break;
// Put the current thread to sleep to allow other threads to run
sched_yield();
}
PROFILE_STOP( "waitAny", profile_level );
return index;
}
void MPI_CLASS::waitAll( int count, MPI_Request *request )
{
if ( count == 0 )
return;
PROFILE_START( "waitAll", profile_level );
while ( 1 ) {
// Check if the request is in our list
bool found_all = true;
for ( int i = 0; i < count; i++ ) {
if ( global_isendrecv_list.find( request[i] ) != global_isendrecv_list.end() )
found_all = false;
}
if ( found_all )
break;
// Put the current thread to sleep to allow other threads to run
sched_yield();
}
PROFILE_STOP( "waitAll", profile_level );
}
std::vector<int> MPI_CLASS::waitSome( int count, MPI_Request *request )
{
if ( count == 0 )
return std::vector<int>();
PROFILE_START( "waitSome", profile_level );
std::vector<int> indicies;
while ( 1 ) {
// Check if the request is in our list
for ( int i = 0; i < count; i++ ) {
if ( global_isendrecv_list.find( request[i] ) == global_isendrecv_list.end() )
indicies.push_back( i );
}
if ( !indicies.empty() )
break;
// Put the current thread to sleep to allow other threads to run
sched_yield();
}
PROFILE_STOP( "waitSome", profile_level );
return indicies;
}
#endif
/************************************************************************
* Probe functions *
************************************************************************/
#ifdef USE_MPI
int MPI_CLASS::Iprobe( int source, int tag ) const
{
MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" );
MPI_INSIST( tag >= 0, "tag must be >= 0" );
MPI_Status status;
int flag = 0;
MPI_Iprobe( source, tag, communicator, &flag, &status );
if ( flag == 0 )
return -1;
int count;
MPI_Get_count( &status, MPI_BYTE, &count );
MPI_ASSERT( count >= 0 );
return count;
}
int MPI_CLASS::probe( int source, int tag ) const
{
MPI_INSIST( tag <= d_maxTag, "Maximum tag value exceeded" );
MPI_INSIST( tag >= 0, "tag must be >= 0" );
MPI_Status status;
MPI_Probe( source, tag, communicator, &status );
int count;
MPI_Get_count( &status, MPI_BYTE, &count );
MPI_ASSERT( count >= 0 );
return count;
}
#else
int MPI_CLASS::Iprobe( int, int ) const
{
MPI_ERROR( "Not implimented for serial codes (Iprobe)" );
return 0;
}
int MPI_CLASS::probe( int, int ) const
{
MPI_ERROR( "Not implimented for serial codes (probe)" );
return 0;
}
#endif
/************************************************************************
* Timer functions *
************************************************************************/
#ifdef USE_MPI
double MPI_CLASS::time() { return MPI_Wtime(); }
double MPI_CLASS::tick() { return MPI_Wtick(); }
#else
double MPI_CLASS::time()
{
auto t = std::chrono::system_clock::now();
auto ns = std::chrono::duration_cast<std::chrono::nanoseconds>( t.time_since_epoch() );
return 1e-9 * ns.count();
}
double MPI_CLASS::tick()
{
auto period = std::chrono::system_clock::period();
return static_cast<double>( period.num ) / static_cast<double>( period.den );
}
#endif
/************************************************************************
* Serialize a block of code across MPI processes *
************************************************************************/
void MPI_CLASS::serializeStart()
{
#ifdef USE_MPI
using namespace std::chrono_literals;
if ( comm_rank == 0 ) {
// Start rank 0 immediately
} else {
// Wait for a message from the previous rank
MPI_Request request;
MPI_Status status;
int flag = false, buf = 0;
MPI_Irecv( &buf, 1, MPI_INT, comm_rank - 1, 5627, MPI_COMM_WORLD, &request );
while ( !flag ) {
MPI_Test( &request, &flag, &status );
std::this_thread::sleep_for( 50ms );
}
}
#endif
}
void MPI_CLASS::serializeStop()
{
#ifdef USE_MPI
using namespace std::chrono_literals;
if ( comm_rank < comm_size - 1 ) {
// Send flag to next rank
MPI_Send( &comm_rank, 1, MPI_INT, comm_rank + 1, 5627, MPI_COMM_WORLD );
// Wait for final finished flag
int flag = false, buf = 0;
MPI_Request request;
MPI_Status status;
MPI_Irecv( &buf, 1, MPI_INT, comm_size - 1, 5627, MPI_COMM_WORLD, &request );
while ( !flag ) {
MPI_Test( &request, &flag, &status );
std::this_thread::sleep_for( 50ms );
}
} else {
// Send final flag to all ranks
for ( int i = 0; i < comm_size - 1; i++ )
MPI_Send( &comm_rank, 1, MPI_INT, i, 5627, MPI_COMM_WORLD );
}
#endif
}
/****************************************************************************
* Function to start/stop MPI *
****************************************************************************/
#ifdef USE_EXT_MPI
static bool called_MPI_Init = false;
#endif
bool MPI_CLASS::MPI_Active()
{
#ifdef USE_EXT_MPI
int MPI_initialized, MPI_finialized;
MPI_Initialized( &MPI_initialized );
MPI_Finalized( &MPI_finialized );
return MPI_initialized != 0 && MPI_finialized == 0;
#else
return false;
#endif
}
void MPI_CLASS::start_MPI( int argc, char *argv[], int profile_level )
{
changeProfileLevel( profile_level );
NULL_USE( argc );
NULL_USE( argv );
#ifdef USE_EXT_MPI
if ( MPI_Active() ) {
called_MPI_Init = false;
} else {
int provided;
int result = MPI_Init_thread( &argc, &argv, MPI_THREAD_MULTIPLE, &provided );
if ( result != MPI_SUCCESS )
MPI_ERROR( "Unable to initialize MPI" );
if ( provided < MPI_THREAD_MULTIPLE )
std::cerr << "Warning: Failed to start MPI with MPI_THREAD_MULTIPLE\n";
called_MPI_Init = true;
}
#endif
}
void MPI_CLASS::stop_MPI()
{
#ifdef USE_EXT_MPI
int finalized;
MPI_Finalized( &finalized );
if ( called_MPI_Init && !finalized ) {
MPI_Barrier( MPI_COMM_WORLD );
MPI_Finalize();
called_MPI_Init = true;
}
#endif
}
} // namespace Utilities